PyPI - ai-edge-torch-nightly - Versions diffs - 0.4.0.dev20250329__py3-none-any.whl → 0.4.0.dev20250331__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.4.0.dev20250329py3-none-any.whl → 0.4.0.dev20250331py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py CHANGED Viewed

@@ -16,61 +16,25 @@
 """Example of converting AMD-Llama-135m model to multi-signature tflite model."""
 import os
-import pathlib
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities.model_builder import ExportConfig
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/amd-llama-135m'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'amd_llama',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
+flags = converter.define_conversion_flags("amd-llama-135m")
 def main(_):
   pytorch_model = amd_llama_135m.build_model(
-      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
   )
   converter.convert_to_tflite(
       pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-      lora_ranks=_LORA_RANKS.value,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
       export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py CHANGED Viewed

@@ -24,54 +24,19 @@ from ai_edge_torch.generative.examples.deepseek import deepseek
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities.model_builder import ExportConfig
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/deepseek'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'deepseek',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
+flags = converter.define_conversion_flags("deepseek")
 def main(_):
   pytorch_model = deepseek.build_model(
-      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
   )
   converter.convert_to_tflite(
       pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-      lora_ranks=_LORA_RANKS.value,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
       export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py CHANGED Viewed

@@ -16,62 +16,24 @@
 """Example of converting a Gemma1 model to multi-signature tflite model."""
 import os
-import pathlib
 from absl import app
-from absl import flags
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities.model_builder import ExportConfig
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma-2b'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'gemma',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
+flags = converter.define_conversion_flags("gemma-2b")
 def main(_):
   pytorch_model = gemma1.build_2b_model(
-      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
   )
   converter.convert_to_tflite(
       pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-      lora_ranks=_LORA_RANKS.value,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
       export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -16,62 +16,25 @@
 """Example of converting a Gemma2 model to multi-signature tflite model."""
 import os
-import pathlib
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.gemma import gemma2
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities.model_builder import ExportConfig
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma2-2b'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'gemma2',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
+flags = converter.define_conversion_flags("gemma2-2b")
 def main(_):
   pytorch_model = gemma2.build_2b_model(
-      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
   )
   converter.convert_to_tflite(
       pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-      lora_ranks=_LORA_RANKS.value,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
       export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py CHANGED Viewed

@@ -16,8 +16,6 @@
 """Example of converting a Gemma3 model to multi-signature tflite model."""
 import os
-import pathlib
 from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.gemma3 import gemma3
@@ -26,48 +24,14 @@ from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 import torch
+flags = converter.define_conversion_flags('gemma3-1b')
 _MODEL_SIZE = flags.DEFINE_string(
     'model_size',
     '1b',
     'The size of the model to convert.',
 )
-_CHECKPOINT_PATH = flags.DEFINE_string(
-    'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma3-1b'),
-    'The path to the model checkpoint, or directory holding the checkpoint.',
-)
-_OUTPUT_PATH = flags.DEFINE_string(
-    'output_path',
-    '/tmp/',
-    'The path to export the tflite model.',
-)
-_OUTPUT_NAME_PREFIX = flags.DEFINE_string(
-    'output_name_prefix',
-    'gemma3',
-    'The prefix of the output tflite model name.',
-)
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (32, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    2048,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    False,
-    'Whether the model should be quantized.',
-)
-_LORA_RANKS = flags.DEFINE_multi_integer(
-    'lora_ranks',
-    None,
-    'If set, the model will be converted with the provided list of LoRA ranks.',
-)
 def _create_mask(mask_len, kv_cache_max_len):
   mask = torch.full(
@@ -101,21 +65,22 @@ def _create_export_config(
 def main(_):
   if _MODEL_SIZE.value == '1b':
     pytorch_model = gemma3.build_model_1b(
-        _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+        flags.FLAGS.checkpoint_path,
+        kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
     )
     config = pytorch_model.config
   else:
     raise ValueError(f'Unsupported model size: {_MODEL_SIZE.value}')
   converter.convert_to_tflite(
       pytorch_model,
-      output_path=_OUTPUT_PATH.value,
-      output_name_prefix=_OUTPUT_NAME_PREFIX.value,
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
       config=config,
-      lora_ranks=_LORA_RANKS.value,
+      lora_ranks=flags.FLAGS.lora_ranks,
       export_config=_create_export_config(
-          _PREFILL_SEQ_LENS.value, _KV_CACHE_MAX_LEN.value
+          flags.FLAGS.prefill_seq_lens, flags.FLAGS.kv_cache_max_len
       ),
   )

ai_edge_torch/generative/examples/gemma3/verify_gemma3.py ADDED Viewed

@@ -0,0 +1,90 @@
+# Copyright 2025 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies the reauthored Gemma3 model."""
+import glob
+import logging
+import os
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.gemma3 import verify_util
+import kagglehub
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+_CHECKPOINT = flags.DEFINE_string(
+    "checkpoint",
+    "",
+    "The checkpoint to verify.",
+)
+_VARIANT = flags.DEFINE_string(
+    "variant",
+    "1b",
+    "The variant of the model to verify.",
+)
+_WEIGHT_FILENAME = flags.DEFINE_string(
+    "weight_filename",
+    None,
+    "The weightfilename of the model to verify.",
+)
+def find_first_ckpt(folder):
+  """Finds the first .ckpt file in a folder."""
+  ckpt_files = sorted(glob.glob(os.path.join(folder, "*.ckpt")))
+  return os.path.basename(ckpt_files[0]) if ckpt_files else None
+def main(_):
+  if _CHECKPOINT.value:
+    checkpoint = _CHECKPOINT.value
+  else:
+    checkpoint = kagglehub.model_download(
+        "google/gemma-3/pyTorch/gemma-3-1b-it"
+    )
+  # If the weight filename is not specified, use the first checkpoint.
+  if _WEIGHT_FILENAME.value is None:
+    weight_filename = find_first_ckpt(checkpoint)
+    logging.info(
+        "NOTE: using the first weight file `%s` from `%s`",
+        weight_filename,
+        checkpoint,
+    )
+  else:
+    weight_filename = _WEIGHT_FILENAME.value
+  # Verify the reauthored model by comparing the outputs with the original one.
+  verify_util.verify_gemma3(
+      checkpoint,
+      _PROMPTS.value,
+      _MAX_NEW_TOKENS.value,
+      _VARIANT.value,
+      weight_filename,
+  )
+if __name__ == "__main__":
+  app.run(main)

ai-edge-torch-nightly 0.4.0.dev20250329__py3-none-any.whl → 0.4.0.dev20250331__py3-none-any.whl

ai-edge-torch-nightly 0.4.0.dev20250329py3-none-any.whl → 0.4.0.dev20250331py3-none-any.whl