PyPI - ai-edge-torch-nightly - Versions diffs - 0.7.0.dev20251006__py3-none-any.whl → 0.7.0.dev20251008__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.7.0.dev20251006py3-none-any.whl → 0.7.0.dev20251008py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -143,9 +143,23 @@ def define_conversion_flags(
       '`prefill_seq_lens` as the maximum of kv_cache size and prefill lengths '
       'in the graph.',
   )
+  flags.DEFINE_bool(
+      'export_gpu_dynamic_shape_verifications',
+      False,
+      'If true, the conversion script will export signatures used only for '
+      'verification of GPU dynamic shapes.',
+  )
   return flags
+# Context length for verifying GPU dynamic shapes.
+_CONTEXT_LENGTH_TO_VERIFY_MAGIC_NUMBERS = 1280
+# Long prefill length for verifying GPU dynamic shapes.
+_LONG_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS = 1024
+# Short prefill length for verifying GPU dynamic shapes.
+_SHORT_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS = 64
 def is_magic_number_(num: int) -> bool:
   """Returns true if the number is a magic number, i.e. prime number > 10."""
   if num < 10:
@@ -263,6 +277,10 @@ def convert_to_tflite(
     config: cfg.ModelConfig = None,
     lora_ranks: Optional[list[int]] = None,
     export_config: ExportConfig = None,
+    extra_model: torch.nn.Module = None,
+    extra_prefill_seq_lens: list[int] = None,
+    extra_kv_cache_max_len: int = 0,
+    extra_signature_prefix: str = '',
 ):
   """Converts a nn.Module model to multi-signature tflite model.
@@ -315,6 +333,15 @@ def convert_to_tflite(
         no LoRA signatures will be added.
       export_config (ExportConfig, optional): The export configuration. If None,
         it uses the default export configuration.
+      extra_model (torch.nn.Module, optional): PyTorch model to export in
+        addition to the pytorch_model. This model can have different
+        prefill_seq_lens and kv_cache_max_len.
+      extra_prefill_seq_lens (list[int], optional): The prefill sequence
+        lengths for extra_model. Meaningful only when extra_model is not None.
+      extra_kv_cache_max_len (int, optional): The maximum size of KV cache
+        buffer for extra_model. Meaningful only when extra_model is not None.
+      extra_signature_prefix (str, optional): The prefix of the extra model
+        signatures. Meaningful only when extra_model is not None.
   """
   # pylint: disable=protected-access
   torch._dynamo.config.cache_size_limit = 64
@@ -353,32 +380,51 @@ def convert_to_tflite(
   )
   output_file = os.path.join(output_path, output_filename)
-  _export_helper(
+  converter = converter_utils.Converter()
+  _add_signatures(
+      converter,
       pytorch_model,
-      output_file,
       prefill_seq_lens,
       kv_cache_max_len,
       pixel_values_size,
       pixel_seq_len,
-      quantize,
       config,
       loras,
       export_config,
   )
+  if extra_model is not None and extra_prefill_seq_lens:
+    _add_signatures(
+        converter,
+        extra_model,
+        extra_prefill_seq_lens,
+        extra_kv_cache_max_len,
+        pixel_values_size,
+        pixel_seq_len,
+        config,
+        loras,
+        export_config,
+        signature_prefix=extra_signature_prefix,
+    )
+  edge_model = converter.convert(
+      quant_config=get_quant_recipe_from_flag(quantize, config),
+  )
+  edge_model.export(output_file)
   return output_file
-def _export_helper(
+def _add_signatures(
+    converter: converter_utils.Converter,
     pytorch_model: torch.nn.Module,
-    output_file: str,
     prefill_seq_lens: list[int],
     kv_cache_max_len: int,
     pixel_values_size: torch.Size,
     pixel_seq_len: int,
-    quantize: str,
     config: cfg.ModelConfig,
     loras: list[None | lora_utils.LoRA],
     export_config: ExportConfig,
+    signature_prefix: str = '',
 ):
   """Helper function to export a model to tflite."""
   prefill_tokens_list = []
@@ -423,17 +469,14 @@ def _export_helper(
       kv_layout=export_config.kvcache_layout,
   )
-  quant_config = get_quant_recipe_from_flag(quantize, config)
   # For export, we create a module that captures any non-exportable,
   # arugments, e.g. the generation config object.
   mod = ExportableModule(pytorch_model, export_config=export_config).eval()
-  converter = converter_utils.Converter()
   for lora in loras:
     for i in range(len(prefill_seq_lens)):
       prefill_seq_len = prefill_seq_lens[i]
-      prefill_signature_name = f'prefill_{prefill_seq_len}'
+      prefill_signature_name = f'{signature_prefix}prefill_{prefill_seq_len}'
       sample_kwargs = {
           'tokens': prefill_tokens_list[i],
@@ -488,17 +531,15 @@ def _export_helper(
     if lora is not None:
       sample_kwargs['lora'] = lora
+    decode_signature_name = f'{signature_prefix}decode'
+    if lora is not None:
+      decode_signature_name += f'_lora_r{lora.get_rank()}'
     converter.add_signature(
-        'decode' if lora is None else f'decode_lora_r{lora.get_rank()}',
+        decode_signature_name,
         mod,
         sample_kwargs=sample_kwargs,
     )
-  edge_model = converter.convert(
-      quant_config=quant_config,
-  )
-  edge_model.export(output_file)
 def build_and_convert_to_tflite_from_flags(
     model_builder: Callable[
@@ -521,11 +562,36 @@ def build_and_convert_to_tflite_from_flags(
       get_mask_cache_size_from_flags(),
   )
+  # Extra model for GPU dynamic shape verification if needed.
+  extra_model = None
+  extra_prefill_seq_lens = None
+  extra_kv_cache_max_len = 0
   if flags.FLAGS.gpu_dynamic_shapes:
     prefill_seq_lens = [
         get_magic_number_for(l) for l in flags.FLAGS.prefill_seq_lens
     ]
     kv_cache_max_len = get_magic_number_for(flags.FLAGS.kv_cache_max_len)
+    if flags.FLAGS.export_gpu_dynamic_shape_verifications:
+      extra_kv_cache_max_len = _CONTEXT_LENGTH_TO_VERIFY_MAGIC_NUMBERS
+      if extra_kv_cache_max_len > flags.FLAGS.kv_cache_max_len:
+        extra_kv_cache_max_len = flags.FLAGS.kv_cache_max_len
+      extra_model = model_builder(
+          checkpoint_path,
+          loader.maybe_get_custom_loader(
+              checkpoint_path, flags.FLAGS.custom_checkpoint_loader
+          ),
+          extra_kv_cache_max_len,
+      )
+      extra_prefill_seq_lens = []
+      if extra_kv_cache_max_len > _SHORT_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS:
+        extra_prefill_seq_lens.append(
+            _SHORT_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS
+        )
+      if extra_kv_cache_max_len > _LONG_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS:
+        extra_prefill_seq_lens.append(
+            _LONG_PREFILL_LENGTH_TO_VERIFY_MAGIC_NUMBERS
+        )
   else:
     prefill_seq_lens = flags.FLAGS.prefill_seq_lens
     kv_cache_max_len = flags.FLAGS.kv_cache_max_len
@@ -539,6 +605,10 @@ def build_and_convert_to_tflite_from_flags(
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
       export_config=export_config_lib.get_from_flags(),
+      extra_model=extra_model,
+      extra_prefill_seq_lens=extra_prefill_seq_lens,
+      extra_kv_cache_max_len=extra_kv_cache_max_len,
+      extra_signature_prefix='test_' if extra_model is not None else '',
   )

ai_edge_torch/version.py CHANGED Viewed

@@ -15,4 +15,4 @@
 # The next version of ai-edge-torch.
 # The minor version code should be bumped after every release.
-__version__ = "0.7.0.dev20251006"
+__version__ = "0.7.0.dev20251008"

{ai_edge_torch_nightly-0.7.0.dev20251006.dist-info → ai_edge_torch_nightly-0.7.0.dev20251008.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.7.0.dev20251006
+Version: 0.7.0.dev20251008
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.7.0.dev20251006.dist-info → ai_edge_torch_nightly-0.7.0.dev20251008.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=lemyLCNoGYRnJsmDuGZu7qOqLbLqG6CGDFtu3ue1syU,129
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=A7loFu8jE9CsXsfMmHYZ-KDFJiaD8Kkqwm_9d3IVzk0,5638
-ai_edge_torch/version.py,sha256=NWXVGUPY6DtGiDOywYxBG91TAP5aYrDt8Ayzv2kwLhs,806
+ai_edge_torch/version.py,sha256=Jd2ZmbryaZTSc314Yj8KLDdZImrRPNAWsBVxJ18z8dk,806
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=iQk3R-pLq4c1nfLqPB4xTRj78gghxPGzJCJtILLdg5o,6123
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -208,7 +208,7 @@ ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=NkEwrjO8vIcd
 ai_edge_torch/generative/test/test_quantize.py,sha256=kKJ01wscTC2t_Ylr7huO5gNKES01gm3dT1gx52z15PA,7356
 ai_edge_torch/generative/test/utils.py,sha256=tF6aCfAGJnc9dmzCnZCEOuKNVimfWOqscv9og0DDLHU,2656
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
-ai_edge_torch/generative/utilities/converter.py,sha256=Bt-48O1wf-7YcGVof53eVKI7wJwNvrc1Bv5zE3JuFdk,20093
+ai_edge_torch/generative/utilities/converter.py,sha256=d8pehTq6EzEdVR8ioL2b1ECGTR4G1K1fczc9amu_Oyk,23106
 ai_edge_torch/generative/utilities/export_config.py,sha256=5B15nYyqf96kjjYlHfPctUfsIdsBsh1f8rxKitJpwKQ,2384
 ai_edge_torch/generative/utilities/litertlm_builder.py,sha256=0cNuaqhc7cQcAa4NRalUXyoPQUQC9O3-aHAJEDV1Mps,4265
 ai_edge_torch/generative/utilities/loader.py,sha256=drgKBmNibuc3PCdc0kU0pVcp2Nt1_mjLYh67RyXOn7U,15952
@@ -270,8 +270,8 @@ ai_edge_torch/testing/__init__.py,sha256=_yGgvnBZWb7T3IN3mc4x1sS4vM96HZwM8pwIcPG
 ai_edge_torch/testing/export.py,sha256=k5mGDGzwc23Z4zaIVDs8CNh-oOt64gsf9MS9NjhbPy4,3293
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.7.0.dev20251006.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.7.0.dev20251006.dist-info/METADATA,sha256=9-6TbSzTZaYh69AVVx-R794sgQjC01l9C0jLlWmTNOg,2074
-ai_edge_torch_nightly-0.7.0.dev20251006.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.7.0.dev20251006.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.7.0.dev20251006.dist-info/RECORD,,
+ai_edge_torch_nightly-0.7.0.dev20251008.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.7.0.dev20251008.dist-info/METADATA,sha256=Xg4GCLMHL1FhKweyTtY2OAUdPGHFwpGlpZw-bUvs3FY,2074
+ai_edge_torch_nightly-0.7.0.dev20251008.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.7.0.dev20251008.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.7.0.dev20251008.dist-info/RECORD,,

{ai_edge_torch_nightly-0.7.0.dev20251006.dist-info → ai_edge_torch_nightly-0.7.0.dev20251008.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.7.0.dev20251006.dist-info → ai_edge_torch_nightly-0.7.0.dev20251008.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.7.0.dev20251006.dist-info → ai_edge_torch_nightly-0.7.0.dev20251008.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.7.0.dev20251006__py3-none-any.whl → 0.7.0.dev20251008__py3-none-any.whl

ai-edge-torch-nightly 0.7.0.dev20251006py3-none-any.whl → 0.7.0.dev20251008py3-none-any.whl