PyPI - litert-torch-nightly - Versions diffs - 0.9.0.dev20260204__py3-none-any.whl → 0.9.0.dev20260206__py3-none-any.whl - Mend

litert-torch-nightly 0.9.0.dev20260204py3-none-any.whl → 0.9.0.dev20260206py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

litert_torch/generative/export_hf/core/export_lib.py CHANGED Viewed

@@ -80,6 +80,7 @@ def load_model(
     model_path: str,
     trust_remote_code: bool = False,
     auto_model_override: str | None = None,
+    task: str = 'text_generation',
 ):
   """Loads model from checkpoint."""
@@ -90,7 +91,12 @@ def load_model(
   )
   config._attn_implementation = 'lrt_transposed_attention'  # pylint: disable=protected-access
-  auto_model_cls = transformers.AutoModelForCausalLM
+  if task == 'text_generation':
+    auto_model_cls = transformers.AutoModelForCausalLM
+  elif task == 'image_text_to_text':
+    auto_model_cls = transformers.AutoModelForImageTextToText
+  else:
+    raise ValueError(f'Unsupported task: {task}')
   if auto_model_override is not None:
     auto_model_cls = transformers.__dict__[auto_model_override]
@@ -101,14 +107,16 @@ def load_model(
       trust_remote_code=trust_remote_code,
   )
-  model.generation_config.cache_implementation = 'static'
-  model.generation_config.do_sample = False
+  if task == 'text_generation':
+    model.generation_config.cache_implementation = 'static'
+    model.generation_config.do_sample = False
   text_model_config = config
   if hasattr(config, 'text_config'):
     text_model_config = config.text_config
-  verify_model_compatibility(model, config, text_model_config)
+  if task == 'text_generation':
+    verify_model_compatibility(model, config, text_model_config)
   # TODO(weiyiw): Refactor into a separate function.
   tokenizer = transformers.AutoTokenizer.from_pretrained(model_path)
@@ -326,7 +334,7 @@ def export_embedder_model(
         sample_kwargs=sample_inputs,
     )
   lrt_model = converter.convert(strict_export=False)
-  model_path = os.path.join(work_dir, 'model.tflite')
+  model_path = os.path.join(work_dir, 'embedder.tflite')
   lrt_model.export(model_path)
   quantization_recipe_list = (
       quantization_recipe.split(',') if quantization_recipe else [None]
@@ -359,7 +367,10 @@ def export_auxiliary_model(
         sample_kwargs=sample_input,
     )
   # Attention Mask
-  attention_mask_module = split_cache_module.SplitAttentionMaskBuilder(model)
+  attention_mask_module = split_cache_module.SplitAttentionMaskBuilder(
+      export_config.cache_length,
+      # TODO(weiyiw): Add sliding window sizes.
+  )
   sample_inputs = attention_mask_module.get_sample_inputs(
       text_model_config, export_config
   )
@@ -370,7 +381,7 @@ def export_auxiliary_model(
         sample_kwargs=sample_input,
     )
   # Cache Update
-  cache_update_module = split_cache_module.CacheUpdate(model)
+  cache_update_module = split_cache_module.CacheUpdate()
   sample_inputs = cache_update_module.get_sample_inputs(
       text_model_config, export_config
   )

litert_torch/generative/export_hf/core/exportable_module_config.py CHANGED Viewed

@@ -31,6 +31,7 @@ class ExportableModuleConfig:
   # Export configs
   externalize_embedder: bool = False
+  single_token_embedder: bool = False
   externalize_rope: bool = False
   split_cache: bool = False

litert_torch/generative/export_hf/core/external_emb/exportable_module.py CHANGED Viewed

@@ -94,3 +94,33 @@ class LiteRTExportableModuleForEmbedder(torch.nn.Module):
     token_ids = torch.maximum(token_ids, torch.tensor(0, dtype=torch.int32))
     output = self.model(token_ids)
     return {"embeddings": output}
+  @classmethod
+  def get_sample_inputs(
+      cls,
+      model_config,
+      export_config: base_exportable_module.ExportableModuleConfig,
+  ):
+    """Gets sample inputs."""
+    batch_size = export_config.batch_size
+    prefill_length = export_config.prefill_lengths[0]
+    prefill_length_dim = export_config.prefill_length_dim
+    del model_config  # Unused.
+    tokens = {"token_ids": torch.ones((batch_size, 1), dtype=torch.int32)}
+    tokens_dynamic_shape = {"token_ids": {1: 1}} if prefill_length_dim else {}
+    if export_config.single_token_embedder:
+      return {"embedder": (tokens, tokens_dynamic_shape)}
+    else:
+      ret = {}
+      ret["decode_embedder"] = (tokens, tokens_dynamic_shape)
+      tokens = {
+          "token_ids": torch.ones(
+              (batch_size, prefill_length), dtype=torch.int32
+          )
+      }
+      tokens_dynamic_shape = (
+          {"token_ids": {1: prefill_length_dim}} if prefill_length_dim else {}
+      )
+      ret[f"prefill_embedder_{prefill_length}"] = (tokens, tokens_dynamic_shape)
+      return ret

litert_torch/generative/export_hf/core/litert_lm_builder.py CHANGED Viewed

@@ -118,7 +118,8 @@ def build_llm_metadata(
       if isinstance(gen_config.eos_token_id, int):
         stop_tokens.add(gen_config.eos_token_id)
       elif isinstance(gen_config.eos_token_id, list):
-        stop_tokens.update(gen_config.eos_token_id)
+        for token_id in gen_config.eos_token_id:
+          stop_tokens.add(token_id)
     elif hasattr(tokenizer, 'eos_token') and tokenizer.eos_token:
       stop_tokens.add(tokenizer.eos_token)
     for stop_token in stop_tokens:

litert_torch/generative/export_hf/core/patches.py CHANGED Viewed

@@ -60,3 +60,32 @@ original_use_kernel_forward_from_hub = (
 transformers.integrations.use_kernel_forward_from_hub = (
     _use_kernel_forward_from_hub
 )
+# TODO(weiyiw): Find a better way to patch Gemma3RMSNorm.
+class Gemma3RMSNorm(torch.nn.Module):
+  """RMSNorm Layer."""
+  def __init__(self, dim: int, eps: float = 1e-6):
+    """RMSNorm Layer."""
+    super().__init__()
+    self.weight = torch.nn.Parameter(torch.ones(dim))
+    self.variance_epsilon = eps
+    self.hidden_size = dim
+  def forward(self, hidden_states):
+    return normalization.rms_norm_with_hlfb(
+        hidden_states,
+        self.weight + 1.0,
+        self.variance_epsilon,
+        torch.ones((self.hidden_size,), dtype=torch.float32),
+    )
+  def extra_repr(self):
+    return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"
+from transformers.models.gemma3 import modeling_gemma3
+original_gemma3_rms_norm = modeling_gemma3.Gemma3RMSNorm
+modeling_gemma3.Gemma3RMSNorm = Gemma3RMSNorm

litert_torch/generative/export_hf/export.py CHANGED Viewed

@@ -31,6 +31,7 @@ def export(
     quantization_recipe: str = 'dynamic_wi8_afp32',
     enable_dynamic_shape: bool = False,
     externalize_embedder: bool = False,
+    single_token_embedder: bool = False,
     key_ts_idx: int = 2,
     value_ts_idx: int = 3,
     split_cache: bool = False,
@@ -38,6 +39,7 @@ def export(
     # target_accelerator: str | None = None,
     trust_remote_code: bool = False,
     use_jinja_template: bool = False,
+    task: str = 'text_generation',
 ):
   """Exports HuggingFace Transformers model to tflite."""
   # TODO(weiyiw): Use tmp dir for work_dir.
@@ -47,6 +49,7 @@ def export(
       model,
       trust_remote_code=trust_remote_code,
       auto_model_override=auto_model_override,
+      task=task,
   )
   del config  # Unused.
   if split_cache and not externalize_embedder:
@@ -62,6 +65,7 @@ def export(
       if enable_dynamic_shape
       else None,
       externalize_embedder=externalize_embedder,
+      single_token_embedder=single_token_embedder,
       k_ts_idx=key_ts_idx,
       v_ts_idx=value_ts_idx,
       split_cache=split_cache,

litert_torch/version.py CHANGED Viewed

@@ -15,4 +15,4 @@
 # The next version of litert-torch.
 # The minor version code should be bumped after every release.
-__version__ = "0.9.0.dev20260204"
+__version__ = "0.9.0.dev20260206"

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: litert-torch-nightly
-Version: 0.9.0.dev20260204
+Version: 0.9.0.dev20260206
 Summary: Support PyTorch model conversion with LiteRT.
 Home-page: https://github.com/google-ai-edge/litert-torch
 Keywords: On-Device ML,AI,Google,TFLite,LiteRT,PyTorch,LLMs,GenAI

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ litert_torch/_config.py,sha256=zDnki83sBsQzDAea6bvzwccylWHnPUzbEyGGRh6B14w,2526
 litert_torch/cli.py,sha256=TiAUbgbWm3ecTUJtJ1_hjKJuC1LrG-Qwnm8_zws-sVY,984
 litert_torch/conftest.py,sha256=gYmFrsR4c_fjIidbyrDnek26yS0crDP6-UoyMvy-WFg,757
 litert_torch/model.py,sha256=KXFTyyfPM6AnP0JoSwsTqQR3lUQbMkTGSr3dUsfQ5Jk,5635
-litert_torch/version.py,sha256=Cx4KT-tDB1CIsBgn-EX34IaDb7a_kd9beo1zv2eOhYQ,804
+litert_torch/version.py,sha256=20bJvaJMGX0olC644fZPyW86ti_tJxn-WOPVm9S1tZ4,804
 litert_torch/_convert/__init__.py,sha256=qdLdbj5NjhNG-QgY5O_8TzOr2XaDoWvmdY9JNPStQmw,670
 litert_torch/_convert/conversion.py,sha256=NuQEphyYp3W19IKvyTWo9pe7zt1-XmWM4zU9PDkUm54,6108
 litert_torch/_convert/conversion_utils.py,sha256=MWpB-3eN-rvQzTtXsPL30cDIK431SQuwvw3ia2K2ONM,2158
@@ -179,7 +179,7 @@ litert_torch/generative/examples/tiny_llama/verify.py,sha256=6geA8OUOSj8_sTRyoo0
 litert_torch/generative/examples/tiny_llama/verify_util.py,sha256=FKMC6Olex6bJbB8HXvC1KwxPbKgRBfT1CjoWcmyaPD8,2989
 litert_torch/generative/export_hf/__init__.py,sha256=5xWIp2ziIwapcZcjSKfeaFgBnIooa8ckhTQ7mazZC3c,670
 litert_torch/generative/export_hf/__main__.py,sha256=8VuBDkZ2sL-q2XdQ45qwzeHQk39-MM_6TdkxOU_23xE,782
-litert_torch/generative/export_hf/export.py,sha256=HC_nwBg3WMGL_qMfOn7OB2SATKed6UQe2KqqE-6CHIA,3656
+litert_torch/generative/export_hf/export.py,sha256=koqs0znGe9QXlEoRF7TuvDtrjbiXa79qgBhGK6MENwk,3800
 litert_torch/generative/export_hf/export_main.py,sha256=bQidNXz0MEP_gil86LSfnpCW0pUiqZq2-F9ZOrSb3Yk,1183
 litert_torch/generative/export_hf/core/__init__.py,sha256=5xWIp2ziIwapcZcjSKfeaFgBnIooa8ckhTQ7mazZC3c,670
 litert_torch/generative/export_hf/core/attention.py,sha256=bXuTHNeVtKwWf6YXgb5I2j08vvgb9M7r1RYGvdjl9QI,4798
@@ -187,14 +187,14 @@ litert_torch/generative/export_hf/core/attention_test.py,sha256=KBSyYjHoTKYi6Se6
 litert_torch/generative/export_hf/core/cache.py,sha256=UnuTBpJvplEyig1myrhA1d0QJ05pNJgWbm-GrsUu5Uk,11763
 litert_torch/generative/export_hf/core/cache_base.py,sha256=6s-6L6iSa-qn0PLdAAhpHdOU9qwqEE-JVdlIsYyCPt4,2180
 litert_torch/generative/export_hf/core/cache_test.py,sha256=y-v-oOGtRNPGWRfIfW3FcpDxvJbzrBU6Pb2o66FkUzU,6203
-litert_torch/generative/export_hf/core/export_lib.py,sha256=W1jG6L9oqu3hYnXaN0lQLtEqc5ZPkTyDVOzGOsLLkAU,14142
+litert_torch/generative/export_hf/core/export_lib.py,sha256=qDOyLxCMeQtRbJQlLcI8Gq3PmL9yMJ2gUvPhoEcVty8,14516
 litert_torch/generative/export_hf/core/exportable_module.py,sha256=niCS0na0VvFLiwebnL4JeXZh2hT8FCQmp-vnyTBh7pA,8257
-litert_torch/generative/export_hf/core/exportable_module_config.py,sha256=cpqtagzOglvbr91NFC0K_QX-7mr5Q7gnhQ8Srqral9Y,1284
-litert_torch/generative/export_hf/core/litert_lm_builder.py,sha256=f8Q2ifVyt65V-kRL0X9FRpQNKIer0R_Yx2lECZTMGPU,7965
-litert_torch/generative/export_hf/core/patches.py,sha256=i1fzs0anIFbBH-Q_PwCtp9VKXy64olJKwnGpnJUjkEo,1815
+litert_torch/generative/export_hf/core/exportable_module_config.py,sha256=oJOWBBKWYpLq5A5qXEAIZbLwvCpY22nstHx6L88CXqU,1322
+litert_torch/generative/export_hf/core/litert_lm_builder.py,sha256=ai-5Njn8fGKco_5jiRnmACBIKu1EL2b5SY5ArmsmttM,7998
+litert_torch/generative/export_hf/core/patches.py,sha256=h4TCTNPT0N9xcMFfJ54XnpCHt1iKwS8mU-GhAxdsUrc,2636
 litert_torch/generative/export_hf/core/utils.py,sha256=5Wgs9aAOKd2i8wmQF_IierLUuFG23v1T6zZPr-azQ7A,4018
 litert_torch/generative/export_hf/core/external_emb/__init__.py,sha256=5xWIp2ziIwapcZcjSKfeaFgBnIooa8ckhTQ7mazZC3c,670
-litert_torch/generative/export_hf/core/external_emb/exportable_module.py,sha256=mWn75lLms3BAeCTEvbkGZ2n4fxtwsqGA8PP4S8-JBdY,3058
+litert_torch/generative/export_hf/core/external_emb/exportable_module.py,sha256=1ke2mugD--1bIqeJhAJ4Ly7o6NRW8RZL79UzCqRHLNY,4113
 litert_torch/generative/export_hf/core/external_rope/__init__.py,sha256=5xWIp2ziIwapcZcjSKfeaFgBnIooa8ckhTQ7mazZC3c,670
 litert_torch/generative/export_hf/core/external_rope/exportable_module.py,sha256=czTf835b9Nw4XcDo6cd9chsmBdbIsdqMtnEkwuwMgX0,2478
 litert_torch/generative/export_hf/core/external_rope/preprocess_model.py,sha256=NL3zROb7EZNAvZfutIhLk4KqXid_HklQMUjHZqZYOH4,1735
@@ -319,9 +319,9 @@ litert_torch/testing/__init__.py,sha256=AfYP1HwTYSQmupveonEHCDV5dEyshzUgbwUrCUhb
 litert_torch/testing/export.py,sha256=3dR6oxnrdtX0MfqAfMv233cf3sHA4e0F2TBQotoo8xc,3292
 litert_torch/testing/model_coverage/__init__.py,sha256=uPXeAhWiD1O0aMDLCX7FTOSNQiea8yOtoIYPCuHEAG4,763
 litert_torch/testing/model_coverage/model_coverage.py,sha256=EPCI7PbNPb7GV28lo3qQvFdzJwJ_ZDrbCGdpeiBZhVo,4715
-litert_torch_nightly-0.9.0.dev20260204.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-litert_torch_nightly-0.9.0.dev20260204.dist-info/METADATA,sha256=-DAJh0KO6GPV9RjXiU3oOK4KKJTj1szkhHO-F6XI99o,2463
-litert_torch_nightly-0.9.0.dev20260204.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
-litert_torch_nightly-0.9.0.dev20260204.dist-info/entry_points.txt,sha256=roYAi9hp0uYrMudMR59hGNF2pz0TSAtqNl4vQLJzxnE,55
-litert_torch_nightly-0.9.0.dev20260204.dist-info/top_level.txt,sha256=mGrsl2SYcjQSLBJX4ZXrHnFqHZe6QLRR7uk0tLfzwfM,13
-litert_torch_nightly-0.9.0.dev20260204.dist-info/RECORD,,
+litert_torch_nightly-0.9.0.dev20260206.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+litert_torch_nightly-0.9.0.dev20260206.dist-info/METADATA,sha256=mRNKi6UzLyaT9u1_6oqV95e9vs3f_77JAhDxkBtU3Ao,2463
+litert_torch_nightly-0.9.0.dev20260206.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
+litert_torch_nightly-0.9.0.dev20260206.dist-info/entry_points.txt,sha256=roYAi9hp0uYrMudMR59hGNF2pz0TSAtqNl4vQLJzxnE,55
+litert_torch_nightly-0.9.0.dev20260206.dist-info/top_level.txt,sha256=mGrsl2SYcjQSLBJX4ZXrHnFqHZe6QLRR7uk0tLfzwfM,13
+litert_torch_nightly-0.9.0.dev20260206.dist-info/RECORD,,

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/WHEEL RENAMED Viewed

File without changes

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{litert_torch_nightly-0.9.0.dev20260204.dist-info → litert_torch_nightly-0.9.0.dev20260206.dist-info}/top_level.txt RENAMED Viewed

File without changes

litert-torch-nightly 0.9.0.dev20260204__py3-none-any.whl → 0.9.0.dev20260206__py3-none-any.whl

litert-torch-nightly 0.9.0.dev20260204py3-none-any.whl → 0.9.0.dev20260206py3-none-any.whl