PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/generative/examples/gemma/verify_util.py ADDED Viewed

@@ -0,0 +1,143 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions to verify the reauthored Gemma model."""
+import dataclasses
+import logging
+import os
+from typing import List, Tuple
+import ai_edge_torch.generative.layers.attention_utils as attn_utils
+from ai_edge_torch.generative.utilities import verifier
+from gemma import config as gemma_config
+from gemma import model as gemma_model
+import torch
+@dataclasses.dataclass
+class _Output:
+  logits: torch.Tensor
+class GemmaWrapper(verifier.ModelWrapper):
+  """Gemma model wrapper for verification.
+  Verifier calls model.forward() with maxium sequence length (1024) expecting
+  the output has 'logits' field while Gemma gets the input tokens with the
+  actual length and returns logits in a tuple.
+  Verifier runs tokenizer before model.generate() while Gemma runs the tokenizer
+  inside model.generate().
+  """
+  def __init__(self, model: torch.nn.Module, max_new_tokens: int):
+    super().__init__(model)
+    self.max_new_tokens = max_new_tokens
+  def _get_actual_input_len(self, tokens: torch.Tensor) -> int:
+    for i in range(tokens.shape[1]):
+      if tokens[0, i] == 0:
+        return i
+    return tokens.shape[1]
+  def _get_kv_caches(
+      self, max_seq_len: int
+  ) -> List[Tuple[torch.Tensor, torch.Tensor]]:
+    config = self.model.config
+    cache_size = (1, max_seq_len, config.num_key_value_heads, config.head_dim)
+    cache = torch.zeros(cache_size)
+    return [
+        (cache.clone(), cache.clone()) for _ in range(config.num_hidden_layers)
+    ]
+  def forward(self, tokens: torch.Tensor) -> _Output:
+    """Forwards the model after reducing input tokens to the actual length."""
+    actual_input_len = self._get_actual_input_len(tokens)
+    input_pos = torch.arange(0, actual_input_len, dtype=torch.long)
+    mask_cache = attn_utils.build_causal_mask_cache(tokens.shape[1])
+    _, logits = self.model.forward(
+        input_token_ids=tokens[0, :actual_input_len].unsqueeze(0),
+        input_positions=input_pos,
+        kv_write_indices=None,
+        kv_caches=self._get_kv_caches(tokens.shape[1]),
+        mask=mask_cache.index_select(2, input_pos),
+        output_positions=input_pos,
+        temperatures=None,
+        top_ps=torch.tensor([1.0], dtype=torch.float),
+        top_ks=torch.tensor([1], dtype=torch.long),
+    )
+    return _Output(logits.float())
+  def generate(self, tokens: torch.Tensor) -> torch.Tensor:
+    """Generates the response after decoding the tokens into a string."""
+    prompts = self.model.tokenizer.decode(tokens[0].tolist())
+    response = self.model.generate(
+        prompts, device="cpu", output_len=self.max_new_tokens, top_k=1
+    )
+    return torch.tensor([self.model.tokenizer.encode(prompts + response)])
+class TokenizerWrapper(torch.nn.Module):
+  """Tokenizer wrapper for verification.
+  Verifier expects the tokenizer to handle tokens in torch.Tensor while Gemma
+  tokenizer expects tokens in a list.
+  """
+  def __init__(self, tokenizer: torch.nn.Module):
+    super().__init__()
+    self.tokenizer = tokenizer
+  def encode(self, text: str, **_) -> torch.Tensor:
+    """Adds one more dimension to the output of the tokenizer."""
+    return torch.tensor([self.tokenizer.encode(text)])
+  def decode(self, tokens: torch.Tensor) -> str:
+    """Decodes the token sequence after converting to a list."""
+    return self.tokenizer.decode(tokens.tolist())
+def verify_reauthored_gemma_model(
+    checkpoint: str,
+    variant: str,
+    reauthored_model: torch.nn.Module,
+    generate_prompts: List[str],
+    forward_input_ids: List[List[int]],
+    weight_filename: str = "model.ckpt",
+    tokenizer_filename: str = "tokenizer.model",
+    max_new_tokens: int = 20,
+    rtol: float = 1e-05,
+    atol: float = 1e-05,
+):
+  """Verifies the reauthored Gemma model against the original model."""
+  config = gemma_config.get_model_config(variant)
+  config.tokenizer = os.path.join(checkpoint, tokenizer_filename)
+  # Use float32 to be compatible with the reauthored model.
+  config.dtype = torch.float32
+  logging.info("Loading the original model from: %s", checkpoint)
+  original_model = gemma_model.GemmaForCausalLM(config).eval()
+  original_model.load_weights(os.path.join(checkpoint, weight_filename))
+  verifier.verify_reauthored_model(
+      original_model=GemmaWrapper(original_model, max_new_tokens),
+      reauthored_model=reauthored_model,
+      tokenizer=TokenizerWrapper(original_model.tokenizer),
+      generate_prompts=generate_prompts,
+      forward_input_ids=forward_input_ids,
+      rtol=rtol,
+      atol=atol,
+  )

ai_edge_torch/generative/examples/openelm/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting OpenELM model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.openelm import openelm
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/openelm'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = openelm.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'openelm_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/openelm/openelm.py ADDED Viewed

@@ -0,0 +1,206 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of building an OpenELM model."""
+from ai_edge_torch.generative.layers import attention
+from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+import ai_edge_torch.generative.layers.attention_utils as attn_utils
+import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.utilities.loader as loading_utils
+import torch
+from torch import nn
+TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    ff_up_proj="transformer.layers.{}.ffn.proj_1",
+    ff_down_proj="transformer.layers.{}.ffn.proj_2",
+    attn_fused_qkv_proj="transformer.layers.{}.attn.qkv_proj",
+    attn_query_norm="transformer.layers.{}.attn.q_norm",
+    attn_key_norm="transformer.layers.{}.attn.k_norm",
+    attn_output_proj="transformer.layers.{}.attn.out_proj",
+    pre_attn_norm="transformer.layers.{}.attn_norm",
+    pre_ff_norm="transformer.layers.{}.ffn_norm",
+    embedding="transformer.token_embeddings",
+    final_norm="transformer.norm",
+    lm_head=None,
+)
+class OpenELM(nn.Module):
+  """An OpenELM model built from the Edge Generative API layers."""
+  def __init__(self, config: cfg.ModelConfig):
+    super().__init__()
+    # Construct model layers.
+    self.tok_embedding = nn.Embedding(
+        config.vocab_size, config.embedding_dim, padding_idx=0
+    )
+    self.lm_head = nn.Linear(
+        config.embedding_dim, config.vocab_size, bias=config.lm_head_use_bias
+    )
+    # OpenELM re-uses the embedding as the head projection layer.
+    self.lm_head.weight.data = self.tok_embedding.weight.data
+    self.transformer_blocks = nn.ModuleList(
+        attention.TransformerBlock(config.block_config(idx), config)
+        for idx in range(config.num_layers)
+    )
+    self.final_norm = builder.build_norm(
+        config.embedding_dim,
+        config.final_norm_config,
+    )
+    # OpenELM has same hyper parameters for rotary_percentage and head_dim for
+    # each layer block. Use the first block.
+    attn_config = config.block_config(0).attn_config
+    self.rope_cache = attn_utils.build_rope_cache(
+        size=config.kv_cache_max,
+        dim=int(attn_config.rotary_percentage * attn_config.head_dim),
+        base=10_000,
+        condense_ratio=1,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.mask_cache = attn_utils.build_causal_mask_cache(
+        size=config.kv_cache_max,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.config = config
+  @torch.inference_mode
+  def forward(
+      self,
+      tokens: torch.Tensor,
+      input_pos: torch.Tensor,
+      kv_cache: kv_utils.KVCache,
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
+    _, seq_len = tokens.size()
+    assert self.config.max_seq_len >= seq_len, (
+        f"Cannot forward sequence of length {seq_len}, max seq length is only"
+        f" {self.config.max_seq_len}"
+    )
+    assert len(self.transformer_blocks) == len(kv_cache.caches), (
+        "The number of transformer blocks and the number of KV cache entries"
+        " must be the same."
+    )
+    cos, sin = self.rope_cache
+    cos = cos.index_select(0, input_pos)
+    sin = sin.index_select(0, input_pos)
+    mask = self.mask_cache.index_select(2, input_pos)
+    mask = mask[:, :, :, : self.config.kv_cache_max]
+    # token embeddings of shape (b, t, n_embd)
+    x = self.tok_embedding(tokens)
+    updated_kv_entires = []
+    for i, block in enumerate(self.transformer_blocks):
+      kv_entry = kv_cache.caches[i] if kv_cache else None
+      x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
+      if kv_entry:
+        updated_kv_entires.append(kv_entry)
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+    x = self.final_norm(x)
+    logits = self.lm_head(x)  # (b, t, vocab_size)
+    return {"logits": logits, "kv_cache": updated_kv_cache}
+def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for an OpenELM model.
+  Args:
+    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
+      is 1024.
+  Returns:
+    The model config for an OpenELM model.
+  """
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, epsilon=1e-6
+  )
+  num_heads = [12] * 4 + [16] * 14 + [20] * 12 + [24] * 6
+  num_query_groups = [3] * 4 + [4] * 14 + [5] * 12 + [6] * 6
+  def make_divisible(v, d):
+    """Ensures that all layers have a channel number that is divisible by d."""
+    new_v = int(v + d / 2) // d * d
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+      new_v += d
+    return new_v
+  # The way to get intermediate size is from
+  # https://huggingface.co/apple/OpenELM-3B/blob/main/modeling_openelm.py
+  def get_intermediate_size(idx: int) -> int:
+    return make_divisible((0.5 + 0.1 * idx) * 3072, 256)
+  def get_block_config(idx: int) -> cfg.TransformerBlockConfig:
+    return cfg.TransformerBlockConfig(
+        attn_config=cfg.AttentionConfig(
+            num_heads=num_heads[idx],
+            head_dim=128,
+            num_query_groups=num_query_groups[idx],
+            rotary_percentage=1.0,
+            qkv_transpose_before_split=True,
+            query_norm_config=norm_config,
+            key_norm_config=norm_config,
+        ),
+        ff_config=cfg.FeedForwardConfig(
+            type=cfg.FeedForwardType.SEQUENTIAL,
+            activation=cfg.ActivationConfig(cfg.ActivationType.SILU_GLU),
+            intermediate_size=get_intermediate_size(idx),
+            pre_ff_norm_config=norm_config,
+        ),
+        pre_attention_norm_config=norm_config,
+    )
+  num_layers = 36
+  config = cfg.ModelConfig(
+      vocab_size=32000,
+      num_layers=num_layers,
+      max_seq_len=2048,
+      embedding_dim=3072,
+      kv_cache_max_len=kv_cache_max_len,
+      block_configs=[get_block_config(i) for i in range(num_layers)],
+      final_norm_config=norm_config,
+  )
+  return config
+def get_fake_model_config(kv_cache_max_len: int = 128) -> cfg.ModelConfig:
+  config = get_model_config(kv_cache_max_len)
+  config.vocab_size = 128
+  config.num_layers = 2
+  config.max_seq_len = 2 * kv_cache_max_len
+  config.embedding_dim = 128
+  config.block_configs = config.block_configs[: config.num_layers]
+  for block_config in config.block_configs:
+    block_config.attn_config.num_heads = 3
+    block_config.attn_config.head_dim = 64
+    block_config.ff_config.intermediate_size = 128
+  return config
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+  config = get_model_config(**kwargs)
+  model = OpenELM(config)
+  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+  # Since embedding and lm-head use the same weight, we need to set strict
+  # to False.
+  loader.load(model, strict=False)
+  model.eval()
+  return model

ai_edge_torch/generative/examples/openelm/verify.py ADDED Viewed

@@ -0,0 +1,64 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies the reauthored OpenELM-3B model."""
+import logging
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.openelm import openelm
+from ai_edge_torch.generative.utilities import verifier
+import transformers
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+def main(_):
+  checkpoint = "apple/OpenELM-3B"
+  logging.info("Loading the original model from: %s", checkpoint)
+  wrapper_model = verifier.ModelWrapper(
+      model=transformers.AutoModelForCausalLM.from_pretrained(
+          checkpoint, trust_remote_code=True
+      ),
+  )
+  # Locate the cached dir.
+  cached_config_file = transformers.utils.cached_file(
+      checkpoint, transformers.utils.CONFIG_NAME
+  )
+  reauthored_checkpoint = pathlib.Path(cached_config_file).parent
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
+  reauthored_model = openelm.build_model(reauthored_checkpoint)
+  tokenizer_checkpoint = "meta-llama/Llama-2-7b-hf"
+  logging.info("Loading the tokenizer from: %s", tokenizer_checkpoint)
+  tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_checkpoint)
+  verifier.verify_reauthored_model(
+      original_model=wrapper_model,
+      reauthored_model=reauthored_model,
+      tokenizer=tokenizer,
+      generate_prompts=_PROMPTS.value,
+  )
+if __name__ == "__main__":
+  app.run(main)

ai_edge_torch/generative/examples/phi/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting a Phi-3.5 model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.phi import phi3
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/phi3'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = phi3.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'phi3_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/phi/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting a Phi-2 model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/phi2'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = phi2.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'phi2_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl