PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240909__py3-none-any.whl → 0.3.0.dev20240911__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240909py3-none-any.whl → 0.3.0.dev20240911py3-none-any.whl

Files changed (36) hide show

ai_edge_torch/_convert/test/test_convert.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ai_edge_torch.testing import model_coverage
 import numpy as np
 import tensorflow as tf
 import torch
+from torch import nn
 import torchvision
 from absl.testing import absltest as googletest
@@ -51,7 +52,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_add(self):
     """Tests conversion of a simple Add module."""
-    class Add(torch.nn.Module):
+    class Add(nn.Module):
       def forward(self, a, b):
         return a + b
@@ -70,7 +71,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_dot_add(self):
     """Tests conversion of a matrix multiplication followed by an add."""
-    class DotAdd(torch.nn.Module):
+    class DotAdd(nn.Module):
       def forward(self, a, b, c):
         return a @ b + c
@@ -99,7 +100,7 @@ class TestConvert(googletest.TestCase):
   def test_signature_args_ordering(self):
     """Tests conversion of a model with more than 10 arguments."""
-    class AddChainWith11Args(torch.nn.Module):
+    class AddChainWith11Args(nn.Module):
       """A model with 11 arguments."""
       def forward(
@@ -152,7 +153,7 @@ class TestConvert(googletest.TestCase):
   def test_multi_output_model(self):
     """Tests conversion of a model that returns multiple outputs."""
-    class BasicAddModelWithMultipleOutputs(torch.nn.Module):
+    class BasicAddModelWithMultipleOutputs(nn.Module):
       """A model that returns multiple outputs."""
       def forward(self, arg0, arg1):
@@ -176,7 +177,7 @@ class TestConvert(googletest.TestCase):
   def test_12_outputs_model(self):
     """Tests conversion of a model that returns more than 10 outputs."""
-    class BasicAddModelWithMultipleOutputs(torch.nn.Module):
+    class BasicAddModelWithMultipleOutputs(nn.Module):
       """A model that returns multiple outputs."""
       def forward(self, arg0, arg1):
@@ -245,7 +246,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_add_converter_flags(self):
     """Tests conversion of an add module setting a tflite converter flag."""
-    class Add(torch.nn.Module):
+    class Add(nn.Module):
       def forward(self, a, b):
         return a + b
@@ -267,6 +268,27 @@ class TestConvert(googletest.TestCase):
     )
     self.assertTrue(os.path.isdir(ir_dump_path))
+  def test_convert_conv_transpose_batch_norm(self):
+    """Tests conversion of a model with ConvTranspose2d and BatchNorm2d."""
+    channels = 2
+    size = 2
+    torch_model = nn.Sequential(
+        nn.ConvTranspose2d(
+            channels, channels, 1, stride=2, dilation=1, bias=False
+        ),
+        nn.BatchNorm2d(channels),
+    )
+    torch_model.eval()
+    sample_input = (torch.rand(1, channels, size, size),)
+    edge_model = ai_edge_torch.convert(torch_model, sample_input)
+    result = model_coverage.compare_tflite_torch(
+        edge_model, torch_model, sample_input
+    )
+    self.assertTrue(result)
   @googletest.skipIf(
       not config.Config.use_torch_xla,
       reason="Shape polymorphism is not yet support with odml_torch.",
@@ -274,7 +296,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_dynamic_batch(self):
     """Test converting a simple model with dynamic batch size."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def __init__(self):
         super().__init__()
@@ -304,7 +326,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_kwargs(self):
     """Test converting a simple model with sample_kwargs."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x, y):
         return x + y
@@ -323,7 +345,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_args_kwargs(self):
     """Test converting a simple model with both sample_args and sample_kwargs."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x, y):
         return x + y
@@ -343,7 +365,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_args_nested_kwargs_1(self):
     """Test converting a simple model with both sample_args and nested sample_kwargs."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x: torch.Tensor, y: torch.Tensor, z: TestContainer1):
         return x + y + z.data_1 + z.data_2[0] + z.data_2[1]
@@ -370,7 +392,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_args_nested_kwargs_2(self):
     """Test converting a simple model with both sample_args and nested sample_kwargs."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x, y, z):
         return x + y + z.data_1 + z.data_2[0][0] + z.data_2[1]
@@ -397,7 +419,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_with_args_nested_kwargs_3(self):
     """Test converting a simple model with both sample_args and nested sample_kwargs."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x, y, z):
         return x + y + z.data_1 + z.data_2[0]["foo"] + z.data_2[1]
@@ -424,7 +446,7 @@ class TestConvert(googletest.TestCase):
   def test_convert_model_non_flat_output_dict(self):
     """Test converting a model with non-flat output structure."""
-    class SampleModel(torch.nn.Module):
+    class SampleModel(nn.Module):
       def forward(self, x, y, z):
         return {"x": x, "y": TestContainer1(data_1=y, data_2=[y, z])}

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -13,32 +13,35 @@
 # limitations under the License.
 # ==============================================================================
+"""Example of converting a Gemma2 model to multi-signature tflite model."""
 import os
-from pathlib import Path
+import pathlib
 import ai_edge_torch
 from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.quantize import quant_recipes
 import torch
-def convert_gemma_to_tflite(
+def convert_gemma2_to_tflite(
     checkpoint_path: str,
     prefill_seq_len: int = 512,
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """Converting a Gemma 2 2B model to multi-signature
-  tflite model.
+  """Converts a Gemma2 2B model to multi-signature tflite model.
   Args:
-      checkpoint_path (str): The filepath to the model checkpoint, or directory holding the checkpoint.
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
       prefill_seq_len (int, optional): The maximum size of prefill input tensor.
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = gemma2.build_2b_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len
@@ -48,20 +51,36 @@ def convert_gemma_to_tflite(
   prefill_input_pos = torch.arange(0, prefill_seq_len)
   decode_token = torch.tensor([[0]], dtype=torch.long)
   decode_input_pos = torch.tensor([0], dtype=torch.int64)
+  kv = kv_utils.KVCache.from_model_config(pytorch_model.config)
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
   edge_model = (
       ai_edge_torch.signature(
-          'prefill', pytorch_model, (prefill_tokens, prefill_input_pos)
+          'prefill',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
       )
-      .signature('decode', pytorch_model, (decode_token, decode_input_pos))
       .convert(quant_config=quant_config)
   )
+  quant_suffix = 'q8' if quantize else 'f32'
   edge_model.export(
-      f'/tmp/gemma2_seq{prefill_seq_len}_kv{kv_cache_max_len}.tflite'
+      f'/tmp/gemma2_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
   )
 if __name__ == '__main__':
-  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/gemma2-2b')
-  convert_gemma_to_tflite(checkpoint_path)
+  path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma2-2b')
+  convert_gemma2_to_tflite(path)

ai_edge_torch/generative/examples/gemma/convert_to_tflite.py CHANGED Viewed

@@ -13,11 +13,14 @@
 # limitations under the License.
 # ==============================================================================
+"""Example of converting a Gemma model to multi-signature tflite model."""
 import os
-from pathlib import Path
+import pathlib
 import ai_edge_torch
 from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.quantize import quant_recipes
 import torch
@@ -48,20 +51,36 @@ def convert_gemma_to_tflite(
   prefill_input_pos = torch.arange(0, prefill_seq_len)
   decode_token = torch.tensor([[0]], dtype=torch.long)
   decode_input_pos = torch.tensor([0], dtype=torch.int64)
+  kv = kv_utils.KVCache.from_model_config(pytorch_model.config)
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
   edge_model = (
       ai_edge_torch.signature(
-          'prefill', pytorch_model, (prefill_tokens, prefill_input_pos)
+          'prefill',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
       )
-      .signature('decode', pytorch_model, (decode_token, decode_input_pos))
       .convert(quant_config=quant_config)
   )
+  quant_suffix = 'q8' if quantize else 'f32'
   edge_model.export(
-      f'/tmp/gemma_seq{prefill_seq_len}_kv{kv_cache_max_len}.tflite'
+      f'/tmp/gemma_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
   )
 if __name__ == '__main__':
-  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/gemma-2b')
-  convert_gemma_to_tflite(checkpoint_path)
+  path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma-2b')
+  convert_gemma_to_tflite(path)

ai_edge_torch/generative/examples/gemma/gemma.py CHANGED Viewed

@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Example of building a Gemma model.
+"""Example of building a Gemma model."""
 import os
-from pathlib import Path
+import pathlib
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.utilities.loader as loading_utils
@@ -84,16 +86,22 @@ class Gemma(nn.Module):
     )
     self.config = config
-  # The model's forward function takes in additional k/v cache tensors
-  # and returns the updated k/v cache tensors to the caller.
-  # This can be eliminated if we handle k/v cache updates inside the model itself.
   @torch.inference_mode
-  def forward(self, idx: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
-    _, seq_len = idx.size()
+  def forward(
+      self,
+      tokens: torch.Tensor,
+      input_pos: torch.Tensor,
+      kv_cache: kv_utils.KVCache,
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
+    _, seq_len = tokens.size()
     assert self.config.max_seq_len >= seq_len, (
         f"Cannot forward sequence of length {seq_len}, max seq length is only"
         f" {self.config.max_seq_len}"
     )
+    assert len(self.transformer_blocks) == len(kv_cache.caches), (
+        "The number of transformer blocks and the number of KV cache entries"
+        " must be the same."
+    )
     cos, sin = self.rope_cache
     cos = cos.index_select(0, input_pos)
@@ -102,15 +110,20 @@ class Gemma(nn.Module):
     mask = mask[:, :, :, : self.config.kv_cache_max]
     # token embeddings of shape (b, t, n_embd)
-    x = self.tok_embedding(idx)
+    x = self.tok_embedding(tokens)
     x = x * (self.config.embedding_dim**0.5)
-    for _, block in enumerate(self.transformer_blocks):
-      x = block(x, (cos, sin), mask, input_pos)
+    updated_kv_entires = []
+    for i, block in enumerate(self.transformer_blocks):
+      kv_entry = kv_cache.caches[i] if kv_cache else None
+      x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
+      if kv_entry:
+        updated_kv_entires.append(kv_entry)
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
     x = self.final_norm(x)
-    res = self.lm_head(x)  # (b, t, vocab_size)
-    return res
+    logits = self.lm_head(x)  # (b, t, vocab_size)
+    return {"logits": logits, "kv_cache": updated_kv_cache}
 def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
@@ -177,25 +190,28 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model
-def define_and_run_2b() -> None:
+def define_and_run_2b(checkpoint_path: str) -> None:
   """Instantiates and runs a Gemma 2B model."""
-  current_dir = Path(__file__).parent.resolve()
+  current_dir = pathlib.Path(__file__).parent.resolve()
   gemma_goldens = torch.load(current_dir / "gemma_lm_logits.pt")
   kv_cache_max_len = 1024
-  checkpoint_path = os.path.join(Path.home(), "Downloads/llm_data/gemma-2b")
   model = build_2b_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
   idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
   tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.long, device="cpu")
   tokens[0, :4] = idx
   input_pos = torch.arange(0, kv_cache_max_len)
-  lm_logits = model.forward(tokens, input_pos)
+  kv = kv_utils.KVCache.from_model_config(model.config)
+  output = model.forward(tokens, input_pos, kv)
   print("comparing with goldens..")
   assert torch.allclose(
-      gemma_goldens, lm_logits[0, idx.shape[1] - 1, :], atol=1e-05
+      gemma_goldens, output["logits"][0, idx.shape[1] - 1, :], atol=1e-02
   )
 if __name__ == "__main__":
-  define_and_run_2b()
+  input_checkpoint_path = os.path.join(
+      pathlib.Path.home(), "Downloads/llm_data/gemma-2b"
+  )
+  define_and_run_2b(input_checkpoint_path)

ai_edge_torch/generative/examples/gemma/gemma2.py CHANGED Viewed

@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Example of building the Gemma2 2B model.
+"""Example of building a Gemma2 model."""
 import os
-from pathlib import Path
+import pathlib
 from typing import Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.utilities.loader as loading_utils
@@ -51,7 +53,8 @@ class Gemma2Block(attention.TransformerBlock):
       rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
       mask: Optional[torch.Tensor] = None,
       input_pos: Optional[torch.Tensor] = None,
-  ) -> torch.Tensor:
+      kv_cache: kv_utils.KVCacheEntry = None,
+  ) -> Tuple[torch.Tensor, Optional[kv_utils.KVCacheEntry]]:
     """Forward function of the Gemma2Block.
     Exactly the same as TransformerBlock but we call the post-attention norm
@@ -62,17 +65,19 @@ class Gemma2Block(attention.TransformerBlock):
       rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
       mask (torch.Tensor): the optional mask tensor.
       input_pos (torch.Tensor): the optional input position tensor.
+      kv_cache (KVCacheEntry): the optional kv cache entry.
     Returns:
-      output activation from this transformer block.
+      output activation from this transformer block, and updated kv cache (if
+      passed in).
     """
     x_norm = self.pre_atten_norm(x)
-    attn_out = self.atten_func(x_norm, rope, mask, input_pos)
+    attn_out, kv = self.atten_func(x_norm, rope, mask, input_pos, kv_cache)
     attn_out_norm = self.post_atten_norm(attn_out)
     x = x + attn_out_norm
     output = x + self.ff(x)
-    return output
+    return output, kv
 class Gemma2(nn.Module):
@@ -138,24 +143,38 @@ class Gemma2(nn.Module):
     return self.mask_cache.index_select(2, input_pos)
   @torch.inference_mode
-  def forward(self, idx: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
-    _, seq_len = idx.size()
+  def forward(
+      self,
+      tokens: torch.Tensor,
+      input_pos: torch.Tensor,
+      kv_cache: kv_utils.KVCache,
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
+    _, seq_len = tokens.size()
     assert self.config.max_seq_len >= seq_len, (
         f"Cannot forward sequence of length {seq_len}, max seq length is only"
         f" {self.config.max_seq_len}"
     )
+    assert len(self.transformer_blocks) == len(kv_cache.caches), (
+        "The number of transformer blocks and the number of KV cache entries"
+        " must be the same."
+    )
     cos, sin = self.rope_cache
     cos = cos.index_select(0, input_pos)
     sin = sin.index_select(0, input_pos)
     # token embeddings of shape (b, t, n_embd)
-    x = self.tok_embedding(idx)
+    x = self.tok_embedding(tokens)
     x = x * (self.config.embedding_dim**0.5)
+    updated_kv_entires = []
     for i, block in enumerate(self.transformer_blocks):
       mask = self.get_attention_mask(i, input_pos)
-      x = block(x, (cos, sin), mask, input_pos)
+      kv_entry = kv_cache.caches[i] if kv_cache else None
+      x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
+      if kv_entry:
+        updated_kv_entires.append(kv_entry)
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
     x = self.final_norm(x)
     res = self.lm_head(x)  # (b, t, vocab_size)
@@ -163,7 +182,8 @@ class Gemma2(nn.Module):
       res = res / self.config.final_logit_softcap
       res = torch.tanh(res)
       res = res * self.config.final_logit_softcap
-    return res
+    return {"logits": res, "kv_cache": updated_kv_cache}
 def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
@@ -243,14 +263,13 @@ def build_2b_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model
-def define_and_run_2b() -> None:
+def define_and_run_2b(checkpoint_path: str) -> None:
   """Instantiates and runs a Gemma2 2B model."""
-  current_dir = Path(__file__).parent.resolve()
+  current_dir = pathlib.Path(__file__).parent.resolve()
   gemma2_goldens = torch.load(current_dir / "gemma2it_2b_golden.pt")
   print("Running GEMMA 2")
   kv_cache_max_len = 1024
-  checkpoint_path = os.path.join(Path.home(), "Downloads/llm_data/gemma2-2b")
   model = build_2b_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
   toks = torch.from_numpy(
       np.array([2, 651, 9456, 576, 573, 3520, 3858, 603, 235248])
@@ -258,11 +277,13 @@ def define_and_run_2b() -> None:
   tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.long, device="cpu")
   tokens[0, :9] = toks
   input_pos = torch.arange(0, kv_cache_max_len)
-  out = model.forward(tokens, input_pos)
-  out_final = out[0, 8, :]
+  kv = kv_utils.KVCache.from_model_config(model.config)
+  out = model.forward(tokens, input_pos, kv)
+  out_final = out["logits"][0, 8, :]
   assert torch.allclose(gemma2_goldens, out_final, atol=1e-04)
 if __name__ == "__main__":
   torch.set_printoptions(sci_mode=True)
-  define_and_run_2b()
+  path = os.path.join(pathlib.Path.home(), "Downloads/llm_data/gemma2-2b")
+  define_and_run_2b(path)

ai_edge_torch/generative/examples/{experimental/phi → phi}/convert_to_tflite.py RENAMED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-#
-# Note: This is an experimental version of phi2 with external KV cache.
-# Please use with caution.
+"""Example of converting a Phi-2 model to multi-signature tflite model."""
 import os
-from pathlib import Path
+import pathlib
 import ai_edge_torch
-from ai_edge_torch.generative.examples.experimental.phi import phi2
-from ai_edge_torch.generative.layers.experimental import ekv_cache
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.quantize import quant_recipes
 import torch
@@ -32,9 +31,8 @@ def convert_phi2_to_tflite(
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """An example method for converting a Phi-2 model to multi-signature
+  """Converts a Phi-2 model to multi-signature tflite model.
-  tflite model.
   Args:
       checkpoint_path (str): The filepath to the model checkpoint, or directory
         holding the checkpoint.
@@ -53,7 +51,7 @@ def convert_phi2_to_tflite(
   prefill_input_pos = torch.arange(0, prefill_seq_len)
   decode_token = torch.tensor([[0]], dtype=torch.long)
   decode_input_pos = torch.tensor([0], dtype=torch.int64)
-  kv = ekv_cache.EKVCache.from_model_config(pytorch_model.config)
+  kv = kv_cache.KVCache.from_model_config(pytorch_model.config)
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
   edge_model = (
@@ -77,11 +75,12 @@ def convert_phi2_to_tflite(
       )
       .convert(quant_config=quant_config)
   )
+  quant_suffix = 'q8' if quantize else 'f32'
   edge_model.export(
-      f'/tmp/phi2_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
+      f'/tmp/phi2_{quant_suffix}_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite'
   )
 if __name__ == '__main__':
-  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/phi2')
-  convert_phi2_to_tflite(checkpoint_path)
+  path = os.path.join(pathlib.Path.home(), 'Downloads/llm_data/phi2')
+  convert_phi2_to_tflite(path)

ai-edge-torch-nightly 0.3.0.dev20240909__py3-none-any.whl → 0.3.0.dev20240911__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240909py3-none-any.whl → 0.3.0.dev20240911py3-none-any.whl