PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240914__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240914py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

ai_edge_torch/generative/test/{test_experimental_ekv.py → test_kv_cache.py} RENAMED Viewed

@@ -12,19 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# A suite of tests to validate experimental external KV Cache layers and models.
-from ai_edge_torch.generative.examples.experimental.gemma import gemma
-from ai_edge_torch.generative.examples.experimental.phi import phi2
-from ai_edge_torch.generative.examples.experimental.tiny_llama import tiny_llama  # NOQA
-from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
+"""A suite of tests to validate KV Cache layer."""
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
 from absl.testing import absltest as googletest
-class TestExternalKVLayers(googletest.TestCase):
+class TestKVLayers(googletest.TestCase):
   def _get_test_config(
       self, num_layers, head_dim, num_query_groups, kv_cache_max_len
@@ -32,14 +30,16 @@ class TestExternalKVLayers(googletest.TestCase):
     attn_config = cfg.AttentionConfig(
         num_heads=1, head_dim=head_dim, num_query_groups=num_query_groups
     )
+    block_config = cfg.TransformerBlockConfig(
+        attn_config=attn_config, ff_config=None
+    )
     config = cfg.ModelConfig(
         kv_cache_max_len=kv_cache_max_len,
         embedding_dim=head_dim,
-        attn_config=attn_config,
+        block_configs=block_config,
         num_layers=num_layers,
         max_seq_len=None,
         vocab_size=None,
-        ff_config=None,
     )
     return config
@@ -54,7 +54,7 @@ class TestExternalKVLayers(googletest.TestCase):
         num_query_groups=NUM_QG,
         kv_cache_max_len=KV_LEN,
     )
-    kv = kv_utils.EKVCache.from_model_config(config)
+    kv = kv_utils.KVCache.from_model_config(config)
     entry = kv.caches[0]
     # single-slice update
     input_pos = torch.tensor([1])
@@ -88,14 +88,14 @@ class TestExternalKVLayers(googletest.TestCase):
   def test_serialization(self):
     class TestModel(torch.nn.Module):
-      def forward(self, kv: kv_utils.EKVCache) -> kv_utils.EKVCache:
+      def forward(self, kv: kv_utils.KVCache) -> kv_utils.KVCache:
         updated_kv_entries = [
             kv_utils.KVCacheEntry(
                 torch.zeros_like(entry.k_cache), torch.zeros_like(entry.v_cache)
             )
             for entry in kv.caches
         ]
-        return kv_utils.EKVCache(updated_kv_entries)
+        return kv_utils.KVCache(updated_kv_entries)
     N = 1
     HEAD_DIM = 2
@@ -107,7 +107,7 @@ class TestExternalKVLayers(googletest.TestCase):
         num_query_groups=NUM_QG,
         kv_cache_max_len=KV_LEN,
     )
-    kv = kv_utils.EKVCache.from_model_config(config)
+    kv = kv_utils.KVCache.from_model_config(config)
     model = TestModel()
     exported_program = torch.export.export(model, (kv,))
     input_specs = exported_program.graph_signature.input_specs
@@ -116,17 +116,5 @@ class TestExternalKVLayers(googletest.TestCase):
     self.assertEqual(input_specs[1].arg.name, "kv_v_0")
-class TestExternalKVModels(googletest.TestCase):
-  def test_can_build_gemma(self):
-    gemma.define_and_run_2b(checkpoint_path=None, test_model=True)
-  def test_can_build_phi2(self):
-    phi2.define_and_run(checkpoint_path=None, test_model=True)
-  def test_can_build_tinyllama(self):
-    tiny_llama.define_and_run(checkpoint_path=None, test_model=True)
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/test/test_loader.py CHANGED Viewed

@@ -71,7 +71,7 @@ class TestLoader(googletest.TestCase):
       safetensors.torch.save_file(test_weights, file_path)
       cfg = tiny_llama.get_model_config()
       cfg.num_layers = 1
-      model = tiny_llama.TinyLLamma(cfg)
+      model = tiny_llama.TinyLlama(cfg)
       loader = loading_utils.ModelLoader(file_path, tiny_llama.TENSOR_NAMES)
       # if returns successfully, it means all the tensors were initiallized.

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
+from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -43,28 +42,32 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
-  )
-  def test_toy_model_with_kv_cache(self):
-    config = toy_model_with_kv_cache.get_model_config()
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
-        [10], dtype=torch.int64
+  def _test_model_with_kv_cache(self, config, pytorch_model):
+    tokens, input_pos = torch.tensor([[1]], dtype=torch.int), torch.tensor(
+        [10], dtype=torch.int
+    )
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
     )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-5,
             rtol=1e-5,
         )
@@ -74,83 +77,95 @@ class TestModelConversion(googletest.TestCase):
       ai_edge_config.Config.use_torch_xla,
       reason="tests with custom ops are not supported on oss",
   )
-  def test_toy_model_with_kv_cache_with_hlfb(self):
+  def test_toy_model_with_kv_cache(self):
     config = toy_model_with_kv_cache.get_model_config()
-    config.enable_hlfb = True
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
-        [10], dtype=torch.int64
-    )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
-    edge_model.set_interpreter_builder(
-        self._interpreter_builder(edge_model.tflite_model())
-    )
-    self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            edge_model,
-            pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
-            atol=1e-5,
-            rtol=1e-5,
-        )
-    )
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    self._test_model_with_kv_cache(config, pytorch_model)
   @googletest.skipIf(
       ai_edge_config.Config.use_torch_xla,
       reason="tests with custom ops are not supported on oss",
   )
-  def test_tiny_llama_multisig(self):
-    config = tiny_llama.get_fake_model_config()
-    pytorch_model = tiny_llama.TinyLLamma(config).eval()
+  def test_toy_model_with_kv_cache_with_hlfb(self):
+    config = toy_model_with_kv_cache.get_model_config()
+    config.enable_hlfb = True
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    self._test_model_with_kv_cache(config, pytorch_model)
+  def _test_multisig_model(self, config, pytorch_model, atol, rtol):
     # prefill
     seq_len = 10
-    prefill_tokens = torch.full((1, seq_len), 0, dtype=torch.long, device="cpu")
+    prefill_tokens = torch.full((1, seq_len), 0, dtype=torch.int, device="cpu")
     prompt_token = torch.from_numpy(np.array([1, 2, 3, 4]))
     prefill_tokens[0, : len(prompt_token)] = prompt_token
-    prefill_input_pos = torch.arange(0, seq_len)
+    prefill_input_pos = torch.arange(0, seq_len, dtype=torch.int)
     # decode
-    decode_token = torch.tensor([[1]], dtype=torch.long)
-    decode_input_pos = torch.tensor([5], dtype=torch.int64)
+    decode_token = torch.tensor([[1]], dtype=torch.int)
+    decode_input_pos = torch.tensor([5], dtype=torch.int)
+    kv = kv_cache.KVCache.from_model_config(config)
     edge_model = (
         ai_edge_torch.signature(
-            "prefill", pytorch_model, (prefill_tokens, prefill_input_pos)
+            "prefill",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": prefill_tokens,
+                "input_pos": prefill_input_pos,
+                "kv_cache": kv,
+            },
+        )
+        .signature(
+            "decode",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": decode_token,
+                "input_pos": decode_input_pos,
+                "kv_cache": kv,
+            },
         )
-        .signature("decode", pytorch_model, (decode_token, decode_input_pos))
         .convert()
     )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
-    copied_model = copy.deepcopy(pytorch_model)
-    copied_edge = copy.deepcopy(edge_model)
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (prefill_tokens, prefill_input_pos),
+            prefill_tokens,
+            prefill_input_pos,
+            kv,
             signature_name="prefill",
-            num_valid_inputs=1,
+            atol=atol,
+            rtol=atol,
         )
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            copied_edge,
-            copied_model,
-            (decode_token, decode_input_pos),
+        test_utils.compare_tflite_torch(
+            edge_model,
+            pytorch_model,
+            decode_token,
+            decode_input_pos,
+            kv,
             signature_name="decode",
-            num_valid_inputs=1,
+            atol=atol,
+            rtol=atol,
         )
     )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_tiny_llama_multisig(self):
+    config = tiny_llama.get_fake_model_config()
+    pytorch_model = tiny_llama.TinyLlama(config).eval()
+    self._test_multisig_model(config, pytorch_model, atol=1e-5, rtol=1e-5)
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
-from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.examples.openelm import openelm
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -43,32 +45,36 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
-  )
-  def test_gemma(self):
-    config = gemma.get_fake_model_config()
-    model = gemma.Gemma(config)
+  def _test_model(self, config, model, signature_name, atol, rtol):
     idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-    tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
+    tokens = torch.full((1, 10), 0, dtype=torch.int, device="cpu")
     tokens[0, :4] = idx
-    input_pos = torch.arange(0, 10)
+    input_pos = torch.arange(0, 10, dtype=torch.int)
+    kv = kv_cache.KVCache.from_model_config(config)
-    edge_model = ai_edge_torch.convert(model, (tokens, input_pos))
+    edge_model = ai_edge_torch.signature(
+        signature_name,
+        model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    ).convert()
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
-            atol=1e-2,
-            rtol=1e-5,
+            tokens,
+            input_pos,
+            kv,
+            signature_name=signature_name,
+            atol=atol,
+            rtol=rtol,
         )
     )
@@ -76,34 +82,21 @@ class TestModelConversion(googletest.TestCase):
       ai_edge_config.Config.use_torch_xla,
       reason="tests with custom ops are not supported on oss",
   )
-  def test_gemma2(self):
-    config = gemma2.get_fake_model_config()
-    model = gemma2.Gemma2(config)
-    model.eval()
-    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-    prefill_tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
-    prefill_tokens[0, :4] = idx
-    prefill_input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.signature(
-        "prefill", model, (prefill_tokens, prefill_input_pos)
-    ).convert()
-    edge_model.set_interpreter_builder(
-        self._interpreter_builder(edge_model.tflite_model())
+  def test_gemma(self):
+    config = gemma.get_fake_model_config()
+    pytorch_model = gemma.Gemma(config).eval()
+    self._test_model(
+        config, pytorch_model, "serving_default", atol=1e-2, rtol=1e-5
     )
-    self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            edge_model,
-            model,
-            (prefill_tokens, prefill_input_pos),
-            signature_name="prefill",
-            num_valid_inputs=1,
-            atol=1e-2,
-            rtol=1e-5,
-        )
-    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_gemma2(self):
+    config = gemma2.get_fake_model_config()
+    pytorch_model = gemma2.Gemma2(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-1, rtol=1e-3)
   @googletest.skipIf(
       ai_edge_config.Config.use_torch_xla,
@@ -112,27 +105,27 @@ class TestModelConversion(googletest.TestCase):
   def test_phi2(self):
     config = phi2.get_fake_model_config()
     pytorch_model = phi2.Phi2(config).eval()
-    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-    tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
-    tokens[0, :4] = idx
-    input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.convert(pytorch_model, (tokens, input_pos))
-    edge_model.set_interpreter_builder(
-        self._interpreter_builder(edge_model.tflite_model())
+    self._test_model(
+        config, pytorch_model, "serving_default", atol=1e-3, rtol=1e-3
     )
-    self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            edge_model,
-            pytorch_model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
-            atol=1e-3,
-            rtol=1e-3,
-        )
-    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_smollm(self):
+    config = smollm.get_fake_model_config()
+    pytorch_model = smollm.SmolLM(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_openelm(self):
+    config = openelm.get_fake_model_config()
+    pytorch_model = openelm.OpenELM(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
 if __name__ == "__main__":

ai_edge_torch/generative/test/test_quantize.py CHANGED Viewed

@@ -115,8 +115,8 @@ class TestQuantizeConvert(parameterized.TestCase):
   def test_quantize_convert_toy_sizes(self, quant_config):
     config = toy_model.get_model_config()
     pytorch_model = toy_model.ToySingleLayerModel(config)
-    idx = torch.unsqueeze(torch.arange(0, 100), 0)
-    input_pos = torch.arange(0, 100)
+    idx = torch.unsqueeze(torch.arange(0, 100, dtype=torch.int), 0)
+    input_pos = torch.arange(0, 100, dtype=torch.int)
     quantized_model = ai_edge_torch.convert(
         pytorch_model, (idx, input_pos), quant_config=quant_config
@@ -131,8 +131,8 @@ class TestQuantizeConvert(parameterized.TestCase):
   def test_quantize_convert_toy_weight_sharing(self):
     config = toy_model.get_model_config()
     pytorch_model = toy_model.ToySingleLayerModelWeightSharing(config)
-    idx = torch.unsqueeze(torch.arange(0, 100), 0)
-    input_pos = torch.arange(0, 100)
+    idx = torch.unsqueeze(torch.arange(0, 100, dtype=torch.int), 0)
+    input_pos = torch.arange(0, 100, dtype=torch.int)
     quant_config = quant_recipes.full_int8_dynamic_recipe()
     quantized_model = ai_edge_torch.convert(
@@ -149,7 +149,7 @@ class TestQuantizeConvert(parameterized.TestCase):
     self.skipTest("b/338288901")
     config = toy_model_with_kv_cache.get_model_config()
     pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config)
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    idx, input_pos = torch.tensor([[1]], dtype=torch.int), torch.tensor(
         [10], dtype=torch.int64
     )

ai_edge_torch/generative/test/utils.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utils for testing."""
+from ai_edge_torch import model
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.lowertools import common_utils
+import numpy as np
+import torch
+from torch.utils import _pytree as pytree
+def compare_tflite_torch(
+    edge_model: model.Model,
+    torch_model: torch.nn.Module,
+    tokens: torch.Tensor,
+    input_pos: torch.Tensor,
+    kv_cache: kv_utils.KVCache,
+    signature_name: str,
+    atol: float = 1e-5,
+    rtol: float = 1e-5,
+):
+  """Compares torch models and TFLite models."""
+  values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
+  flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
+  torch_output = torch_model(tokens, input_pos, kv_cache)
+  input_kv_flatten = {k: v.numpy() for k, v in zip(flat_names, values)}
+  edge_output = edge_model(
+      signature_name=signature_name,
+      tokens=tokens.numpy(),
+      input_pos=input_pos.numpy(),
+      **input_kv_flatten,
+  )
+  return np.allclose(
+      edge_output["logits"],
+      torch_output["logits"].detach().numpy(),
+      atol=atol,
+      rtol=rtol,
+  )

ai-edge-torch-nightly 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240914__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240914py3-none-any.whl