PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240913__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240913py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

ai_edge_torch/generative/layers/normalization.py CHANGED Viewed

@@ -14,7 +14,10 @@
 # ==============================================================================
 # Common normalization layers.
+from ai_edge_torch.hlfb import StableHLOCompositeBuilder
 import torch
+from torch import nn
+import torch.nn.functional as F
 # Implementation for RMSNorm from: https://arxiv.org/abs/1910.07467
@@ -58,3 +61,158 @@ class RMSNorm(torch.nn.Module):
       return output * (1 + self.weight)
     else:
       return output * self.weight
+class GroupNorm(torch.nn.Module):
+  def __init__(
+      self,
+      group_num: int,
+      dim: int,
+      eps: float = 1e-5,
+      enable_hlfb: bool = False,
+  ):
+    """Initialize the GroupNorm layer.
+    Args:
+      group_num (int): Number of groups to separate the channels into.
+      dim (int): Dimension of the input tensor.
+      eps (float): A small float value to ensure numerical stability (default:
+        1e-6).
+      enable_hlfb (bool): Whether to convert this normalization into a single
+        op.
+    """
+    super().__init__()
+    self.enable_hlfb = enable_hlfb
+    self.group_num = group_num
+    self.eps = eps
+    self.weight = torch.nn.Parameter(torch.ones(dim))
+    self.bias = torch.nn.Parameter(torch.ones(dim))
+  def forward(self, x):
+    """Running the forward pass of GroupNorm layer.
+    Args:
+      x (torch.Tensor): input tensor.
+    Returns:
+      torch.Tensor: output tensor after applying GroupNorm.
+    """
+    if self.enable_hlfb:
+      return group_norm_with_hlfb(
+          x,
+          self.weight,
+          self.bias,
+          self.group_num,
+          self.eps,
+      )
+    else:
+      return F.group_norm(x, self.group_num, self.weight, self.bias, self.eps)
+class LayerNorm(torch.nn.Module):
+  def __init__(self, dim: int, eps: float = 1e-5, enable_hlfb: bool = False):
+    """Initialize the LayerNorm layer.
+    Args:
+      dim (int): dimension of the input tensor.
+      eps (float): A small float value to ensure numerical stability (default:
+        1e-6).
+      enable_hlfb (bool): Whether to convert this normalization into a single
+        op.
+    """
+    super().__init__()
+    self.enable_hlfb = enable_hlfb
+    self.eps = eps
+    self.weight = torch.nn.Parameter(torch.ones(dim))
+    self.bias = torch.nn.Parameter(torch.ones(dim))
+  def forward(self, x):
+    """Running the forward pass of LayerNorm layer.
+    Args:
+      x (torch.Tensor): input tensor.
+    Returns:
+      torch.Tensor: output tensor after applying LayerNorm.
+    """
+    if self.enable_hlfb:
+      return layer_norm_with_hlfb(
+          x,
+          self.weight,
+          self.bias,
+          self.eps,
+      )
+    else:
+      return F.layer_norm(
+          x,
+          x.shape,
+          self.weight.broadcast_to(x.shape),
+          self.bias.broadcast_to(x.shape),
+          self.eps,
+      )
+def group_norm_with_hlfb(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    b: torch.Tensor,
+    num_groups: int,
+    eps: float,
+):
+  """Group Normalization with high-level function boundary enabled.
+  Args:
+    x (torch.Tensor): Input tensor for Group Normalization, with BCHW shape.
+    w (torch.Tensor): The weight tensor for the normalization.
+    b (torch.Tensor): The bias tensor for the normalization.
+    num_groups (int): Number of groups to separate the channels into.
+    eps (float): A small float value to ensure numerical stability.
+  Returns:
+    The output tensor of Group Normalization.
+  """
+  x = torch.permute(x, (0, 2, 3, 1))
+  builder = StableHLOCompositeBuilder(
+      name="odml.group_norm", attr={"num_groups": num_groups, "eps": eps}
+  )
+  x, w, b = builder.mark_inputs(x, w, b)
+  x = torch.permute(x, (0, 3, 1, 2))
+  y = F.group_norm(x, num_groups, weight=w, bias=b, eps=eps)
+  y = torch.permute(y, (0, 2, 3, 1))
+  y = builder.mark_outputs(y)
+  y = torch.permute(y, (0, 3, 1, 2))
+  return y
+def layer_norm_with_hlfb(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    b: torch.Tensor,
+    eps: float,
+):
+  """Layer Normalization with high-level function boundary enabled.
+  Args:
+    x (torch.Tensor): Input tensor for Layer Normalization.
+    w (torch.Tensor): The weight tensor for the normalization.
+    b (torch.Tensor): The bias tensor for the normalization.
+    eps (float): A small float value to ensure numerical stability.
+  Returns:
+    The output tensor of Layer Normalization.
+  """
+  builder = StableHLOCompositeBuilder(name="odml.layer_norm", attr={"eps": eps})
+  x, w, b = builder.mark_inputs(x, w, b)
+  y = F.layer_norm(
+      x,
+      x.shape,
+      weight=w.broadcast_to(x.shape),
+      bias=b.broadcast_to(x.shape),
+      eps=eps,
+  )
+  y = builder.mark_outputs(y)
+  return y

ai_edge_torch/generative/layers/unet/blocks_2d.py CHANGED Viewed

@@ -122,7 +122,6 @@ class AttentionBlock2D(nn.Module):
         config.attention_batch_size,
         config.dim,
         config.attention_config,
-        0,
         enable_hlfb=config.enable_hlfb,
     )
@@ -180,7 +179,6 @@ class CrossAttentionBlock2D(nn.Module):
         config.query_dim,
         config.cross_dim,
         config.attention_config,
-        0,
         enable_hlfb=config.enable_hlfb,
     )

ai_edge_torch/generative/test/{test_experimental_ekv.py → test_kv_cache.py} RENAMED Viewed

@@ -12,19 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# A suite of tests to validate experimental external KV Cache layers and models.
-from ai_edge_torch.generative.examples.experimental.gemma import gemma
-from ai_edge_torch.generative.examples.experimental.phi import phi2
-from ai_edge_torch.generative.examples.experimental.tiny_llama import tiny_llama  # NOQA
-from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
+"""A suite of tests to validate KV Cache layer."""
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
 from absl.testing import absltest as googletest
-class TestExternalKVLayers(googletest.TestCase):
+class TestKVLayers(googletest.TestCase):
   def _get_test_config(
       self, num_layers, head_dim, num_query_groups, kv_cache_max_len
@@ -32,14 +30,16 @@ class TestExternalKVLayers(googletest.TestCase):
     attn_config = cfg.AttentionConfig(
         num_heads=1, head_dim=head_dim, num_query_groups=num_query_groups
     )
+    block_config = cfg.TransformerBlockConfig(
+        attn_config=attn_config, ff_config=None
+    )
     config = cfg.ModelConfig(
         kv_cache_max_len=kv_cache_max_len,
         embedding_dim=head_dim,
-        attn_config=attn_config,
+        block_configs=block_config,
         num_layers=num_layers,
         max_seq_len=None,
         vocab_size=None,
-        ff_config=None,
     )
     return config
@@ -54,7 +54,7 @@ class TestExternalKVLayers(googletest.TestCase):
         num_query_groups=NUM_QG,
         kv_cache_max_len=KV_LEN,
     )
-    kv = kv_utils.EKVCache.from_model_config(config)
+    kv = kv_utils.KVCache.from_model_config(config)
     entry = kv.caches[0]
     # single-slice update
     input_pos = torch.tensor([1])
@@ -88,14 +88,14 @@ class TestExternalKVLayers(googletest.TestCase):
   def test_serialization(self):
     class TestModel(torch.nn.Module):
-      def forward(self, kv: kv_utils.EKVCache) -> kv_utils.EKVCache:
+      def forward(self, kv: kv_utils.KVCache) -> kv_utils.KVCache:
         updated_kv_entries = [
             kv_utils.KVCacheEntry(
                 torch.zeros_like(entry.k_cache), torch.zeros_like(entry.v_cache)
             )
             for entry in kv.caches
         ]
-        return kv_utils.EKVCache(updated_kv_entries)
+        return kv_utils.KVCache(updated_kv_entries)
     N = 1
     HEAD_DIM = 2
@@ -107,7 +107,7 @@ class TestExternalKVLayers(googletest.TestCase):
         num_query_groups=NUM_QG,
         kv_cache_max_len=KV_LEN,
     )
-    kv = kv_utils.EKVCache.from_model_config(config)
+    kv = kv_utils.KVCache.from_model_config(config)
     model = TestModel()
     exported_program = torch.export.export(model, (kv,))
     input_specs = exported_program.graph_signature.input_specs
@@ -116,17 +116,5 @@ class TestExternalKVLayers(googletest.TestCase):
     self.assertEqual(input_specs[1].arg.name, "kv_v_0")
-class TestExternalKVModels(googletest.TestCase):
-  def test_can_build_gemma(self):
-    gemma.define_and_run_2b(checkpoint_path=None, test_model=True)
-  def test_can_build_phi2(self):
-    phi2.define_and_run(checkpoint_path=None, test_model=True)
-  def test_can_build_tinyllama(self):
-    tiny_llama.define_and_run(checkpoint_path=None, test_model=True)
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/test/test_loader.py CHANGED Viewed

@@ -71,7 +71,7 @@ class TestLoader(googletest.TestCase):
       safetensors.torch.save_file(test_weights, file_path)
       cfg = tiny_llama.get_model_config()
       cfg.num_layers = 1
-      model = tiny_llama.TinyLLamma(cfg)
+      model = tiny_llama.TinyLlama(cfg)
       loader = loading_utils.ModelLoader(file_path, tiny_llama.TENSOR_NAMES)
       # if returns successfully, it means all the tensors were initiallized.

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
+from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -49,22 +48,32 @@ class TestModelConversion(googletest.TestCase):
   )
   def test_toy_model_with_kv_cache(self):
     config = toy_model_with_kv_cache.get_model_config()
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    tokens, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
         [10], dtype=torch.int64
     )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-5,
             rtol=1e-5,
         )
@@ -77,22 +86,32 @@ class TestModelConversion(googletest.TestCase):
   def test_toy_model_with_kv_cache_with_hlfb(self):
     config = toy_model_with_kv_cache.get_model_config()
     config.enable_hlfb = True
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    tokens, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
         [10], dtype=torch.int64
     )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-5,
             rtol=1e-5,
         )
@@ -104,7 +123,7 @@ class TestModelConversion(googletest.TestCase):
   )
   def test_tiny_llama_multisig(self):
     config = tiny_llama.get_fake_model_config()
-    pytorch_model = tiny_llama.TinyLLamma(config).eval()
+    pytorch_model = tiny_llama.TinyLlama(config).eval()
     # prefill
     seq_len = 10
@@ -117,37 +136,56 @@ class TestModelConversion(googletest.TestCase):
     decode_token = torch.tensor([[1]], dtype=torch.long)
     decode_input_pos = torch.tensor([5], dtype=torch.int64)
+    kv = kv_cache.KVCache.from_model_config(config)
     edge_model = (
         ai_edge_torch.signature(
-            "prefill", pytorch_model, (prefill_tokens, prefill_input_pos)
+            "prefill",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": prefill_tokens,
+                "input_pos": prefill_input_pos,
+                "kv_cache": kv,
+            },
+        )
+        .signature(
+            "decode",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": decode_token,
+                "input_pos": decode_input_pos,
+                "kv_cache": kv,
+            },
         )
-        .signature("decode", pytorch_model, (decode_token, decode_input_pos))
         .convert()
     )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
-    copied_model = copy.deepcopy(pytorch_model)
-    copied_edge = copy.deepcopy(edge_model)
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (prefill_tokens, prefill_input_pos),
+            prefill_tokens,
+            prefill_input_pos,
+            kv,
             signature_name="prefill",
-            num_valid_inputs=1,
+            atol=1e-5,
+            rtol=1e-5,
         )
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            copied_edge,
-            copied_model,
-            (decode_token, decode_input_pos),
+        test_utils.compare_tflite_torch(
+            edge_model,
+            pytorch_model,
+            decode_token,
+            decode_input_pos,
+            kv,
             signature_name="decode",
-            num_valid_inputs=1,
+            atol=1e-5,
+            rtol=1e-5,
         )
     )

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
-from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -55,18 +55,28 @@ class TestModelConversion(googletest.TestCase):
     tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     tokens[0, :4] = idx
     input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.convert(model, (tokens, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-2,
             rtol=1e-5,
         )
@@ -85,23 +95,31 @@ class TestModelConversion(googletest.TestCase):
     prefill_tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     prefill_tokens[0, :4] = idx
     prefill_input_pos = torch.arange(0, 10)
+    kv = kv_cache.KVCache.from_model_config(config)
     edge_model = ai_edge_torch.signature(
-        "prefill", model, (prefill_tokens, prefill_input_pos)
+        "prefill",
+        model,
+        sample_kwargs={
+            "tokens": prefill_tokens,
+            "input_pos": prefill_input_pos,
+            "kv_cache": kv,
+        },
     ).convert()
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             model,
-            (prefill_tokens, prefill_input_pos),
+            prefill_tokens,
+            prefill_input_pos,
+            kv,
             signature_name="prefill",
-            num_valid_inputs=1,
-            atol=1e-2,
-            rtol=1e-5,
+            atol=1e-1,
+            rtol=1e-3,
         )
     )
@@ -117,18 +135,28 @@ class TestModelConversion(googletest.TestCase):
     tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     tokens[0, :4] = idx
     input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.convert(pytorch_model, (tokens, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-3,
             rtol=1e-3,
         )

ai_edge_torch/generative/test/utils.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utils for testing."""
+from ai_edge_torch import model
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.lowertools import common_utils
+import numpy as np
+import torch
+from torch.utils import _pytree as pytree
+def compare_tflite_torch(
+    edge_model: model.Model,
+    torch_model: torch.nn.Module,
+    tokens: torch.Tensor,
+    input_pos: torch.Tensor,
+    kv_cache: kv_utils.KVCache,
+    signature_name: str,
+    atol: float = 1e-5,
+    rtol: float = 1e-5,
+):
+  """Compares torch models and TFLite models."""
+  values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
+  flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
+  torch_output = torch_model(tokens, input_pos, kv_cache)
+  input_kv_flatten = {k: v.numpy() for k, v in zip(flat_names, values)}
+  edge_output = edge_model(
+      signature_name=signature_name,
+      tokens=tokens.numpy(),
+      input_pos=input_pos.numpy(),
+      **input_kv_flatten,
+  )
+  return np.allclose(
+      edge_output["logits"],
+      torch_output["logits"].detach().numpy(),
+      atol=atol,
+      rtol=rtol,
+  )

ai-edge-torch-nightly 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240913__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240913py3-none-any.whl