PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240909__py3-none-any.whl → 0.3.0.dev20240911__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240909py3-none-any.whl → 0.3.0.dev20240911py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
+from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -49,22 +48,32 @@ class TestModelConversion(googletest.TestCase):
   )
   def test_toy_model_with_kv_cache(self):
     config = toy_model_with_kv_cache.get_model_config()
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    tokens, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
         [10], dtype=torch.int64
     )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-5,
             rtol=1e-5,
         )
@@ -77,22 +86,32 @@ class TestModelConversion(googletest.TestCase):
   def test_toy_model_with_kv_cache_with_hlfb(self):
     config = toy_model_with_kv_cache.get_model_config()
     config.enable_hlfb = True
-    pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config).eval()
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    pytorch_model = toy_model_with_kv_cache.ToyModelWithKVCache(config).eval()
+    tokens, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
         [10], dtype=torch.int64
     )
-    edge_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (idx, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-5,
             rtol=1e-5,
         )
@@ -117,37 +136,56 @@ class TestModelConversion(googletest.TestCase):
     decode_token = torch.tensor([[1]], dtype=torch.long)
     decode_input_pos = torch.tensor([5], dtype=torch.int64)
+    kv = kv_cache.KVCache.from_model_config(config)
     edge_model = (
         ai_edge_torch.signature(
-            "prefill", pytorch_model, (prefill_tokens, prefill_input_pos)
+            "prefill",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": prefill_tokens,
+                "input_pos": prefill_input_pos,
+                "kv_cache": kv,
+            },
+        )
+        .signature(
+            "decode",
+            pytorch_model,
+            sample_kwargs={
+                "tokens": decode_token,
+                "input_pos": decode_input_pos,
+                "kv_cache": kv,
+            },
         )
-        .signature("decode", pytorch_model, (decode_token, decode_input_pos))
         .convert()
     )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
-    copied_model = copy.deepcopy(pytorch_model)
-    copied_edge = copy.deepcopy(edge_model)
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (prefill_tokens, prefill_input_pos),
+            prefill_tokens,
+            prefill_input_pos,
+            kv,
             signature_name="prefill",
-            num_valid_inputs=1,
+            atol=1e-5,
+            rtol=1e-5,
         )
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
-            copied_edge,
-            copied_model,
-            (decode_token, decode_input_pos),
+        test_utils.compare_tflite_torch(
+            edge_model,
+            pytorch_model,
+            decode_token,
+            decode_input_pos,
+            kv,
             signature_name="decode",
-            num_valid_inputs=1,
+            atol=1e-5,
+            rtol=1e-5,
         )
     )

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# Testing model conversion for a few gen-ai models.
-import copy
+"""Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
 from ai_edge_torch import config as ai_edge_config
-from ai_edge_torch.generative.examples.gemma import gemma, gemma2
-from ai_edge_torch.generative.examples.phi2 import phi2
-from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache  # NOQA
-from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
-from ai_edge_torch.testing import model_coverage
+from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
 import numpy as np
 import torch
@@ -55,18 +55,28 @@ class TestModelConversion(googletest.TestCase):
     tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     tokens[0, :4] = idx
     input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.convert(model, (tokens, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-2,
             rtol=1e-5,
         )
@@ -85,23 +95,31 @@ class TestModelConversion(googletest.TestCase):
     prefill_tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     prefill_tokens[0, :4] = idx
     prefill_input_pos = torch.arange(0, 10)
+    kv = kv_cache.KVCache.from_model_config(config)
     edge_model = ai_edge_torch.signature(
-        "prefill", model, (prefill_tokens, prefill_input_pos)
+        "prefill",
+        model,
+        sample_kwargs={
+            "tokens": prefill_tokens,
+            "input_pos": prefill_input_pos,
+            "kv_cache": kv,
+        },
     ).convert()
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             model,
-            (prefill_tokens, prefill_input_pos),
+            prefill_tokens,
+            prefill_input_pos,
+            kv,
             signature_name="prefill",
-            num_valid_inputs=1,
-            atol=1e-2,
-            rtol=1e-5,
+            atol=1e-1,
+            rtol=1e-3,
         )
     )
@@ -117,18 +135,28 @@ class TestModelConversion(googletest.TestCase):
     tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
     tokens[0, :4] = idx
     input_pos = torch.arange(0, 10)
-    edge_model = ai_edge_torch.convert(pytorch_model, (tokens, input_pos))
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.convert(
+        pytorch_model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    )
     edge_model.set_interpreter_builder(
         self._interpreter_builder(edge_model.tflite_model())
     )
     self.assertTrue(
-        model_coverage.compare_tflite_torch(
+        test_utils.compare_tflite_torch(
             edge_model,
             pytorch_model,
-            (tokens, input_pos),
-            num_valid_inputs=1,
+            tokens,
+            input_pos,
+            kv,
+            signature_name="serving_default",
             atol=1e-3,
             rtol=1e-3,
         )

ai_edge_torch/generative/test/utils.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utils for testing."""
+from ai_edge_torch import model
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.lowertools import common_utils
+import numpy as np
+import torch
+from torch.utils import _pytree as pytree
+def compare_tflite_torch(
+    edge_model: model.Model,
+    torch_model: torch.nn.Module,
+    tokens: torch.Tensor,
+    input_pos: torch.Tensor,
+    kv_cache: kv_utils.KVCache,
+    signature_name: str,
+    atol: float = 1e-5,
+    rtol: float = 1e-5,
+):
+  """Compares torch models and TFLite models."""
+  values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
+  flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
+  torch_output = torch_model(tokens, input_pos, kv_cache)
+  input_kv_flatten = {k: v.numpy() for k, v in zip(flat_names, values)}
+  edge_output = edge_model(
+      signature_name=signature_name,
+      tokens=tokens.numpy(),
+      input_pos=input_pos.numpy(),
+      **input_kv_flatten,
+  )
+  return np.allclose(
+      edge_output["logits"],
+      torch_output["logits"].detach().numpy(),
+      atol=atol,
+      rtol=rtol,
+  )

ai_edge_torch/odml_torch/lowerings/_convolution.py CHANGED Viewed

@@ -12,22 +12,171 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Provides lowering for coreaten to mlir stablehlo op: Convolution"""
+"""Provides lowering for coreaten to stablehlo for Convolution."""
 import math
 from typing import Optional
+from ai_edge_torch.odml_torch.lowerings import registry
 from jax._src.lib.mlir import ir
 from jax._src.lib.mlir.dialects import hlo as stablehlo
 import torch
-from .registry import lower
+def make_padding(padding):
+  """Change the padding from pytorch to stablehlo style.
+  Stablehlo allows start and end padding for each dimension while aten only
+  allows symmetric padding and so only has one number per dimension.
+  Args:
+    padding: The padding of the convolution
+  Returns:
+    The padding in stablehlo style
+  """
+  return tuple((p, p) for p in padding)
+def create_conv_dimension_numbers(lhs, transposed: bool = False):
+  """Create the dimension numbers for the convolution.
+  Args:
+    lhs: The input tensor
+    transposed: Whether the convolution is transposed
+  Returns:
+    The dimension numbers for the convolution
+  """
+  num_spatial_dims = len(lhs.type.shape) - 2
+  spatial_dimensions = []
+  for i in range(0, num_spatial_dims):
+    spatial_dimensions.append(i + 2)
+  # Regular kernels are OIHW
+  # TransposedConv kernels are IOHW
+  dimension_numbers = stablehlo.ConvDimensionNumbers.get(
+      input_batch_dimension=0,
+      input_feature_dimension=1,
+      input_spatial_dimensions=spatial_dimensions,
+      kernel_input_feature_dimension=0 if transposed else 1,
+      kernel_output_feature_dimension=1 if transposed else 0,
+      kernel_spatial_dimensions=spatial_dimensions,
+      output_batch_dimension=0,
+      output_feature_dimension=1,
+      output_spatial_dimensions=spatial_dimensions,
+  )
+  return dimension_numbers
+def infer_output_shape(
+    lhs,
+    rhs,
+    stride,
+    dilation,
+    padding,
+    transposed: bool = False,
+    output_padding: list[int] = 0,
+):
+  """Infer the output shape of the convolution.
+  Args:
+    lhs: The input tensor
+    rhs: The kernel tensor
+    stride: The stride of the convolution (dilation of input in transposed conv)
+    dilation: The kernel dilation of the convolution
+    padding: The padding of the convolution
+    transposed: Whether the convolution is transposed
+    output_padding: The output padding of the convolution
+  Returns:
+    The output shape of the convolution
+  """
+  lhs_type: ir.RankedTensorType = lhs.type
+  lhs_shape: list[int] = lhs_type.shape
+  rhs_shape: list[int] = rhs.type.shape
+  # Input layout is: (N)CHW and Kernel layout is: (O)IHW for regular conv
+  # Input layout is: (N)CHW and Kernel layout is: I(O)HW for transposed conv
+  output_shape = (
+      [lhs_shape[0], rhs_shape[1]]
+      if transposed
+      else [lhs_shape[0], rhs_shape[0]]
+  )
+  num_spatial_dims = len(lhs.type.shape) - 2
+  # looping over the spatial dims (skipping the first 2 dims which are
+  # batch and features)
+  for spatial_dim in range(0, num_spatial_dims):
+    dim = spatial_dim + 2
+    dim_size = lhs_shape[dim]
+    kernel_dim_size = rhs_shape[dim]
+    if transposed:
+      output_dim_size = (
+          (dim_size - 1) * stride[spatial_dim]
+          - 2 * padding[spatial_dim]
+          + dilation[spatial_dim] * (kernel_dim_size - 1)
+          + output_padding[spatial_dim]
+          + 1
+      )
+    else:
+      output_dim_size = math.floor(
+          (
+              (
+                  dim_size
+                  + 2 * padding[spatial_dim]
+                  - dilation[spatial_dim] * (kernel_dim_size - 1)
+                  - 1
+              )
+              / stride[spatial_dim]
+          )
+          + 1
+      )
+    output_shape.append(output_dim_size)
+  return output_shape
+def build_transpose_conv(
+    lctx,
+    output_type: ir.RankedTensorType,
+    lhs: ir.Value,
+    rhs: ir.Value,
+    stride: list[int],
+    padding: list[int],
+    dilation: list[int],
+    output_padding: list[int],
+    groups: int,
+):
+  lhs_type: ir.RankedTensorType = lhs.type
+  num_spatial_dims = len(lhs_type.shape) - 2
+  rhs = stablehlo.reverse(rhs, list(range(2, 2 + num_spatial_dims)))
+  kernel_size = rhs.type.shape
+  # We need to additional padding on the input to get the right output size.
+  adjusted_padding = [
+      dilation[dim] * (kernel_size[dim + 2] - 1) - padding[dim]
+      for dim in range(num_spatial_dims)
+  ]
+  return stablehlo.convolution(
+      result=output_type,
+      lhs=lhs,
+      rhs=rhs,
+      dimension_numbers=create_conv_dimension_numbers(lhs, True),
+      feature_group_count=groups,
+      batch_group_count=1,
+      padding=make_padding(adjusted_padding),
+      lhs_dilation=stride,
+      rhs_dilation=dilation,
+  )
 # convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride,
 #   SymInt[] padding, SymInt[] dilation, bool transposed,
 #   SymInt[] output_padding, SymInt groups) -> Tensor
-# @lower(torch.ops.aten.convolution)
+@registry.lower(torch.ops.aten.convolution)
 def _aten_convolution(
     lctx,
     lhs: ir.Value,
@@ -40,80 +189,53 @@ def _aten_convolution(
     output_padding: list[int],
     groups: int,
 ):
-  if transposed:
-    raise NotImplementedError("Transposed convolution is not implemented.")
-  if bias is not None:
-    raise NotImplementedError("Bias on convolution is not implemented.")
-  # Stablehlo allows start and end padding for each dimension while aten only
-  # allows symmetric padding and so only has one number per dimension.
-  def make_padding(padding):
-    return tuple((p, p) for p in padding)
-  def create_conv_dimension_numbers():
-    num_spatial_dims = len(lhs.type.shape) - 2
-    spatial_dimensions = []
-    for i in range(0, num_spatial_dims):
-      spatial_dimensions.append(i + 2)
-    dimension_numbers = stablehlo.ConvDimensionNumbers.get(
-        input_batch_dimension=0,
-        input_feature_dimension=1,
-        input_spatial_dimensions=spatial_dimensions,
-        kernel_input_feature_dimension=1,
-        kernel_output_feature_dimension=0,
-        kernel_spatial_dimensions=spatial_dimensions,
-        output_batch_dimension=0,
-        output_feature_dimension=1,
-        output_spatial_dimensions=spatial_dimensions,
+  # TODO(b/365559296) Add support for output_padding
+  if any(output_padding):
+    raise NotImplementedError(
+        "Output padding on convolution is not implemented."
     )
-    return dimension_numbers
-  def infer_output_shape():
-    lhs_type: ir.RankedTensorType = lhs.type
-    lhs_shape: list[int] = lhs_type.shape
-    rhs_shape: list[int] = rhs.type.shape
-    # Input layout is: (N)CHW and Kernel layout is: (O)IHW
-    output_shape = [lhs_shape[0], rhs_shape[0]]
-    num_spatial_dims = len(lhs.type.shape) - 2
-    # looping over the spatial dims (skipping the first 2 dims which are
-    # batch and features)
-    for spatial_dim in range(0, num_spatial_dims):
-      dim_size = lhs_shape[spatial_dim + 2]
-      kernel_dim_size = rhs_shape[spatial_dim + 2]
-      # for example, a dilation of 2 increases the dimension size by 2
-      dim_size *= dilation[spatial_dim]
-      # padding added to both sides
-      dim_size += 2 * padding[spatial_dim]
-      output_dim_size = math.ceil(
-          (dim_size - kernel_dim_size + 1) / stride[spatial_dim]
-      )
-      output_shape.append(output_dim_size)
-    return output_shape
   lhs_type: ir.RankedTensorType = lhs.type
-  op = stablehlo.ConvolutionOp(
-      result=ir.RankedTensorType.get(
-          infer_output_shape(), lhs_type.element_type
-      ),
-      lhs=lhs,
-      rhs=rhs,
-      dimension_numbers=create_conv_dimension_numbers(),
-      feature_group_count=groups,
-      batch_group_count=1,
-      window_strides=stride,
-      padding=make_padding(padding),
-      lhs_dilation=(1,) * len(stride),
-      rhs_dilation=dilation,
+  output_shape = infer_output_shape(
+      lhs, rhs, stride, dilation, padding, transposed, output_padding
+  )
+  output_type = ir.RankedTensorType.get(
+      output_shape,
+      lhs_type.element_type,
   )
-  return op.result
+  if transposed:
+    res = build_transpose_conv(
+        lctx,
+        output_type,
+        lhs,
+        rhs,
+        stride,
+        padding,
+        dilation,
+        output_padding,
+        groups,
+    )
+  else:
+    res = stablehlo.convolution(
+        result=output_type,
+        lhs=lhs,
+        rhs=rhs,
+        dimension_numbers=create_conv_dimension_numbers(lhs),
+        feature_group_count=groups,
+        batch_group_count=1,
+        window_strides=stride,
+        padding=make_padding(padding),
+        rhs_dilation=dilation,
+    )
+  if bias is not None:
+    # broadcast [C] to [NCHW]
+    broadcasted_bias = stablehlo.broadcast_in_dim(output_type, bias, [1])
+    res = stablehlo.add(
+        lhs=res,
+        rhs=broadcasted_bias,
+    )
+  return res

ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py CHANGED Viewed

@@ -105,7 +105,6 @@ lower_by_torch_xla2(torch.ops.aten.clamp.default)
 lower_by_torch_xla2(torch.ops.aten.clone)
 lower_by_torch_xla2(torch.ops.aten.clone.default)
 lower_by_torch_xla2(torch.ops.aten.constant_pad_nd)
-lower_by_torch_xla2(torch.ops.aten.convolution)
 lower_by_torch_xla2(torch.ops.aten.cos)
 lower_by_torch_xla2(torch.ops.aten.cosh)
 lower_by_torch_xla2(torch.ops.aten.cumsum)

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20240909"
+__version__ = "0.3.0.dev20240911"

{ai_edge_torch_nightly-0.3.0.dev20240909.dist-info → ai_edge_torch_nightly-0.3.0.dev20240911.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20240909
+Version: 0.3.0.dev20240911
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

ai-edge-torch-nightly 0.3.0.dev20240909__py3-none-any.whl → 0.3.0.dev20240911__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240909py3-none-any.whl → 0.3.0.dev20240911py3-none-any.whl