PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/generative/test/test_model_conversion_large.py ADDED Viewed

@@ -0,0 +1,251 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing model conversion for a few gen-ai models."""
+import ai_edge_torch
+from ai_edge_torch import config as ai_edge_config
+from ai_edge_torch.generative.examples.gemma import gemma1
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.examples.openelm import openelm
+from ai_edge_torch.generative.examples.phi import phi2
+from ai_edge_torch.generative.examples.phi import phi3
+from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.examples.stable_diffusion import clip as sd_clip
+from ai_edge_torch.generative.examples.stable_diffusion import decoder as sd_decoder
+from ai_edge_torch.generative.examples.stable_diffusion import diffusion as sd_diffusion
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.test import utils as test_utils
+import numpy as np
+import torch
+from absl.testing import absltest as googletest
+from ai_edge_litert import interpreter
+class TestModelConversion(googletest.TestCase):
+  """Unit tests that check for model conversion and correctness."""
+  def setUp(self):
+    super().setUp()
+    # Builder function for an Interpreter that supports custom ops.
+    self._interpreter_builder = (
+        lambda tflite_model: lambda: interpreter.InterpreterWithCustomOps(
+            custom_op_registerers=["GenAIOpsRegisterer"],
+            model_content=tflite_model,
+            experimental_default_delegate_latest_features=True,
+        )
+    )
+  def _test_model(self, config, model, signature_name, atol, rtol):
+    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
+    tokens = torch.full((1, 10), 0, dtype=torch.int, device="cpu")
+    tokens[0, :4] = idx
+    input_pos = torch.arange(0, 10, dtype=torch.int)
+    kv = kv_cache.KVCache.from_model_config(config)
+    edge_model = ai_edge_torch.signature(
+        signature_name,
+        model,
+        sample_kwargs={
+            "tokens": tokens,
+            "input_pos": input_pos,
+            "kv_cache": kv,
+        },
+    ).convert()
+    edge_model.set_interpreter_builder(
+        self._interpreter_builder(edge_model.tflite_model())
+    )
+    self.assertTrue(
+        test_utils.compare_tflite_torch(
+            edge_model,
+            model,
+            tokens,
+            input_pos,
+            kv,
+            signature_name=signature_name,
+            atol=atol,
+            rtol=rtol,
+        )
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_gemma1(self):
+    config = gemma1.get_fake_model_config()
+    pytorch_model = gemma1.Gemma(config).eval()
+    self._test_model(
+        config, pytorch_model, "serving_default", atol=1e-2, rtol=1e-5
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_gemma2(self):
+    config = gemma2.get_fake_model_config()
+    pytorch_model = gemma2.Gemma2(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_phi2(self):
+    config = phi2.get_fake_model_config()
+    pytorch_model = phi2.Phi2(config).eval()
+    self._test_model(
+        config, pytorch_model, "serving_default", atol=1e-3, rtol=1e-3
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_phi3(self):
+    config = phi3.get_fake_model_config()
+    pytorch_model = phi3.Phi3_5Mini(config).eval()
+    self._test_model(
+        config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_smollm(self):
+    config = smollm.get_fake_model_config()
+    pytorch_model = smollm.SmolLM(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_openelm(self):
+    config = openelm.get_fake_model_config()
+    pytorch_model = openelm.OpenELM(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_stable_diffusion_clip(self):
+    config = sd_clip.get_fake_model_config()
+    prompt_tokens = torch.from_numpy(
+        np.array([[1, 2, 3, 4, 5, 6]], dtype=np.int32)
+    )
+    pytorch_model = sd_clip.CLIP(config).eval()
+    torch_output = pytorch_model(prompt_tokens)
+    edge_model = ai_edge_torch.signature(
+        "encode", pytorch_model, (prompt_tokens,)
+    ).convert()
+    edge_model.set_interpreter_builder(
+        self._interpreter_builder(edge_model.tflite_model())
+    )
+    edge_output = edge_model(
+        prompt_tokens.numpy(),
+        signature_name="encode",
+    )
+    self.assertTrue(
+        np.allclose(
+            edge_output,
+            torch_output.detach().numpy(),
+            atol=1e-4,
+            rtol=1e-5,
+        )
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_stable_diffusion_diffusion(self):
+    config = sd_diffusion.get_fake_model_config(2)
+    latents = torch.from_numpy(
+        np.random.normal(size=(2, 4, 8, 8)).astype(np.float32)
+    )
+    context = torch.from_numpy(
+        np.random.normal(size=(2, 4, 4)).astype(np.float32)
+    )
+    time_embedding = torch.from_numpy(
+        np.random.normal(size=(2, 2)).astype(np.float32)
+    )
+    pytorch_model = sd_diffusion.Diffusion(config).eval()
+    torch_output = pytorch_model(latents, context, time_embedding)
+    edge_model = ai_edge_torch.signature(
+        "diffusion", pytorch_model, (latents, context, time_embedding)
+    ).convert()
+    edge_model.set_interpreter_builder(
+        self._interpreter_builder(edge_model.tflite_model())
+    )
+    edge_output = edge_model(
+        latents.numpy(),
+        context.numpy(),
+        time_embedding.numpy(),
+        signature_name="diffusion",
+    )
+    self.assertTrue(
+        np.allclose(
+            edge_output,
+            torch_output.detach().numpy(),
+            atol=1e-4,
+            rtol=1e-5,
+        )
+    )
+  @googletest.skipIf(
+      ai_edge_config.Config.use_torch_xla,
+      reason="tests with custom ops are not supported on oss",
+  )
+  def test_stable_diffusion_decoder(self):
+    config = sd_decoder.get_fake_model_config()
+    latents = torch.from_numpy(
+        np.random.normal(size=(1, 4, 64, 64)).astype(np.float32)
+    )
+    pytorch_model = sd_decoder.Decoder(config).eval()
+    torch_output = pytorch_model(latents)
+    edge_model = ai_edge_torch.signature(
+        "decode", pytorch_model, (latents,)
+    ).convert()
+    edge_model.set_interpreter_builder(
+        self._interpreter_builder(edge_model.tflite_model())
+    )
+    edge_output = edge_model(
+        latents.numpy(),
+        signature_name="decode",
+    )
+    self.assertTrue(
+        np.allclose(
+            edge_output,
+            torch_output.detach().numpy(),
+            atol=1e-4,
+            rtol=1e-5,
+        )
+    )
+if __name__ == "__main__":
+  googletest.main()

ai_edge_torch/generative/test/test_quantize.py CHANGED Viewed

@@ -13,12 +13,8 @@
 # limitations under the License.
 # ==============================================================================
-import unittest
-from parameterized import parameterized
-import torch
 import ai_edge_torch
+from ai_edge_torch import config
 from ai_edge_torch.generative.examples.test_models import toy_model  # NOQA
 from ai_edge_torch.generative.quantize import quant_recipe
 from ai_edge_torch.generative.quantize import quant_recipe_utils
@@ -29,20 +25,22 @@ from ai_edge_torch.generative.quantize.quant_attrs import Granularity
 from ai_edge_torch.generative.quantize.quant_attrs import Mode
 from ai_edge_torch.quantize import quant_config
 from ai_edge_torch.testing import model_coverage
+import torch
+from absl.testing import absltest as googletest
+from absl.testing import parameterized
-class TestVerifyRecipes(unittest.TestCase):
+class TestVerifyRecipes(parameterized.TestCase):
   """Unit tests that check for model quantization recipes."""
-  @parameterized.expand(
-      [
-          (Dtype.FP32, Dtype.FP32),
-          (Dtype.INT8, Dtype.INT8),
-          (Dtype.INT8, Dtype.FP16),
-          (Dtype.FP16, Dtype.INT8),
-          (Dtype.FP16, Dtype.FP16),
-      ]
-  )
+  @parameterized.parameters([
+      (Dtype.FP32, Dtype.FP32),
+      (Dtype.INT8, Dtype.INT8),
+      (Dtype.INT8, Dtype.FP16),
+      (Dtype.FP16, Dtype.INT8),
+      (Dtype.FP16, Dtype.FP16),
+  ])
   def test_verify_invalid_recipes(
       self,
       activation,
@@ -54,31 +52,29 @@ class TestVerifyRecipes(unittest.TestCase):
           with self.assertRaises(ValueError):
             quant_recipe.LayerQuantRecipe(activation, weight, m, a, g).verify()
-  @parameterized.expand(
-      [
-          (
-              Dtype.FP32,
-              Dtype.INT8,
-              Mode.DYNAMIC_RANGE,
-              Algorithm.MIN_MAX,
-              Granularity.CHANNELWISE,
-          ),
-          (
-              Dtype.FP32,
-              Dtype.INT8,
-              Mode.WEIGHT_ONLY,
-              Algorithm.MIN_MAX,
-              Granularity.CHANNELWISE,
-          ),
-          (
-              Dtype.FP32,
-              Dtype.FP16,
-              Mode.WEIGHT_ONLY,
-              Algorithm.FLOAT_CAST,
-              Granularity.NONE,
-          ),
-      ]
-  )
+  @parameterized.parameters([
+      (
+          Dtype.FP32,
+          Dtype.INT8,
+          Mode.DYNAMIC_RANGE,
+          Algorithm.MIN_MAX,
+          Granularity.CHANNELWISE,
+      ),
+      (
+          Dtype.FP32,
+          Dtype.INT8,
+          Mode.WEIGHT_ONLY,
+          Algorithm.MIN_MAX,
+          Granularity.CHANNELWISE,
+      ),
+      (
+          Dtype.FP32,
+          Dtype.FP16,
+          Mode.WEIGHT_ONLY,
+          Algorithm.FLOAT_CAST,
+          Granularity.NONE,
+      ),
+  ])
   def test_verify_valid_recipes(
       self,
       activation,
@@ -87,10 +83,12 @@ class TestVerifyRecipes(unittest.TestCase):
       algo,
       granularity,
   ):
-    quant_recipe.LayerQuantRecipe(activation, weight, mode, algo, granularity).verify()
+    quant_recipe.LayerQuantRecipe(
+        activation, weight, mode, algo, granularity
+    ).verify()
-class TestQuantizeConvert(unittest.TestCase):
+class TestQuantizeConvert(parameterized.TestCase):
   """Test conversion with quantization."""
   def _attention_int8_dynamic_recipe() -> quant_config.QuantConfig:
@@ -107,35 +105,51 @@ class TestQuantizeConvert(unittest.TestCase):
         )
     )
-  @parameterized.expand(
-      [
-          (quant_recipes.full_fp16_recipe(), 0.65),
-          (quant_recipes.full_int8_dynamic_recipe(), 0.47),
-          (_attention_int8_dynamic_recipe(), 0.89),
-          (_feedforward_int8_dynamic_recipe(), 0.72),
-      ]
-  )
-  def test_quantize_convert_toy_sizes(self, quant_config, expected_compression):
+  @parameterized.parameters([
+      (quant_recipes.full_fp16_recipe()),
+      (quant_recipes.full_int8_dynamic_recipe()),
+      (quant_recipes.full_int8_weight_only_recipe()),
+      (_attention_int8_dynamic_recipe()),
+      (_feedforward_int8_dynamic_recipe()),
+  ])
+  def test_quantize_convert_toy_sizes(self, quant_config):
     config = toy_model.get_model_config()
     pytorch_model = toy_model.ToySingleLayerModel(config)
-    idx = torch.unsqueeze(torch.arange(0, 100), 0)
-    input_pos = torch.arange(0, 100)
+    idx = torch.unsqueeze(torch.arange(0, 100, dtype=torch.int), 0)
+    input_pos = torch.arange(0, 100, dtype=torch.int)
+    quantized_model = ai_edge_torch.convert(
+        pytorch_model, (idx, input_pos), quant_config=quant_config
+    )
+    float_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
+    self.assertLess(
+        len(quantized_model._tflite_model),
+        len(float_model._tflite_model),
+        "Quantized model isn't smaller than F32 model.",
+    )
+  def test_quantize_convert_toy_weight_sharing(self):
+    config = toy_model.get_model_config()
+    pytorch_model = toy_model.ToySingleLayerModelWeightSharing(config)
+    idx = torch.unsqueeze(torch.arange(0, 100, dtype=torch.int), 0)
+    input_pos = torch.arange(0, 100, dtype=torch.int)
+    quant_config = quant_recipes.full_int8_dynamic_recipe()
     quantized_model = ai_edge_torch.convert(
         pytorch_model, (idx, input_pos), quant_config=quant_config
     )
     float_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
-    self.assertAlmostEqual(
-        len(quantized_model._tflite_model) / len(float_model._tflite_model),
-        expected_compression,
-        delta=0.01,
+    self.assertLess(
+        len(quantized_model._tflite_model),
+        len(float_model._tflite_model),
+        "Quantized model isn't smaller than F32 model.",
     )
   def test_quantize_convert_compare_toy(self):
     self.skipTest("b/338288901")
     config = toy_model_with_kv_cache.get_model_config()
     pytorch_model = toy_model_with_kv_cache.ToyModelWithKV(config)
-    idx, input_pos = torch.tensor([[1]], dtype=torch.long), torch.tensor(
+    idx, input_pos = torch.tensor([[1]], dtype=torch.int), torch.tensor(
         [10], dtype=torch.int64
     )
@@ -145,7 +159,9 @@ class TestQuantizeConvert(unittest.TestCase):
     )
     float_model = ai_edge_torch.convert(pytorch_model, (idx, input_pos))
-    self.assertLess(len(quantized_model._tflite_model), len(float_model._tflite_model))
+    self.assertLess(
+        len(quantized_model._tflite_model), len(float_model._tflite_model)
+    )
     self.assertTrue(
         model_coverage.compare_tflite_torch(
             quantized_model,
@@ -159,4 +175,4 @@ class TestQuantizeConvert(unittest.TestCase):
 if __name__ == "__main__":
-  unittest.main()
+  googletest.main()

ai_edge_torch/generative/test/utils.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utils for testing."""
+from ai_edge_torch import model
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.lowertools import common_utils
+import numpy as np
+import torch
+from torch.utils import _pytree as pytree
+def compare_tflite_torch(
+    edge_model: model.Model,
+    torch_model: torch.nn.Module,
+    tokens: torch.Tensor,
+    input_pos: torch.Tensor,
+    kv_cache: kv_utils.KVCache,
+    signature_name: str,
+    atol: float = 1e-5,
+    rtol: float = 1e-5,
+):
+  """Compares torch models and TFLite models."""
+  values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
+  flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
+  torch_output = torch_model(tokens, input_pos, kv_cache)
+  input_kv_flatten = {k: v.numpy() for k, v in zip(flat_names, values)}
+  edge_output = edge_model(
+      signature_name=signature_name,
+      tokens=tokens.numpy(),
+      input_pos=input_pos.numpy(),
+      **input_kv_flatten,
+  )
+  return np.allclose(
+      edge_output["logits"],
+      torch_output["logits"].detach().numpy(),
+      atol=atol,
+      rtol=rtol,
+  )

ai_edge_torch/generative/utilities/converter.py ADDED Viewed

@@ -0,0 +1,82 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utility functions for model conversion."""
+import ai_edge_torch
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.generative.quantize import quant_recipes
+import torch
+def convert_to_tflite(
+    pytorch_model: torch.nn.Module,
+    tflite_path: str,
+    prefill_seq_len: int = 512,
+    quantize: bool = True,
+):
+  """Converts a nn.Module model to multi-signature tflite model.
+  A PyTorch model will be converted to a tflite model with two signatures:
+  "prefill" and "decode".
+  "prefill" signature takes a tensor of shape [1, prefill_seq_len] of token
+  sequence, a tensor of shape [1, prefill_seq_len] of token positions, and an
+  external KV cache as a sample input.
+  "decode" signature takes a tensor of shape [1, 1] of token sequence, a tensor
+  of shape [1, 1] of the token position, and an external KV cache as a sample
+  input.
+  The final tflite model will be exported to tflite_path.
+  Args:
+      pytorch_model (torch.nn.Module): PyTorch model to convert to tflite.
+      tflite_path (str): The tflite file path to export.
+      prefill_seq_len (int, optional): The maximum size of prefill input tensor.
+        Defaults to 512.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
+  """
+  # Tensors used to trace the model graph during conversion.
+  prefill_tokens = torch.full((1, prefill_seq_len), 0, dtype=torch.int)
+  prefill_input_pos = torch.arange(0, prefill_seq_len, dtype=torch.int)
+  decode_token = torch.tensor([[0]], dtype=torch.int)
+  decode_input_pos = torch.tensor([0], dtype=torch.int)
+  kv = kv_utils.KVCache.from_model_config(pytorch_model.config)
+  quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
+  edge_model = (
+      ai_edge_torch.signature(
+          'prefill',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .convert(quant_config=quant_config)
+  )
+  edge_model.export(tflite_path)

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl