PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

ai_edge_torch/generative/examples/smollm/smollm.py CHANGED Viewed

@@ -17,10 +17,16 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES
+class SmolLM(model_builder.DecoderOnlyModel):
+  """A SmolLM model built from the Edge Generative API layers."""
+  pass
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for a SmolLM 135M model.
@@ -72,11 +78,10 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=SmolLM,
   )

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -15,13 +15,14 @@
 """A toy example which has basic transformer block (w/ externalized KV-Cache)."""
-from typing import Tuple
+from typing import Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 import torch
 from torch import nn
@@ -62,6 +63,7 @@ class ToyModelWithKVCache(torch.nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      export_config: Optional[ExportConfig] = None,
   ) -> Tuple[torch.Tensor, kv_utils.KVCache]:
     x = self.tok_embedding(tokens)
     cos, sin = self.rope_cache
@@ -77,8 +79,16 @@ class ToyModelWithKVCache(torch.nn.Module):
       if kv_entry:
         updated_kv_entires.append(kv_entry)
-    x = self.final_norm(x)
     updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+    if export_config is not None:
+      if (
+          torch.numel(input_pos) > 1
+          and not export_config.output_logits_on_prefill
+      ):
+        return {'kv_cache': updated_kv_cache}
+    x = self.final_norm(x)
     return {'logits': self.lm_head(x), 'kv_cache': updated_kv_cache}

ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py CHANGED Viewed

@@ -22,6 +22,7 @@ from absl import app
 from absl import flags
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
 from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
@@ -63,6 +64,7 @@ def main(_):
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
       prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
+      export_config=ExportConfig(),
   )

ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -17,10 +17,16 @@
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
 TENSOR_NAMES = model_builder.TENSOR_NAMES_WITH_SEPARATE_LM_HEAD
+class TinyLlama(model_builder.DecoderOnlyModel):
+  """A TinyLlama model built from the Edge Generative API layers."""
+  pass
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   """Returns the model config for a TinyLlama model.
@@ -73,11 +79,10 @@ def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
   return config
-def build_model(
-    checkpoint_path: str, **kwargs
-) -> model_builder.DecoderOnlyModel:
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
   return model_builder.build_decoder_only_model(
       checkpoint_path=checkpoint_path,
       config=get_model_config(**kwargs),
       tensor_names=TENSOR_NAMES,
+      model_class=TinyLlama,
   )

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -241,9 +241,7 @@ class CausalSelfAttention(nn.Module):
       q, k = _embed_rope(q, k, n_elem, rope)
     if kv_cache is not None:
-      kv_cache = kv_utils.update(
-          kv_cache, input_pos, k, v, enable_hlfb=self.enable_hlfb
-      )
+      kv_cache = kv_utils.update(kv_cache, input_pos, k, v)
       k, v = kv_cache.k_cache, kv_cache.v_cache
     y = self.sdpa_func(
@@ -379,9 +377,7 @@ class CrossAttention(nn.Module):
       q, k = _embed_rope(q, k, n_elem, rope)
     if kv_cache is not None:
-      kv_cache = kv_utils.update(
-          kv_cache, input_pos, k, v, enable_hlfb=self.enable_hlfb
-      )
+      kv_cache = kv_utils.update(kv_cache, input_pos, k, v)
       k, v = kv_cache.k_cache, kv_cache.v_cache
     if mask is None:
       mask = torch.zeros(

ai_edge_torch/generative/layers/kv_cache.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import List, Tuple
 from ai_edge_torch import hlfb
 from ai_edge_torch.generative.layers import model_config
+from ai_edge_torch.generative.utilities.dynamic_update_slice import dynamic_update_slice
 import torch
 import torch.utils._pytree as pytree
@@ -146,7 +147,7 @@ def update(
     input_pos: torch.Tensor,
     k_slice: torch.Tensor,
     v_slice: torch.Tensor,
-    enable_hlfb: bool = True,
+    use_dus: bool = True,
 ) -> KVCacheEntry:
   """Out of place update of Cache buffer.
@@ -155,17 +156,12 @@ def update(
       input_pos (torch.Tensor): The update slice positions.
       k_slice (torch.Tensor): The K slice to be updated in the new cache.
       v_slice (torch.Tensor): The V slice to be updated in the new cache.
-      enable_hlfb (bool, optional): Whether the op is annotated for export with
-        High Level Function Boundary. Defaults to True.
   Returns:
       KVCacheEntry: The updated KVCache entry based on the passed inputs.
   """
-  # Don't enable HLFB for kv cache op for now, since it won't work with LLM
-  # inference engine. Remove this part once we ship a new LLM inference engine.
-  enable_hlfb=False
-  update_func = _update_kv_hlfb_impl if enable_hlfb else _update_kv_base_impl
-  return update_func(cache, input_pos, k_slice, v_slice)
+  update_kv_cache = _update_kv_impl if use_dus else _update_kv_base_impl
+  return update_kv_cache(cache, input_pos, k_slice, v_slice)
 def _update_kv_base_impl(
@@ -181,18 +177,28 @@ def _update_kv_base_impl(
   return updated_cache
-def _update_kv_hlfb_impl(
+def _get_slice_indices(positions: torch.Tensor) -> torch.Tensor:
+  """Dynamic Update Slice updates are a variadic sequence of 0-rank tensors."""
+  zero = torch.zeros([]).int()
+  positions = positions.int()[0].reshape([])
+  return [zero, positions, zero, zero]
+def _update_kv_impl(
     cache: KVCacheEntry,
     input_pos: torch.Tensor,
     k_slice: torch.Tensor,
     v_slice: torch.Tensor,
 ) -> KVCacheEntry:
-  """Update the cache buffer with High Level Function Boundary annotation."""
-  builder = hlfb.StableHLOCompositeBuilder(name="odml.update_external_kv_cache")
-  k_cache, v_cache, input_pos, k_slice, v_slice = builder.mark_inputs(
-      cache.k_cache, cache.v_cache, input_pos, k_slice, v_slice
-  )
-  k = k_cache.index_copy(1, input_pos.to(torch.long), k_slice)
-  v = v_cache.index_copy(1, input_pos.to(torch.long), v_slice)
-  k, v = builder.mark_outputs(k, v)
-  return KVCacheEntry(k, v)
+  """Update the cache buffer for K and V caches."""
+  # NB: Here assume that input_pos == range(input_pos[0], len(input_pos))
+  k_slice_indices = _get_slice_indices(input_pos)
+  v_slice_indices = _get_slice_indices(input_pos)
+  k = dynamic_update_slice(cache.k_cache, k_slice, k_slice_indices)
+  v = dynamic_update_slice(cache.v_cache, v_slice, v_slice_indices)
+  updated_cache = KVCacheEntry(k, v)
+  return updated_cache

ai_edge_torch/generative/layers/normalization.py CHANGED Viewed

@@ -190,14 +190,12 @@ def group_norm_with_hlfb(
   """
   x = torch.permute(x, (0, 2, 3, 1))
-  # TODO: b/366544750 - Change "reduction_axes" field as an array, rather than
-  # int32 when the bug is fixed.
   builder = StableHLOCompositeBuilder(
       name="odml.group_norm",
       attr={
           "num_groups": num_groups,
           "epsilon": eps,
-          "reduction_axes": 3,
+          "reduction_axes": [3],
           "channel_axis": 3,
       },
   )

ai_edge_torch/generative/test/test_kv_cache.py CHANGED Viewed

@@ -71,18 +71,18 @@ class TestKVLayers(googletest.TestCase):
         [0, 0, 5, 5, 0, 0, 0, 0],
     )
     # multi-slice update
-    input_pos = torch.tensor([0, 3])
+    input_pos = torch.tensor([0, 1])
     k_slice = v_slice = torch.full(
         (1, 2, NUM_QG, HEAD_DIM), 7, dtype=torch.float
     )
     updated_entry = kv_utils.update(entry, input_pos, k_slice, v_slice)
     self.assertEqual(
         updated_entry.k_cache.numpy().flatten().tolist(),
-        [7, 7, 0, 0, 0, 0, 7, 7],
+        [7, 7, 7, 7, 0, 0, 0, 0],
     )
     self.assertEqual(
         updated_entry.v_cache.numpy().flatten().tolist(),
-        [7, 7, 0, 0, 0, 0, 7, 7],
+        [7, 7, 7, 7, 0, 0, 0, 0],
     )
   def test_serialization(self):

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -16,12 +16,10 @@
 """Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
-from ai_edge_torch import config as ai_edge_config
 from ai_edge_torch.generative.examples.test_models import toy_model_with_kv_cache
 from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
 from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.test import utils as test_utils
-from ai_edge_torch.generative.utilities import model_builder
 import numpy as np
 import torch
@@ -84,25 +82,25 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_toy_model_with_kv_cache(self):
     self._test_model_with_kv_cache(enable_hlfb=False)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_toy_model_with_kv_cache_with_hlfb(self):
     self._test_model_with_kv_cache(enable_hlfb=True)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
-  def test_toy_model_has_ekv_op(self):
-    """Tests that the model has the external kv cache op."""
+  def test_toy_model_has_dus_op(self):
+    """Tests that the model has the dynamic update slice op."""
     _, edge_model, _ = self._get_params(enable_hlfb=True)
     interpreter_ = interpreter.InterpreterWithCustomOps(
         custom_op_registerers=["GenAIOpsRegisterer"],
@@ -112,7 +110,7 @@ class TestModelConversion(googletest.TestCase):
     # pylint: disable=protected-access
     op_names = [op["op_name"] for op in interpreter_._get_ops_details()]
-    self.assertIn("odml.update_external_kv_cache", op_names)
+    self.assertIn("DYNAMIC_UPDATE_SLICE", op_names)
   def _test_multisig_model(self, config, pytorch_model, atol, rtol):
     # prefill
@@ -180,12 +178,12 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_tiny_llama_multisig(self):
     config = tiny_llama.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
+    pytorch_model = tiny_llama.TinyLlama(config).eval()
     self._test_multisig_model(config, pytorch_model, atol=1e-5, rtol=1e-5)

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -16,7 +16,6 @@
 """Testing model conversion for a few gen-ai models."""
 import ai_edge_torch
-from ai_edge_torch import config as ai_edge_config
 from ai_edge_torch.generative.examples.amd_llama_135m import amd_llama_135m
 from ai_edge_torch.generative.examples.gemma import gemma1
 from ai_edge_torch.generative.examples.gemma import gemma2
@@ -32,7 +31,6 @@ from ai_edge_torch.generative.examples.stable_diffusion import decoder as sd_dec
 from ai_edge_torch.generative.examples.stable_diffusion import diffusion as sd_diffusion
 from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.test import utils as test_utils
-from ai_edge_torch.generative.utilities import model_builder
 import numpy as np
 import torch
@@ -53,12 +51,15 @@ class TestModelConversion(googletest.TestCase):
             experimental_default_delegate_latest_features=True,
         )
     )
+    # Default cache_size_limit, 8 is hit and aborts often when the tests are
+    # running all together. Doubles it to avoid abortion.
+    torch._dynamo.config.cache_size_limit = 16
+    np.random.seed(1234)  # Make np.random deterministic.
   def _test_model(self, config, model, signature_name, atol, rtol):
-    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
-    tokens = torch.zeros((1, 10), dtype=torch.int, device="cpu")
-    tokens[0, :4] = idx
-    input_pos = torch.arange(0, 10, dtype=torch.int)
+    seq_len = 10
+    tokens = torch.zeros((1, seq_len), dtype=torch.int, device="cpu")
+    input_pos = torch.arange(0, seq_len, dtype=torch.int)
     kv = kv_cache.KVCache.from_model_config(config)
     edge_model = ai_edge_torch.signature(
@@ -74,6 +75,7 @@ class TestModelConversion(googletest.TestCase):
         self._interpreter_builder(edge_model.tflite_model())
     )
+    tokens = torch.arange(1, seq_len + 1, dtype=torch.int).unsqueeze(0)
     self.assertTrue(
         test_utils.compare_tflite_torch(
             edge_model,
@@ -88,19 +90,17 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_gemma1(self):
     config = gemma1.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
-    self._test_model(
-        config, pytorch_model, "serving_default", atol=1e-2, rtol=1e-5
-    )
+    pytorch_model = gemma1.Gemma1(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_gemma2(self):
     config = gemma2.get_fake_model_config()
@@ -108,8 +108,8 @@ class TestModelConversion(googletest.TestCase):
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_llama(self):
     config = llama.get_fake_model_config()
@@ -117,19 +117,18 @@ class TestModelConversion(googletest.TestCase):
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_phi2(self):
     config = phi2.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
-    self._test_model(
-        config, pytorch_model, "serving_default", atol=1e-3, rtol=1e-3
-    )
+    pytorch_model = phi2.Phi2(config).eval()
+    # Phi-2 logits are very big, so we need a larger absolute tolerance.
+    self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_phi3(self):
     config = phi3.get_fake_model_config()
@@ -137,58 +136,58 @@ class TestModelConversion(googletest.TestCase):
     self._test_model(config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_smollm(self):
     config = smollm.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
+    pytorch_model = smollm.SmolLM(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_openelm(self):
     config = openelm.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
+    pytorch_model = openelm.OpenELM(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_qwen(self):
     config = qwen.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
+    pytorch_model = qwen.Qwen(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_amd_llama_135m(self):
     config = amd_llama_135m.get_fake_model_config()
-    pytorch_model = model_builder.DecoderOnlyModel(config).eval()
-    self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
+    pytorch_model = amd_llama_135m.AmdLlama(config).eval()
+    self._test_model(config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5)
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
-  def test_paligemma(self):
+  def disabled_test_paligemma(self):
     config = paligemma.get_fake_model_config()
     pytorch_model = paligemma.PaliGemma(config).eval()
-    idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
     image_embedding_config = config.image_encoder_config.image_embedding
     num_patches = (
         image_embedding_config.image_size // image_embedding_config.patch_size
     ) ** 2
     # Make sure the token size is longer than the number of image patches.
-    tokens_len = num_patches + 10
-    tokens = torch.zeros((1, tokens_len), dtype=torch.int, device="cpu")
-    tokens[0, :4] = idx
-    input_pos = torch.arange(0, tokens_len, dtype=torch.int)
+    seq_len = num_patches + 10
+    tokens = torch.zeros((1, seq_len), dtype=torch.int, device="cpu")
+    input_pos = torch.arange(0, seq_len, dtype=torch.int)
     kv = kv_cache.KVCache.from_model_config(config.decoder_config)
     pixel_values = torch.zeros((1, 3, 8, 8), dtype=torch.float32, device="cpu")
@@ -206,6 +205,7 @@ class TestModelConversion(googletest.TestCase):
         self._interpreter_builder(edge_model.tflite_model())
     )
+    tokens = torch.arange(1, seq_len + 1, dtype=torch.int).unsqueeze(0)
     self.assertTrue(
         test_utils.compare_tflite_torch(
             edge_model,
@@ -221,8 +221,8 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_stable_diffusion_clip(self):
     config = sd_clip.get_fake_model_config()
@@ -244,7 +244,7 @@ class TestModelConversion(googletest.TestCase):
         signature_name="encode",
     )
     self.assertTrue(
-        np.allclose(
+        test_utils.compare_logits(
             edge_output,
             torch_output.detach().numpy(),
             atol=1e-4,
@@ -253,19 +253,21 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_stable_diffusion_diffusion(self):
     config = sd_diffusion.get_fake_model_config(2)
+    # Reduce stddev(scale) of input values to avoid too big output logits which
+    # fails comparisons with reasonable tolerances.
     latents = torch.from_numpy(
-        np.random.normal(size=(2, 4, 8, 8)).astype(np.float32)
+        np.random.normal(size=(2, 4, 8, 8), scale=0.1).astype(np.float32)
     )
     context = torch.from_numpy(
-        np.random.normal(size=(2, 4, 4)).astype(np.float32)
+        np.random.normal(size=(2, 4, 4), scale=0.1).astype(np.float32)
     )
     time_embedding = torch.from_numpy(
-        np.random.normal(size=(2, 2)).astype(np.float32)
+        np.random.normal(size=(2, 2), scale=0.1).astype(np.float32)
     )
     pytorch_model = sd_diffusion.Diffusion(config).eval()
@@ -284,7 +286,7 @@ class TestModelConversion(googletest.TestCase):
         signature_name="diffusion",
     )
     self.assertTrue(
-        np.allclose(
+        test_utils.compare_logits(
             edge_output,
             torch_output.detach().numpy(),
             atol=1e-4,
@@ -293,13 +295,15 @@ class TestModelConversion(googletest.TestCase):
     )
   @googletest.skipIf(
-      ai_edge_config.Config.use_torch_xla,
-      reason="tests with custom ops are not supported on oss",
+      ai_edge_torch.config.in_oss,
+      reason="tests with custom ops are not supported in oss",
   )
   def test_stable_diffusion_decoder(self):
     config = sd_decoder.get_fake_model_config()
+    # Reduce stddev(scale) of input values to avoid too big output logits which
+    # fails comparisons with reasonable tolerances.
     latents = torch.from_numpy(
-        np.random.normal(size=(1, 4, 64, 64)).astype(np.float32)
+        np.random.normal(size=(1, 4, 64, 64), scale=0.1).astype(np.float32)
     )
     pytorch_model = sd_decoder.Decoder(config).eval()
@@ -316,10 +320,10 @@ class TestModelConversion(googletest.TestCase):
         signature_name="decode",
     )
     self.assertTrue(
-        np.allclose(
+        test_utils.compare_logits(
             edge_output,
             torch_output.detach().numpy(),
-            atol=1e-4,
+            atol=1e-3,
             rtol=1e-5,
         )
     )

ai_edge_torch/generative/test/utils.py CHANGED Viewed

@@ -15,6 +15,8 @@
 """Common utils for testing."""
+import logging
 from ai_edge_torch import model
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.lowertools import common_utils
@@ -33,7 +35,7 @@ def compare_tflite_torch(
     atol: float = 1e-5,
     rtol: float = 1e-5,
     **kwargs,
-):
+) -> bool:
   """Compares torch models and TFLite models."""
   values, spec = pytree.tree_flatten({"kv_cache": kv_cache})
   flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
@@ -49,9 +51,32 @@ def compare_tflite_torch(
       **kwargs,
   )
-  return np.allclose(
-      edge_output["logits"],
-      torch_output["logits"].detach().numpy(),
-      atol=atol,
-      rtol=rtol,
+  return compare_logits(
+      edge_output["logits"], torch_output["logits"].detach().numpy(), atol, rtol
   )
+def compare_logits(
+    edge_logits: np.ndarray,
+    torch_logits: dict[str, torch.Tensor],
+    atol: float = 1e-5,
+    rtol: float = 1e-5,
+) -> bool:
+  """Compares logits from edge model and torch model."""
+  if np.allclose(edge_logits, torch_logits, rtol, atol, equal_nan=True):
+    return True
+  logging.info("edge_logits: %s", edge_logits)
+  logging.info("torch_logits: %s", torch_logits)
+  orig_atol = atol
+  while rtol < 1:
+    atol = orig_atol
+    while atol < 1:
+      if np.allclose(edge_logits, torch_logits, rtol, atol, equal_nan=True):
+        logging.info("Got allclose true with atol=%s, rtol=%s", atol, rtol)
+        return False
+      atol *= 10
+    rtol *= 10
+  logging.info("allclose failed with reasonable atol and rtol.")
+  return False

ai-edge-torch-nightly 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl