PyPI - ai-edge-torch-nightly - Versions diffs - 0.5.0.dev20250424__py3-none-any.whl → 0.5.0.dev20250425__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.5.0.dev20250424py3-none-any.whl → 0.5.0.dev20250425py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

ai_edge_torch/_convert/conversion.py CHANGED Viewed

@@ -35,13 +35,11 @@ def _run_convert_passes(
   )
   passes = [
-      fx_passes.CastInputsBf16ToF32Pass(),
-      fx_passes.BuildInterpolateCompositePass(),
-      fx_passes.CanonicalizePass(),
       fx_passes.OptimizeLayoutTransposesPass(),
       fx_passes.CanonicalizePass(),
       fx_passes.BuildAtenCompositePass(),
       fx_passes.RemoveNonUserOutputsPass(),
+      fx_passes.CastInputsBf16ToF32Pass(),
   ]
   # Debuginfo is not injected automatically by odml_torch. Only inject

ai_edge_torch/_convert/fx_passes/__init__.py CHANGED Viewed

@@ -16,7 +16,6 @@
 from typing import Sequence, Union
 from ai_edge_torch._convert.fx_passes.build_aten_composite_pass import BuildAtenCompositePass
-from ai_edge_torch._convert.fx_passes.build_interpolate_composite_pass import BuildInterpolateCompositePass
 from ai_edge_torch._convert.fx_passes.cast_inputs_bf16_to_f32_pass import CastInputsBf16ToF32Pass
 from ai_edge_torch._convert.fx_passes.inject_mlir_debuginfo_pass import InjectMlirDebuginfoPass
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import OptimizeLayoutTransposesPass

ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py CHANGED Viewed

@@ -20,7 +20,8 @@ import torch
 import torch.utils._pytree as pytree
 _composite_builders: dict[
-    Callable, Callable[[torch.fx.GraphModule, torch.fx.Node], None]
+    Callable[[Any, ...], Any],
+    Callable[[torch.fx.GraphModule, torch.fx.Node], None],
 ] = {}
@@ -272,13 +273,73 @@ def _aten_embedding(gm: torch.fx.GraphModule, node: torch.fx.Node):
     output = op(**full_kwargs)
     output = builder.mark_outputs(output)
-    # Explicitly reshape back to the original shape. This places the ReshapeOp outside of the HLFB.
+    # Explicitly reshape back to the original shape. This places the ReshapeOp
+    # outside of the HLFB.
     output = torch.reshape(output, (*(original_idx_shape), embedding_dim))
     return output
   node.target = embedding
+@_register_composite_builder(torch.ops.aten.upsample_bilinear2d.vec)
+def _aten_upsample_bilinear2d_vec(_, node: torch.fx.Node):
+  """Build a composite for aten.upsample_bilinear2d.vec."""
+  op = node.target
+  args_mapper = TorchOpArgumentsMapper(op)
+  # Assumes later FX passes does not change the args/kwargs of the op.
+  # Which is a valid assumption for, given that composite/mark_tensor wrapper
+  # should semantically prevents any future mutations on the op.
+  output_h, output_w = node.meta["val"].shape[-2:]
+  def upsample_bilinear2d_vec(*args, **kwargs):
+    nonlocal op, args_mapper
+    full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
+    builder = lowertools.StableHLOCompositeBuilder(
+        name="odml.upsample_bilinear2d",
+        attr={
+            "size": (int(output_h), int(output_w)),
+            "align_corners": full_kwargs["align_corners"],
+            "is_nchw_op": True,
+        },
+    )
+    full_kwargs["input"] = builder.mark_inputs(full_kwargs["input"])
+    output = op(**full_kwargs)
+    output = builder.mark_outputs(output)
+    return output
+  node.target = upsample_bilinear2d_vec
+@_register_composite_builder(torch.ops.aten.upsample_nearest2d.vec)
+def _aten_upsample_nearest2d_vec(_, node: torch.fx.Node):
+  """Build a composite for aten.upsample_nearest2d.vec."""
+  op = node.target
+  args_mapper = TorchOpArgumentsMapper(op)
+  # Assumes later FX passes does not change the args/kwargs of the op.
+  # Which is a valid assumption for, given that composite/mark_tensor wrapper
+  # should semantically prevents any future mutations on the op.
+  output_h, output_w = node.meta["val"].shape[-2:]
+  def upsample_nearest2d_vec(*args, **kwargs):
+    nonlocal op, args_mapper
+    full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
+    builder = lowertools.StableHLOCompositeBuilder(
+        name="tfl.resize_nearest_neighbor",
+        attr={
+            "size": (int(output_h), int(output_w)),
+            "is_nchw_op": True,
+        },
+    )
+    full_kwargs["input"] = builder.mark_inputs(full_kwargs["input"])
+    output = op(**full_kwargs)
+    output = builder.mark_outputs(output)
+    return output
+  node.target = upsample_nearest2d_vec
 class BuildAtenCompositePass(fx_infra.PassBase):
   def call(self, graph_module: torch.fx.GraphModule):

ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py CHANGED Viewed

@@ -17,6 +17,7 @@
 import operator
 import ai_edge_torch
+from ai_edge_torch import lowertools
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_mark
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import op_func_registry
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import utils
@@ -24,7 +25,7 @@ import torch
 import torch.utils._pytree as pytree
 aten = torch.ops.aten
-StableHLOCompositeBuilder = ai_edge_torch.hlfb.StableHLOCompositeBuilder
+StableHLOCompositeBuilder = lowertools.StableHLOCompositeBuilder
 __all__ = ["rewrite_nhwc_node", "has_nhwc_rewriter"]

ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py CHANGED Viewed

@@ -17,11 +17,43 @@
 from absl import app
 from ai_edge_torch.generative.examples.deepseek import deepseek
+from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
-from ai_edge_torch.generative.utilities import export_config
+from ai_edge_torch.generative.utilities.model_builder import export_cfg
+import torch
+flags = converter.define_conversion_flags('deepseek')
+ExportConfig = export_cfg.ExportConfig
+def _create_mask(mask_len, kv_cache_max_len):
+  mask = torch.full(
+      (mask_len, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  mask = torch.triu(mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  return mask
+def _create_export_config(
+    prefill_seq_lens: list[int], kv_cache_max_len: int
+) -> ExportConfig:
+  """Creates the export config for the model."""
+  export_config = ExportConfig()
+  if isinstance(prefill_seq_lens, list):
+    prefill_mask = [_create_mask(i, kv_cache_max_len) for i in prefill_seq_lens]
+  else:
+    prefill_mask = _create_mask(prefill_seq_lens, kv_cache_max_len)
+  export_config.prefill_mask = prefill_mask
+  decode_mask = torch.full(
+      (1, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  decode_mask = torch.triu(decode_mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  export_config.decode_mask = decode_mask
+  export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
+  return export_config
-flags = converter.define_conversion_flags("deepseek")
-ExportConfig = export_config.ExportConfig
 def main(_):
   pytorch_model = deepseek.build_model(
@@ -34,7 +66,9 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=_create_export_config(
+          flags.FLAGS.prefill_seq_lens, flags.FLAGS.kv_cache_max_len
+      ),
   )

ai_edge_torch/generative/examples/deepseek/deepseek.py CHANGED Viewed

@@ -53,6 +53,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   norm_config = cfg.NormalizationConfig(
       type=cfg.NormalizationType.RMS_NORM,
       epsilon=1e-06,
+      enable_hlfb=True,
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,

ai_edge_torch/generative/examples/qwen/convert_to_tflite.py CHANGED Viewed

@@ -17,13 +17,14 @@
 from absl import app
 from ai_edge_torch.generative.examples.qwen import qwen
+from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
+import torch
 flags = converter.define_conversion_flags('qwen')
 ExportConfig = export_config.ExportConfig
 _MODEL_SIZE = flags.DEFINE_enum(
     'model_size',
     '3b',
@@ -37,6 +38,36 @@ _BUILDER = {
     '3b': qwen.build_3b_model,
 }
+def _create_mask(mask_len, kv_cache_max_len):
+  mask = torch.full(
+      (mask_len, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  mask = torch.triu(mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  return mask
+def _create_export_config(
+    prefill_seq_lens: list[int], kv_cache_max_len: int
+) -> ExportConfig:
+  """Creates the export config for the model."""
+  export_config = ExportConfig()
+  if isinstance(prefill_seq_lens, list):
+    prefill_mask = [_create_mask(i, kv_cache_max_len) for i in prefill_seq_lens]
+  else:
+    prefill_mask = _create_mask(prefill_seq_lens, kv_cache_max_len)
+  export_config.prefill_mask = prefill_mask
+  decode_mask = torch.full(
+      (1, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  decode_mask = torch.triu(decode_mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  export_config.decode_mask = decode_mask
+  export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
+  return export_config
 def main(_):
   pytorch_model = _BUILDER[_MODEL_SIZE.value](
       flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
@@ -48,7 +79,11 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=_create_export_config(
+          flags.FLAGS.prefill_seq_lens, flags.FLAGS.kv_cache_max_len
+      )
+      if flags.FLAGS.transpose_kv_cache
+      else ExportConfig(),
   )

ai_edge_torch/generative/examples/qwen/qwen.py CHANGED Viewed

@@ -53,6 +53,7 @@ def get_3b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   norm_config = cfg.NormalizationConfig(
       type=cfg.NormalizationType.RMS_NORM,
       epsilon=1e-06,
+      enable_hlfb=True,
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,

ai_edge_torch/generative/layers/scaled_dot_product_attention.py CHANGED Viewed

@@ -17,6 +17,8 @@
 import math
 from typing import Optional
+from ai_edge_torch.generative.custom_ops import bmm_4d as bmm_lib
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.hlfb import StableHLOCompositeBuilder
 import torch
 import torch.nn.functional as F
@@ -142,3 +144,52 @@ def scaled_dot_product_attention_with_hlfb(
   result = y.transpose(1, 2)
   result = builder.mark_outputs(result)
   return result
+def scaled_dot_product_attention_transposed(
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    head_size: int,
+    mask: Optional[torch.Tensor] = None,
+    scale: Optional[float] = None,
+    softcap: Optional[float] = None,
+):
+  """Scaled dot product attention with transposed key and value.
+  Args:
+    query: Query tensor, with shape [B, T, N, H].
+    key: Key tensor, with shape [B, T, KV_LEN, H].
+    value: Value tensor, with shape [B, T, KV_LEN, H].
+    head_size (int): head dimension.
+    mask (torch.Tensor): the optional mask tensor.
+    scale (float): the optional scale factor.
+    softcap (float): the optional softcap for the logits.
+  Returns:
+    The output tensor of scaled_dot_product_attention_transposed.
+  """
+  if scale is None:
+    scale = 1.0 / math.sqrt(head_size)
+  query = query * scale
+  assert mask is not None, "Mask should not be None!"
+  t = mask.shape[2]
+  logits = bmm_lib.bmm_4d(query, key)
+  _, bk, gt, s = logits.shape
+  g = gt // t
+  logits = logits.reshape((bk, g, t, s))
+  if softcap is not None:
+    logits = torch.tanh(logits / softcap)
+    logits = logits * softcap
+  padded_logits = logits + mask
+  padded_logits = padded_logits.reshape(1, bk, gt, s)
+  probs = F.softmax(padded_logits, dim=-1).type_as(key)
+  encoded = bmm_lib.bmm_4d(probs, value)
+  return encoded  # 1, bk, gt, h

ai_edge_torch/generative/layers/sdpa_with_kv_update.py CHANGED Viewed

@@ -18,9 +18,8 @@
 from typing import Tuple
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
-from ai_edge_torch.generative.layers import scaled_dot_product_attention as sdpa_default
+from ai_edge_torch.generative.layers import scaled_dot_product_attention as sdpa
 from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils_experimental
-from ai_edge_torch.generative.layers.experimental import scaled_dot_product_attention as sdpa
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
@@ -72,8 +71,7 @@ def _sdpa_with_kv_update_transposed(
   kv = kv_utils_experimental.update(kv, input_pos, key, value)
   key, value = kv.k_cache, kv.v_cache
-  sdpa_out = sdpa.scaled_dot_product_attention(
-      kv,
+  sdpa_out = sdpa.scaled_dot_product_attention_transposed(
       query,
       key,
       value,
@@ -105,9 +103,9 @@ def _sdpa_with_kv_update_default(
     key, value = kv.k_cache, kv.v_cache
   if enable_hlfb:
-    sdpa_func = sdpa_default.scaled_dot_product_attention_with_hlfb
+    sdpa_func = sdpa.scaled_dot_product_attention_with_hlfb
   else:
-    sdpa_func = sdpa_default.scaled_dot_product_attention
+    sdpa_func = sdpa.scaled_dot_product_attention
   sdpa_out = sdpa_func(
       query,
       key,

ai_edge_torch/generative/test/test_model_conversion.py CHANGED Viewed

@@ -32,10 +32,8 @@ class TestModelConversion(googletest.TestCase):
   def setUp(self):
     super().setUp()
-    # Builder function for an Interpreter that supports custom ops.
     self._interpreter_builder = (
-        lambda tflite_model: lambda: interpreter.InterpreterWithCustomOps(
-            custom_op_registerers=["GenAIOpsRegisterer"],
+        lambda tflite_model: lambda: interpreter.Interpreter(
             model_content=tflite_model,
             experimental_default_delegate_latest_features=True,
         )
@@ -85,44 +83,24 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_toy_model_with_kv_cache(self):
     self._test_model_with_kv_cache(enable_hlfb=False)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_toy_model_with_kv_cache_with_hlfb(self):
     self._test_model_with_kv_cache(enable_hlfb=True)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_toy_model_with_kv_cache_transposed(self):
     self._test_model_with_kv_cache(kv_layout=kv_cache.KV_LAYOUT_TRANSPOSED)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_toy_model_has_dus_op(self):
     """Tests that the model has the dynamic update slice op."""
     _, edge_model, _ = self._get_params(
         enable_hlfb=True, kv_layout=kv_cache.KV_LAYOUT_DEFAULT
     )
-    interpreter_ = interpreter.InterpreterWithCustomOps(
-        custom_op_registerers=["GenAIOpsRegisterer"],
-        model_content=edge_model.tflite_model(),
-        experimental_default_delegate_latest_features=True,
-    )
+    interpreter = self._interpreter_builder(edge_model.tflite_model())()
     # pylint: disable=protected-access
-    op_names = [op["op_name"] for op in interpreter_._get_ops_details()]
+    op_names = [op["op_name"] for op in interpreter._get_ops_details()]
     self.assertIn("DYNAMIC_UPDATE_SLICE", op_names)
   def _test_multisig_model(
@@ -197,19 +175,11 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_tiny_llama_multisig(self):
     config = tiny_llama.get_fake_model_config()
     pytorch_model = tiny_llama.TinyLlama(config).eval()
     self._test_multisig_model(config, pytorch_model, atol=1e-5, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_tiny_llama_multisig_kv_layout_transposed(self):
     config = tiny_llama.get_fake_model_config()
     pytorch_model = tiny_llama.TinyLlama(config).eval()

ai_edge_torch/generative/test/test_model_conversion_large.py CHANGED Viewed

@@ -48,10 +48,8 @@ class TestModelConversion(googletest.TestCase):
   def setUp(self):
     super().setUp()
-    # Builder function for an Interpreter that supports custom ops.
     self._interpreter_builder = (
-        lambda tflite_model: lambda: interpreter.InterpreterWithCustomOps(
-            custom_op_registerers=["GenAIOpsRegisterer"],
+        lambda tflite_model: lambda: interpreter.Interpreter(
             model_content=tflite_model,
             experimental_default_delegate_latest_features=True,
         )
@@ -94,110 +92,62 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_gemma1(self):
     config = gemma1.get_fake_model_config()
     pytorch_model = gemma1.Gemma1(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_gemma2(self):
     config = gemma2.get_fake_model_config()
     pytorch_model = gemma2.Gemma2(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_llama(self):
     config = llama.get_fake_model_config()
     pytorch_model = llama.Llama(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_phi2(self):
     config = phi2.get_fake_model_config()
     pytorch_model = phi2.Phi2(config).eval()
     # Phi-2 logits are very big, so we need a larger absolute tolerance.
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_phi3(self):
     config = phi3.get_fake_model_config()
     pytorch_model = phi3.Phi3_5Mini(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_phi4(self):
     config = phi4.get_fake_model_config()
     pytorch_model = phi4.Phi4Mini(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_smollm(self):
     config = smollm.get_fake_model_config()
     pytorch_model = smollm.SmolLM(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_smollm2(self):
     config = smollm.get_fake_model_config_v2()
     pytorch_model = smollm.SmolLM2(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_openelm(self):
     config = openelm.get_fake_model_config()
     pytorch_model = openelm.OpenELM(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-4, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_qwen(self):
     config = qwen.get_fake_model_config()
     pytorch_model = qwen.Qwen(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-3, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_deepseek(self):
     config = deepseek.get_fake_model_config()
     pytorch_model = deepseek.DeepSeekDistillQwen(config).eval()
     self._test_model(config, pytorch_model, "prefill", atol=1e-5, rtol=1e-5)
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_amd_llama_135m(self):
     config = amd_llama_135m.get_fake_model_config()
     pytorch_model = amd_llama_135m.AmdLlama(config).eval()
@@ -246,19 +196,11 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_paligemma1(self):
     self._test_paligemma_model(
         decoder.Decoder, decoder.get_fake_decoder_config, atol=1e-3, rtol=1e-5
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_paligemma2(self):
     self._test_paligemma_model(
         decoder2.Decoder2,
@@ -267,10 +209,6 @@ class TestModelConversion(googletest.TestCase):
         rtol=1e-5,
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_qwen_vl_model(self):
     config = qwen_vl.get_fake_model_config()
     pytorch_model = qwen_vl.QwenVL(config).eval()
@@ -316,10 +254,7 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
+  @googletest.skipIf(ai_edge_torch.config.in_oss, reason="flaky")
   def test_stable_diffusion_clip(self):
     config = sd_clip.get_fake_model_config()
     prompt_tokens = torch.from_numpy(
@@ -348,10 +283,7 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
+  @googletest.skipIf(ai_edge_torch.config.in_oss, reason="b/413106901")
   def test_stable_diffusion_diffusion(self):
     config = sd_diffusion.get_fake_model_config(2)
     # Reduce stddev(scale) of input values to avoid too big output logits which
@@ -390,10 +322,6 @@ class TestModelConversion(googletest.TestCase):
         )
     )
-  @googletest.skipIf(
-      ai_edge_torch.config.in_oss,
-      reason="tests with custom ops are not supported in oss",
-  )
   def test_stable_diffusion_decoder(self):
     config = sd_decoder.get_fake_model_config()
     # Reduce stddev(scale) of input values to avoid too big output logits which

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -81,6 +81,11 @@ def define_conversion_flags(model_name: str):
       'If set, the model will be converted with the provided list of LoRA'
       ' ranks.',
   )
+  flags.DEFINE_bool(
+      'transpose_kv_cache',
+      False,
+      'If set, the model will be converted with transposed KV cache.',
+  )
   return flags

ai_edge_torch/odml_torch/lowerings/_decomp_registry.py CHANGED Viewed

@@ -34,6 +34,8 @@ fx_infra.decomp.update_pre_lower_decomp(
         torch.ops.aten.replication_pad1d,
         torch.ops.aten.replication_pad2d,
         torch.ops.aten.replication_pad3d,
+        torch.ops.aten.upsample_bilinear2d.vec,
+        torch.ops.aten.upsample_nearest2d.vec,
         torch.ops.aten.addmm,
     ])
 )

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.5.0.dev20250424"
+__version__ = "0.5.0.dev20250425"

{ai_edge_torch_nightly-0.5.0.dev20250424.dist-info → ai_edge_torch_nightly-0.5.0.dev20250425.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.5.0.dev20250424
+Version: 0.5.0.dev20250425
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.5.0.dev20250424.dist-info → ai_edge_torch_nightly-0.5.0.dev20250425.dist-info}/RECORD RENAMED Viewed

@@ -2,16 +2,15 @@ ai_edge_torch/__init__.py,sha256=8sPR_5uXJA4NEE0nIwNdSl-ADOJEoR8hAgYvBQDY70Y,120
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=Nixp49eAXZPPMWEWkqpm_M4Mi_WGPx-I8q2noKuh0hw,706
+ai_edge_torch/version.py,sha256=_aF64u6MXH8zPBTEg6odQq2WazbUIxQYlfJNXzfkMdM,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/_convert/conversion.py,sha256=dOr3TUfF0UCvkmlUrMqKvgaN4jh3lJ9XFuO-sHaAmIw,5521
+ai_edge_torch/_convert/conversion.py,sha256=QVugYVfbyaeBgSKKbhFzHG5oXA7t3M-40JcpcdSu6W8,5436
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
 ai_edge_torch/_convert/converter.py,sha256=075F8LRewk_033Ebsnft7FJr3KgtIbtZ_-8udIPy6ho,9980
 ai_edge_torch/_convert/signature.py,sha256=-YKJdLk-eNEHfhdPCtcQVtZf915SoVePEFxKXPPf16c,2572
 ai_edge_torch/_convert/to_channel_last_io.py,sha256=_31phf7TYgZY2ftpNbrdlB1RhDium1lz_BXEQ6IsMFc,2893
-ai_edge_torch/_convert/fx_passes/__init__.py,sha256=6LtGzzqT2IXprfI_vPYKhE7IuN5XmPG0xy-v0UtZ9yk,1361
-ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py,sha256=a1KhqLetFb_efRHjX4T-zH0vF-U37Ha5I1CPIAsIluE,9211
-ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py,sha256=3JyjiHpn17Zhfq3yGQXK5LMH71DQPXHb_4GOkP9uAjY,4251
+ai_edge_torch/_convert/fx_passes/__init__.py,sha256=jbRCZmSduG_1qmngaEEtbofAyL1PKZ8P1uxzzsXQhsw,1253
+ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py,sha256=dgUO-lI9Id9hIOHP5XmegVlu5Fl79GR4_b-lDUehzoo,11428
 ai_edge_torch/_convert/fx_passes/cast_inputs_bf16_to_f32_pass.py,sha256=90YxLVAAkiA3qKr4Um__JmPeC1bTeA2PxBCj0GETq1Q,1748
 ai_edge_torch/_convert/fx_passes/inject_mlir_debuginfo_pass.py,sha256=Z6E3U7SYZvMl3Ivpqa3burVOLKFndEZuNmWKNxjq2mM,2386
 ai_edge_torch/_convert/fx_passes/remove_non_user_outputs_pass.py,sha256=HCOkj0k3NhaYbtfjE8HDXVmYhZ9fL5V_u6VunVh9mN4,2116
@@ -19,7 +18,7 @@ ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/__init__.py,sha
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/_decomp_registry.py,sha256=aWO_zHDF4j_hokoKJQNFIFmua4ysXztsgS6pcyBUht0,1082
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_check.py,sha256=7yEKSfXskXUk4tsd7c8vL155O-iU4eUjXCU5RSZHrbw,8204
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_mark.py,sha256=4RyGUwR22bZqkn_TnptenFJodc_Q43f4_SBG7gmTbos,1621
-ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py,sha256=OCFcPP618zH8IE12KTBQm2hRTtsaSeO3egvlOBUpNxA,13911
+ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py,sha256=IhEh3tTP3-AmQlpt24stKKEl0AIRyuo2REZIbhkmgJo,13940
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/op_func_registry.py,sha256=OhisegHY2j4cv_m9auCh9Mq9qmm1lUqpFLVO9X-oBlc,1032
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/pass_body.py,sha256=mr0MiLbaQmU-3S3KT-vb58kRWbNT3VJiCKY-K7_3tFg,10556
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/utils.py,sha256=YLMttMg5PdvXTtQ8lxpKb434UGVvYVALV1-xeuH4UGc,2131
@@ -54,8 +53,8 @@ ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py,sha256=urNif8
 ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py,sha256=z5MWiZLnsQzhNYMiQbcI9i0ki-dtkbimCptkiTFZxwo,1586
 ai_edge_torch/generative/examples/amd_llama_135m/verify.py,sha256=o13NkFlBgawBsjdJup05VMUjAPvDRAmig6VyEkX8q6U,2426
 ai_edge_torch/generative/examples/deepseek/__init__.py,sha256=JaAnrFoXTl3RJX97XspklkTyqOHVyAgRJsZtzNDd10c,671
-ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py,sha256=r6Pb5_LRKvw2QrOMn3PzunrVxPB-LSdyU2H1XORZo9A,1553
-ai_edge_torch/generative/examples/deepseek/deepseek.py,sha256=AOAJ7ltXwY5IbmcCP2nVHW9FmRwexzfNxnoDlR-sW9c,2885
+ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py,sha256=1wz4h3bjyX2qMRZ310UKGNYTORegzxinVFmYz2Fupm4,2666
+ai_edge_torch/generative/examples/deepseek/deepseek.py,sha256=yhS_i2kR0GJWpWciCt4p9Z9nHYh6A5uJ8Ycy2ebFN9w,2909
 ai_edge_torch/generative/examples/deepseek/verify.py,sha256=iYldze-pvZGvPkkqr6zA7EmitPnH9sXkzjNVx353IcE,2403
 ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=tSEtGeS-Ndcc_cTm7c4CT4FqRiwrHedEv1oJk4Y_zYU,1552
@@ -104,8 +103,8 @@ ai_edge_torch/generative/examples/phi/verify.py,sha256=YPFCdbnfmvq38fbpBNr0kHPfS
 ai_edge_torch/generative/examples/phi/verify_phi3.py,sha256=kVYaBVvddfQng0IyZGxyTJEzhiPO0G4VFJm2WOc2Q94,2360
 ai_edge_torch/generative/examples/phi/verify_phi4.py,sha256=BoCa5kUBRHtMQ-5ql6yD4pG4xHJMyUiQlpMOWVx-JgY,2356
 ai_edge_torch/generative/examples/qwen/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/qwen/convert_to_tflite.py,sha256=om3lXL1RnA87PkfU_cRfP6RnPgXrCmaB-cK98H-nqbA,1802
-ai_edge_torch/generative/examples/qwen/qwen.py,sha256=Zi_qiQ1JPokXZ95jgSEnQp3F-LKzFCvWvFLKhJjnASo,4199
+ai_edge_torch/generative/examples/qwen/convert_to_tflite.py,sha256=-Xe5koexhNUkWjS2XgS9Ggg7XOQAlMO8QcBJRTNjJa4,2972
+ai_edge_torch/generative/examples/qwen/qwen.py,sha256=m8APYzo9N0SXsdvCxC8HtCcbN3W7gLKkRBL-Tg0BWXU,4223
 ai_edge_torch/generative/examples/qwen/verify.py,sha256=9_AyEJTeUfvhhID64Rto2bflFPyXMFokdQLsseLUMiI,2775
 ai_edge_torch/generative/examples/qwen_vl/__init__.py,sha256=JaAnrFoXTl3RJX97XspklkTyqOHVyAgRJsZtzNDd10c,671
 ai_edge_torch/generative/examples/qwen_vl/convert_to_tflite.py,sha256=yVebRatt2SLCsGvrYTBXOM-0S2REhkpikHTyy5MCjUw,2222
@@ -159,11 +158,10 @@ ai_edge_torch/generative/layers/lora.py,sha256=hsvWLLOnW7HQ0AysOZu30x_cetMquDd1t
 ai_edge_torch/generative/layers/model_config.py,sha256=nLXvTkDAIHJQ0PTaWODF8oxJQoJ-K8D10cKR9229SAw,8355
 ai_edge_torch/generative/layers/normalization.py,sha256=MbwH-n80Fob5YvjBzdqDjBizMHLzSJGYRDdbD-rL5C0,6174
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=975zR202MdIrILJ7blceAcxrNqX1ZCN0ECKG1gz-bV8,2655
-ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=vp8dVx6tOe99neJhpbrtIt5fvN5NFw19JVH1v0yi5Mg,4154
-ai_edge_torch/generative/layers/sdpa_with_kv_update.py,sha256=D4rATT2Ppa9Su7yuRHYnQPJ1dFvUDAyH1GrFnCed7p8,3810
+ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=efqqGRZPJ55hKn1MQJ-cXfrJD85uS1v7W_juyGyts58,5648
+ai_edge_torch/generative/layers/sdpa_with_kv_update.py,sha256=Hn8Zw-jiB9GH2uZ-yaRMcDdpmjECcW4uCy-YNH9zV8c,3693
 ai_edge_torch/generative/layers/experimental/__init__.py,sha256=nz-K0h8DfiATHzR6s1_bCw2akUmHWffU1bDRSkIzSqI,592
 ai_edge_torch/generative/layers/experimental/kv_cache.py,sha256=zgpFVftOfllvjh9-UEBSvUbm152SnQETn29rUMMMvAM,2978
-ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py,sha256=YFcIGOkaNb-vvQKjI-G9-bC2Z1W0O_qRyIZPlsLl72U,2797
 ai_edge_torch/generative/layers/unet/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/layers/unet/blocks_2d.py,sha256=ZteHZXK6HKyxYji49DQ46sA9aIy7U3Jnz0HZp6hfevY,28996
 ai_edge_torch/generative/layers/unet/builder.py,sha256=zAqWXdimmMrQRhmE_t9XkS68mh6PSrzwb-2NZZXrR5I,1901
@@ -180,12 +178,12 @@ ai_edge_torch/generative/test/test_custom_dus.py,sha256=MjIhTvkTko872M35XMciobvI
 ai_edge_torch/generative/test/test_kv_cache.py,sha256=1sXN2RPntq0PP3IEy0NkvIbzQ0Y8JhPIwRSFwO9JLlE,5728
 ai_edge_torch/generative/test/test_loader.py,sha256=9mQUeeZKOVApOWSWl2cN9c10axZjMKM1-0Zd823CCS4,3449
 ai_edge_torch/generative/test/test_lora.py,sha256=6QIM6RLTc2HrodGpp_aS3OxM9Rco2KAzEnYgotkg41M,5310
-ai_edge_torch/generative/test/test_model_conversion.py,sha256=jSNJ0Eex6VYCkGn3FXbCOOJ2S3-F_QuwJctu3VycjR4,7200
-ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=-v2Vj7Qdd3GyBn4k7BWVgyGzrbcL30Su3nxZYLtwkCs,14787
+ai_edge_torch/generative/test/test_model_conversion.py,sha256=mhNJikLnGVGi9NKmXB8FhnqeDy9gtrvC3yEbrTABZ4Y,6163
+ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=6LkLnFOvlnt7JVVDYKMaZClPRBEvdjq6xnSjIFYNdI8,12554
 ai_edge_torch/generative/test/test_quantize.py,sha256=bEJMhpQ9bIDUZVBXTW888728FcH-i3SyE4JSZZUgU0A,6071
 ai_edge_torch/generative/test/utils.py,sha256=tF6aCfAGJnc9dmzCnZCEOuKNVimfWOqscv9og0DDLHU,2656
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
-ai_edge_torch/generative/utilities/converter.py,sha256=LtBHjnslhL-uf4sDRoC8JIbbUD73g0QW3FiWsHUdV1g,10631
+ai_edge_torch/generative/utilities/converter.py,sha256=z3CvNJxKzglu1BU_5ri91RUeGHh7urhoWFbk0oq7i2M,10768
 ai_edge_torch/generative/utilities/export_config.py,sha256=8-795nyd3M34LkGhgW7hwHlJyTc2Oz1iipHK8yBhdFs,1633
 ai_edge_torch/generative/utilities/loader.py,sha256=7p__m2JryWphGlYOuRxdoT4id4_tWJEVOV7y2X4H-Ak,13737
 ai_edge_torch/generative/utilities/model_builder.py,sha256=ZYX1TxpFdj573du2QCyHJlFjx4q1m12R74fp4Gwl92A,6343
@@ -227,7 +225,7 @@ ai_edge_torch/odml_torch/lowerings/__init__.py,sha256=uJ-niilt1c-D6QJzLwgvCUf62l
 ai_edge_torch/odml_torch/lowerings/_basic.py,sha256=fEWjIdEpDIqT1EYLZE13O9A41OuaNdbfBrv3vNxS9gI,11601
 ai_edge_torch/odml_torch/lowerings/_batch_norm.py,sha256=PaLI0BB6pdBW1VyfW8VTOT_Be-ZcqYdNOsyfzKfq8Cg,2064
 ai_edge_torch/odml_torch/lowerings/_convolution.py,sha256=Q0aDzyUcZMoSzSbOU-r3LJMgPe6fble0QwdYVIOHHHk,6887
-ai_edge_torch/odml_torch/lowerings/_decomp_registry.py,sha256=VhmeGFnB5hrUsALiVWV96JJOqPDrTIWouHjTvLuT5eU,2477
+ai_edge_torch/odml_torch/lowerings/_decomp_registry.py,sha256=ybOdoFE5HIJTkyiYcc73zpyUyUpioVnAca6k0wyJPs4,2572
 ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py,sha256=tkaDo232HjuZvJHyua0n6tdHecifUuVzclJAGq7PPYs,11428
 ai_edge_torch/odml_torch/lowerings/_layer_norm.py,sha256=khJIvDVk2s332Nd2Be-5dM6-wp5DGff61HCV5lskHmQ,3011
 ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py,sha256=XDZ0zLej_XaQDJnaAAxhNFAd7NfQm5SOVEp_nno_krA,6178
@@ -244,8 +242,8 @@ ai_edge_torch/testing/__init__.py,sha256=_yGgvnBZWb7T3IN3mc4x1sS4vM96HZwM8pwIcPG
 ai_edge_torch/testing/export.py,sha256=k5mGDGzwc23Z4zaIVDs8CNh-oOt64gsf9MS9NjhbPy4,3293
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.5.0.dev20250424.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.5.0.dev20250424.dist-info/METADATA,sha256=Gz8c2qvL6qiK7lrd001P55TXltKdycDvDaAq4d4Y-eQ,2051
-ai_edge_torch_nightly-0.5.0.dev20250424.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.5.0.dev20250424.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.5.0.dev20250424.dist-info/RECORD,,
+ai_edge_torch_nightly-0.5.0.dev20250425.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.5.0.dev20250425.dist-info/METADATA,sha256=owGeoLcv0XFf4tXFatFjXLSisoaRBBwrtyLx3LFq8PM,2051
+ai_edge_torch_nightly-0.5.0.dev20250425.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.5.0.dev20250425.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.5.0.dev20250425.dist-info/RECORD,,

ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py DELETED Viewed

@@ -1,129 +0,0 @@
-# Copyright 2024 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Build interpolate composite pass."""
-import functools
-from ai_edge_torch import fx_infra
-from ai_edge_torch.hlfb import mark_pattern
-from ai_edge_torch.hlfb.mark_pattern import pattern as pattern_module
-import torch
-# For torch nightly released after mid June 2024,
-# torch.nn.functional.interpolate no longer gets exported into decomposed graph
-# but a single aten op:
-# torch.ops.aten.upsample_nearest2d.vec/torch.ops.aten.upsample_bilinear2d.vec.
-# This would interefere with our pattern matching based composite builder.
-# Here we register the now missing decompositions first.
-_INTERPOLATE_DECOMPOSITIONS = torch._decomp.get_decompositions([
-    torch.ops.aten.upsample_bilinear2d.vec,
-    torch.ops.aten.upsample_nearest2d.vec,
-])
-@functools.cache
-def _get_upsample_bilinear2d_pattern():
-  pattern = pattern_module.Pattern(
-      "odml.upsample_bilinear2d",
-      lambda x: torch.nn.functional.interpolate(
-          x, scale_factor=2, mode="bilinear", align_corners=False
-      ),
-      export_args=(torch.rand(1, 3, 100, 100),),
-      extra_decomp_table=_INTERPOLATE_DECOMPOSITIONS,
-  )
-  @pattern.register_attr_builder
-  def attr_builder(pattern, graph_module, internal_match):
-    output = internal_match.returning_nodes[0]
-    output_h, output_w = output.meta["val"].shape[-2:]
-    return {
-        "size": (int(output_h), int(output_w)),
-        "align_corners": False,
-        "is_nchw_op": True,
-    }
-  return pattern
-@functools.cache
-def _get_upsample_bilinear2d_align_corners_pattern():
-  pattern = pattern_module.Pattern(
-      "odml.upsample_bilinear2d",
-      lambda x: torch.nn.functional.interpolate(
-          x, scale_factor=2, mode="bilinear", align_corners=True
-      ),
-      export_args=(torch.rand(1, 3, 100, 100),),
-      extra_decomp_table=_INTERPOLATE_DECOMPOSITIONS,
-  )
-  @pattern.register_attr_builder
-  def attr_builder(graph_module, pattern, internal_match):
-    output = internal_match.returning_nodes[0]
-    output_h, output_w = output.meta["val"].shape[-2:]
-    return {
-        "size": (int(output_h), int(output_w)),
-        "align_corners": True,
-        "is_nchw_op": True,
-    }
-  return pattern
-@functools.cache
-def _get_interpolate_nearest2d_pattern():
-  pattern = pattern_module.Pattern(
-      "tfl.resize_nearest_neighbor",
-      lambda x: torch.nn.functional.interpolate(
-          x, scale_factor=2, mode="nearest"
-      ),
-      export_args=(torch.rand(1, 3, 100, 100),),
-      extra_decomp_table=_INTERPOLATE_DECOMPOSITIONS,
-  )
-  @pattern.register_attr_builder
-  def attr_builder(pattern, graph_module, internal_match):
-    output = internal_match.returning_nodes[0]
-    output_h, output_w = output.meta["val"].shape[-2:]
-    return {
-        "size": (int(output_h), int(output_w)),
-        "is_nchw_op": True,
-    }
-  return pattern
-class BuildInterpolateCompositePass(fx_infra.ExportedProgramPassBase):
-  def __init__(self):
-    super().__init__()
-    self._patterns = [
-        _get_upsample_bilinear2d_pattern(),
-        _get_upsample_bilinear2d_align_corners_pattern(),
-        _get_interpolate_nearest2d_pattern(),
-    ]
-  def call(self, exported_program: torch.export.ExportedProgram):
-    exported_program = fx_infra.safe_run_decompositions(
-        exported_program,
-        _INTERPOLATE_DECOMPOSITIONS,
-    )
-    graph_module = exported_program.graph_module
-    for pattern in self._patterns:
-      graph_module = mark_pattern.mark_pattern(graph_module, pattern)
-    graph_module.graph.lint()
-    graph_module.recompile()
-    return fx_infra.ExportedProgramPassResult(exported_program, True)

ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py DELETED Viewed

@@ -1,93 +0,0 @@
-# Copyright 2025 The AI Edge Torch Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# Implements scaled dot product attention.  This is experimental and
-# GPU-specific code.
-import math
-from typing import Optional
-from ai_edge_torch.generative.custom_ops import bmm_4d as bmm_lib
-from ai_edge_torch.generative.layers import kv_cache as kv_utils
-from ai_edge_torch.generative.utilities import types
-from ai_edge_torch.hlfb import StableHLOCompositeBuilder
-from multipledispatch import dispatch
-import torch
-import torch.nn.functional as F
-def scaled_dot_product_attention(
-    kv: kv_utils.KVCacheEntry,
-    query: torch.Tensor,
-    key: torch.Tensor,
-    value: torch.Tensor,
-    head_size: int,
-    mask: Optional[torch.Tensor] = None,
-    scale: Optional[float] = None,
-    softcap: Optional[float] = None,
-):
-  if hasattr(kv, "kv_layout"):
-    return _sdpa(
-        kv.kv_layout[0](),  # key layout
-        kv.kv_layout[1](),  # value layout
-        query=query,
-        key=key,
-        value=value,
-        head_size=head_size,
-        mask=mask,
-        scale=scale,
-        softcap=softcap,
-    )
-  raise ValueError("No kv_layout attribute found in kv.")
-@dispatch(types.BNTH, types.BNHT)
-def _sdpa(k_type, v_type, *args, **kwargs):
-  query = kwargs["query"]
-  key = kwargs["key"]
-  value = kwargs["value"]
-  head_size = kwargs["head_size"]
-  mask = kwargs.get("mask", None)
-  scale = kwargs.get("scale", None)
-  softcap = kwargs.get("softcap", None)
-  if scale is None:
-    scale = 1.0 / math.sqrt(head_size)
-  query = query * scale
-  assert mask is not None, "Mask should not be None!"
-  t = mask.shape[2]
-  logits = bmm_lib.bmm_4d(query, key)
-  _, bk, gt, s = logits.shape
-  g = gt // t
-  logits = logits.reshape((bk, g, t, s))
-  if softcap is not None:
-    logits = torch.tanh(logits / softcap)
-    logits = logits * softcap
-  padded_logits = logits + mask
-  padded_logits = padded_logits.reshape(1, bk, gt, s)
-  probs = F.softmax(padded_logits, dim=-1).type_as(key)
-  encoded = bmm_lib.bmm_4d(probs, value)
-  return encoded  # 1, bk, gt, h
-@dispatch(object, object)
-def _sdpa(k_type, v_type, *args, **kwargs):
-  raise ValueError(f"No implementations for k={k_type} and v={v_type}")

{ai_edge_torch_nightly-0.5.0.dev20250424.dist-info → ai_edge_torch_nightly-0.5.0.dev20250425.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.5.0.dev20250424.dist-info → ai_edge_torch_nightly-0.5.0.dev20250425.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.5.0.dev20250424.dist-info → ai_edge_torch_nightly-0.5.0.dev20250425.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.5.0.dev20250424__py3-none-any.whl → 0.5.0.dev20250425__py3-none-any.whl

ai-edge-torch-nightly 0.5.0.dev20250424py3-none-any.whl → 0.5.0.dev20250425py3-none-any.whl