PyPI - ai-edge-torch-nightly - Versions diffs - 0.4.0.dev20250407__py3-none-any.whl → 0.5.0.dev20250409__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.4.0.dev20250407py3-none-any.whl → 0.5.0.dev20250409py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py CHANGED Viewed

@@ -17,7 +17,7 @@
 from absl import app
 from ai_edge_torch.generative.examples.gemma3 import gemma3
-from ai_edge_torch.generative.layers.experimental import kv_cache
+from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
 import torch
@@ -58,7 +58,7 @@ def _create_export_config(
   )
   decode_mask = torch.triu(decode_mask, diagonal=1).unsqueeze(0).unsqueeze(0)
   export_config.decode_mask = decode_mask
-  export_config.kvcache_cls = kv_cache.KVCacheTransposed
+  export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
   return export_config

ai_edge_torch/generative/examples/gemma3/decoder.py CHANGED Viewed

@@ -18,9 +18,9 @@
 from typing import List, Optional, Tuple
 from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 from ai_edge_torch.generative.layers.experimental import attention
-from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.layers.rotary_position_embedding as rotary_pos_emb
 from ai_edge_torch.generative.utilities import export_config as export_cfg
@@ -81,8 +81,8 @@ class DecoderBlock(attention.TransformerBlock):
       rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
       mask: Optional[torch.Tensor] = None,
       input_pos: Optional[torch.Tensor] = None,
-      kv_cache: kv_utils.KVCacheEntryBase = None,
-  ) -> Tuple[torch.Tensor, Optional[kv_utils.KVCacheEntryBase]]:
+      kv_cache: kv_utils.KVCacheEntry = None,
+  ) -> Tuple[torch.Tensor, Optional[kv_utils.KVCacheEntry]]:
     """Forward function of the Gemma3Block.
     Exactly the same as TransformerBlock but we call the post-attention norm
@@ -241,13 +241,12 @@ class Decoder(nn.Module):
       self,
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
-      kv_cache: kv_utils.KVCacheBase,
+      kv_cache: kv_utils.KVCache,
       input_embeds: Optional[torch.Tensor] = None,
       mask: Optional[torch.Tensor] = None,
       image_indices: Optional[torch.Tensor] = None,
       export_config: Optional[export_cfg.ExportConfig] = None,
-  ) -> dict[torch.Tensor, kv_utils.KVCacheBase]:
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
     pixel_mask = None
     if input_embeds is None:
       # token embeddings of shape (b, t, n_embd)
@@ -287,10 +286,10 @@ class Decoder(nn.Module):
       rope: List[Tuple[torch.Tensor, torch.Tensor]],
       mask: torch.Tensor | List[torch.Tensor],
       input_pos: torch.Tensor,
-      kv_cache: kv_utils.KVCacheBase,
+      kv_cache: kv_utils.KVCache,
       pixel_mask: Optional[torch.Tensor] = None,
       export_config: Optional[export_cfg.ExportConfig] = None,
-  ) -> dict[torch.Tensor, kv_utils.KVCacheBase]:
+  ) -> dict[torch.Tensor, kv_utils.KVCache]:
     """Forwards the model with input embeddings."""
     assert len(self.transformer_blocks) == len(kv_cache.caches), (
         "The number of transformer blocks and the number of KV cache entries"
@@ -326,7 +325,7 @@ class Decoder(nn.Module):
       x, kv_entry = block(x, rope[i], mask_entry, input_pos, kv_entry)
       if kv_entry:
         updated_kv_entries.append(kv_entry)
-    updated_kv_cache = kv_utils.KVCacheBase(tuple(updated_kv_entries))
+    updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entries))
     if export_config is not None:
       if (
           torch.numel(input_pos) > 1

ai_edge_torch/generative/examples/gemma3/verify_util.py CHANGED Viewed

@@ -20,8 +20,8 @@ import os
 from typing import List, Optional, Tuple
 from ai_edge_torch.generative.examples.gemma3 import gemma3
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
-from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils
 from ai_edge_torch.generative.utilities.experimental import verifier
 from gemma import config as gemma_config
 from gemma import model as gemma_model
@@ -94,7 +94,9 @@ class UnifiedGemma3Wrapper(verifier.ReauthoredModelWrapper):
   def _init_kv_cache(self):
     """Returns an initialized KV cache."""
-    return kv_utils.KVCacheTransposed.from_model_config(self.model.model.config)
+    return kv_utils.KVCache.from_model_config(
+        self.model.model.config, kv_layout=kv_utils.KV_LAYOUT_TRANSPOSED
+    )
   def forward(
       self, tokens: torch.Tensor, pixel_values: torch.Tensor = None

ai_edge_torch/generative/layers/experimental/attention.py CHANGED Viewed

@@ -22,8 +22,9 @@ at any time.
 from typing import Optional, Tuple, Union
 from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers import lora as lora_utils
-from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils
+from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils_experimental
 from ai_edge_torch.generative.layers.experimental import scaled_dot_product_attention as sdpa
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.layers.rotary_position_embedding as rotary_pos_emb
@@ -69,9 +70,9 @@ class TransformerBlock(nn.Module):
       rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
       mask: Optional[torch.Tensor] = None,
       input_pos: Optional[torch.Tensor] = None,
-      kv_cache: kv_utils.KVCacheEntryBase = None,
+      kv_cache: kv_utils.KVCacheEntry = None,
       lora: Optional[lora_utils.LoRAEntry] = None,
-  ) -> Union[torch.Tensor, Tuple[torch.Tensor, kv_utils.KVCacheEntryBase]]:
+  ) -> Union[torch.Tensor, Tuple[torch.Tensor, kv_utils.KVCacheEntry]]:
     """Forward function of the TransformerBlock.
     Args:
@@ -79,7 +80,7 @@ class TransformerBlock(nn.Module):
       rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
       mask (torch.Tensor): the optional mask tensor.
       input_pos (torch.Tensor): the optional input position tensor.
-      kv_cache (KVCacheEntryBase): the optional kv cache entry.
+      kv_cache (KVCacheEntry): the optional kv cache entry.
       lora (LoRAEntry): the optional lora entry.
     Returns:
@@ -154,9 +155,9 @@ class CausalSelfAttention(nn.Module):
       rope: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
       mask: Optional[torch.Tensor] = None,
       input_pos: Optional[torch.Tensor] = None,
-      kv_cache: Optional[kv_utils.KVCacheEntryBase] = None,
+      kv_cache: Optional[kv_utils.KVCacheEntry] = None,
       lora: Optional[lora_utils.LoRAEntry] = None,
-  ) -> Union[torch.Tensor, Tuple[torch.Tensor, kv_utils.KVCacheEntryBase]]:
+  ) -> Union[torch.Tensor, Tuple[torch.Tensor, kv_utils.KVCacheEntry]]:
     """Forward function of the CausalSelfAttention layer, which can support
        MQA, GQA and MHA.
@@ -166,8 +167,7 @@ class CausalSelfAttention(nn.Module):
       rope (Tuple[torch.Tensor, torch.Tensor]): the input rope tensor.
       mask (torch.Tensor): the optional mask tensor.
       input_pos (torch.Tensor): the optional input position tensor.
-      kv_cache (KVCacheEntryBase): the KV cache entry corresponding to this
-        module.
+      kv_cache (KVCacheEntry): the KV cache entry corresponding to this module.
       lora (LoRAEntry): the optional lora entry.
     Returns:
@@ -237,7 +237,7 @@ class CausalSelfAttention(nn.Module):
     )  # 1, bk, h, s
     if kv_cache is not None:
-      kv_cache = kv_utils.update(kv_cache, input_pos, k, v)
+      kv_cache = kv_utils_experimental.update(kv_cache, input_pos, k, v)
       k, v = kv_cache.k_cache, kv_cache.v_cache
     sdpa_out = self.sdpa_func(

ai_edge_torch/generative/layers/experimental/kv_cache.py CHANGED Viewed

@@ -18,304 +18,33 @@
 This is an experimental implementation and is subject to change at any time.
 """
-import dataclasses
-import functools
-from typing import Any, List, Tuple, Type
-from ai_edge_torch.generative.layers import model_config
-from ai_edge_torch.generative.layers.experimental import types
 from ai_edge_torch.generative.custom_ops import dynamic_update_slice as dus_utils
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 import torch
-import torch.utils._pytree as pytree
-@dataclasses.dataclass
-class KVCacheEntryBase:
-  """A single cache entry that includes K and V caches.
-  The chaches are built based on the provided config with the shape of
-  (batch_size, kv_cache_max, num_query_groups, head_dim).
-  """
-  k_cache: torch.Tensor
-  v_cache: torch.Tensor
-  @classmethod
-  def _from_model_config(
-      cls,
-      k_shape: Tuple[int, ...],
-      v_shape: Tuple[int, ...],
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-  ):
-    """Build an instance of the class based on model config."""
-    k = torch.zeros(k_shape, dtype=dtype, device=device)
-    v = torch.zeros(v_shape, dtype=dtype, device=device)
-    obj = cls(k_cache=k, v_cache=v)
-    return obj
-  @classmethod
-  def from_model_config(
-      cls,
-      kv_cache_max: int,
-      config: model_config.AttentionConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    """Build an instance of the class based on model config."""
-    shape = (batch_size, kv_cache_max, config.num_query_groups, config.head_dim)
-    return cls._from_model_config(shape, shape, dtype, device)
-@dataclasses.dataclass
-class KVCacheEntryBTNH(KVCacheEntryBase):
-  k_type = types.BTNH()
-  v_type = types.BTNH()
-@dataclasses.dataclass
-class KVCacheEntryTransposed(KVCacheEntryBase):
-  k_type = types.BNTH()
-  v_type = types.BNHT()
-  @classmethod
-  def from_model_config(
-      cls,
-      kv_cache_max: int,
-      config: model_config.AttentionConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    """Build an instance of the class based on model config."""
-    k_shape = (
-        batch_size,
-        config.num_query_groups,
-        kv_cache_max,
-        config.head_dim,
-    )  # b, k, s, h
-    v_shape = (
-        batch_size,
-        config.num_query_groups,
-        config.head_dim,
-        kv_cache_max,
-    )  # b, k, h, s
-    return cls._from_model_config(k_shape, v_shape, dtype, device)
-def _flatten_kv_entry(
-    kv_e: KVCacheEntryBase,
-) -> Tuple[List[torch.Tensor], Any]:
-  return ([kv_e.k_cache, kv_e.v_cache], None)
-def _unflatten_kv_entry(
-    kv_entry_ty: Type[KVCacheEntryBase],
-    values: List[torch.Tensor],
-    unused_context: Any,
-) -> KVCacheEntryBase:
-  return kv_entry_ty(*values)
-pytree.register_pytree_node(
-    KVCacheEntryTransposed,
-    _flatten_kv_entry,
-    functools.partial(_unflatten_kv_entry, KVCacheEntryTransposed),
-    serialized_type_name="",
-)
-pytree.register_pytree_node(
-    KVCacheEntryBase,
-    _flatten_kv_entry,
-    functools.partial(_unflatten_kv_entry, KVCacheEntryBase),
-    serialized_type_name="",
-)
-@dataclasses.dataclass
-class KVCacheBase:
-  """A utility class for holding KV cache entries per layer."""
-  caches: Tuple[KVCacheEntryBase, ...]
-  @classmethod
-  def _from_model_config(
-      cls,
-      kv_entry_cls,
-      config: model_config.ModelConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    caches = [
-        kv_entry_cls.from_model_config(
-            config.kv_cache_max,
-            config.block_config(idx).attn_config,
-            dtype,
-            device,
-            batch_size,
-        )
-        for idx in range(config.num_layers)
-    ]
-    obj = cls(caches=tuple(caches))
-    return obj
-  @classmethod
-  def from_model_config(
-      cls,
-      config: model_config.ModelConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    """Build an instance of the class based on model config.
-    Args:
-        config (ModelConfig): Model config used for building the cache.
-        dtype (torch.dtype, optional): The data type of the cache tensor.
-          Defaults to torch.float32.
-        device (torch.device, optional): The device placement of the cache
-          tensors. Defaults to None.
-        batch_size (int, optional): The batch size of the cache tensors.
-          Defaults to 1.
-    Returns:
-        KVCacheBase: The created cache object.
-    """
-    assert batch_size == 1, "Batch size must be 1 for KV Cache."
-    return cls._from_model_config(
-        KVCacheEntryBase,
-        config=config,
-        dtype=dtype,
-        device=device,
-        batch_size=batch_size,
-    )
-  def flatten(self) -> List[torch.Tensor]:
-    """Flatten the cache entries into a list of tensors with order k_i, v_i."""
-    flattened, _ = _flatten_kvc(self)
-    return flattened
-@dataclasses.dataclass
-class KVCacheBTNH(KVCacheBase):
-  @classmethod
-  def from_model_config(
-      cls,
-      config: model_config.ModelConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    return cls._from_model_config(
-        KVCacheEntryBTNH,
-        config=config,
-        dtype=dtype,
-        device=device,
-        batch_size=batch_size,
-    )
-@dataclasses.dataclass
-class KVCacheTransposed(KVCacheBase):
-  @classmethod
-  def from_model_config(
-      cls,
-      config: model_config.ModelConfig,
-      dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
-      batch_size: int = 1,
-  ):
-    return cls._from_model_config(
-        KVCacheEntryTransposed,
-        config=config,
-        dtype=dtype,
-        device=device,
-        batch_size=batch_size,
-    )
-def _flatten_kvc(kvc: KVCacheBase) -> Tuple[List[str], List[str]]:
-  flattened = []
-  flat_names = []
-  none_names = []
-  for i, kv_entry in enumerate(kvc.caches):
-    flattened.append(kv_entry.k_cache)
-    flat_names.append(f"k_{i}")
-    flattened.append(kv_entry.v_cache)
-    flat_names.append(f"v_{i}")
-  return flattened, [flat_names, none_names]
-def _flatten_kvc_with_keys(kvc: KVCacheBase) -> Tuple[List, List]:
-  flattened, (flat_names, none_names) = _flatten_kvc(kvc)
-  return [
-      (pytree.MappingKey(k), v) for k, v in zip(flat_names, flattened)
-  ], flat_names
-def _unflatten_kvc(
-    kv_ty: Type[KVCacheBase],
-    kv_entry_type: Type[KVCacheEntryBase],
-    values: List[torch.Tensor],
-    context: Tuple[List, List],
-) -> KVCacheBase:
-  assert len(values) % 2 == 0, "Found odd number of K and V entries."
-  num_layers = len(values) // 2
-  flat_names = context[0]
-  kv_entries = []
-  for i in range(num_layers):
-    k_cache_idx = flat_names.index(f"k_{i}")
-    v_cache_idx = flat_names.index(f"v_{i}")
-    kv_entries.append(
-        kv_entry_type(k_cache=values[k_cache_idx], v_cache=values[v_cache_idx])
-    )
-  obj = kv_ty(tuple(kv_entries))
-  return obj
-pytree.register_pytree_node(
-    KVCacheTransposed,
-    _flatten_kvc,
-    functools.partial(
-        _unflatten_kvc, KVCacheTransposed, KVCacheEntryTransposed
-    ),
-    flatten_with_keys_fn=_flatten_kvc_with_keys,
-    serialized_type_name="",
-)
-pytree.register_pytree_node(
-    KVCacheBase,
-    _flatten_kvc,
-    functools.partial(_unflatten_kvc, KVCacheBase, KVCacheEntryBase),
-    flatten_with_keys_fn=_flatten_kvc_with_keys,
-    serialized_type_name="",
-)
 def update(
-    cache: KVCacheEntryBase,
+    cache: kv_utils.KVCacheEntry,
     input_pos: torch.Tensor,
     k_slice: torch.Tensor,
     v_slice: torch.Tensor,
-) -> KVCacheEntryBase:
+) -> kv_utils.KVCacheEntry:
   """Out of place update of Cache buffer.
   Args:
-      cache (KVCacheEntryBase): The original cache buffer.
+      cache (kv_utils.KVCacheEntry): The original cache buffer.
       input_pos (torch.Tensor): The update slice positions.
       k_slice (torch.Tensor): The K slice to be updated in the new cache.
       v_slice (torch.Tensor): The V slice to be updated in the new cache.
   Returns:
-      KVCacheEntryBase: The updated KVCacheBase entry based on the passed
+      kv_utils.KVCacheEntry: The updated KVCacheBase entry based on the passed
       inputs.
   """
-  update_kv_cache = _update_kv_impl
-  return update_kv_cache(cache, input_pos, k_slice, v_slice)
+  assert (
+      cache.kv_layout == kv_utils.KV_LAYOUT_TRANSPOSED
+  ), "KV entry must have transposed layout."
+  return _update_kv_impl_transposed(cache, input_pos, k_slice, v_slice)
 def _get_slice_indices(
@@ -338,12 +67,12 @@ def _get_slice_indices(
   return slice_indices
-def _update_kv_impl(
-    cache: KVCacheEntryTransposed,
+def _update_kv_impl_transposed(
+    cache: kv_utils.KVCacheEntry,
     input_pos: torch.Tensor,
     k_slice: torch.Tensor,
     v_slice: torch.Tensor,
-) -> KVCacheEntryTransposed:
+) -> kv_utils.KVCacheEntry:
   """Update the cache buffer with High Level Function Boundary annotation."""
   cache_dim = 4
   k_ts_idx = 2
@@ -357,4 +86,4 @@ def _update_kv_impl(
   v = dus_utils.dynamic_update_slice(
       cache.v_cache, v_slice, [x for x in v_slice_indices]
   )
-  return KVCacheEntryTransposed(k, v)
+  return kv_utils.KVCacheEntry(k, v, cache.kv_layout)

ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py CHANGED Viewed

@@ -19,7 +19,7 @@ import math
 from typing import Optional
 from ai_edge_torch.generative.custom_ops import bmm_4d as bmm_lib
-from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers.experimental import types
 from ai_edge_torch.hlfb import StableHLOCompositeBuilder
 from multipledispatch import dispatch
@@ -28,7 +28,7 @@ import torch.nn.functional as F
 def scaled_dot_product_attention(
-    kv: kv_utils.KVCacheBase,
+    kv: kv_utils.KVCacheEntry,
     query: torch.Tensor,
     key: torch.Tensor,
     value: torch.Tensor,
@@ -37,10 +37,10 @@ def scaled_dot_product_attention(
     scale: Optional[float] = None,
     softcap: Optional[float] = None,
 ):
-  if hasattr(kv, "k_type") and hasattr(kv, "v_type"):
+  if hasattr(kv, "kv_layout"):
     return _sdpa(
-        kv.k_type,
-        kv.v_type,
+        kv.kv_layout[0](),  # key layout
+        kv.kv_layout[1](),  # value layout
         query=query,
         key=key,
         value=value,
@@ -49,10 +49,7 @@ def scaled_dot_product_attention(
         scale=scale,
         softcap=softcap,
     )
-  raise ValueError(
-      f"SDPA for K type {type(kv.caches[0].k_type)} and V type"
-      f" {type(kv.caches[0].v_type)} not supported."
-  )
+  raise ValueError("No kv_layout attribute found in kv.")
 @dispatch(types.BNTH, types.BNHT)

ai_edge_torch/generative/layers/experimental/types.py CHANGED Viewed

@@ -62,6 +62,9 @@ class TensorDimensionMeta(type):
   def __repr__(cls):
     return f'{cls.__name__}'
+  def __iter__(cls):
+    return iter(getattr(cls, 'dimensions'))
 def create_tensor_dimension_order_class(dims: Tuple[TensorDims]):
   """Creates a TensorDimensionMeta class with the specified dimensions.

ai_edge_torch/generative/layers/kv_cache.py CHANGED Viewed

@@ -16,24 +16,58 @@
 """Utility functions for externalized KV Cache."""
 import dataclasses
-from typing import List, Tuple
+from typing import Any, List, Tuple
 from ai_edge_torch.generative.custom_ops.dynamic_update_slice import dynamic_update_slice
 from ai_edge_torch.generative.layers import model_config
+from ai_edge_torch.generative.layers.experimental import types
 import torch
 import torch.utils._pytree as pytree
+KVLayout = Tuple[types.TensorDimensionMeta, types.TensorDimensionMeta]
+# Define common layouts for KV Cache.
+KV_LAYOUT_DEFAULT = (types.BTNH, types.BTNH)
+KV_LAYOUT_TRANSPOSED = (types.BNTH, types.BNHT)
 @dataclasses.dataclass
 class KVCacheEntry:
   """A single cache entry that includes K and V caches.
-  The chaches are built based on the provided config with the shape of
-  (batch_size=1, kv_cache_max, num_query_groups, head_dim).
+  The cache layout can be customized based on different use cases.
   """
   k_cache: torch.Tensor
   v_cache: torch.Tensor
+  kv_layout: KVLayout = KV_LAYOUT_DEFAULT
+  @classmethod
+  def construct_kv_shape_from_layout(
+      cls,
+      shape_spec: types.TensorDimensionMeta,
+      kv_cache_max: int,
+      config: model_config.AttentionConfig,
+      batch_size: int,
+  ) -> List[int]:
+    """Constructs the shape of the key or value cache entry based on
+    the specified layout.
+    """
+    output_shape = []
+    for dim_spec in shape_spec:
+      if dim_spec is types.TensorDims.BATCH:
+        output_shape.append(batch_size)
+      elif dim_spec is types.TensorDims.SEQUENCE:
+        output_shape.append(kv_cache_max)
+      elif dim_spec is types.TensorDims.NUM_HEADS:
+        output_shape.append(config.num_query_groups)
+      elif dim_spec is types.TensorDims.HEAD_DIM:
+        output_shape.append(config.head_dim)
+      else:
+        raise ValueError(f"Unsupported dimension spec: {dim_spec}")
+    return output_shape
   @classmethod
   def from_model_config(
@@ -41,14 +75,20 @@ class KVCacheEntry:
       kv_cache_max: int,
       config: model_config.AttentionConfig,
       dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
+      device: torch.device | None = None,
       batch_size: int = 1,
+      kv_layout: KVLayout = KV_LAYOUT_DEFAULT,
   ) -> "KVCacheEntry":
     """Build an instance of the class based on model config."""
-    shape = (batch_size, kv_cache_max, config.num_query_groups, config.head_dim)
-    k = torch.zeros(shape, dtype=dtype, device=device)
-    v = torch.zeros(shape, dtype=dtype, device=device)
-    obj = cls(k_cache=k, v_cache=v)
+    k_shape = cls.construct_kv_shape_from_layout(
+        kv_layout[0], kv_cache_max, config, batch_size
+    )
+    v_shape = cls.construct_kv_shape_from_layout(
+        kv_layout[1], kv_cache_max, config, batch_size
+    )
+    k = torch.zeros(k_shape, dtype=dtype, device=device)
+    v = torch.zeros(v_shape, dtype=dtype, device=device)
+    obj = cls(k_cache=k, v_cache=v, kv_layout=kv_layout)
     return obj
@@ -63,8 +103,9 @@ class KVCache:
       cls,
       config: model_config.ModelConfig,
       dtype: torch.dtype = torch.float32,
-      device: torch.device = None,
+      device: torch.device | None = None,
       batch_size: int = 1,
+      kv_layout: KVLayout = KV_LAYOUT_DEFAULT,
   ) -> "KVCache":
     """Build an instance of the class based on model config.
@@ -89,6 +130,7 @@ class KVCache:
             dtype,
             device,
             batch_size,
+            kv_layout,
         )
         for idx in range(config.num_layers)
     ]
@@ -104,7 +146,7 @@ class KVCache:
 def _flatten_kvc(kvc: KVCache) -> Tuple[List[str], List[str]]:
   flattened = []
   flat_names = []
-  none_names = []
+  none_names = [kvc.caches[0].kv_layout]
   for i, kv_entry in enumerate(kvc.caches):
     flattened.append(kv_entry.k_cache)
     flat_names.append(f"k_{i}")
@@ -121,22 +163,48 @@ def _flatten_kvc_with_keys(kvc: KVCache) -> Tuple[List, List]:
 def _unflatten_kvc(
-    values: List[torch.Tensor], context: Tuple[List, List]
+    values: List[torch.Tensor],
+    context: Tuple[List, List],
 ) -> KVCache:
   assert len(values) % 2 == 0, "Found odd number of K and V entries."
   num_layers = len(values) // 2
   flat_names = context[0]
+  kv_layout = context[1][0]
   kv_entries = []
   for i in range(num_layers):
     k_cache_idx = flat_names.index(f"k_{i}")
     v_cache_idx = flat_names.index(f"v_{i}")
     kv_entries.append(
-        KVCacheEntry(k_cache=values[k_cache_idx], v_cache=values[v_cache_idx])
+        KVCacheEntry(
+            k_cache=values[k_cache_idx],
+            v_cache=values[v_cache_idx],
+            kv_layout=kv_layout,
+        )
     )
   obj = KVCache(tuple(kv_entries))
   return obj
+def _flatten_kv_entry(
+    kv_e: KVCacheEntry,
+) -> Tuple[List[torch.Tensor], Any]:
+  return ([kv_e.k_cache, kv_e.v_cache], kv_e.kv_layout)
+def _unflatten_kv_entry(
+    values: List[torch.Tensor],
+    context: Any,
+) -> KVCacheEntry:
+  return KVCacheEntry(*values, kv_layout=context)
+pytree.register_pytree_node(
+    KVCacheEntry,
+    _flatten_kv_entry,
+    _unflatten_kv_entry,
+    serialized_type_name="",
+)
 pytree.register_pytree_node(
     KVCache,
     _flatten_kvc,
@@ -145,7 +213,6 @@ pytree.register_pytree_node(
     serialized_type_name="",
 )
 def update(
     cache: KVCacheEntry,
     input_pos: torch.Tensor,
@@ -204,5 +271,5 @@ def _update_kv_impl(
   k = dynamic_update_slice(cache.k_cache, k_slice, k_slice_indices)
   v = dynamic_update_slice(cache.v_cache, v_slice, v_slice_indices)
-  updated_cache = KVCacheEntry(k, v)
+  updated_cache = KVCacheEntry(k, v, cache.kv_layout)
   return updated_cache

ai_edge_torch/generative/test/test_kv_cache.py CHANGED Viewed

@@ -16,7 +16,6 @@
 """A suite of tests to validate KV Cache layer."""
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
-from ai_edge_torch.generative.layers.experimental import kv_cache as kv_utils_experimental
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
 import torch.utils._pytree as pytree
@@ -117,7 +116,7 @@ class TestKVLayers(googletest.TestCase):
     self.assertEqual(input_specs[0].arg.name, "kv_k_0")
     self.assertEqual(input_specs[1].arg.name, "kv_v_0")
-  def test_pytree_roundtrip_experimental_kv_cache_base(self):
+  def test_pytree_roundtrip_kv_cache(self):
     NUM_LAYERS = 4
     config = self._get_test_config(
         num_layers=NUM_LAYERS,
@@ -125,15 +124,13 @@ class TestKVLayers(googletest.TestCase):
         num_query_groups=1,
         kv_cache_max_len=4,
     )
-    kv = kv_utils_experimental.KVCacheBase.from_model_config(
-        config, batch_size=1
-    )
+    kv = kv_utils.KVCache.from_model_config(config, batch_size=1)
     flat, treespec = pytree.tree_flatten(kv)
     self.assertLen(flat, NUM_LAYERS * 2)
     kv_unflat = pytree.tree_unflatten(flat, treespec)
     self.assertEqual(kv, kv_unflat)
-  def test_pytree_roundtrip_experimental_kv_cache_derived(self):
+  def test_pytree_roundtrip_kv_cache_derived(self):
     NUM_LAYERS = 4
     config = self._get_test_config(
         num_layers=NUM_LAYERS,
@@ -141,41 +138,37 @@ class TestKVLayers(googletest.TestCase):
         num_query_groups=1,
         kv_cache_max_len=4,
     )
-    kv = kv_utils_experimental.KVCacheTransposed.from_model_config(
-        config, batch_size=1
+    kv = kv_utils.KVCache.from_model_config(
+        config, batch_size=1, kv_layout=kv_utils.KV_LAYOUT_TRANSPOSED
     )
     flat, treespec = pytree.tree_flatten(kv)
     self.assertLen(flat, NUM_LAYERS * 2)
     kv_unflat = pytree.tree_unflatten(flat, treespec)
     self.assertEqual(kv, kv_unflat)
-  def test_pytree_roundtrip_experimental_kv_entry_base(self):
+  def test_pytree_roundtrip_kv_entry(self):
     attn_config = cfg.AttentionConfig(
         num_heads=1, head_dim=1, num_query_groups=1
     )
-    kv = kv_utils_experimental.KVCacheEntryBase.from_model_config(
-        32, attn_config
-    )
+    kv = kv_utils.KVCacheEntry.from_model_config(32, attn_config)
     flat, treespec = pytree.tree_flatten(kv)
     self.assertLen(flat, 2)
     kv_unflat = pytree.tree_unflatten(flat, treespec)
     self.assertEqual(kv, kv_unflat)
-    self.assertIsInstance(kv_unflat, kv_utils_experimental.KVCacheEntryBase)
+    self.assertIsInstance(kv_unflat, kv_utils.KVCacheEntry)
-  def test_pytree_roundtrip_experimental_kv_entry_derived(self):
+  def test_pytree_roundtrip_kv_entry_derived(self):
     attn_config = cfg.AttentionConfig(
         num_heads=1, head_dim=1, num_query_groups=1
     )
-    kv = kv_utils_experimental.KVCacheEntryTransposed.from_model_config(
-        32, attn_config
+    kv = kv_utils.KVCacheEntry.from_model_config(
+        32, attn_config, kv_layout=kv_utils.KV_LAYOUT_TRANSPOSED
     )
     flat, treespec = pytree.tree_flatten(kv)
     self.assertLen(flat, 2)
     kv_unflat = pytree.tree_unflatten(flat, treespec)
     self.assertEqual(kv, kv_unflat)
-    self.assertIsInstance(
-        kv_unflat, kv_utils_experimental.KVCacheEntryTransposed
-    )
+    self.assertIsInstance(kv_unflat, kv_utils.KVCacheEntry)
 if __name__ == "__main__":

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -20,6 +20,7 @@ import pathlib
 from typing import Optional, Union
 from absl import flags
 from ai_edge_torch._convert import converter as converter_utils
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers import lora as lora_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.quantize import quant_recipes
@@ -218,9 +219,13 @@ def _export_helper(
       [[0] for _ in range(export_config.decode_batch_size)], dtype=torch.int
   )
   decode_input_pos = torch.tensor([0], dtype=torch.int)
-  prefill_kv = export_config.kvcache_cls.from_model_config(config)
-  decode_kv = export_config.kvcache_cls.from_model_config(
-      config, batch_size=export_config.decode_batch_size
+  prefill_kv = kv_utils.KVCache.from_model_config(
+      config, kv_layout=export_config.kvcache_layout
+  )
+  decode_kv = kv_utils.KVCache.from_model_config(
+      config,
+      batch_size=export_config.decode_batch_size,
+      kv_layout=export_config.kvcache_layout,
   )
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None

ai_edge_torch/generative/utilities/export_config.py CHANGED Viewed

@@ -32,7 +32,9 @@ class ExportConfig:
   # Attention masks given as inputs to the model.
   prefill_mask: Optional[torch.Tensor | List[torch.Tensor]] = None
   decode_mask: Optional[torch.Tensor | List[torch.Tensor]] = None
-  # The KV Cache class for K and V buffers in attention.
+  # The KV Cache layout for K and V buffers in attention.
+  kvcache_layout: kv_utils.KVLayout = kv_utils.KV_LAYOUT_DEFAULT
+  # TODO(b/409373223): The KV Cache class for K and V buffers in attention.
   kvcache_cls: type = kv_utils.KVCache
   # The batch size of the decode signature.
   decode_batch_size: int = 1

ai_edge_torch/odml_torch/export.py CHANGED Viewed

@@ -209,7 +209,10 @@ class MlirLowered:
   def get_text(self, enable_debug_info=False):
     return str(
-        self.module.operation.get_asm(enable_debug_info=enable_debug_info)
+        self.module.operation.get_asm(
+            enable_debug_info=enable_debug_info,
+            large_elements_limit=16,
+        )
     )
   @property
@@ -326,8 +329,24 @@ def _convert_q_dq_per_channel_args_to_list(
 def exported_program_to_mlir(
     exported_program: torch.export.ExportedProgram,
+    *,
+    ir_context: ir.Context | None = None,
+    _pre_lower_pass: (
+        Callable[[torch.export.ExportedProgram], None] | None
+    ) = None,
 ) -> MlirLowered:
-  """Lower the exported program to MLIR."""
+  """Lower the exported program to MLIR.
+  Args:
+    exported_program: The exported program to lower.
+    ir_context: The MLIR context to use. If not provided, a new context will be
+      created.
+    _pre_lower_pass: A function to run on exported program before lowering.
+  Returns:
+    The lowered MLIR module, metadata, and weight tensors bundle from exported
+    program.
+  """
   exported_program = fx_infra.safe_run_decompositions(
       exported_program,
       fx_infra.decomp.pre_lower_decomp(),
@@ -340,10 +359,16 @@ def exported_program_to_mlir(
   # Do not call run_decompositions after applying the passes.
   _convert_q_dq_per_channel_args_to_list(exported_program)
-  with export_utils.create_ir_context() as context, ir.Location.unknown():
+  if _pre_lower_pass:
+    _pre_lower_pass(exported_program)
+  if not ir_context:
+    ir_context = export_utils.create_ir_context()
+  with ir_context, ir.Location.unknown():
     module = ir.Module.create()
-    lctx = LoweringContext(context, module)
+    lctx = LoweringContext(ir_context, module)
     interpreter = LoweringInterpreter(exported_program.graph_module, lctx)
     ir_flat_inputs, export_flat_args, tensor_metas = _build_flat_inputs(
         exported_program
@@ -382,7 +407,6 @@ def exported_program_to_mlir(
     main_func.attributes["sym_visibility"] = ir.StringAttr.get("public")
     temp_func.erase()
     module.operation.verify()
   input_signature = []
@@ -422,5 +446,5 @@ def exported_program_to_mlir(
       for tensor_meta in _get_output_metas(exported_program)
   ]
   return MlirLowered(
-      context, module, state_dict, input_signature, output_signature
+      ir_context, module, state_dict, input_signature, output_signature
   )

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.4.0.dev20250407"
+__version__ = "0.5.0.dev20250409"

{ai_edge_torch_nightly-0.4.0.dev20250407.dist-info → ai_edge_torch_nightly-0.5.0.dev20250409.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.4.0.dev20250407
+Version: 0.5.0.dev20250409
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI
@@ -25,6 +25,9 @@ License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: scipy
 Requires-Dist: safetensors
+Requires-Dist: multipledispatch
+Requires-Dist: transformers
+Requires-Dist: kagglehub
 Requires-Dist: tabulate
 Requires-Dist: torch>=2.4.0
 Requires-Dist: tf-nightly>=2.19.0.dev20250101

{ai_edge_torch_nightly-0.4.0.dev20250407.dist-info → ai_edge_torch_nightly-0.5.0.dev20250409.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=8sPR_5uXJA4NEE0nIwNdSl-ADOJEoR8hAgYvBQDY70Y,120
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=fJ75mse1mSxcWdxcUrb7b5ab1ko3Jhnbbvqz_EJKxl0,706
+ai_edge_torch/version.py,sha256=DEYqmCDZNmwuMxnxrFvcTEaDp6Z_BVHJaZMYjVQ2ijU,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=gpXQnifODU-mWxkUZw_3ov1lEYBw1SPVIcqj5k7pTGo,5550
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -65,12 +65,12 @@ ai_edge_torch/generative/examples/gemma/verify_gemma1.py,sha256=ip-Gmk4CI5f0GWSd
 ai_edge_torch/generative/examples/gemma/verify_gemma2.py,sha256=IoBhEMwH07-tFm5-U6F2hpCsI8xynglhq1x9tIOdaPQ,1322
 ai_edge_torch/generative/examples/gemma/verify_util.py,sha256=tR8RflXocDZqvuStyw9aFlzuiTllEC8rNnjrxms6_Is,5727
 ai_edge_torch/generative/examples/gemma3/__init__.py,sha256=JaAnrFoXTl3RJX97XspklkTyqOHVyAgRJsZtzNDd10c,671
-ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py,sha256=6Dkxi7Vs8xBaqMif00ATQSr_hTPhYXMdDqHwzOsAzq8,2952
-ai_edge_torch/generative/examples/gemma3/decoder.py,sha256=__kpzArZ0mLfX7IzpHPmYFuhKTP9uI_9Lrzk_EfFDlE,15701
+ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py,sha256=szssSBrIUYdNIoU7LHdAq7wCqgjaY6qbV8yvTgg796Q,2945
+ai_edge_torch/generative/examples/gemma3/decoder.py,sha256=n6ZQfqNEHuOhY7Pu21bb8Eax8yn2Sx5osTKJKmhonXY,15659
 ai_edge_torch/generative/examples/gemma3/gemma3.py,sha256=5PEt0aWJ5wkUBvMoWFOJ-C48ZhG7uCVb8PCKQtZ8Fvw,6485
 ai_edge_torch/generative/examples/gemma3/image_encoder.py,sha256=uRoLoBWzFtQz5wFZfPCxbkvZsgPAqSkUUsV3977GbYc,5184
 ai_edge_torch/generative/examples/gemma3/verify_gemma3.py,sha256=v8oNXFICmVOtQxfO7IhZ8GnbvotEkDi9lzYHjoQyOso,2464
-ai_edge_torch/generative/examples/gemma3/verify_util.py,sha256=u30qiZu3HJCTt5noWqtf9PgGLKQ87ke4Zpa4cpG6-As,8883
+ai_edge_torch/generative/examples/gemma3/verify_util.py,sha256=nEv0qQ0l6gSXKxP5mNwkd2lRGxpFfD4e7FNV3V76zhw,8915
 ai_edge_torch/generative/examples/llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/llama/convert_to_tflite.py,sha256=A4uLUdqvU1NKo3seqZlWSS3fqYahnEKqNBQBJO6yXvE,1762
 ai_edge_torch/generative/examples/llama/llama.py,sha256=UKvMO85_5z1vEY5MVu6QBW_vpQYA8LWHbJI4Yx6BrCc,6592
@@ -153,17 +153,17 @@ ai_edge_torch/generative/layers/attention.py,sha256=wLZ1jgUlcODBWgK3hnnhclHuuQDq
 ai_edge_torch/generative/layers/attention_utils.py,sha256=zBVwlBUTs-nStIKCZG0ks5ra7tsqc9ShfakFJKH5rds,7344
 ai_edge_torch/generative/layers/builder.py,sha256=LXGuSHIx6QZAzLFm7aJvlzoMPgQwbXLFchGEKYwOOUA,5090
 ai_edge_torch/generative/layers/feed_forward.py,sha256=hdICat-8gW7-vxDAevJQ8NQ-mynllPiqLdXQMF6JMnc,4189
-ai_edge_torch/generative/layers/kv_cache.py,sha256=zjdovWqgEKtx7cvbA0apOwXaNft5AXxNTbJhBT4CXyg,6541
+ai_edge_torch/generative/layers/kv_cache.py,sha256=9kkFpB9msgUDStFxEyQYYsavKPP4Dgqb_NFcd4hA4aU,8502
 ai_edge_torch/generative/layers/lora.py,sha256=hsvWLLOnW7HQ0AysOZu30x_cetMquDd1tjfyLz8HCSU,17892
 ai_edge_torch/generative/layers/model_config.py,sha256=nLXvTkDAIHJQ0PTaWODF8oxJQoJ-K8D10cKR9229SAw,8355
 ai_edge_torch/generative/layers/normalization.py,sha256=MbwH-n80Fob5YvjBzdqDjBizMHLzSJGYRDdbD-rL5C0,6174
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=975zR202MdIrILJ7blceAcxrNqX1ZCN0ECKG1gz-bV8,2655
 ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=vp8dVx6tOe99neJhpbrtIt5fvN5NFw19JVH1v0yi5Mg,4154
 ai_edge_torch/generative/layers/experimental/__init__.py,sha256=nz-K0h8DfiATHzR6s1_bCw2akUmHWffU1bDRSkIzSqI,592
-ai_edge_torch/generative/layers/experimental/attention.py,sha256=95djjlJItDVuSNE3BL0b6u3lQoIhmmdvaik7qBBvQA0,8909
-ai_edge_torch/generative/layers/experimental/kv_cache.py,sha256=uXUxiQjPndXYZVGKgm9FxzHgQDal8GdY7cUZDpc_Sno,9997
-ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py,sha256=YFW0iGcZjTuej6VFIkwdSY28fIQi_KTAVdT8gWNmq7o,2880
-ai_edge_torch/generative/layers/experimental/types.py,sha256=bPPxw6TOCZVWdeDP3vCbOnjNP5-bdUMmfsfO-EtdazQ,2847
+ai_edge_torch/generative/layers/experimental/attention.py,sha256=oW8cxv0pXcesnyGz6bXacRmlvHPfKNnJnls_Qb4L_aQ,8968
+ai_edge_torch/generative/layers/experimental/kv_cache.py,sha256=PlgL2bNNKasu3wFr3Iu9wbATWluWZt3_s4tzglJu2tM,2942
+ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py,sha256=-ztTIgdec5gXkOVe6FXk3PMeS2HoL6-mBfDBdjQIcLQ,2808
+ai_edge_torch/generative/layers/experimental/types.py,sha256=gZI9hIPB3XAo4oecKIIoVDfiyibLaSNFhecPFx4VDTM,2913
 ai_edge_torch/generative/layers/unet/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/layers/unet/blocks_2d.py,sha256=ZteHZXK6HKyxYji49DQ46sA9aIy7U3Jnz0HZp6hfevY,28996
 ai_edge_torch/generative/layers/unet/builder.py,sha256=zAqWXdimmMrQRhmE_t9XkS68mh6PSrzwb-2NZZXrR5I,1901
@@ -177,7 +177,7 @@ ai_edge_torch/generative/quantize/quant_recipes.py,sha256=0Kvr_o7pbMnE8VMe6Ml0FB
 ai_edge_torch/generative/quantize/supported_schemes.py,sha256=FjdycEOvxRgBmQdZVufetPvkDoD7rUowIOSKV9oV5Kk,1418
 ai_edge_torch/generative/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/test/test_custom_dus.py,sha256=MjIhTvkTko872M35XMciobvICcDWTcIDJ3rociko-wM,3267
-ai_edge_torch/generative/test/test_kv_cache.py,sha256=MBPS-0bDXB0tQSKHa1XwDQeVIfabRbc8JQA99h9fzlQ,5961
+ai_edge_torch/generative/test/test_kv_cache.py,sha256=1sXN2RPntq0PP3IEy0NkvIbzQ0Y8JhPIwRSFwO9JLlE,5728
 ai_edge_torch/generative/test/test_loader.py,sha256=9mQUeeZKOVApOWSWl2cN9c10axZjMKM1-0Zd823CCS4,3449
 ai_edge_torch/generative/test/test_lora.py,sha256=6QIM6RLTc2HrodGpp_aS3OxM9Rco2KAzEnYgotkg41M,5310
 ai_edge_torch/generative/test/test_model_conversion.py,sha256=jfqkECCX7XKHeBAuDXrkwQJf0vM72eG3LMc5rluha84,6191
@@ -185,8 +185,8 @@ ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=-v2Vj7Qdd3Gy
 ai_edge_torch/generative/test/test_quantize.py,sha256=bEJMhpQ9bIDUZVBXTW888728FcH-i3SyE4JSZZUgU0A,6071
 ai_edge_torch/generative/test/utils.py,sha256=tF6aCfAGJnc9dmzCnZCEOuKNVimfWOqscv9og0DDLHU,2656
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
-ai_edge_torch/generative/utilities/converter.py,sha256=ycXDcd3ZE-EdjksDjHi4ru3JpfhtrfOompg_990qvWI,9607
-ai_edge_torch/generative/utilities/export_config.py,sha256=-UuukWqUUj8RM8lTtMCa_PD6SqCZv97i4BMiJA2zBPg,1491
+ai_edge_torch/generative/utilities/converter.py,sha256=87Tzj-gLydx8_xnHxKlCbMmM1XHShstpKi8RH3xY7Xw,9757
+ai_edge_torch/generative/utilities/export_config.py,sha256=8-795nyd3M34LkGhgW7hwHlJyTc2Oz1iipHK8yBhdFs,1633
 ai_edge_torch/generative/utilities/loader.py,sha256=7p__m2JryWphGlYOuRxdoT4id4_tWJEVOV7y2X4H-Ak,13737
 ai_edge_torch/generative/utilities/model_builder.py,sha256=ZYX1TxpFdj573du2QCyHJlFjx4q1m12R74fp4Gwl92A,6343
 ai_edge_torch/generative/utilities/moonshine_loader.py,sha256=_RpFabSqtGH5PHiP3_1f6QfO14qMADUxr_HGRlVDFB0,4891
@@ -210,7 +210,7 @@ ai_edge_torch/lowertools/translate_recipe.py,sha256=ymkBpFqAUiupRWqrPOWiVphKcXR1
 ai_edge_torch/odml_torch/__init__.py,sha256=S8jOzE9nLof-6es3XDiGJRN-9H_XTxsVm9dE7lD3RWo,812
 ai_edge_torch/odml_torch/_torch_future.py,sha256=jSYHf1CMTJzMizPMbu2b39hAt0ZTR6gQLq67GMe9KTo,2336
 ai_edge_torch/odml_torch/_torch_library.py,sha256=Lw1gqL2HWNRspdTwNhIkYAHDyafHedHtkXyKKxn-Wss,805
-ai_edge_torch/odml_torch/export.py,sha256=7l8R0DEq_vfns8iWpruMlIyaIKZAFzoAy369-7iRrl0,14164
+ai_edge_torch/odml_torch/export.py,sha256=rxsyVagQgb-DDIVtwZwSTSVFINqwIZleOOfmPkBoPKg,14817
 ai_edge_torch/odml_torch/export_utils.py,sha256=QeA37Irlty6AiIBuqmHmJgn3lqahBQ5xsh6IKRoKm1g,4774
 ai_edge_torch/odml_torch/tf_integration.py,sha256=NN29WeXmHZ0S1RPDFHUnBi2DEjMvAtwczStPYIsQ1w8,4849
 ai_edge_torch/odml_torch/composite/__init__.py,sha256=71GM_gDZxJyo38ZSoYSwhZX3xKA9rknO93JS9kw9w_c,778
@@ -243,8 +243,8 @@ ai_edge_torch/testing/__init__.py,sha256=_yGgvnBZWb7T3IN3mc4x1sS4vM96HZwM8pwIcPG
 ai_edge_torch/testing/export.py,sha256=dguMa-aEi-WDPnmGBUs2IPdEmt2IVmHOELH19uiJ1uU,3014
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.4.0.dev20250407.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.4.0.dev20250407.dist-info/METADATA,sha256=NKNyCEoVoa3kan1IjHA8JXvBbAfEW-H4VMjekJPMlkM,1966
-ai_edge_torch_nightly-0.4.0.dev20250407.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.4.0.dev20250407.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.4.0.dev20250407.dist-info/RECORD,,
+ai_edge_torch_nightly-0.5.0.dev20250409.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.5.0.dev20250409.dist-info/METADATA,sha256=kZwo6E79HLuM7_4E-Yw9erTzOnAAzio3Vy45hXNiC48,2051
+ai_edge_torch_nightly-0.5.0.dev20250409.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.5.0.dev20250409.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.5.0.dev20250409.dist-info/RECORD,,

{ai_edge_torch_nightly-0.4.0.dev20250407.dist-info → ai_edge_torch_nightly-0.5.0.dev20250409.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250407.dist-info → ai_edge_torch_nightly-0.5.0.dev20250409.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250407.dist-info → ai_edge_torch_nightly-0.5.0.dev20250409.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.4.0.dev20250407__py3-none-any.whl → 0.5.0.dev20250409__py3-none-any.whl

ai-edge-torch-nightly 0.4.0.dev20250407py3-none-any.whl → 0.5.0.dev20250409py3-none-any.whl