PyPI - ai-edge-torch-nightly - Versions diffs - 0.4.0.dev20250227__py3-none-any.whl → 0.4.0.dev20250228__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.4.0.dev20250227py3-none-any.whl → 0.4.0.dev20250228py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_edge_torch/generative/layers/experimental/attention.py CHANGED Viewed

@@ -52,7 +52,6 @@ class TransformerBlock(nn.Module):
         config.pre_attention_norm_config,
     )
     self.atten_func = CausalSelfAttention(
-        model_config.batch_size,
         model_config.embedding_dim,
         config.attn_config,
         model_config.enable_hlfb,
@@ -119,7 +118,6 @@ class CausalSelfAttention(nn.Module):
   def __init__(
       self,
-      batch_size: int,
       dim: int,
       config: cfg.AttentionConfig,
       enable_hlfb: bool,
@@ -127,14 +125,12 @@ class CausalSelfAttention(nn.Module):
     """Initialize an instance of CausalSelfAttention.
     Args:
-      batch_size (int): batch size of the input tensor.
       dim (int): causal attention's input/output dimmension.
       config (cfg.AttentionConfig): attention specific configurations.
       enable_hlfb (bool): whether hlfb is enabled or not.
     """
     super().__init__()
     self.kv_cache = None
-    self.batch_size = batch_size
     qkv_shape = (
         config.num_heads + 2 * config.num_query_groups
     ) * config.head_dim
@@ -180,10 +176,6 @@ class CausalSelfAttention(nn.Module):
     """
     # Batch size, sequence length, embedding dimensionality.
     B, T, E = x.size()
-    assert B == self.batch_size, (
-        "batch size of input tensor must match with the batch size specified in"
-        " the model configuration."
-    )
     qkv = self.qkv_projection(x)

ai_edge_torch/generative/layers/experimental/kv_cache.py CHANGED Viewed

@@ -21,23 +21,19 @@ This is an experimental implementation and is subject to change at any time.
 import dataclasses
 from typing import List, Tuple
-from ai_edge_torch import hlfb
 from ai_edge_torch.generative.layers import model_config
-from ai_edge_torch.generative.layers.experimental import types as types
-from ai_edge_torch.generative.utilities.dynamic_update_slice import dynamic_update_slice
+from ai_edge_torch.generative.layers.experimental import types
+from ai_edge_torch.generative.utilities import dynamic_update_slice as dus_utils
 import torch
-import torch.nn as nn
 import torch.utils._pytree as pytree
-BATCH_SIZE = 1
 @dataclasses.dataclass
 class KVCacheEntryBase:
   """A single cache entry that includes K and V caches.
   The chaches are built based on the provided config with the shape of
-  (batch_size=1, kv_cache_max, num_query_groups, head_dim).
+  (batch_size, kv_cache_max, num_query_groups, head_dim).
   """
   k_cache: torch.Tensor
@@ -46,10 +42,8 @@ class KVCacheEntryBase:
   @classmethod
   def _from_model_config(
       cls,
-      kv_cache_max: int,
-      config: model_config.AttentionConfig,
-      k_shape: Tuple,
-      v_shape: Tuple,
+      k_shape: Tuple[int, ...],
+      v_shape: Tuple[int, ...],
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
   ) -> "KVCacheEntryBase":
@@ -66,12 +60,11 @@ class KVCacheEntryBase:
       config: model_config.AttentionConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheEntryBase":
     """Build an instance of the class based on model config."""
-    shape = (BATCH_SIZE, kv_cache_max, config.num_query_groups, config.head_dim)
-    return cls._from_model_config(
-        kv_cache_max, config, shape, shape, dtype, device
-    )
+    shape = (batch_size, kv_cache_max, config.num_query_groups, config.head_dim)
+    return cls._from_model_config(shape, shape, dtype, device)
 @dataclasses.dataclass
@@ -93,24 +86,22 @@ class KVCacheEntryTransposed(KVCacheEntryBase):
       config: model_config.AttentionConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheEntryBase":
     """Build an instance of the class based on model config."""
-    num_kv_heads = config.num_query_groups
     k_shape = (
-        1,
-        BATCH_SIZE * num_kv_heads,
+        batch_size,
+        config.num_query_groups,
         kv_cache_max,
         config.head_dim,
-    )  # 1, bk, s, h
+    )  # b, k, s, h
     v_shape = (
-        1,
-        BATCH_SIZE * num_kv_heads,
+        batch_size,
+        config.num_query_groups,
         config.head_dim,
         kv_cache_max,
-    )  # 1, bk, h, s
-    return cls._from_model_config(
-        kv_cache_max, config, k_shape, v_shape, dtype, device
-    )
+    )  # b, k, h, s
+    return cls._from_model_config(k_shape, v_shape, dtype, device)
 @dataclasses.dataclass
@@ -126,6 +117,7 @@ class KVCacheBase:
       config: model_config.ModelConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheBase":
     caches = [
         kv_entry_cls.from_model_config(
@@ -133,6 +125,7 @@ class KVCacheBase:
             config.block_config(idx).attn_config,
             dtype,
             device,
+            batch_size,
         )
         for idx in range(config.num_layers)
     ]
@@ -145,6 +138,7 @@ class KVCacheBase:
       config: model_config.ModelConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheBase":
     """Build an instance of the class based on model config.
@@ -154,12 +148,19 @@ class KVCacheBase:
           Defaults to torch.float32.
         device (torch.device, optional): The device placement of the cache
           tensors. Defaults to None.
+        batch_size (int, optional): The batch size of the cache tensors.
+          Defaults to 1.
     Returns:
         KVCacheBase: The created cache object.
     """
+    assert batch_size == 1, "Batch size must be 1 for KV Cache."
     return cls._from_model_config(
-        KVCacheEntryBase, config=config, dtype=dtype, device=device
+        KVCacheEntryBase,
+        config=config,
+        dtype=dtype,
+        device=device,
+        batch_size=batch_size,
     )
   def flatten(self) -> List[torch.Tensor]:
@@ -177,9 +178,14 @@ class KVCacheBTNH(KVCacheBase):
       config: model_config.ModelConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheBTNH":
     return cls._from_model_config(
-        KVCacheEntryBTNH, config=config, dtype=dtype, device=device
+        KVCacheEntryBTNH,
+        config=config,
+        dtype=dtype,
+        device=device,
+        batch_size=batch_size,
     )
@@ -192,9 +198,14 @@ class KVCacheTransposed(KVCacheBase):
       config: model_config.ModelConfig,
       dtype: torch.dtype = torch.float32,
       device: torch.device = None,
+      batch_size: int = 1,
   ) -> "KVCacheBTNH":
     return cls._from_model_config(
-        KVCacheEntryTransposed, config=config, dtype=dtype, device=device
+        KVCacheEntryTransposed,
+        config=config,
+        dtype=dtype,
+        device=device,
+        batch_size=batch_size,
     )
@@ -258,7 +269,6 @@ def update(
     input_pos: torch.Tensor,
     k_slice: torch.Tensor,
     v_slice: torch.Tensor,
-    use_dus: bool = True,
 ) -> KVCacheEntryBase:
   """Out of place update of Cache buffer.
@@ -309,6 +319,10 @@ def _update_kv_impl(
   positions = input_pos.clone()
   k_slice_indices = _get_slice_indices(positions, cache_dim, k_ts_idx)
   v_slice_indices = _get_slice_indices(positions, cache_dim, v_ts_idx)
-  k = dynamic_update_slice(cache.k_cache, k_slice, [x for x in k_slice_indices])
-  v = dynamic_update_slice(cache.v_cache, v_slice, [x for x in v_slice_indices])
+  k = dus_utils.dynamic_update_slice(
+      cache.k_cache, k_slice, [x for x in k_slice_indices]
+  )
+  v = dus_utils.dynamic_update_slice(
+      cache.v_cache, v_slice, [x for x in v_slice_indices]
+  )
   return KVCacheEntryTransposed(k, v)

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.4.0.dev20250227"
+__version__ = "0.4.0.dev20250228"

{ai_edge_torch_nightly-0.4.0.dev20250227.dist-info → ai_edge_torch_nightly-0.4.0.dev20250228.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.4.0.dev20250227
+Version: 0.4.0.dev20250228
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.4.0.dev20250227.dist-info → ai_edge_torch_nightly-0.4.0.dev20250228.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ai_edge_torch/__init__.py,sha256=8sPR_5uXJA4NEE0nIwNdSl-ADOJEoR8hAgYvBQDY70Y,120
 ai_edge_torch/_config.py,sha256=AiqhbcheF7j_ozIGDLC89k1we95aVgFDa-tR6h7UI0s,2529
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=K2jtDrBNGi74j_uQYVUT6MJ2-aQFKkKy5ZYur9iWdVU,706
+ai_edge_torch/version.py,sha256=-EqWeDLQh8HxiqQxA-N-t0YXsYU9QT1iaq2h-kCDBdo,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=gpXQnifODU-mWxkUZw_3ov1lEYBw1SPVIcqj5k7pTGo,5550
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -147,8 +147,8 @@ ai_edge_torch/generative/layers/normalization.py,sha256=MbwH-n80Fob5YvjBzdqDjBiz
 ai_edge_torch/generative/layers/rotary_position_embedding.py,sha256=975zR202MdIrILJ7blceAcxrNqX1ZCN0ECKG1gz-bV8,2655
 ai_edge_torch/generative/layers/scaled_dot_product_attention.py,sha256=vp8dVx6tOe99neJhpbrtIt5fvN5NFw19JVH1v0yi5Mg,4154
 ai_edge_torch/generative/layers/experimental/__init__.py,sha256=nz-K0h8DfiATHzR6s1_bCw2akUmHWffU1bDRSkIzSqI,592
-ai_edge_torch/generative/layers/experimental/attention.py,sha256=KC1UkIhaPx2DNRfkxCXO7eZZMeNm2UxkjFi-fB8HVhw,9212
-ai_edge_torch/generative/layers/experimental/kv_cache.py,sha256=gE_q8YoSzOhGgbSm0K91jXkbFKnFJpuYf-hxMzLNw78,8976
+ai_edge_torch/generative/layers/experimental/attention.py,sha256=95djjlJItDVuSNE3BL0b6u3lQoIhmmdvaik7qBBvQA0,8909
+ai_edge_torch/generative/layers/experimental/kv_cache.py,sha256=VN4gn4ylaVOwaTR5EXKv0YTVgpQ850bmjGLCgCCI1ps,9267
 ai_edge_torch/generative/layers/experimental/scaled_dot_product_attention.py,sha256=1vMh1L3uYX4ptKQMWcAjxkL1v2-g0jmOiuai8ydp0dc,2879
 ai_edge_torch/generative/layers/experimental/types.py,sha256=bPPxw6TOCZVWdeDP3vCbOnjNP5-bdUMmfsfO-EtdazQ,2847
 ai_edge_torch/generative/layers/unet/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
@@ -230,8 +230,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=U0KisSW-uZkoMJcy-ZP9W57p3tsa594fr9
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.4.0.dev20250227.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.4.0.dev20250227.dist-info/METADATA,sha256=cHcz3adq1WwVddazAJ06h7SKITJm70eMpFVjoNa2Jw4,1966
-ai_edge_torch_nightly-0.4.0.dev20250227.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.4.0.dev20250227.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.4.0.dev20250227.dist-info/RECORD,,
+ai_edge_torch_nightly-0.4.0.dev20250228.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.4.0.dev20250228.dist-info/METADATA,sha256=oGVZ_Z3zOzdyxj4cJ5XTT-YzPpTa99SBgFJo5zUBqJU,1966
+ai_edge_torch_nightly-0.4.0.dev20250228.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.4.0.dev20250228.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.4.0.dev20250228.dist-info/RECORD,,

{ai_edge_torch_nightly-0.4.0.dev20250227.dist-info → ai_edge_torch_nightly-0.4.0.dev20250228.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250227.dist-info → ai_edge_torch_nightly-0.4.0.dev20250228.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.4.0.dev20250227.dist-info → ai_edge_torch_nightly-0.4.0.dev20250228.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.4.0.dev20250227__py3-none-any.whl → 0.4.0.dev20250228__py3-none-any.whl

ai-edge-torch-nightly 0.4.0.dev20250227py3-none-any.whl → 0.4.0.dev20250228py3-none-any.whl