PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240806__py3-none-any.whl → 0.2.0.dev20240808__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240806py3-none-any.whl → 0.2.0.dev20240808py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (104) hide show

ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py CHANGED Viewed

@@ -28,17 +28,17 @@ def convert_tiny_llama_to_tflite(
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """An example method for converting TinyLlama model to multi-signature
-  tflite model.
+  """Converts TinyLlama model to multi-signature tflite model.
   Args:
-      checkpoint_path (str): The filepath to the model checkpoint, or directory holding the checkpoint.
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
       prefill_seq_len (int, optional): The maximum size of prefill input tensor.
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = tiny_llama.build_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len

ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -64,7 +64,9 @@ class TinyLLamma(nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -109,6 +111,7 @@ class TinyLLamma(nn.Module):
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
+      head_dim=64,
       num_query_groups=4,
       rotary_percentage=1.0,
   )

ai_edge_torch/generative/fx_passes/__init__.py CHANGED Viewed

@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from ai_edge_torch.convert.fx_passes import CanonicalizePass
-from ai_edge_torch.convert.fx_passes import run_passes
+from ai_edge_torch._convert.fx_passes import CanonicalizePass
+from ai_edge_torch._convert.fx_passes import run_passes
 from ai_edge_torch.generative.fx_passes.remove_sdpa_zero_mask_pass import RemoveSDPACompositeZeroMaskPass  # NOQA
 import torch

ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py CHANGED Viewed

@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from ai_edge_torch.convert.fx_passes._pass_base import ExportedProgramPassBase
-from ai_edge_torch.convert.fx_passes._pass_base import ExportedProgramPassResult  # NOQA
+from ai_edge_torch import lowertools
+from ai_edge_torch._convert.fx_passes._pass_base import ExportedProgramPassBase
+from ai_edge_torch._convert.fx_passes._pass_base import ExportedProgramPassResult
 import torch
@@ -27,7 +28,7 @@ class RemoveSDPACompositeZeroMaskPass(ExportedProgramPassBase):
     for node in graph.nodes:
       if not (
           node.op == "call_function"
-          and node.target == torch.ops.xla.mark_tensor.default
+          and node.target == lowertools.mark_tensor_op
       ):
         continue

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -24,7 +24,6 @@ from ai_edge_torch.generative.layers.scaled_dot_product_attention import scaled_
 from ai_edge_torch.generative.layers.scaled_dot_product_attention import scaled_dot_product_attention_with_hlfb  # NOQA
 import torch
 from torch import nn
-import torch.nn.functional as F
 def _embed_rope(
@@ -60,8 +59,8 @@ class TransformerBlock(nn.Module):
     """Initialize an instance of the TransformerBlock.
     Args:
-      config (cfg.ModelConfig): the configuration object
-        for this transformer block.
+      config (cfg.ModelConfig): the configuration object for this transformer
+        block.
     """
     super().__init__()
@@ -131,20 +130,23 @@ class CausalSelfAttention(nn.Module):
       batch_size (int): batch size of the input tensor.
       dim (int): causal attention's input/output dimmension.
       config (cfg.AttentionConfig): attention specific configurations.
-      kv_cache_max (int): determines the size of the KV Cache buffer, if enabled.
+      kv_cache_max (int): determines the size of the KV Cache buffer, if
+        enabled.
       enable_hlfb (bool): whether hlfb is enabled or not.
     """
     super().__init__()
-    self.head_dim = dim // config.num_heads
-    shape = (config.num_heads + 2 * config.num_query_groups) * self.head_dim
-    # Key, query, value projections for all heads.
-    self.qkv_projection = nn.Linear(dim, shape, bias=config.qkv_use_bias)
-    self.output_projection = nn.Linear(
-        dim, dim, bias=config.output_proj_use_bias
-    )
     self.config = config
     self.kv_cache = None
     self.batch_size = batch_size
+    qkv_shape = (
+        config.num_heads + 2 * config.num_query_groups
+    ) * config.head_dim
+    output_shape = config.num_heads * config.head_dim
+    # Key, query, value projections for all heads.
+    self.qkv_projection = nn.Linear(dim, qkv_shape, bias=config.qkv_use_bias)
+    self.output_projection = nn.Linear(
+        output_shape, dim, bias=config.output_proj_use_bias
+    )
     # Build a k/v cache with size (batch_size, kv_cache_max, n_heads, head_dim).
     if config.enable_kv_cache:
@@ -152,7 +154,7 @@ class CausalSelfAttention(nn.Module):
           batch_size,
           kv_cache_max,
           config.num_query_groups,
-          self.head_dim,
+          config.head_dim,
           enable_hlfb,
       )
@@ -169,6 +171,7 @@ class CausalSelfAttention(nn.Module):
       input_pos: Optional[torch.Tensor] = None,
   ) -> torch.Tensor:
     """Forward function of the CausalSelfAttention layer, which can support
        MQA, GQA and MHA.
     Args:
@@ -193,7 +196,7 @@ class CausalSelfAttention(nn.Module):
     q_per_kv = self.config.num_heads // self.config.num_query_groups
     # Each group has >=1 queries, 1 key, and 1 value.
     if self.config.qkv_transpose_before_split:
-      qkv = qkv.view(B, T, -1, self.head_dim)
+      qkv = qkv.view(B, T, -1, self.config.head_dim)
       q, k, v = qkv.split(
           (
               q_per_kv * self.config.num_query_groups,
@@ -205,22 +208,27 @@ class CausalSelfAttention(nn.Module):
     else:
       qkv = qkv.view(B, T, self.config.num_query_groups, -1)
       q, k, v = qkv.split(
-          (q_per_kv * self.head_dim, self.head_dim, self.head_dim), dim=-1
+          (
+              q_per_kv * self.config.head_dim,
+              self.config.head_dim,
+              self.config.head_dim,
+          ),
+          dim=-1,
       )
-    q = q.reshape(B, T, -1, self.head_dim)
-    k = k.reshape(B, T, -1, self.head_dim)
-    v = v.reshape(B, T, -1, self.head_dim)
+    q = q.reshape(B, T, -1, self.config.head_dim)
+    k = k.reshape(B, T, -1, self.config.head_dim)
+    v = v.reshape(B, T, -1, self.config.head_dim)
     # Compute rotary positional embedding for query and key.
-    n_elem = int(self.config.rotary_percentage * self.head_dim)
+    n_elem = int(self.config.rotary_percentage * self.config.head_dim)
     q, k = _embed_rope(q, k, n_elem, rope)
     if self.kv_cache is not None:
       # TODO(haoliang): Handle when execeeding max sequence length.
       k, v = self.kv_cache.update_cache(input_pos, k, v)
-    y = self.sdpa_func(q, k, v, self.head_dim, mask=mask)
+    y = self.sdpa_func(q, k, v, self.config.head_dim, mask=mask)
     y = y.reshape(B, T, E)
     # Compute the output projection.
@@ -274,12 +282,12 @@ class CrossAttention(nn.Module):
       query_dim (int): query tensor's dimension.
       cross_dim (int): cross attention's dimensions, for key and value tensors.
       config (cfg.AttentionConfig): attention specific configurations.
-      kv_cache_max (int): determines the size of the KV Cache buffer, if enabled.
+      kv_cache_max (int): determines the size of the KV Cache buffer, if
+        enabled.
       enable_hlfb (bool): whether hlfb is enabled or not.
     """
     super().__init__()
     self.config = config
-    self.head_dim = query_dim // config.num_heads
     self.n_heads = config.num_heads
     self.q_projection = nn.Linear(
         query_dim, query_dim, bias=config.qkv_use_bias
@@ -301,7 +309,7 @@ class CrossAttention(nn.Module):
           batch_size,
           kv_cache_max,
           config.num_query_groups,
-          self.head_dim,
+          self.config.head_dim,
           enable_hlfb,
       )
@@ -324,7 +332,8 @@ class CrossAttention(nn.Module):
       x (torch.Tensor): the target tensor, with shape [B, target_seq_len, ...].
       y (torch.Tensor): the source tensor, with shape [B, source_seq_len, ...].
       rope (Tuple[torch.Tensor, torch.Tensor]): the optional input rope tensor.
-      mask (torch.Tensor): the optional mask tensor can be broadcaseted to shape [B, n_heads, target_seq_len, source_seq_len].
+      mask (torch.Tensor): the optional mask tensor can be broadcaseted to shape
+        [B, n_heads, target_seq_len, source_seq_len].
       input_pos (torch.Tensor): the optional input position tensor.
     Returns:
@@ -338,13 +347,13 @@ class CrossAttention(nn.Module):
     k = self.k_projection(y)
     v = self.v_projection(y)
-    interim_shape = (batch_size, -1, self.n_heads, self.head_dim)
+    interim_shape = (batch_size, -1, self.n_heads, self.config.head_dim)
     q = q.view(interim_shape)
     k = k.view(interim_shape)
     v = v.view(interim_shape)
     # Compute rotary positional embedding for query and key.
-    n_elem = int(self.config.rotary_percentage * self.head_dim)
+    n_elem = int(self.config.rotary_percentage * self.config.head_dim)
     q, k = _embed_rope(q, k, n_elem, rope)
     if self.kv_cache is not None:
@@ -354,7 +363,7 @@ class CrossAttention(nn.Module):
       mask = torch.zeros(
           (batch_size, 1, target_seq_len, source_seq_len), dtype=torch.float32
       )
-    y = self.sdpa_func(q, k, v, self.head_dim, mask=mask)
+    y = self.sdpa_func(q, k, v, self.config.head_dim, mask=mask)
     y = y.reshape(batch_size, target_seq_len, -1)
     # Compute the output projection.

ai_edge_torch/generative/layers/attention_utils.py CHANGED Viewed

@@ -28,7 +28,9 @@ def build_rope_cache(
     dtype: torch.dtype = torch.float32,
     device: torch.device = None,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-  """Precompute Rotary Positional Embedding Sin and Cos values for quick lookups
+  """Precomputes Rotary Positional Embeddings.
+  Precompute Rotary Positional Embedding Sin and Cos values for quick lookup
   during the inference.
   Args:
@@ -84,16 +86,22 @@ def relative_position_bucket(
     num_buckets: int,
     max_distance: int,
 ) -> torch.Tensor:
-  """
-  Adapted from Mesh Tensorflow:
+  """Adapted from Mesh Tensorflow:
   https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593
-  Translate relative position to a bucket number for relative attention. The relative position is defined as
-  memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
-  position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
-  small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
-  positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
-  This should allow for more graceful generalization to longer sequences than the model has been trained on
+  Translate relative position to a bucket number for relative attention. The
+  relative position is defined as
+  memory_position - query_position, i.e. the distance in tokens from the
+  attending position to the attended-to
+  position. If bidirectional=False, then positive relative positions are
+  invalid. We use smaller buckets for
+  small absolute relative_position and larger buckets for larger absolute
+  relative_positions. All relative
+  positions >=max_distance map to the same bucket. All relative positions
+  <=-max_distance map to the same bucket.
+  This should allow for more graceful generalization to longer sequences than
+  the model has been trained on
   Args:
       relative_position: an int32 Tensor
@@ -102,7 +110,8 @@ def relative_position_bucket(
       max_distance: an integer for max distance.
   Returns:
-      a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
+      a Tensor with the same shape as relative_position, containing int32 values
+      in the range [0, num_buckets)
   """
   relative_buckets = 0
   if bidirectional:
@@ -119,7 +128,8 @@ def relative_position_bucket(
   max_exact = num_buckets // 2
   is_small = relative_position < max_exact
-  # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance
+  # The other half of the buckets are for logarithmically bigger bins in
+  # positions up to max_distance
   relative_position_if_large = max_exact + (
       torch.log(relative_position.float() / max_exact)
       / math.log(max_distance / max_exact)
@@ -148,7 +158,8 @@ def build_relative_position_buckets(
   Args:
     query_length: an integer of length of current query tensor.
     key_length: an integer of length of current key tensor.
-    bidirectional: a boolean - whether the attention is bidirectional, default is True.
+    bidirectional: a boolean - whether the attention is bidirectional, default
+      is True.
     num_buckets: an integer for number of buckets, default is 32.
     max_distance: an integer for max distance, default is 128.

ai_edge_torch/generative/layers/builder.py CHANGED Viewed

@@ -26,7 +26,6 @@ class GeGLU(nn.Module):
   GeGLU(x) = (xW+b) * GELU(xV+c)
   See: https://arxiv.org/abs/2002.05202v1
   """
   def __init__(self, d_in: int, d_out: int):

ai_edge_torch/generative/layers/feed_forward.py CHANGED Viewed

@@ -33,11 +33,9 @@ class SequentialFeedForward(nn.Module):
   ):
     """Init function for feedforward layer.
-    Args:
-      dim(int): embedding size.
-      hidden_dim(int): hidden dim size of the feedforward layer.
-      activation(Callable): activation function used in this block.
-      use_bias(Boolean): whether to use bias. Default is false.
+    Args: dim(int): embedding size. hidden_dim(int): hidden dim size of the
+    feedforward layer. activation(Callable): activation function used in this
+    block. use_bias(Boolean): whether to use bias. Default is false.
     """
     super().__init__()
     self.act = activation
@@ -71,11 +69,9 @@ class GatedFeedForward(nn.Module):
   ):
     """Init function for feedforward layer.
-    Args:
-      dim(int): embedding size.
-      hidden_dim(int): hidden dim size of the feedforward layer.
-      activation(Callable): activation function used in this block.
-      use_bias(Boolean): whether to use bias. Default is false.
+    Args: dim(int): embedding size. hidden_dim(int): hidden dim size of the
+    feedforward layer. activation(Callable): activation function used in this
+    block. use_bias(Boolean): whether to use bias. Default is false.
     """
     super().__init__()
     self.act = activation

ai_edge_torch/generative/layers/kv_cache.py CHANGED Viewed

@@ -17,7 +17,6 @@
 from ai_edge_torch.hlfb import StableHLOCompositeBuilder
 import torch
 from torch import nn
-import torch_xla
 class KVCache(nn.Module):

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -55,9 +55,10 @@ class FeedForwardType(enum.Enum):
 @dataclass
 class AttentionConfig:
-  """Attention moduel's parameters."""
+  """Attention model's parameters."""
   num_heads: int
+  head_dim: int
   # Used to determine number of groups in grouped query attention (GQA)
   # https://arxiv.org/pdf/2305.13245.pdf
   num_query_groups: Optional[int]
@@ -156,7 +157,3 @@ class ModelConfig:
       return self.kv_cache_max_len
     else:
       return self.max_seq_len
-  @property
-  def head_dim(self) -> int:
-    return self.embedding_dim // self.attn_config.num_heads

ai_edge_torch/generative/layers/normalization.py CHANGED Viewed

@@ -21,12 +21,12 @@ import torch
 class RMSNorm(torch.nn.Module):
   def __init__(self, dim: int, eps: float = 1e-6, zero_centered_gamma=False):
-    """
-    Initialize the RMSNorm layer.
+    """Initialize the RMSNorm layer.
     Args:
       dim (int): dimension of the input tensor.
-      eps (float): A small float value to ensure numerical stability (default: 1e-6).
+      eps (float): A small float value to ensure numerical stability (default:
+        1e-6).
     """
     super().__init__()
     self.eps = eps
@@ -34,8 +34,7 @@ class RMSNorm(torch.nn.Module):
     self.zero_centered_gamma = zero_centered_gamma
   def _norm(self, x):
-    """
-    Apply RMSNorm normalization.
+    """Apply RMSNorm normalization.
     Args:
       x (torch.Tensor): input tensor.
@@ -46,8 +45,7 @@ class RMSNorm(torch.nn.Module):
     return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
   def forward(self, x):
-    """
-    Running the forward pass of RMSNorm layer.
+    """Running the forward pass of RMSNorm layer.
     Args:
       x (torch.Tensor): input tensor.

ai_edge_torch/generative/layers/rotary_position_embedding.py CHANGED Viewed

@@ -22,9 +22,9 @@ def apply_rope(
   """Computes rotary positional embedding.
   Args:
-    x(torch.Tensor): the input tensor.
-    cos(torch.Tensor): cosine value for the rope.
-    sin(torch.Tensor): sin value for the rope.
+    x: the input tensor.
+    cos: cosine value for the rope.
+    sin: sin value for the rope.
   Returns:
     output tensor of RoPE.

ai_edge_torch/generative/layers/unet/blocks_2d.py CHANGED Viewed

@@ -105,7 +105,6 @@ class AttentionBlock2D(nn.Module):
   """2D self attention block
   x = SelfAttention(Norm(input_tensor)) + x
   """
   def __init__(self, config: unet_cfg.AttentionBlock2DConfig):
@@ -161,14 +160,14 @@ class CrossAttentionBlock2D(nn.Module):
   """2D cross attention block
   x = CrossAttention(Norm(input_tensor), context) + x
   """
   def __init__(self, config: unet_cfg.CrossAttentionBlock2DConfig):
     """Initialize an instance of the AttentionBlock2D.
     Args:
-      config (unet_cfg.CrossAttentionBlock2DConfig): the configuration of this block.
+      config (unet_cfg.CrossAttentionBlock2DConfig): the configuration of this
+        block.
     """
     super().__init__()
     self.config = config
@@ -191,7 +190,8 @@ class CrossAttentionBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      context_tensor (torch.Tensor): the context tensor to apply cross attention on.
+      context_tensor (torch.Tensor): the context tensor to apply cross attention
+        on.
     Returns:
       output activation tensor after cross attention.
@@ -220,7 +220,6 @@ class FeedForwardBlock2D(nn.Module):
   """2D feed forward block
   x = w2(Activation(w1(Norm(x)))) + x
   """
   def __init__(
@@ -291,15 +290,14 @@ class TransformerBlock2D(nn.Module):
   └─────────┬─────────┘
             ▼
       hidden_states
   """
   def __init__(self, config: unet_cfg.TransformerBlock2DConfig):
     """Initialize an instance of the TransformerBlock2D.
     Args:
-      config (unet_cfg.TransformerBlock2Dconfig): the configuration of this block.
+      config (unet_cfg.TransformerBlock2Dconfig): the configuration of this
+        block.
     """
     super().__init__()
     self.config = config
@@ -329,7 +327,8 @@ class TransformerBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      context_tensor (torch.Tensor): the context tensor to apply cross attention on.
+      context_tensor (torch.Tensor): the context tensor to apply cross attention
+        on.
     Returns:
       output activation tensor after transformer block.
@@ -377,7 +376,8 @@ class DownEncoderBlock2D(nn.Module):
     """Initialize an instance of the DownEncoderBlock2D.
     Args:
-      config (unet_cfg.DownEncoderBlock2DConfig): the configuration of this block.
+      config (unet_cfg.DownEncoderBlock2DConfig): the configuration of this
+        block.
     """
     super().__init__()
     self.config = config
@@ -418,10 +418,13 @@ class DownEncoderBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding.
-      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
-      output_hidden_states (bool): whether to output hidden states, usually for skip connections.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is
+        configured to accept time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if
+        configured to use transofrmer block.
+      output_hidden_states (bool): whether to output hidden states, usually for
+        skip connections.
     Returns:
       output hidden_states tensor after DownEncoderBlock2D.
     """
@@ -523,9 +526,10 @@ class UpDecoderBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding.
-      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is
+        configured to accept time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if
+        configured to use transofrmer block.
     Returns:
       output hidden_states tensor after UpDecoderBlock2D.
@@ -576,7 +580,8 @@ class SkipUpDecoderBlock2D(nn.Module):
     """Initialize an instance of the SkipUpDecoderBlock2D.
     Args:
-      config (unet_cfg.SkipUpDecoderBlock2DConfig): the configuration of this block.
+      config (unet_cfg.SkipUpDecoderBlock2DConfig): the configuration of this
+        block.
     """
     super().__init__()
     self.config = config
@@ -632,10 +637,12 @@ class SkipUpDecoderBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      skip_connection_tensors (List[torch.Tensor]): the skip connection tensors from encoder blocks.
-      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding.
-      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
+      skip_connection_tensors (List[torch.Tensor]): the skip connection tensors
+        from encoder blocks.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is
+        configured to accept time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if
+        configured to use transofrmer block.
     Returns:
       output hidden_states tensor after SkipUpDecoderBlock2D.
@@ -738,10 +745,10 @@ class MidBlock2D(nn.Module):
     Args:
       input_tensor (torch.Tensor): the input tensor.
-      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding.
-      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use
-        transofrmer block.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is
+        configured to accept time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if
+        configured to use transofrmer block.
     Returns:
       output hidden_states tensor after MidBlock2D.

ai-edge-torch-nightly 0.2.0.dev20240806__py3-none-any.whl → 0.2.0.dev20240808__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240806py3-none-any.whl → 0.2.0.dev20240808py3-none-any.whl