PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240805__py3-none-any.whl → 0.2.0.dev20240807__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240805py3-none-any.whl → 0.2.0.dev20240807py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (103) hide show

ai_edge_torch/generative/examples/experimental/phi/convert_to_tflite.py CHANGED Viewed

@@ -33,17 +33,17 @@ def convert_phi2_to_tflite(
     quantize: bool = True,
 ):
   """An example method for converting a Phi-2 model to multi-signature
-  tflite model.
+  tflite model.
   Args:
-      checkpoint_path (str): The filepath to the model checkpoint, or
-        directory holding the checkpoint.
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
       prefill_seq_len (int, optional): The maximum size of prefill input tensor.
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = phi2.build_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len

ai_edge_torch/generative/examples/experimental/phi/phi2.py CHANGED Viewed

@@ -68,7 +68,9 @@ class Phi2(nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -118,6 +120,7 @@ class Phi2(nn.Module):
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
+      head_dim=80,
       num_query_groups=32,
       rotary_percentage=0.4,
       qkv_use_bias=True,

ai_edge_torch/generative/examples/experimental/tiny_llama/convert_to_tflite.py CHANGED Viewed

@@ -21,7 +21,7 @@ import os
 from pathlib import Path
 import ai_edge_torch
-from ai_edge_torch.generative.examples.experimental.tiny_llama import tiny_llama  # NOQA
+from ai_edge_torch.generative.examples.experimental.tiny_llama import tiny_llama
 from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
 from ai_edge_torch.generative.quantize import quant_recipes
 import torch
@@ -33,8 +33,7 @@ def convert_tiny_llama_to_tflite(
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """An example method for converting TinyLlama model to multi-signature
-  tflite model.
+  """An example for converting TinyLlama model to multi-signature tflite model.
   Args:
       checkpoint_path (str): The filepath to the model checkpoint, or directory
@@ -43,8 +42,8 @@ def convert_tiny_llama_to_tflite(
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = tiny_llama.build_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len

ai_edge_torch/generative/examples/experimental/tiny_llama/tiny_llama.py CHANGED Viewed

@@ -70,7 +70,9 @@ class TinyLLamma(nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -121,6 +123,7 @@ class TinyLLamma(nn.Module):
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
+      head_dim=64,
       num_query_groups=4,
       rotary_percentage=1.0,
   )

ai_edge_torch/generative/examples/gemma/convert_to_tflite.py CHANGED Viewed

@@ -28,17 +28,17 @@ def convert_gemma_to_tflite(
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """An example method for converting a Gemma 2B model to multi-signature
-  tflite model.
+  """Converts a Gemma 2B model to multi-signature tflite model.
   Args:
-      checkpoint_path (str): The filepath to the model checkpoint, or directory holding the checkpoint.
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
       prefill_seq_len (int, optional): The maximum size of prefill input tensor.
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = gemma.build_2b_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len

ai_edge_torch/generative/examples/gemma/gemma.py CHANGED Viewed

@@ -68,7 +68,9 @@ class Gemma(nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -113,6 +115,7 @@ class Gemma(nn.Module):
 def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=8,
+      head_dim=256,
       num_query_groups=1,
       rotary_percentage=1.0,
   )

ai_edge_torch/generative/examples/phi2/convert_to_tflite.py CHANGED Viewed

@@ -28,17 +28,17 @@ def convert_phi2_to_tflite(
     kv_cache_max_len: int = 1024,
     quantize: bool = True,
 ):
-  """An example method for converting a Phi-2 model to multi-signature
-  tflite model.
+  """Converts a Phi-2 model to multi-signature tflite model.
   Args:
-      checkpoint_path (str): The filepath to the model checkpoint, or directory holding the checkpoint.
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
       prefill_seq_len (int, optional): The maximum size of prefill input tensor.
         Defaults to 512.
       kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
         including both prefill and decode. Defaults to 1024.
-      quantize (bool, optional): Whether the model should be quanized.
-        Defaults to True.
+      quantize (bool, optional): Whether the model should be quanized. Defaults
+        to True.
   """
   pytorch_model = phi2.build_model(
       checkpoint_path, kv_cache_max_len=kv_cache_max_len

ai_edge_torch/generative/examples/phi2/phi2.py CHANGED Viewed

@@ -63,7 +63,9 @@ class Phi2(nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.kv_cache_max,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -107,6 +109,7 @@ class Phi2(nn.Module):
 def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
+      head_dim=80,
       num_query_groups=32,
       rotary_percentage=0.4,
       qkv_use_bias=True,

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -49,6 +49,7 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 class CLIP(nn.Module):
   """CLIP text encoder
   For details, see https://arxiv.org/abs/2103.00020
   """
@@ -92,6 +93,7 @@ def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=num_heads,
+      head_dim=embedding_dim // num_heads,
       num_query_groups=num_query_groups,
       rotary_percentage=0.0,
       qkv_use_bias=True,

ai_edge_torch/generative/examples/stable_diffusion/decoder.py CHANGED Viewed

@@ -15,9 +15,9 @@
 import ai_edge_torch.generative.layers.builder as layers_builder
 import ai_edge_torch.generative.layers.model_config as layers_cfg
-import ai_edge_torch.generative.layers.unet.blocks_2d as blocks_2d
+from ai_edge_torch.generative.layers.unet import blocks_2d
 import ai_edge_torch.generative.layers.unet.model_config as unet_cfg
-import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
+from ai_edge_torch.generative.utilities import stable_diffusion_loader
 import torch
 from torch import nn
@@ -288,6 +288,7 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
       normalization_config=norm_config,
       attention_config=layers_cfg.AttentionConfig(
           num_heads=1,
+          head_dim=block_out_channels[-1],
           num_query_groups=1,
           qkv_use_bias=True,
           output_proj_use_bias=True,

ai_edge_torch/generative/examples/stable_diffusion/diffusion.py CHANGED Viewed

@@ -15,9 +15,9 @@
 import ai_edge_torch.generative.layers.builder as layers_builder
 import ai_edge_torch.generative.layers.model_config as layers_cfg
-import ai_edge_torch.generative.layers.unet.blocks_2d as blocks_2d
+from ai_edge_torch.generative.layers.unet import blocks_2d
 import ai_edge_torch.generative.layers.unet.model_config as unet_cfg
-import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
+from ai_edge_torch.generative.utilities import stable_diffusion_loader
 import torch
 from torch import nn
@@ -195,6 +195,31 @@ TENSOR_NAMES = stable_diffusion_loader.DiffusionModelLoader.TensorNames(
 )
+def build_attention_config(
+    num_heads,
+    dim,
+    num_query_groups,
+    rotary_percentage=0.0,
+    qkv_transpose_before_split=True,
+    qkv_use_bias=False,
+    output_proj_use_bias=True,
+    enable_kv_cache=False,
+    qkv_fused_interleaved=False,
+):
+  return layers_cfg.AttentionConfig(
+      num_heads=num_heads,
+      head_dim=dim // num_heads,
+      num_query_groups=num_query_groups,
+      rotary_percentage=rotary_percentage,
+      qkv_transpose_before_split=qkv_transpose_before_split,
+      qkv_use_bias=qkv_use_bias,
+      output_proj_use_bias=output_proj_use_bias,
+      enable_kv_cache=enable_kv_cache,
+      qkv_fused_interleaved=qkv_fused_interleaved,
+  )
 class TimeEmbedding(nn.Module):
   def __init__(self, in_dim, out_dim):
@@ -267,17 +292,6 @@ class Diffusion(nn.Module):
         config.in_channels, block_out_channels[0], kernel_size=3, padding=1
     )
-    attention_config = layers_cfg.AttentionConfig(
-        num_heads=config.transformer_num_attention_heads,
-        num_query_groups=config.transformer_num_attention_heads,
-        rotary_percentage=0.0,
-        qkv_transpose_before_split=True,
-        qkv_use_bias=False,
-        output_proj_use_bias=True,
-        enable_kv_cache=False,
-        qkv_fused_interleaved=False,
-    )
     # Down encoders.
     down_encoders = []
     output_channel = block_out_channels[0]
@@ -312,7 +326,11 @@ class Diffusion(nn.Module):
                             dim=output_channel,
                             attention_batch_size=config.transformer_batch_size,
                             normalization_config=config.transformer_norm_config,
-                            attention_config=attention_config,
+                            attention_config=build_attention_config(
+                                num_heads=config.transformer_num_attention_heads,
+                                dim=output_channel,
+                                num_query_groups=config.transformer_num_attention_heads,
+                            ),
                             enable_hlfb=False,
                         ),
                         cross_attention_block_config=unet_cfg.CrossAttentionBlock2DConfig(
@@ -320,7 +338,11 @@ class Diffusion(nn.Module):
                             cross_dim=config.transformer_cross_attention_dim,
                             attention_batch_size=config.transformer_batch_size,
                             normalization_config=config.transformer_norm_config,
-                            attention_config=attention_config,
+                            attention_config=build_attention_config(
+                                num_heads=config.transformer_num_attention_heads,
+                                dim=output_channel,
+                                num_query_groups=config.transformer_num_attention_heads,
+                            ),
                             enable_hlfb=False,
                         ),
                         pre_conv_normalization_config=config.transformer_pre_conv_norm_config,
@@ -374,7 +396,11 @@ class Diffusion(nn.Module):
                     dim=mid_block_channels,
                     attention_batch_size=config.transformer_batch_size,
                     normalization_config=config.transformer_norm_config,
-                    attention_config=attention_config,
+                    attention_config=build_attention_config(
+                        num_heads=config.transformer_num_attention_heads,
+                        dim=mid_block_channels,
+                        num_query_groups=config.transformer_num_attention_heads,
+                    ),
                     enable_hlfb=False,
                 ),
                 cross_attention_block_config=unet_cfg.CrossAttentionBlock2DConfig(
@@ -382,7 +408,11 @@ class Diffusion(nn.Module):
                     cross_dim=config.transformer_cross_attention_dim,
                     attention_batch_size=config.transformer_batch_size,
                     normalization_config=config.transformer_norm_config,
-                    attention_config=attention_config,
+                    attention_config=build_attention_config(
+                        num_heads=config.transformer_num_attention_heads,
+                        dim=mid_block_channels,
+                        num_query_groups=config.transformer_num_attention_heads,
+                    ),
                     enable_hlfb=False,
                 ),
                 pre_conv_normalization_config=config.transformer_pre_conv_norm_config,
@@ -437,7 +467,11 @@ class Diffusion(nn.Module):
                             dim=output_channel,
                             attention_batch_size=config.transformer_batch_size,
                             normalization_config=config.transformer_norm_config,
-                            attention_config=attention_config,
+                            attention_config=build_attention_config(
+                                num_heads=config.transformer_num_attention_heads,
+                                dim=output_channel,
+                                num_query_groups=config.transformer_num_attention_heads,
+                            ),
                             enable_hlfb=False,
                         ),
                         cross_attention_block_config=unet_cfg.CrossAttentionBlock2DConfig(
@@ -445,7 +479,11 @@ class Diffusion(nn.Module):
                             cross_dim=config.transformer_cross_attention_dim,
                             attention_batch_size=config.transformer_batch_size,
                             normalization_config=config.transformer_norm_config,
-                            attention_config=attention_config,
+                            attention_config=build_attention_config(
+                                num_heads=config.transformer_num_attention_heads,
+                                dim=output_channel,
+                                num_query_groups=config.transformer_num_attention_heads,
+                            ),
                             enable_hlfb=False,
                         ),
                         pre_conv_normalization_config=config.transformer_pre_conv_norm_config,
@@ -543,7 +581,6 @@ def get_model_config(batch_size: int) -> unet_cfg.DiffusionModelConfig:
   Retruns:
     The configuration of diffusion model of Stable Diffusion v1.5.
   """
   in_channels = 4
   out_channels = 4

ai_edge_torch/generative/examples/stable_diffusion/pipeline.py CHANGED Viewed

@@ -127,7 +127,9 @@ def run_tflite_pipeline(
     input_image: Optional[Image.Image] = None,
 ):
   """Run stable diffusion pipeline with tflite model.
   model:
     StableDiffsuion model.
   prompt:
     The prompt to guide the image generation.
@@ -136,27 +138,36 @@ def run_tflite_pipeline(
   uncond_prompt:
     The prompt not to guide the image generation.
   cfg_scale:
-    Guidance scale of classifier-free guidance. Higher guidance scale encourages to generate
-    images that are closely linked to the text `prompt`, usually at the expense of lower
+    Guidance scale of classifier-free guidance. Higher guidance scale encourages
+    to generate
+    images that are closely linked to the text `prompt`, usually at the expense
+    of lower
     image quality.
   height:
     The height in pixels of the generated image.
   width:
     The width in pixels of the generated image.
   sampler:
-    A sampler to be used to denoise the encoded image latents. Can be one of `k_lms, `k_euler`,
+    A sampler to be used to denoise the encoded image latents. Can be one of
+    `k_lms, `k_euler`,
     or `k_euler_ancestral`.
   n_inference_steps:
-    The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+    The number of denoising steps. More denoising steps usually lead to a higher
+    quality image at the
     expense of slower inference. This parameter will be modulated by `strength`.
   seed:
     A seed to make generation deterministic.
   strength:
-    Conceptually, indicates how much to transform the reference `input_image`. Must be between 0 and 1.
-    `input_image` will be used as a starting point, adding more noise to it the larger the `strength`.
-    The number of denoising steps depends on the amount of noise initially added. When `strength` is 1,
-    added noise will be maximum and the denoising process will run for the full number of iterations
-    specified in `n_inference_steps`. A value of 1, therefore, essentially ignores `input_image`.
+    Conceptually, indicates how much to transform the reference `input_image`.
+    Must be between 0 and 1.
+    `input_image` will be used as a starting point, adding more noise to it the
+    larger the `strength`.
+    The number of denoising steps depends on the amount of noise initially
+    added. When `strength` is 1,
+    added noise will be maximum and the denoising process will run for the full
+    number of iterations
+    specified in `n_inference_steps`. A value of 1, therefore, essentially
+    ignores `input_image`.
   input_image:
     Image which is served as the starting point for the image generation.
   """

ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py CHANGED Viewed

@@ -28,6 +28,7 @@ class SamplerInterface(abc.ABC):
   @abc.abstractmethod
   def set_strength(self, strength: float = 1) -> None:
     """Set the strength of initial step.
     Conceptually, indicates how much to transform the reference `input_images`.
     """
     return NotImplemented

ai_edge_torch/generative/examples/t5/t5.py CHANGED Viewed

@@ -17,14 +17,13 @@
 import copy
 import os
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Optional
 from ai_edge_torch.generative.examples.t5.t5_attention import EncoderDecoderBlock  # NOQA
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.builder as builder
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.utilities.t5_loader as loading_utils
-import numpy as np
 import torch
 import torch.nn as nn
@@ -371,6 +370,7 @@ class T5Decoder(nn.Module):
 def get_model_config_t5() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=12,
+      head_dim=64,
       num_query_groups=12,
       qkv_use_bias=False,
       relative_attention_num_buckets=32,

ai_edge_torch/generative/examples/t5/t5_attention.py CHANGED Viewed

@@ -37,10 +37,10 @@ class EncoderDecoderBlock(nn.Module):
     """Initialize an instance of the EncoderDecoderBlock.
     Args:
-      config (cfg.ModelConfig): the configuration object
-        for this transformer block.
-      has_relative_attention_bias (bool): whether the
-        self attention block has relative bias.
+      config (cfg.ModelConfig): the configuration object for this transformer
+        block.
+      has_relative_attention_bias (bool): whether the self attention block has
+        relative bias.
     """
     super().__init__()
@@ -143,8 +143,10 @@ class T5Attention(CrossAttention):
     Args:
       dim (int): causal attention's input/output dimmension.
       config (cfg.AttentionConfig): attention specific configurations.
-      norm_config (cfg.NormalizationConfig): normalization configure before attention.
-      kv_cache_max (int): determines the size of the KV Cache buffer, if enabled.
+      norm_config (cfg.NormalizationConfig): normalization configure before
+        attention.
+      kv_cache_max (int): determines the size of the KV Cache buffer, if
+        enabled.
       enable_hlfb (bool): whether hlfb is enabled or not.
       has_relative_attention_bias (bool): whether we compute relative bias.
     """
@@ -185,7 +187,7 @@ class T5Attention(CrossAttention):
     )  # batch size, sequence length, embedding dimensionality (n_embd)
     query_states = self.q_projection(x)
     query_states = query_states.reshape(
-        B, T, -1, self.head_dim
+        B, T, -1, self.config.head_dim
     )  # (B, T, nh_q, hs)
     if key_value_states is not None:
@@ -198,13 +200,13 @@ class T5Attention(CrossAttention):
       )  # batch size, sequence length, embedding dimensionality (n_embd)
       key_states = self.k_projection(key_value_states)
       value_states = self.v_projection(key_value_states)
-      key_states = key_states.reshape(kvB, kvT, -1, self.head_dim)
-      value_states = value_states.reshape(kvB, kvT, -1, self.head_dim)
+      key_states = key_states.reshape(kvB, kvT, -1, self.config.head_dim)
+      value_states = value_states.reshape(kvB, kvT, -1, self.config.head_dim)
     else:
       key_states = self.k_projection(x)
       value_states = self.v_projection(x)
-      key_states = key_states.reshape(B, T, -1, self.head_dim)
-      value_states = value_states.reshape(B, T, -1, self.head_dim)
+      key_states = key_states.reshape(B, T, -1, self.config.head_dim)
+      value_states = value_states.reshape(B, T, -1, self.config.head_dim)
     if key_value_states is None and self.kv_cache is not None:
       key_states, value_states = self.kv_cache.update_cache(
@@ -221,7 +223,7 @@ class T5Attention(CrossAttention):
             0
         )  # shape (1, num_heads, query_length, key_length)
       else:
-        # position_bias = torch.zeros(B, self.n_heads, T, self.head_dim, dtype=torch.float32)
+        # position_bias = torch.zeros(B, self.n_heads, T, self.config.head_dim, dtype=torch.float32)
         position_bias = torch.zeros_like(mask, dtype=torch.float32)
     mask = mask + position_bias
@@ -229,7 +231,7 @@ class T5Attention(CrossAttention):
         query_states,
         key_states,
         value_states,
-        self.head_dim,
+        self.config.head_dim,
         mask=mask,
         scale=1.0,
     )

ai_edge_torch/generative/examples/test_models/toy_model.py CHANGED Viewed

@@ -43,7 +43,9 @@ class ToySingleLayerModel(torch.nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -72,6 +74,7 @@ class ToySingleLayerModel(torch.nn.Module):
 def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=32,
+      head_dim=4,
       num_query_groups=4,
       rotary_percentage=1.0,
       enable_kv_cache=False,

ai_edge_torch/generative/examples/test_models/toy_model_with_external_kv_cache.py CHANGED Viewed

@@ -17,6 +17,7 @@
 from typing import Tuple
 import ai_edge_torch
+from ai_edge_torch import lowertools
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.builder as builder
 from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
@@ -24,7 +25,6 @@ from ai_edge_torch.generative.layers.experimental.attention import TransformerBl
 import ai_edge_torch.generative.layers.model_config as cfg
 import torch
 import torch.nn as nn
-import torch_xla
 RoPECache = Tuple[torch.Tensor, torch.Tensor]
@@ -46,7 +46,9 @@ class ToyModelWithExternalKV(torch.nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -84,13 +86,12 @@ class ToyModelWithExternalKV(torch.nn.Module):
 def _export_stablehlo_mlir(model, args):
   ep = torch.export.export(model, args)
-  stablehlo_gm = torch_xla.stablehlo.exported_program_to_stablehlo(ep)
-  return stablehlo_gm.get_stablehlo_text()
+  return lowertools.exported_program_to_mlir_text(ep)
 def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
-      num_heads=32, num_query_groups=4, rotary_percentage=1.0
+      num_heads=32, head_dim=4, num_query_groups=4, rotary_percentage=1.0
   )
   ff_config = cfg.FeedForwardConfig(
       type=cfg.FeedForwardType.GATED,

ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py CHANGED Viewed

@@ -13,17 +13,16 @@
 # limitations under the License.
 # ==============================================================================
 # A toy example which has basic transformer block (w/ KV-Cache).
-from typing import List, Tuple
+from typing import Tuple
 import ai_edge_torch
+from ai_edge_torch import lowertools
 from ai_edge_torch.generative.layers.attention import TransformerBlock
 import ai_edge_torch.generative.layers.attention_utils as attn_utils
 import ai_edge_torch.generative.layers.builder as builder
 import ai_edge_torch.generative.layers.model_config as cfg
-import numpy as np
 import torch
 import torch.nn as nn
-import torch_xla
 RoPECache = Tuple[torch.Tensor, torch.Tensor]
@@ -45,7 +44,9 @@ class ToyModelWithKV(torch.nn.Module):
     )
     self.rope_cache = attn_utils.build_rope_cache(
         size=config.max_seq_len,
-        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        dim=int(
+            config.attn_config.rotary_percentage * config.attn_config.head_dim
+        ),
         base=10_000,
         condense_ratio=1,
         dtype=torch.float32,
@@ -72,13 +73,12 @@ class ToyModelWithKV(torch.nn.Module):
 def _export_stablehlo_mlir(model, args):
   ep = torch.export.export(model, args)
-  stablehlo_gm = torch_xla.stablehlo.exported_program_to_stablehlo(ep)
-  return stablehlo_gm.get_stablehlo_text()
+  return lowertools.exported_program_to_mlir_text(ep)
 def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
-      num_heads=32, num_query_groups=4, rotary_percentage=1.0
+      num_heads=32, head_dim=4, num_query_groups=4, rotary_percentage=1.0
   )
   ff_config = cfg.FeedForwardConfig(
       type=cfg.FeedForwardType.GATED,

ai_edge_torch/generative/examples/tiny_llama/__init__.py CHANGED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai-edge-torch-nightly 0.2.0.dev20240805__py3-none-any.whl → 0.2.0.dev20240807__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240805py3-none-any.whl → 0.2.0.dev20240807py3-none-any.whl