PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/generative/examples/smollm/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting SmolLM model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/smollm'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = smollm.build_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'smollm_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/smollm/smollm.py ADDED Viewed

@@ -0,0 +1,101 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of building a SmolLM model."""
+import copy
+from ai_edge_torch.generative.examples.tiny_llama import tiny_llama
+import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.utilities.loader as loading_utils
+from torch import nn
+TENSOR_NAMES = copy.copy(tiny_llama.TENSOR_NAMES)
+# SmolLM re-uses the embedding as the head projection layer.
+TENSOR_NAMES.lm_head = None
+class SmolLM(tiny_llama.TinyLlama):
+  """A SmolLM model built from the Edge Generative API layers.
+  SmolLM shares the same architecture as TinyLlama, but with different model
+  sizes.
+  """
+  def __init__(self, config: cfg.ModelConfig):
+    super().__init__(config)
+    # SmolLM re-uses the embedding as the head projection layer.
+    self.lm_head.weight.data = self.tok_embedding.weight.data
+def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a SmolLM 135M model.
+  Args:
+    kv_cache_max_len (int): The maximum sequence length of the KV cache. Default
+      is 1024.
+  Returns:
+    The model config for a SmolLM model.
+  """
+  attn_config = cfg.AttentionConfig(
+      num_heads=9,
+      head_dim=64,
+      num_query_groups=3,
+      rotary_percentage=1.0,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+      intermediate_size=1536,
+  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=49152,
+      num_layers=30,
+      max_seq_len=2048,
+      embedding_dim=576,
+      kv_cache_max_len=kv_cache_max_len,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+      enable_hlfb=True,
+  )
+  return config
+def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
+  config = get_model_config(**kwargs)
+  config.vocab_size = 128
+  config.num_layers = 2
+  # SmolLM has only one block config.
+  config.block_config(0).ff_config.intermediate_size = 64
+  return config
+def build_model(checkpoint_path: str, **kwargs) -> nn.Module:
+  config = get_model_config(**kwargs)
+  model = SmolLM(config)
+  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+  # Since embedding and lm-head use the same weight, we need to set strict
+  # to False.
+  loader.load(model, strict=False)
+  model.eval()
+  return model

ai_edge_torch/generative/examples/smollm/verify.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies the reauthored SmolLM-135M model."""
+import logging
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.smollm import smollm
+from ai_edge_torch.generative.utilities import verifier
+import transformers
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+def main(_):
+  checkpoint = "HuggingFaceTB/SmolLM-135M"
+  logging.info("Loading the original model from: %s", checkpoint)
+  wrapper_model = verifier.ModelWrapper(
+      model=transformers.AutoModelForCausalLM.from_pretrained(checkpoint),
+  )
+  # Locate the cached dir.
+  cached_config_file = transformers.utils.cached_file(
+      checkpoint, transformers.utils.CONFIG_NAME
+  )
+  reauthored_checkpoint = pathlib.Path(cached_config_file).parent
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
+  reauthored_model = smollm.build_model(reauthored_checkpoint)
+  logging.info("Loading the tokenizer from: %s", checkpoint)
+  tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)
+  verifier.verify_reauthored_model(
+      original_model=wrapper_model,
+      reauthored_model=reauthored_model,
+      tokenizer=tokenizer,
+      generate_prompts=_PROMPTS.value,
+      atol=1e-04,
+  )
+if __name__ == "__main__":
+  app.run(main)

ai_edge_torch/generative/examples/stable_diffusion/attention.py CHANGED Viewed

@@ -73,7 +73,9 @@ class SelfAttention(nn.Module):
 class CrossAttention(nn.Module):
-  def __init__(self, n_heads, d_embed, d_cross, in_proj_bias=True, out_proj_bias=True):
+  def __init__(
+      self, n_heads, d_embed, d_cross, in_proj_bias=True, out_proj_bias=True
+  ):
     super().__init__()
     self.q_proj = nn.Linear(d_embed, d_embed, bias=in_proj_bias)
     self.k_proj = nn.Linear(d_cross, d_embed, bias=in_proj_bias)

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -13,25 +13,34 @@
 # limitations under the License.
 # ==============================================================================
-import torch
-from torch import nn
 from ai_edge_torch.generative.layers.attention import TransformerBlock
 import ai_edge_torch.generative.layers.attention_utils as attention_utils
 import ai_edge_torch.generative.layers.builder as builder
 import ai_edge_torch.generative.layers.model_config as cfg
 import ai_edge_torch.generative.utilities.loader as loading_utils
+import torch
+from torch import nn
 TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
-    ff_up_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.mlp.fc1",
-    ff_down_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.mlp.fc2",
+    ff_up_proj=(
+        "cond_stage_model.transformer.text_model.encoder.layers.{}.mlp.fc1"
+    ),
+    ff_down_proj=(
+        "cond_stage_model.transformer.text_model.encoder.layers.{}.mlp.fc2"
+    ),
     attn_query_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.self_attn.q_proj",
     attn_key_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.self_attn.k_proj",
     attn_value_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.self_attn.v_proj",
     attn_output_proj="cond_stage_model.transformer.text_model.encoder.layers.{}.self_attn.out_proj",
-    pre_attn_norm="cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm1",
-    pre_ff_norm="cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm2",
-    embedding="cond_stage_model.transformer.text_model.embeddings.token_embedding",
+    pre_attn_norm=(
+        "cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm1"
+    ),
+    post_attn_norm=(
+        "cond_stage_model.transformer.text_model.encoder.layers.{}.layer_norm2"
+    ),
+    embedding=(
+        "cond_stage_model.transformer.text_model.embeddings.token_embedding"
+    ),
     embedding_position="cond_stage_model.transformer.text_model.embeddings.position_embedding.weight",
     final_norm="cond_stage_model.transformer.text_model.final_layer_norm",
     lm_head=None,
@@ -39,7 +48,8 @@ TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
 class CLIP(nn.Module):
-  """CLIP text encoder
+  """CLIP text encoder.
   For details, see https://arxiv.org/abs/2103.00020
   """
@@ -51,10 +61,14 @@ class CLIP(nn.Module):
     )
     self.config = config
+    # CLIP has only one block config.
+    block_config = config.block_config(0)
     self.transformer_blocks = nn.ModuleList(
-        TransformerBlock(config) for _ in range(config.num_layers)
+        TransformerBlock(block_config, config) for _ in range(config.num_layers)
+    )
+    self.final_norm = builder.build_norm(
+        config.embedding_dim, config.final_norm_config
     )
-    self.final_norm = builder.build_norm(config.embedding_dim, config.final_norm_config)
     self.mask_cache = attention_utils.build_causal_mask_cache(
         size=config.max_seq_len, dtype=torch.float32
@@ -62,7 +76,7 @@ class CLIP(nn.Module):
   @torch.inference_mode
   def forward(self, tokens: torch.LongTensor) -> torch.FloatTensor:
-    tokens = tokens.type(torch.long)
+    tokens = tokens.type(torch.int)
     state = self.tok_embedding(tokens) + self.tok_embedding_position
     for layer in self.transformer_blocks:
@@ -72,6 +86,7 @@ class CLIP(nn.Module):
 def get_model_config() -> cfg.ModelConfig:
+  """Get configs for the CLIP of Stable Diffusion v1.5."""
   max_seq_len = 77
   vocab_size = 49408
   num_layers = 12
@@ -81,6 +96,7 @@ def get_model_config() -> cfg.ModelConfig:
   attn_config = cfg.AttentionConfig(
       num_heads=num_heads,
+      head_dim=embedding_dim // num_heads,
       num_query_groups=num_query_groups,
       rotary_percentage=0.0,
       qkv_use_bias=True,
@@ -99,15 +115,69 @@ def get_model_config() -> cfg.ModelConfig:
   norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.LAYER_NORM)
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
   config = cfg.ModelConfig(
       vocab_size=vocab_size,
       num_layers=num_layers,
       max_seq_len=max_seq_len,
       embedding_dim=embedding_dim,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+      enable_hlfb=True,
+  )
+  return config
+def get_fake_model_config() -> cfg.ModelConfig:
+  """Get fake configs for the CLIP of Stable Diffusion v1.5 for testing."""
+  max_seq_len = 6
+  vocab_size = 100
+  num_layers = 2
+  num_heads = 12
+  num_query_groups = 12
+  embedding_dim = 24
+  attn_config = cfg.AttentionConfig(
+      num_heads=num_heads,
+      head_dim=embedding_dim // num_heads,
+      num_query_groups=num_query_groups,
+      rotary_percentage=0.0,
+      qkv_use_bias=True,
+      qkv_transpose_before_split=True,
+      qkv_fused_interleaved=False,
+      output_proj_use_bias=True,
+      enable_kv_cache=False,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.SEQUENTIAL,
+      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_QUICK),
+      intermediate_size=embedding_dim * 4,
+      use_bias=True,
+  )
+  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.LAYER_NORM)
+  block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,
       pre_attention_norm_config=norm_config,
-      pre_ff_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=vocab_size,
+      num_layers=num_layers,
+      max_seq_len=max_seq_len,
+      embedding_dim=embedding_dim,
+      block_configs=block_config,
       final_norm_config=norm_config,
       enable_hlfb=True,
   )

ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py CHANGED Viewed

@@ -18,19 +18,22 @@ import os
 from pathlib import Path
 from typing import Optional
-import torch
 import ai_edge_torch
 import ai_edge_torch.generative.examples.stable_diffusion.clip as clip
 import ai_edge_torch.generative.examples.stable_diffusion.decoder as decoder
 import ai_edge_torch.generative.examples.stable_diffusion.diffusion as diffusion
 from ai_edge_torch.generative.examples.stable_diffusion.encoder import Encoder
 import ai_edge_torch.generative.examples.stable_diffusion.util as util
+from ai_edge_torch.generative.quantize import quant_recipes
 import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
+import torch
 arg_parser = argparse.ArgumentParser()
 arg_parser.add_argument(
-    '--clip_ckpt', type=str, help='Path to source CLIP model checkpoint', required=True
+    '--clip_ckpt',
+    type=str,
+    help='Path to source CLIP model checkpoint',
+    required=True,
 )
 arg_parser.add_argument(
     '--diffusion_ckpt',
@@ -60,6 +63,7 @@ def convert_stable_diffusion_to_tflite(
     decoder_ckpt_path: str,
     image_height: int = 512,
     image_width: int = 512,
+    quantize: bool = True,
 ):
   clip_model = clip.CLIP(clip.get_model_config())
@@ -90,10 +94,14 @@ def convert_stable_diffusion_to_tflite(
   n_tokens = 77
   timestamp = 0
   len_prompt = 1
-  prompt_tokens = torch.full((1, n_tokens), 0, dtype=torch.long)
-  input_image = torch.full((1, 3, image_height, image_width), 0, dtype=torch.float32)
+  prompt_tokens = torch.full((1, n_tokens), 0, dtype=torch.int)
+  input_image = torch.full(
+      (1, 3, image_height, image_width), 0, dtype=torch.float32
+  )
   noise = torch.full(
-      (len_prompt, 4, image_height // 8, image_width // 8), 0, dtype=torch.float32
+      (len_prompt, 4, image_height // 8, image_width // 8),
+      0,
+      dtype=torch.float32,
   )
   input_latents = torch.zeros_like(noise)
@@ -105,15 +113,19 @@ def convert_stable_diffusion_to_tflite(
   if not os.path.exists(output_dir):
     Path(output_dir).mkdir(parents=True, exist_ok=True)
+  quant_config = (
+      quant_recipes.full_int8_weight_only_recipe() if quantize else None
+  )
   # TODO(yichunk): convert to multi signature tflite model.
   # CLIP text encoder
-  ai_edge_torch.signature('encode', clip_model, (prompt_tokens,)).convert().export(
-      f'{output_dir}/clip.tflite'
-  )
+  ai_edge_torch.signature('encode', clip_model, (prompt_tokens,)).convert(
+      quant_config=quant_config
+  ).export(f'{output_dir}/clip.tflite')
   # TODO(yichunk): enable image encoder conversion
   # Image encoder
-  # ai_edge_torch.signature('encode', encoder, (input_image, noise)).convert().export(
+  # ai_edge_torch.signature('encode', encoder, (input_image, noise)).convert(quant_config=quant_config).export(
   #     f'{output_dir}/encoder.tflite'
   # )
@@ -122,12 +134,12 @@ def convert_stable_diffusion_to_tflite(
       'diffusion',
       diffusion_model,
       (torch.repeat_interleave(input_latents, 2, 0), context, time_embedding),
-  ).convert().export(f'{output_dir}/diffusion.tflite')
+  ).convert(quant_config=quant_config).export(f'{output_dir}/diffusion.tflite')
   # Image decoder
-  ai_edge_torch.signature('decode', decoder_model, (input_latents,)).convert().export(
-      f'{output_dir}/decoder.tflite'
-  )
+  ai_edge_torch.signature('decode', decoder_model, (input_latents,)).convert(
+      quant_config=quant_config
+  ).export(f'{output_dir}/decoder.tflite')
 if __name__ == '__main__':
@@ -139,4 +151,5 @@ if __name__ == '__main__':
       decoder_ckpt_path=args.decoder_ckpt,
       image_height=512,
       image_width=512,
+      quantize=True,
   )

ai_edge_torch/generative/examples/stable_diffusion/decoder.py CHANGED Viewed

@@ -13,14 +13,13 @@
 # limitations under the License.
 # ==============================================================================
-import torch
-from torch import nn
 import ai_edge_torch.generative.layers.builder as layers_builder
 import ai_edge_torch.generative.layers.model_config as layers_cfg
-import ai_edge_torch.generative.layers.unet.blocks_2d as blocks_2d
+from ai_edge_torch.generative.layers.unet import blocks_2d
 import ai_edge_torch.generative.layers.unet.model_config as unet_cfg
-import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
+from ai_edge_torch.generative.utilities import stable_diffusion_loader
+import torch
+from torch import nn
 TENSOR_NAMES = stable_diffusion_loader.AutoEncoderModelLoader.TensorNames(
     post_quant_conv="first_stage_model.post_quant_conv",
@@ -104,7 +103,9 @@ TENSOR_NAMES = stable_diffusion_loader.AutoEncoderModelLoader.TensorNames(
                     norm_2="first_stage_model.decoder.up.1.block.0.norm2",
                     conv_1="first_stage_model.decoder.up.1.block.0.conv1",
                     conv_2="first_stage_model.decoder.up.1.block.0.conv2",
-                    residual_layer="first_stage_model.decoder.up.1.block.0.nin_shortcut",
+                    residual_layer=(
+                        "first_stage_model.decoder.up.1.block.0.nin_shortcut"
+                    ),
                 ),
                 stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="first_stage_model.decoder.up.1.block.1.norm1",
@@ -128,7 +129,9 @@ TENSOR_NAMES = stable_diffusion_loader.AutoEncoderModelLoader.TensorNames(
                     norm_2="first_stage_model.decoder.up.0.block.0.norm2",
                     conv_1="first_stage_model.decoder.up.0.block.0.conv1",
                     conv_2="first_stage_model.decoder.up.0.block.0.conv2",
-                    residual_layer="first_stage_model.decoder.up.0.block.0.nin_shortcut",
+                    residual_layer=(
+                        "first_stage_model.decoder.up.0.block.0.nin_shortcut"
+                    ),
                 ),
                 stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="first_stage_model.decoder.up.0.block.1.norm1",
@@ -285,6 +288,63 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
       normalization_config=norm_config,
       attention_config=layers_cfg.AttentionConfig(
           num_heads=1,
+          head_dim=block_out_channels[-1],
+          num_query_groups=1,
+          qkv_use_bias=True,
+          output_proj_use_bias=True,
+          enable_kv_cache=False,
+          qkv_transpose_before_split=True,
+          qkv_fused_interleaved=False,
+          rotary_percentage=0.0,
+      ),
+      enable_hlfb=False,
+  )
+  mid_block_config = unet_cfg.MidBlock2DConfig(
+      in_channels=block_out_channels[-1],
+      normalization_config=norm_config,
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
+      num_layers=1,
+      attention_block_config=att_config,
+  )
+  config = unet_cfg.AutoEncoderConfig(
+      in_channels=in_channels,
+      latent_channels=latent_channels,
+      out_channels=out_channels,
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
+      block_out_channels=block_out_channels,
+      scaling_factor=scaling_factor,
+      layers_per_block=layers_per_block,
+      normalization_config=norm_config,
+      mid_block_config=mid_block_config,
+  )
+  return config
+def get_fake_model_config() -> unet_cfg.AutoEncoderConfig:
+  """Get fake configs for the Decoder of Stable Diffusion v1.5 for testing."""
+  in_channels = 3
+  latent_channels = 4
+  out_channels = 3
+  block_out_channels = [2, 4]
+  scaling_factor = 0.18215
+  layers_per_block = 2
+  norm_config = layers_cfg.NormalizationConfig(
+      layers_cfg.NormalizationType.GROUP_NORM, group_num=2
+  )
+  att_config = unet_cfg.AttentionBlock2DConfig(
+      dim=block_out_channels[-1],
+      normalization_config=norm_config,
+      attention_config=layers_cfg.AttentionConfig(
+          num_heads=1,
+          head_dim=block_out_channels[-1],
           num_query_groups=1,
           qkv_use_bias=True,
           output_proj_use_bias=True,
@@ -293,12 +353,15 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
           qkv_fused_interleaved=False,
           rotary_percentage=0.0,
       ),
+      enable_hlfb=False,
   )
   mid_block_config = unet_cfg.MidBlock2DConfig(
       in_channels=block_out_channels[-1],
       normalization_config=norm_config,
-      activation_config=layers_cfg.ActivationConfig(layers_cfg.ActivationType.SILU),
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
       num_layers=1,
       attention_block_config=att_config,
   )
@@ -307,7 +370,9 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
       in_channels=in_channels,
       latent_channels=latent_channels,
       out_channels=out_channels,
-      activation_config=layers_cfg.ActivationConfig(layers_cfg.ActivationType.SILU),
+      activation_config=layers_cfg.ActivationConfig(
+          layers_cfg.ActivationType.SILU
+      ),
       block_out_channels=block_out_channels,
       scaling_factor=scaling_factor,
       layers_per_block=layers_per_block,

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl