PyPI - ai-edge-torch-nightly - Versions diffs - 0.5.0.dev20250424__py3-none-any.whl → 0.5.0.dev20250426__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.5.0.dev20250424py3-none-any.whl → 0.5.0.dev20250426py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

ai_edge_torch/_convert/conversion.py CHANGED Viewed

@@ -35,13 +35,11 @@ def _run_convert_passes(
   )
   passes = [
-      fx_passes.CastInputsBf16ToF32Pass(),
-      fx_passes.BuildInterpolateCompositePass(),
-      fx_passes.CanonicalizePass(),
       fx_passes.OptimizeLayoutTransposesPass(),
       fx_passes.CanonicalizePass(),
       fx_passes.BuildAtenCompositePass(),
       fx_passes.RemoveNonUserOutputsPass(),
+      fx_passes.CastInputsBf16ToF32Pass(),
   ]
   # Debuginfo is not injected automatically by odml_torch. Only inject

ai_edge_torch/_convert/fx_passes/__init__.py CHANGED Viewed

@@ -16,7 +16,6 @@
 from typing import Sequence, Union
 from ai_edge_torch._convert.fx_passes.build_aten_composite_pass import BuildAtenCompositePass
-from ai_edge_torch._convert.fx_passes.build_interpolate_composite_pass import BuildInterpolateCompositePass
 from ai_edge_torch._convert.fx_passes.cast_inputs_bf16_to_f32_pass import CastInputsBf16ToF32Pass
 from ai_edge_torch._convert.fx_passes.inject_mlir_debuginfo_pass import InjectMlirDebuginfoPass
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import OptimizeLayoutTransposesPass

ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py CHANGED Viewed

@@ -20,7 +20,8 @@ import torch
 import torch.utils._pytree as pytree
 _composite_builders: dict[
-    Callable, Callable[[torch.fx.GraphModule, torch.fx.Node], None]
+    Callable[[Any, ...], Any],
+    Callable[[torch.fx.GraphModule, torch.fx.Node], None],
 ] = {}
@@ -272,13 +273,73 @@ def _aten_embedding(gm: torch.fx.GraphModule, node: torch.fx.Node):
     output = op(**full_kwargs)
     output = builder.mark_outputs(output)
-    # Explicitly reshape back to the original shape. This places the ReshapeOp outside of the HLFB.
+    # Explicitly reshape back to the original shape. This places the ReshapeOp
+    # outside of the HLFB.
     output = torch.reshape(output, (*(original_idx_shape), embedding_dim))
     return output
   node.target = embedding
+@_register_composite_builder(torch.ops.aten.upsample_bilinear2d.vec)
+def _aten_upsample_bilinear2d_vec(_, node: torch.fx.Node):
+  """Build a composite for aten.upsample_bilinear2d.vec."""
+  op = node.target
+  args_mapper = TorchOpArgumentsMapper(op)
+  # Assumes later FX passes does not change the args/kwargs of the op.
+  # Which is a valid assumption for, given that composite/mark_tensor wrapper
+  # should semantically prevents any future mutations on the op.
+  output_h, output_w = node.meta["val"].shape[-2:]
+  def upsample_bilinear2d_vec(*args, **kwargs):
+    nonlocal op, args_mapper
+    full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
+    builder = lowertools.StableHLOCompositeBuilder(
+        name="odml.upsample_bilinear2d",
+        attr={
+            "size": (int(output_h), int(output_w)),
+            "align_corners": full_kwargs["align_corners"],
+            "is_nchw_op": True,
+        },
+    )
+    full_kwargs["input"] = builder.mark_inputs(full_kwargs["input"])
+    output = op(**full_kwargs)
+    output = builder.mark_outputs(output)
+    return output
+  node.target = upsample_bilinear2d_vec
+@_register_composite_builder(torch.ops.aten.upsample_nearest2d.vec)
+def _aten_upsample_nearest2d_vec(_, node: torch.fx.Node):
+  """Build a composite for aten.upsample_nearest2d.vec."""
+  op = node.target
+  args_mapper = TorchOpArgumentsMapper(op)
+  # Assumes later FX passes does not change the args/kwargs of the op.
+  # Which is a valid assumption for, given that composite/mark_tensor wrapper
+  # should semantically prevents any future mutations on the op.
+  output_h, output_w = node.meta["val"].shape[-2:]
+  def upsample_nearest2d_vec(*args, **kwargs):
+    nonlocal op, args_mapper
+    full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
+    builder = lowertools.StableHLOCompositeBuilder(
+        name="tfl.resize_nearest_neighbor",
+        attr={
+            "size": (int(output_h), int(output_w)),
+            "is_nchw_op": True,
+        },
+    )
+    full_kwargs["input"] = builder.mark_inputs(full_kwargs["input"])
+    output = op(**full_kwargs)
+    output = builder.mark_outputs(output)
+    return output
+  node.target = upsample_nearest2d_vec
 class BuildAtenCompositePass(fx_infra.PassBase):
   def call(self, graph_module: torch.fx.GraphModule):

ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py CHANGED Viewed

@@ -17,6 +17,7 @@
 import operator
 import ai_edge_torch
+from ai_edge_torch import lowertools
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_mark
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import op_func_registry
 from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import utils
@@ -24,7 +25,7 @@ import torch
 import torch.utils._pytree as pytree
 aten = torch.ops.aten
-StableHLOCompositeBuilder = ai_edge_torch.hlfb.StableHLOCompositeBuilder
+StableHLOCompositeBuilder = lowertools.StableHLOCompositeBuilder
 __all__ = ["rewrite_nhwc_node", "has_nhwc_rewriter"]

ai_edge_torch/generative/examples/deepseek/convert_to_tflite.py CHANGED Viewed

@@ -17,11 +17,11 @@
 from absl import app
 from ai_edge_torch.generative.examples.deepseek import deepseek
+from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
-flags = converter.define_conversion_flags("deepseek")
-ExportConfig = export_config.ExportConfig
+flags = converter.define_conversion_flags('deepseek')
 def main(_):
   pytorch_model = deepseek.build_model(
@@ -34,7 +34,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/deepseek/deepseek.py CHANGED Viewed

@@ -53,6 +53,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
   norm_config = cfg.NormalizationConfig(
       type=cfg.NormalizationType.RMS_NORM,
       epsilon=1e-06,
+      enable_hlfb=True,
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,

ai_edge_torch/generative/examples/gemma3/convert_gemma3_to_tflite.py CHANGED Viewed

@@ -17,14 +17,10 @@
 from absl import app
 from ai_edge_torch.generative.examples.gemma3 import gemma3
-from ai_edge_torch.generative.layers import kv_cache
 from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
-import torch
 flags = converter.define_conversion_flags('gemma3-1b')
-ExportConfig = export_config.ExportConfig
 _MODEL_SIZE = flags.DEFINE_string(
     'model_size',
@@ -33,55 +29,23 @@ _MODEL_SIZE = flags.DEFINE_string(
 )
-def _create_mask(mask_len, kv_cache_max_len):
-  mask = torch.full(
-      (mask_len, kv_cache_max_len), float('-inf'), dtype=torch.float32
-  )
-  mask = torch.triu(mask, diagonal=1).unsqueeze(0).unsqueeze(0)
-  return mask
-def _create_export_config(
-    prefill_seq_lens: list[int], kv_cache_max_len: int
-) -> ExportConfig:
-  """Creates the export config for the model."""
-  export_config = ExportConfig()
-  if isinstance(prefill_seq_lens, list):
-    prefill_mask = [_create_mask(i, kv_cache_max_len) for i in prefill_seq_lens]
-  else:
-    prefill_mask = _create_mask(prefill_seq_lens, kv_cache_max_len)
-  export_config.prefill_mask = prefill_mask
-  decode_mask = torch.full(
-      (1, kv_cache_max_len), float('-inf'), dtype=torch.float32
-  )
-  decode_mask = torch.triu(decode_mask, diagonal=1).unsqueeze(0).unsqueeze(0)
-  export_config.decode_mask = decode_mask
-  export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
-  return export_config
 def main(_):
   if _MODEL_SIZE.value == '1b':
     pytorch_model = gemma3.build_model_1b(
         flags.FLAGS.checkpoint_path,
         kv_cache_max_len=flags.FLAGS.kv_cache_max_len,
     )
-    config = pytorch_model.config
   else:
     raise ValueError(f'Unsupported model size: {_MODEL_SIZE.value}')
   converter.convert_to_tflite(
       pytorch_model,
       output_path=flags.FLAGS.output_path,
       output_name_prefix=flags.FLAGS.output_name_prefix,
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
-      config=config,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=_create_export_config(
-          flags.FLAGS.prefill_seq_lens, flags.FLAGS.kv_cache_max_len
-      ),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/hammer/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/hammer/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,92 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting hammer 2.1 models to multi-signature tflite model."""
+from absl import app
+from ai_edge_torch.generative.examples.hammer import hammer
+from ai_edge_torch.generative.layers import kv_cache
+from ai_edge_torch.generative.utilities import converter
+from ai_edge_torch.generative.utilities import export_config as export_cfg
+import torch
+flags = converter.define_conversion_flags('hammer')
+ExportConfig = export_cfg.ExportConfig
+_MODEL_SIZE = flags.DEFINE_enum(
+    'model_size',
+    '1.5b',
+    ['0.5b', '1.5b'],
+    'The size of the model to convert.',
+)
+_BUILDER = {
+    '0.5b': hammer.build_0_5b_model,
+    '1.5b': hammer.build_1_5b_model,
+}
+def _create_mask(mask_len, kv_cache_max_len):
+  mask = torch.full(
+      (mask_len, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  mask = torch.triu(mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  return mask
+def _create_export_config(
+    prefill_seq_lens: list[int], kv_cache_max_len: int
+) -> ExportConfig:
+  """Creates the export config for the model."""
+  export_config = ExportConfig()
+  if isinstance(prefill_seq_lens, list):
+    prefill_mask = [_create_mask(i, kv_cache_max_len) for i in prefill_seq_lens]
+  else:
+    prefill_mask = _create_mask(prefill_seq_lens, kv_cache_max_len)
+  export_config.prefill_mask = prefill_mask
+  decode_mask = torch.full(
+      (1, kv_cache_max_len), float('-inf'), dtype=torch.float32
+  )
+  decode_mask = torch.triu(decode_mask, diagonal=1).unsqueeze(0).unsqueeze(0)
+  export_config.decode_mask = decode_mask
+  export_config.kvcache_layout = kv_cache.KV_LAYOUT_TRANSPOSED
+  return export_config
+def main(_):
+  pytorch_model = _BUILDER[_MODEL_SIZE.value](
+      flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
+  )
+  converter.convert_to_tflite(
+      pytorch_model,
+      output_path=flags.FLAGS.output_path,
+      output_name_prefix=flags.FLAGS.output_name_prefix,
+      prefill_seq_len=flags.FLAGS.prefill_seq_lens,
+      quantize=flags.FLAGS.quantize,
+      lora_ranks=flags.FLAGS.lora_ranks,
+      export_config=_create_export_config(
+          flags.FLAGS.prefill_seq_lens, flags.FLAGS.kv_cache_max_len
+      )
+      if flags.FLAGS.transpose_kv_cache
+      else ExportConfig(),
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/hammer/hammer.py ADDED Viewed

@@ -0,0 +1,107 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of building Hammer 2.1 models."""
+import ai_edge_torch.generative.layers.model_config as cfg
+from ai_edge_torch.generative.utilities import model_builder
+from torch import nn
+TENSOR_NAMES = model_builder.TENSOR_NAMES
+class Hammer(model_builder.DecoderOnlyModel):
+  """A Hammer model built from the Edge Generative API layers."""
+  pass
+def get_1_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Hammer 2.1 1.5B model."""
+  attn_config = cfg.AttentionConfig(
+      num_heads=12,
+      head_dim=128,
+      num_query_groups=2,
+      rotary_base=1000000,
+      rotary_percentage=1.0,
+      qkv_use_bias=True,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
+      intermediate_size=8960,
+  )
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM,
+      epsilon=1e-06,
+      enable_hlfb=True,
+  )
+  block_config = cfg.TransformerBlockConfig(
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      post_attention_norm_config=norm_config,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=151665,
+      num_layers=28,
+      max_seq_len=32768,
+      embedding_dim=1536,
+      kv_cache_max_len=kv_cache_max_len,
+      block_configs=block_config,
+      final_norm_config=norm_config,
+      enable_hlfb=True,
+  )
+  return config
+def get_0_5b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  """Returns the model config for a Hammer 2.1 0.5B model."""
+  config = get_1_5b_model_config(kv_cache_max_len)
+  # Hammer has only one block config.
+  block_config = config.block_config(0)
+  block_config.attn_config.num_heads = 14
+  block_config.attn_config.head_dim = 64
+  block_config.ff_config.intermediate_size = 4864
+  config.num_layers = 24
+  config.embedding_dim = 896
+  return config
+def get_fake_model_config(**kwargs) -> cfg.ModelConfig:
+  config = get_1_5b_model_config(**kwargs)
+  config.vocab_size = 128
+  config.num_layers = 2
+  config.embedding_dim = 16
+  # Hammer has only one block config.
+  config.block_config(0).ff_config.intermediate_size = 64
+  return config
+def build_1_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_1_5b_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Hammer,
+  )
+def build_0_5b_model(checkpoint_path: str, **kwargs) -> nn.Module:
+  return model_builder.build_decoder_only_model(
+      checkpoint_path=checkpoint_path,
+      config=get_0_5b_model_config(**kwargs),
+      tensor_names=TENSOR_NAMES,
+      model_class=Hammer,
+  )

ai_edge_torch/generative/examples/hammer/verify.py ADDED Viewed

@@ -0,0 +1,86 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies the reauthored Hammer 2.1 0.5B and 1.5B models."""
+import logging
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.hammer import hammer
+from ai_edge_torch.generative.utilities import transformers_verifier
+from ai_edge_torch.generative.utilities import verifier
+import transformers
+_MODEL_SIZE = flags.DEFINE_enum(
+    "model_size",
+    "0.5b",
+    ["0.5b", "1.5b"],
+    "The size of the model to verify.",
+)
+_PROMPTS = flags.DEFINE_multi_string(
+    "prompts",
+    "What is the meaning of life?",
+    "The input prompts to generate answers.",
+)
+_MAX_NEW_TOKENS = flags.DEFINE_integer(
+    "max_new_tokens",
+    30,
+    "The maximum size of the generated tokens.",
+)
+_CHECKPOINT = {
+    "0.5b": "MadeAgents/Hammer2.1-0.5b",
+    "1.5b": "MadeAgents/Hammer2.1-1.5b",
+}
+_BUILDER = {
+    "0.5b": hammer.build_0_5b_model,
+    "1.5b": hammer.build_1_5b_model,
+}
+def main(_):
+  checkpoint = _CHECKPOINT[_MODEL_SIZE.value]
+  logging.info("Loading the original model from: %s", checkpoint)
+  original_model = transformers.AutoModelForCausalLM.from_pretrained(checkpoint)
+  # Locate the cached dir.
+  cached_config_file = transformers.utils.cached_file(
+      checkpoint, transformers.utils.CONFIG_NAME
+  )
+  reauthored_checkpoint = pathlib.Path(cached_config_file).parent
+  logging.info("Building the reauthored model from: %s", reauthored_checkpoint)
+  reauthored_model = _BUILDER[_MODEL_SIZE.value](reauthored_checkpoint)
+  logging.info("Loading the tokenizer from: %s", checkpoint)
+  tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint)
+  verifier.verify_reauthored_model(
+      original_model=transformers_verifier.TransformersModelWrapper(
+          original_model
+      ),
+      reauthored_model=verifier.ReauthoredModelWrapper(reauthored_model),
+      tokenizer=verifier.TokenizerWrapper(tokenizer),
+      generate_prompts=_PROMPTS.value,
+      max_new_tokens=_MAX_NEW_TOKENS.value,
+      atol=1e-04,
+  )
+if __name__ == "__main__":
+  app.run(main)

ai_edge_torch/generative/examples/llama/convert_to_tflite.py CHANGED Viewed

@@ -22,8 +22,6 @@ from ai_edge_torch.generative.utilities import export_config
 flags = converter.define_conversion_flags('llama')
-ExportConfig = export_config.ExportConfig
 _MODEL_SIZE = flags.DEFINE_enum(
     'model_size',
@@ -49,7 +47,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/llama/llama.py CHANGED Viewed

@@ -121,7 +121,9 @@ def get_1b_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU),
       intermediate_size=8192,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True,
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,

ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py CHANGED Viewed

@@ -21,7 +21,6 @@ from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
 flags = converter.define_conversion_flags("phi3")
-ExportConfig = export_config.ExportConfig
 def main(_):
@@ -35,7 +34,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/phi/convert_phi4_to_tflite.py CHANGED Viewed

@@ -21,7 +21,6 @@ from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
 flags = converter.define_conversion_flags("phi4")
-ExportConfig = export_config.ExportConfig
 def main(_):
@@ -35,7 +34,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/phi/convert_to_tflite.py CHANGED Viewed

@@ -22,7 +22,6 @@ from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
 flags = converter.define_conversion_flags("phi2")
-ExportConfig = export_config.ExportConfig
 def main(_):
@@ -36,7 +35,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai_edge_torch/generative/examples/phi/phi2.py CHANGED Viewed

@@ -65,7 +65,7 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       use_bias=True,
   )
   norm_config = cfg.NormalizationConfig(
-      type=cfg.NormalizationType.LAYER_NORM,
+      type=cfg.NormalizationType.LAYER_NORM, enable_hlfb=True
   )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,

ai_edge_torch/generative/examples/phi/phi3.py CHANGED Viewed

@@ -162,7 +162,9 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU_GLU),
       intermediate_size=8192,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True,
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,

ai_edge_torch/generative/examples/phi/phi4.py CHANGED Viewed

@@ -112,7 +112,9 @@ def get_model_config(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
       activation=cfg.ActivationConfig(cfg.ActivationType.SILU_GLU),
       intermediate_size=8192,
   )
-  norm_config = cfg.NormalizationConfig(type=cfg.NormalizationType.RMS_NORM)
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM, enable_hlfb=True
+  )
   block_config = cfg.TransformerBlockConfig(
       attn_config=attn_config,
       ff_config=ff_config,

ai_edge_torch/generative/examples/qwen/convert_to_tflite.py CHANGED Viewed

@@ -21,8 +21,6 @@ from ai_edge_torch.generative.utilities import converter
 from ai_edge_torch.generative.utilities import export_config
 flags = converter.define_conversion_flags('qwen')
-ExportConfig = export_config.ExportConfig
 _MODEL_SIZE = flags.DEFINE_enum(
     'model_size',
@@ -37,6 +35,7 @@ _BUILDER = {
     '3b': qwen.build_3b_model,
 }
 def main(_):
   pytorch_model = _BUILDER[_MODEL_SIZE.value](
       flags.FLAGS.checkpoint_path, kv_cache_max_len=flags.FLAGS.kv_cache_max_len
@@ -48,7 +47,7 @@ def main(_):
       prefill_seq_len=flags.FLAGS.prefill_seq_lens,
       quantize=flags.FLAGS.quantize,
       lora_ranks=flags.FLAGS.lora_ranks,
-      export_config=ExportConfig(),
+      export_config=export_config.get_from_flags(),
   )

ai-edge-torch-nightly 0.5.0.dev20250424__py3-none-any.whl → 0.5.0.dev20250426__py3-none-any.whl

ai-edge-torch-nightly 0.5.0.dev20250424py3-none-any.whl → 0.5.0.dev20250426py3-none-any.whl