PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240914__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240914py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

ai_edge_torch/generative/utilities/loader.py CHANGED Viewed

@@ -101,6 +101,8 @@ class ModelLoader:
     attn_value_proj: str = None
     attn_fused_qkv_proj: str = None
     attn_output_proj: str = None
+    attn_query_norm: str = None
+    attn_key_norm: str = None
     ff_up_proj: str = None
     ff_down_proj: str = None
@@ -221,7 +223,8 @@ class ModelLoader:
       converted_state: Dict[str, torch.Tensor],
   ):
     prefix = f"transformer_blocks.{idx}"
-    if config.ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
+    ff_config = config.block_config(idx).ff_config
+    if ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
       ff_up_proj_name = self._names.ff_up_proj.format(idx)
       ff_down_proj_name = self._names.ff_down_proj.format(idx)
       converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
@@ -230,7 +233,7 @@ class ModelLoader:
       converted_state[f"{prefix}.ff.w2.weight"] = state.pop(
           f"{ff_down_proj_name}.weight"
       )
-      if config.ff_config.use_bias:
+      if ff_config.use_bias:
         converted_state[f"{prefix}.ff.w1.bias"] = state.pop(
             f"{ff_up_proj_name}.bias"
         )
@@ -250,7 +253,7 @@ class ModelLoader:
       converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
           f"{ff_gate_proj_name}.weight"
       )
-      if config.ff_config.use_bias:
+      if ff_config.use_bias:
         converted_state[f"{prefix}.ff.w3.bias"] = state.pop(
             f"{ff_up_proj_name}.bias"
         )
@@ -289,6 +292,7 @@ class ModelLoader:
       converted_state: Dict[str, torch.Tensor],
   ):
     prefix = f"transformer_blocks.{idx}"
+    attn_config = config.block_config(idx).attn_config
     if self._names.attn_fused_qkv_proj:
       fused_qkv_name = self._names.attn_fused_qkv_proj.format(idx)
       converted_state[f"{prefix}.atten_func.qkv_projection.weight"] = state.pop(
@@ -300,13 +304,13 @@ class ModelLoader:
       v_name = self._names.attn_value_proj.format(idx)
       converted_state[f"{prefix}.atten_func.qkv_projection.weight"] = (
           self._fuse_qkv(
-              config,
+              attn_config,
               state.pop(f"{q_name}.weight"),
               state.pop(f"{k_name}.weight"),
               state.pop(f"{v_name}.weight"),
           )
       )
-    if config.attn_config.qkv_use_bias:
+    if attn_config.qkv_use_bias:
       if self._names.attn_fused_qkv_proj:
         converted_state[f"{prefix}.atten_func.qkv_projection.bias"] = state.pop(
             f"{fused_qkv_name}.bias"
@@ -314,18 +318,29 @@ class ModelLoader:
       else:
         converted_state[f"{prefix}.atten_func.qkv_projection.bias"] = (
             self._fuse_qkv(
-                config,
+                attn_config,
                 state.pop(f"{q_name}.bias"),
                 state.pop(f"{k_name}.bias"),
                 state.pop(f"{v_name}.bias"),
             )
         )
+    if self._names.attn_query_norm is not None:
+      attn_query_norm_name = self._names.attn_query_norm.format(idx)
+      converted_state[f"{prefix}.atten_func.query_norm.weight"] = state.pop(
+          f"{attn_query_norm_name}.weight"
+      )
+    if self._names.attn_key_norm is not None:
+      attn_key_norm_name = self._names.attn_key_norm.format(idx)
+      converted_state[f"{prefix}.atten_func.key_norm.weight"] = state.pop(
+          f"{attn_key_norm_name}.weight"
+      )
     o_name = self._names.attn_output_proj.format(idx)
     converted_state[f"{prefix}.atten_func.output_projection.weight"] = (
         state.pop(f"{o_name}.weight")
     )
-    if config.attn_config.output_proj_use_bias:
+    if attn_config.output_proj_use_bias:
       converted_state[f"{prefix}.atten_func.output_projection.bias"] = (
           state.pop(f"{o_name}.bias")
       )
@@ -360,18 +375,16 @@ class ModelLoader:
   def _fuse_qkv(
       self,
-      config: model_config.ModelConfig,
+      attn_config: model_config.AttentionConfig,
       q: torch.Tensor,
       k: torch.Tensor,
       v: torch.Tensor,
   ) -> torch.Tensor:
-    if config.attn_config.qkv_fused_interleaved:
-      q_per_kv = (
-          config.attn_config.num_heads // config.attn_config.num_query_groups
-      )
-      qs = torch.split(q, config.attn_config.head_dim * q_per_kv)
-      ks = torch.split(k, config.attn_config.head_dim)
-      vs = torch.split(v, config.attn_config.head_dim)
+    if attn_config.qkv_fused_interleaved:
+      q_per_kv = attn_config.num_heads // attn_config.num_query_groups
+      qs = torch.split(q, attn_config.head_dim * q_per_kv)
+      ks = torch.split(k, attn_config.head_dim)
+      vs = torch.split(v, attn_config.head_dim)
       cycled = [t for group in zip(qs, ks, vs) for t in group]
       return torch.cat(cycled)
     else:

ai_edge_torch/generative/utilities/t5_loader.py CHANGED Viewed

@@ -279,7 +279,8 @@ class ModelLoader:
     prefix = additional_prefix + f"transformer_blocks.{idx}"
     if names.ff_up_proj is None or names.ff_down_proj is None:
       return
-    if config.ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
+    ff_config = config.block_config(idx).ff_config
+    if ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
       ff_up_proj_name = names.ff_up_proj.format(idx)
       ff_down_proj_name = names.ff_down_proj.format(idx)
       converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
@@ -288,7 +289,7 @@ class ModelLoader:
       converted_state[f"{prefix}.ff.w2.weight"] = state.pop(
           f"{ff_down_proj_name}.weight"
       )
-      if config.ff_config.use_bias:
+      if ff_config.use_bias:
         converted_state[f"{prefix}.ff.w1.bias"] = state.pop(
             f"{ff_up_proj_name}.bias"
         )
@@ -309,7 +310,7 @@ class ModelLoader:
         converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
             f"{ff_gate_proj_name}.weight"
         )
-        if config.ff_config.use_bias:
+        if ff_config.use_bias:
           converted_state[f"{prefix}.ff.w3.bias"] = state.pop(
               f"{ff_up_proj_name}.bias"
           )
@@ -337,20 +338,21 @@ class ModelLoader:
     ):
       return
     prefix = additional_prefix + f"transformer_blocks.{idx}"
+    attn_config = config.block_config(idx).attn_config
     q_name = names.attn_query_proj.format(idx)
     k_name = names.attn_key_proj.format(idx)
     v_name = names.attn_value_proj.format(idx)
     # model.encoder.transformer_blocks[0].atten_func.q_projection.weight
     if fuse_attention:
       converted_state[f"{prefix}.atten_func.attn.weight"] = self._fuse_qkv(
-          config,
+          attn_config,
           state.pop(f"{q_name}.weight"),
           state.pop(f"{k_name}.weight"),
           state.pop(f"{v_name}.weight"),
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.atten_func.attn.bias"] = self._fuse_qkv(
-            config,
+            attn_config,
             state.pop(f"{q_name}.bias"),
             state.pop(f"{k_name}.bias"),
             state.pop(f"{v_name}.bias"),
@@ -365,7 +367,7 @@ class ModelLoader:
       converted_state[f"{prefix}.atten_func.v_projection.weight"] = state.pop(
           f"{v_name}.weight"
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.atten_func.q_projection.bias"] = state.pop(
             f"{q_name}.bias"
         )
@@ -380,7 +382,7 @@ class ModelLoader:
     converted_state[f"{prefix}.atten_func.output_projection.weight"] = (
         state.pop(f"{o_name}.weight")
     )
-    if config.attn_config.output_proj_use_bias:
+    if attn_config.output_proj_use_bias:
       converted_state[f"{prefix}.atten_func.output_projection.bias"] = (
           state.pop(f"{o_name}.bias")
       )
@@ -402,6 +404,7 @@ class ModelLoader:
     ):
       return
     prefix = additional_prefix + f"transformer_blocks.{idx}"
+    attn_config = config.block_config(idx).attn_config
     q_name = names.cross_attn_query_proj.format(idx)
     k_name = names.cross_attn_key_proj.format(idx)
     v_name = names.cross_attn_value_proj.format(idx)
@@ -409,16 +412,16 @@ class ModelLoader:
     if fuse_attention:
       converted_state[f"{prefix}.cross_atten_func.attn.weight"] = (
           self._fuse_qkv(
-              config,
+              attn_config,
               state.pop(f"{q_name}.weight"),
               state.pop(f"{k_name}.weight"),
               state.pop(f"{v_name}.weight"),
           )
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.cross_atten_func.attn.bias"] = (
             self._fuse_qkv(
-                config,
+                attn_config,
                 state.pop(f"{q_name}.bias"),
                 state.pop(f"{k_name}.bias"),
                 state.pop(f"{v_name}.bias"),
@@ -434,7 +437,7 @@ class ModelLoader:
       converted_state[f"{prefix}.cross_atten_func.v_projection.weight"] = (
           state.pop(f"{v_name}.weight")
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.cross_atten_func.q_projection.bias"] = (
             state.pop(f"{q_name}.bias")
         )
@@ -449,7 +452,7 @@ class ModelLoader:
     converted_state[f"{prefix}.cross_atten_func.output_projection.weight"] = (
         state.pop(f"{o_name}.weight")
     )
-    if config.attn_config.output_proj_use_bias:
+    if attn_config.output_proj_use_bias:
       converted_state[f"{prefix}.cross_atten_func.output_projection.bias"] = (
           state.pop(f"{o_name}.bias")
       )
@@ -496,16 +499,14 @@ class ModelLoader:
   def _fuse_qkv(
       self,
-      config: model_config.ModelConfig,
+      attn_config: model_config.AttentionConfig,
       q: torch.Tensor,
       k: torch.Tensor,
       v: torch.Tensor,
   ) -> torch.Tensor:
-    q_per_kv = (
-        config.attn_config.num_heads // config.attn_config.num_query_groups
-    )
-    qs = torch.split(q, config.attn_config.head_dim * q_per_kv)
-    ks = torch.split(k, config.attn_config.head_dim)
-    vs = torch.split(v, config.attn_config.head_dim)
+    q_per_kv = attn_config.num_heads // attn_config.num_query_groups
+    qs = torch.split(q, attn_config.head_dim * q_per_kv)
+    ks = torch.split(k, attn_config.head_dim)
+    vs = torch.split(v, attn_config.head_dim)
     cycled = [t for group in zip(qs, ks, vs) for t in group]
     return torch.cat(cycled)

ai_edge_torch/odml_torch/export.py CHANGED Viewed

@@ -223,6 +223,41 @@ class MlirLowered:
     return tf_integration.mlir_to_flatbuffer(self)
+# TODO(b/331481564) Make this a ai_edge_torch FX pass.
+def _convert_i64_to_i32(exported_program: torch.export.ExportedProgram):
+  """Convert internal constant aten ops' output from int64 to int32.
+  Int32 generally has better performance and compatibility than int64 in
+  runtime. This pass converts aten op where the output(s) are int64 constant
+  tensors to return int32 constant tensors.
+  Args:
+    exported_program: The exported program to apply the pass.
+  """
+  def in_i32(x: int):
+    return -2147483648 <= x <= 2147483647
+  def rewrite_arange(node: torch.fx.Node):
+    tensor_meta = node.meta.get("tensor_meta", None)
+    if not tensor_meta:
+      return
+    start, end = node.args[:2]
+    if tensor_meta.dtype != torch.int64:
+      return
+    if not (in_i32(start) and in_i32(end)):
+      return
+    op = node.target
+    node.target = lambda *args, **kwargs: op(*args, **kwargs).type(torch.int32)
+  graph_module = exported_program.graph_module
+  for node in graph_module.graph.nodes:
+    if node.target == torch.ops.aten.arange.start_step:
+      rewrite_arange(node)
 def exported_program_to_mlir(
     exported_program: torch.export.ExportedProgram,
 ) -> MlirLowered:
@@ -231,6 +266,11 @@ def exported_program_to_mlir(
       lowerings.decompositions()
   )
+  _convert_i64_to_i32(exported_program)
+  exported_program = exported_program.run_decompositions(
+      lowerings.decompositions()
+  )
   with export_utils.create_ir_context() as context, ir.Location.unknown():
     module = ir.Module.create()

ai_edge_torch/odml_torch/lowerings/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ from . import _basic
 from . import _batch_norm
 from . import _convolution
 from . import _jax_lowerings
+from . import _layer_norm
 from . import context
 from . import registry
 from . import utils

ai_edge_torch/odml_torch/lowerings/_basic.py CHANGED Viewed

@@ -202,3 +202,47 @@ def _aten_div(mod, x, y, *, rounding_mode=None, out=None) -> ir.Value:
   x, y = utils.broadcast_args_if_needed(x, y)
   return stablehlo.divide(x, y)
+# Schema:
+#   - aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt?
+#       start=None, SymInt? end=None, SymInt step=1) -> Tensor
+# Torch Reference:
+#   - https://pytorch.org/docs/stable/generated/torch.slice_scatter.html
+#   - https://github.com/pytorch/pytorch/blob/18f9331e5deb4c02ae5c206e133a9b4add49bd97/aten/src/ATen/native/TensorShape.cpp#L4002
+@lower(torch.ops.aten.slice_scatter)
+def _aten_slice_scatter(lctx, self, src, dim=0, start=None, end=None, step=1):
+  start = start or 0
+  end = end or self.type.shape[dim]
+  if start < 0:
+    start = self.type.shape[dim] + start
+  if end < 0:
+    end = self.type.shape[dim] + end
+  end = start + step * math.ceil((end - start) / step) - (step - 1)
+  padding_low = start
+  padding_high = self.type.shape[dim] - end
+  rank = len(self.type.shape)
+  src = stablehlo.pad(
+      src,
+      utils.splat(0, src.type.element_type, []),
+      edge_padding_low=[padding_low if i == dim else 0 for i in range(rank)],
+      edge_padding_high=[padding_high if i == dim else 0 for i in range(rank)],
+      interior_padding=[step - 1 if i == dim else 0 for i in range(rank)],
+  )
+  pred = np.ones(self.type.shape, dtype=np.bool_)
+  pred[*[
+      slice(start, end, step) if i == dim else slice(None, None, None)
+      for i in range(rank)
+  ]] = False
+  pred = stablehlo.constant(
+      ir.DenseElementsAttr.get(
+          np.packbits(pred, bitorder="little"),
+          type=ir.IntegerType.get_signless(1),
+          shape=pred.shape,
+      )
+  )
+  out = stablehlo.select(pred, self, src)
+  return out

ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py CHANGED Viewed

@@ -167,7 +167,6 @@ lower_by_torch_xla2(torch.ops.aten.mul.Scalar)
 lower_by_torch_xla2(torch.ops.aten.mul.Tensor)
 lower_by_torch_xla2(torch.ops.aten.native_batch_norm)
 lower_by_torch_xla2(torch.ops.aten.native_group_norm)
-lower_by_torch_xla2(torch.ops.aten.native_layer_norm)
 lower_by_torch_xla2(torch.ops.aten.native_layer_norm_backward)
 lower_by_torch_xla2(torch.ops.aten.ne)
 lower_by_torch_xla2(torch.ops.aten.neg)
@@ -204,7 +203,6 @@ lower_by_torch_xla2(torch.ops.aten.sin)
 lower_by_torch_xla2(torch.ops.aten.sinh)
 lower_by_torch_xla2(torch.ops.aten.slice)
 lower_by_torch_xla2(torch.ops.aten.slice_copy)
-lower_by_torch_xla2(torch.ops.aten.slice_scatter)
 lower_by_torch_xla2(torch.ops.aten.sort)
 lower_by_torch_xla2(torch.ops.aten.split)
 lower_by_torch_xla2(torch.ops.aten.split_copy)

ai_edge_torch/odml_torch/lowerings/_layer_norm.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides lowering for coreaten to stablehlo for LayerNorm."""
+import math
+from typing import Optional
+from ai_edge_torch.odml_torch.lowerings import registry
+from ai_edge_torch.odml_torch.lowerings import utils
+from jax._src.lib.mlir import ir
+from jax._src.lib.mlir.dialects import hlo as stablehlo
+import torch
+# native_layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight,
+# Tensor? bias, float eps) -> (Tensor, Tensor, Tensor)
+@registry.lower(torch.ops.aten.native_layer_norm)
+def _aten_native_layer_norm(
+    lctx,
+    data: ir.Value,
+    normalized_shape: list[int],
+    weight: Optional[ir.Value],
+    bias: Optional[ir.Value],
+    eps: float,
+):
+  data_type: ir.RankedTensorType = data.type
+  unnormalized_count = math.prod(data_type.shape) // math.prod(normalized_shape)
+  dest_shape = [
+      1,
+      unnormalized_count,
+      math.prod(normalized_shape),
+  ]
+  dest_type = ir.RankedTensorType.get(dest_shape, data_type.element_type)
+  reshaped_data = stablehlo.reshape(dest_type, data)
+  one = utils.splat(1, data_type.element_type, [unnormalized_count])
+  zero = utils.splat(0, data_type.element_type, [unnormalized_count])
+  output, mean, var = stablehlo.batch_norm_training(
+      reshaped_data, one, zero, eps, 1
+  )
+  eps_splat = utils.splat(eps, var.type.element_type, var.type.shape)
+  rstd = stablehlo.rsqrt(stablehlo.add(var, eps_splat))
+  stats_shape = data_type.shape[: -1 * len(normalized_shape)] + [1] * len(
+      normalized_shape
+  )
+  stats_type = ir.RankedTensorType.get(stats_shape, data_type.element_type)
+  mean = stablehlo.reshape(stats_type, mean)
+  rstd = stablehlo.reshape(stats_type, rstd)
+  output = stablehlo.reshape(data_type, output)
+  data_rank = len(data_type.shape)
+  normalized_rank = len(normalized_shape)
+  if weight is not None:
+    weight = stablehlo.broadcast_in_dim(
+        data_type, weight, list(range(data_rank - normalized_rank, data_rank))
+    )
+    output = stablehlo.multiply(weight, output)
+  if bias is not None:
+    bias = stablehlo.broadcast_in_dim(
+        data_type, bias, list(range(data_rank - normalized_rank, data_rank))
+    )
+    output = stablehlo.add(bias, output)
+  return output, mean, rstd

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20240910"
+__version__ = "0.3.0.dev20240914"

{ai_edge_torch_nightly-0.3.0.dev20240910.dist-info → ai_edge_torch_nightly-0.3.0.dev20240914.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20240910
+Version: 0.3.0.dev20240914
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

ai-edge-torch-nightly 0.3.0.dev20240910__py3-none-any.whl → 0.3.0.dev20240914__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20240910py3-none-any.whl → 0.3.0.dev20240914py3-none-any.whl