PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

ai_edge_torch/generative/utilities/converter.py CHANGED Viewed

@@ -15,13 +15,28 @@
 """Common utility functions for model conversion."""
-from typing import Union
+from functools import partial
+from typing import Any, Union
 from ai_edge_torch._convert import converter as converter_utils
 import ai_edge_torch.generative.layers.kv_cache as kv_utils
 import ai_edge_torch.generative.layers.model_config as cfg
 from ai_edge_torch.generative.quantize import quant_recipes
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 import torch
+import torch.nn as nn
+class ExportableModule(torch.nn.Module):
+  def __init__(self, module, **extra_kwargs):
+    super().__init__()
+    self.module = module
+    self.extra_kwargs = extra_kwargs
+  def forward(self, *export_args, **export_kwargs):
+    full_kwargs = {**export_kwargs, **self.extra_kwargs}
+    return self.module(*export_args, **full_kwargs)
 def convert_to_tflite(
@@ -31,6 +46,7 @@ def convert_to_tflite(
     pixel_values_size: torch.Size = None,
     quantize: bool = True,
     config: cfg.ModelConfig = None,
+    export_config: ExportConfig = None,
 ):
   """Converts a nn.Module model to multi-signature tflite model.
@@ -97,6 +113,11 @@ def convert_to_tflite(
   )
   quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
+  # For export, we create a module that captures any non-exportable,
+  # arugments, e.g. the generation config object.
+  mod = ExportableModule(pytorch_model, export_config=export_config)
   converter = converter_utils.Converter()
   for i in range(len(prefill_seq_lens)):
     prefill_seq_len = prefill_seq_lens[i]
@@ -108,7 +129,7 @@ def convert_to_tflite(
       prefill_signature_name = f'prefill_{prefill_seq_len}'
     converter.add_signature(
         prefill_signature_name,
-        pytorch_model,
+        mod,
         sample_kwargs={
             'tokens': prefill_tokens,
             'input_pos': prefill_input_pos,
@@ -118,7 +139,7 @@ def convert_to_tflite(
     if prefill_pixel_values is not None:
       converter.add_signature(
           prefill_signature_name + '_pixel',
-          pytorch_model,
+          mod,
           sample_kwargs={
               'tokens': prefill_tokens,
               'input_pos': prefill_input_pos,
@@ -129,7 +150,7 @@ def convert_to_tflite(
   converter.add_signature(
       'decode',
-      pytorch_model,
+      mod,
       sample_kwargs={
           'tokens': decode_token,
           'input_pos': decode_input_pos,

ai_edge_torch/generative/utilities/model_builder.py CHANGED Viewed

@@ -16,7 +16,8 @@
 """Utilities to be used for re-authoring transformer models."""
 import copy
-from typing import Tuple
+from dataclasses import dataclass
+from typing import Optional, Tuple
 from ai_edge_torch.generative.layers import attention
 from ai_edge_torch.generative.layers import builder
@@ -45,6 +46,15 @@ TENSOR_NAMES_WITH_SEPARATE_LM_HEAD = copy.copy(TENSOR_NAMES)
 TENSOR_NAMES_WITH_SEPARATE_LM_HEAD.lm_head = "lm_head"
+@dataclass
+class ExportConfig:
+  """Model generating configuration settings."""
+  # On prefill signatures, should the model produce logit output?
+  # When False, only decode signatures will produce output.
+  output_logits_on_prefill: bool = False
 class DecoderOnlyModel(nn.Module):
   """A simple decoder-only transformer model built from the Edge Generative API.
@@ -93,6 +103,7 @@ class DecoderOnlyModel(nn.Module):
       tokens: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      export_config: Optional[ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     _, seq_len = tokens.size()
     assert self.config.max_seq_len >= seq_len, (
@@ -108,7 +119,7 @@ class DecoderOnlyModel(nn.Module):
     mask = mask[:, :, :, : self.config.kv_cache_max]
     return self.forward_with_embeds(
-        input_embeds, rope, mask, input_pos, kv_cache
+        input_embeds, rope, mask, input_pos, kv_cache, export_config
     )
   def forward_with_embeds(
@@ -118,6 +129,7 @@ class DecoderOnlyModel(nn.Module):
       mask: torch.Tensor,
       input_pos: torch.Tensor,
       kv_cache: kv_utils.KVCache,
+      export_config: Optional[ExportConfig] = None,
   ) -> dict[torch.Tensor, kv_utils.KVCache]:
     """Forwards the model with input embeddings."""
     assert len(self.transformer_blocks) == len(kv_cache.caches), (
@@ -137,6 +149,13 @@ class DecoderOnlyModel(nn.Module):
         updated_kv_entires.append(kv_entry)
     updated_kv_cache = kv_utils.KVCache(tuple(updated_kv_entires))
+    if export_config is not None:
+      if (
+          torch.numel(input_pos) > 1
+          and not export_config.output_logits_on_prefill
+      ):
+        return {"kv_cache": updated_kv_cache}
     x = self.final_norm(x)
     logits = self.lm_head(x)  # (b, t, vocab_size)
     return {"logits": logits, "kv_cache": updated_kv_cache}
@@ -146,8 +165,9 @@ def build_decoder_only_model(
     checkpoint_path: str,
     config: cfg.ModelConfig,
     tensor_names: loading_utils.ModelLoader.TensorNames,
-) -> DecoderOnlyModel:
-  transformer = DecoderOnlyModel(config)
+    model_class: type[nn.Module] = DecoderOnlyModel,
+) -> nn.Module:
+  transformer = model_class(config)
   loader = loading_utils.ModelLoader(checkpoint_path, tensor_names)
   loader.load(
       transformer, strict=not config.lm_head_share_weight_with_embedding

ai_edge_torch/generative/utilities/verifier.py CHANGED Viewed

@@ -19,6 +19,7 @@ import logging
 from typing import List
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
+from ai_edge_torch.generative.utilities.model_builder import ExportConfig
 import torch
@@ -40,6 +41,7 @@ class ModelWrapper(torch.nn.Module):
     """
     super().__init__()
     self.model = model
+    self.export_config = ExportConfig(output_logits_on_prefill=True)
   def forward(
       self, tokens: torch.Tensor, pixel_values: torch.Tensor = None
@@ -103,13 +105,25 @@ class ReauthoredModelWrapper(ModelWrapper):
     Returns:
       The output logits and the updated KV cache.
     """
+    # Verification requires logit outputs on prefill for comparison.
+    if (
+        self.export_config is not None
+        and not self.export_config.output_logits_on_prefill
+    ):
+      raise ValueError("Verifier requires logit output on prefill.")
     # Since the reauthored model doesn't include keyword arguments, pass
     # pixel_values only when it is not None. Otherwise, it may raise an error.
     if pixel_values is None:
-      output = self.model.forward(tokens, input_pos, kv_cache)
+      output = self.model.forward(
+          tokens, input_pos, kv_cache, export_config=self.export_config
+      )
     else:
       output = self.model.forward(
-          tokens, input_pos, kv_cache, pixel_values=pixel_values
+          tokens,
+          input_pos,
+          kv_cache,
+          pixel_values=pixel_values,
+          export_config=self.export_config,
       )
     return output["logits"], output["kv_cache"]

ai_edge_torch/lowertools/_shim.py CHANGED Viewed

@@ -15,13 +15,15 @@
 from typing import Any, Optional
-from ai_edge_torch import config
+from ai_edge_torch import _config
 from ai_edge_torch._convert import signature
 from ai_edge_torch.quantize import quant_config as qcfg
 import torch
+config = _config.config
 # isort: off
-if config.Config.use_torch_xla:
+if config.use_torch_xla:
   from ai_edge_torch.lowertools import torch_xla_utils as utils
   from ai_edge_torch.lowertools.torch_xla_utils import exported_program_to_mlir_text
   from torch_xla.experimental.mark_pattern_utils import StableHLOCompositeBuilder

ai_edge_torch/lowertools/test_utils.py CHANGED Viewed

@@ -15,9 +15,11 @@
 import re
 from typing import Optional
-from ai_edge_torch import config
+from ai_edge_torch import _config
 from absl.testing import absltest as googletest
+config = _config.config
 def _extract_backend_configs(mlir):
   mlir = mlir.replace("\\22", '"')
@@ -38,7 +40,7 @@ def assert_string_count(
   if odml_torch_attr_counter is None:
     odml_torch_attr_counter = {}
-  if config.Config.use_torch_xla:
+  if config.use_torch_xla:
     for key in torch_xla_pattern_counter:
       test_case.assertEqual(
           mlir.count(key),

ai_edge_torch/odml_torch/lowerings/__init__.py CHANGED Viewed

@@ -21,6 +21,6 @@ from . import _quantized_decomposed
 from . import context
 from . import registry
 from . import utils
-from .registry import decompositions
+from .decomp import decompositions
 from .registry import lookup
 from .registry import lower

ai_edge_torch/odml_torch/lowerings/_basic.py CHANGED Viewed

@@ -276,11 +276,13 @@ def _aten_slice_scatter(lctx, self, src, dim=0, start=None, end=None, step=1):
           interior_padding if i == dim else 0 for i in range(rank)
       ],
   )
-  pred = np.ones(self.type.shape, dtype=np.bool_)
-  pred[*[
+  slices = [
       slice(start, end, step) if i == dim else slice(None, None, None)
       for i in range(rank)
-  ]] = False
+  ]
+  pred = np.ones(self.type.shape, dtype=np.bool_)
+  pred[np.index_exp[tuple(slices)]] = False
   pred = stablehlo.constant(
       ir.DenseElementsAttr.get(
           np.packbits(pred, bitorder="little"),

ai_edge_torch/odml_torch/lowerings/_convolution.py CHANGED Viewed

@@ -232,7 +232,9 @@ def _aten_convolution(
   if bias is not None:
     # broadcast [C] to [NCHW]
-    broadcasted_bias = stablehlo.broadcast_in_dim(output_type, bias, [1])
+    broadcasted_bias = stablehlo.broadcast_in_dim(
+        output_type, bias, ir.DenseI64ArrayAttr.get([1])
+    )
     res = stablehlo.add(
         lhs=res,
         rhs=broadcasted_bias,

ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py CHANGED Viewed

@@ -16,12 +16,15 @@ import functools
 import logging
 from ai_edge_torch.odml_torch import jax_bridge
+from ai_edge_torch.odml_torch.lowerings import context
+from ai_edge_torch.odml_torch.lowerings import registry
+import jax.numpy as jnp
+from jax._src.lib.mlir import ir
 import torch
 import torch_xla2.ops.jaten  # Import to load torch_xla2 ops
 import torch_xla2.ops.ops_registry  # Import to load torch_xla2 ops
-from . import registry
+LoweringContext = context.LoweringContext
 @functools.cache
 def _log_usage(op):
@@ -258,3 +261,26 @@ def _aten_copy(self, *args, **kwargs):
 @lower_by_jax(torch.ops.aten.copy, ir_input_names=["src"])
 def _aten_copy(self, src, **kwargs):
   return _TORCH_XLA2_IMPLS[torch.ops.aten.copy](self, src)
+# Schema:
+#   - aten::einsum(str equation, Tensor[] tensors, *, int[]? path=None)
+#       -> Tensor
+# Torch Reference:
+#   - https://pytorch.org/docs/stable/generated/torch.einsum.html
+#   - https://github.com/pytorch/pytorch/blob/1b3f8b75896720e88362cbec7db32abc52afa83e/aten/src/ATen/native/Linear.cpp#L255
+@registry.lower(torch.ops.aten.einsum.default)
+def _aten_einsum_default(
+    lctx: LoweringContext,
+    equation: str,
+    tensors: list[ir.Value],
+    path=None,
+):
+  _log_usage(torch.ops.aten.einsum.default)
+  @jax_bridge.wrap
+  def jax_lowering(operands):
+    # Ignore the input path and let JAX determine the path.
+    return jnp.einsum(equation, *operands, optimize="optimal")
+  return jax_lowering(lctx, tuple(tensors))

ai_edge_torch/odml_torch/lowerings/_layer_norm.py CHANGED Viewed

@@ -20,6 +20,7 @@ from ai_edge_torch.odml_torch.lowerings import registry
 from ai_edge_torch.odml_torch.lowerings import utils
 from jax._src.lib.mlir import ir
 from jax._src.lib.mlir.dialects import hlo as stablehlo
+import numpy as np
 import torch
@@ -66,12 +67,20 @@ def _aten_native_layer_norm(
   normalized_rank = len(normalized_shape)
   if weight is not None:
     weight = stablehlo.broadcast_in_dim(
-        data_type, weight, list(range(data_rank - normalized_rank, data_rank))
+        data_type,
+        weight,
+        ir.DenseI64ArrayAttr.get(
+            list(range(data_rank - normalized_rank, data_rank))
+        ),
     )
     output = stablehlo.multiply(weight, output)
   if bias is not None:
     bias = stablehlo.broadcast_in_dim(
-        data_type, bias, list(range(data_rank - normalized_rank, data_rank))
+        data_type,
+        bias,
+        ir.DenseI64ArrayAttr.get(
+            list(range(data_rank - normalized_rank, data_rank))
+        ),
     )
     output = stablehlo.add(bias, output)

ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Lowerings for PT2E torch.ops.quantized_decomposed ops."""
-from typing import Union, cast
+from typing import Optional, Union, cast
 from ai_edge_torch.odml_torch.lowerings import context
 from ai_edge_torch.odml_torch.lowerings import utils
@@ -30,15 +30,15 @@ LoweringContext = context.LoweringContext
 def _uniform_quantized_type(
-    stored_type: str | ir.Type,
-    expressed_type: str | ir.Type,
+    stored_type: Union[str, ir.Type],
+    expressed_type: Union[str, ir.Type],
     *,
-    scale=float | list[float] | tuple[float],
-    zero_point=float | list[float] | tuple[float],
-    storage_type_min: int | None = None,
-    storage_type_max: int | None = None,
-    channel_axis: int | None = None,
-    channel_axis_size: int | None = None,
+    scale=Union[float, list[float], tuple[float]],
+    zero_point=Union[float, list[float], tuple[float]],
+    storage_type_min: Optional[int] = None,
+    storage_type_max: Optional[int] = None,
+    channel_axis: Optional[int] = None,
+    channel_axis_size: Optional[int] = None,
 ):
   """Polyfill for quant.UniformQuantizedType."""
   if storage_type_min and storage_type_max:

ai_edge_torch/odml_torch/lowerings/decomp.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Torch export decompositions to run before lowering."""
+import functools
+import torch
+@functools.cache
+def decompositions():
+  # Base: Core ATen decompositions
+  decompositions = torch._decomp.core_aten_decompositions()
+  decompositions.update(
+      torch._decomp.get_decompositions([
+          torch.ops.aten.upsample_nearest2d,
+          torch.ops.aten._native_batch_norm_legit.no_stats,
+          torch.ops.aten._native_batch_norm_legit_functional,
+          torch.ops.aten._adaptive_avg_pool2d,
+          torch.ops.aten._adaptive_avg_pool3d,
+          torch.ops.aten.grid_sampler_2d,
+          torch.ops.aten.native_group_norm,
+          torch.ops.aten.native_dropout,
+          torch.ops.aten.reflection_pad1d,
+          torch.ops.aten.reflection_pad2d,
+          torch.ops.aten.reflection_pad3d,
+          torch.ops.aten.replication_pad1d,
+          torch.ops.aten.replication_pad2d,
+          torch.ops.aten.replication_pad3d,
+          torch.ops.aten.addmm,
+      ])
+  )
+  torch._decomp.remove_decompositions(
+      decompositions,
+      [
+          torch.ops.aten.roll,
+          # Torch's default einsum impl/decompositions is less efficient and
+          # optimized through converter than JAX's impl. Disable einsum
+          # decomposition to use JAX bridge for a more efficient lowering.
+          torch.ops.aten.einsum.default,
+      ],
+  )
+  # Override _safe_softmax decompositions with regular softmax.
+  # _safe_softmax introduces additional check-select ops to guard extreme
+  # input values to softmax, which could make the converted model inefficient
+  # on-device.
+  if hasattr(torch.ops.aten, "_safe_softmax"):
+    decompositions[torch.ops.aten._safe_softmax.default] = torch.softmax
+  return decompositions

ai_edge_torch/odml_torch/lowerings/registry.py CHANGED Viewed

@@ -26,7 +26,6 @@ class LoweringRegistry:
   def __init__(self):
     self.registered_ops = {}
-    self.decompositions = {}
   def lookup(self, op_or_name):
     candidate = self._get_lowering(op_or_name)
@@ -52,33 +51,6 @@ class LoweringRegistry:
 global_registry = LoweringRegistry()
-global_registry.decompositions.update(torch._decomp.core_aten_decompositions())
-global_registry.decompositions.update(
-    torch._decomp.get_decompositions([
-        torch.ops.aten.upsample_nearest2d,
-        torch.ops.aten._native_batch_norm_legit.no_stats,
-        torch.ops.aten._native_batch_norm_legit_functional,
-        torch.ops.aten._adaptive_avg_pool2d,
-        torch.ops.aten._adaptive_avg_pool3d,
-        torch.ops.aten.grid_sampler_2d,
-        torch.ops.aten.native_group_norm,
-        torch.ops.aten.native_dropout,
-        torch.ops.aten.reflection_pad1d,
-        torch.ops.aten.reflection_pad2d,
-        torch.ops.aten.reflection_pad3d,
-        torch.ops.aten.replication_pad1d,
-        torch.ops.aten.replication_pad2d,
-        torch.ops.aten.replication_pad3d,
-        torch.ops.aten.addmm,
-    ])
-)
-torch._decomp.remove_decompositions(
-    global_registry.decompositions,
-    [
-        torch.ops.aten.roll,
-    ],
-)
 def lookup(op):
@@ -91,7 +63,3 @@ def lower(op):
     return lowering
   return inner
-def decompositions():
-  return global_registry.decompositions

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20241206"
+__version__ = "0.3.0.dev20241214"

{ai_edge_torch_nightly-0.3.0.dev20241206.dist-info → ai_edge_torch_nightly-0.3.0.dev20241214.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20241206
+Version: 0.3.0.dev20241214
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI
@@ -11,7 +11,6 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering
@@ -20,7 +19,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development
 Classifier: Topic :: Software Development :: Libraries
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy
@@ -28,10 +27,13 @@ Requires-Dist: scipy
 Requires-Dist: safetensors
 Requires-Dist: tabulate
 Requires-Dist: torch>=2.4.0
-Requires-Dist: torch-xla>=2.4.0
-Requires-Dist: tf-nightly>=2.19.0.dev20241121
+Requires-Dist: tf-nightly>=2.19.0.dev20241201
 Requires-Dist: ai-edge-litert-nightly
 Requires-Dist: ai-edge-quantizer-nightly
+Requires-Dist: jax
+Requires-Dist: torch-xla2[odml]>=0.0.1.dev20241201
+Provides-Extra: torch-xla
+Requires-Dist: torch-xla>=2.4.0; extra == "torch-xla"
 Library that supports converting PyTorch models into a .tflite format, which can
 then be run with TensorFlow Lite and MediaPipe.  This enables applications for

ai-edge-torch-nightly 0.3.0.dev20241206__py3-none-any.whl → 0.3.0.dev20241214__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20241206py3-none-any.whl → 0.3.0.dev20241214py3-none-any.whl