PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/debug/culprit.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Culprit finder for AI Edge Torch conversion."""
 import contextlib
 import copy
@@ -20,17 +21,15 @@ import functools
 import io
 import operator
 import os
-import sys
 from typing import Any, Callable, Generator, List, Optional, Tuple, Union
-from functorch.compile import minifier as fx_minifier
+import ai_edge_torch
+from ai_edge_torch.debug import utils
 import torch
 from torch._functorch import aot_autograd
+from torch._functorch.fx_minifier import minifier as fx_minifier
 import torch.utils._pytree as pytree
-import ai_edge_torch
-from ai_edge_torch.debug import utils
 _torch_float_dtypes = {
     torch.float32,
     torch.float,
@@ -117,24 +116,32 @@ class Culprit(SearchResult):
       print_output: bool - If true, prints the code to stdout. Otherwise returns
         the code in a str.
     """
-    # TODO (b/321263453): Support Python code gen with sample arg tensor values.
+    # TODO: b/321263453 - Support Python code gen with sample arg tensor values.
     random_inputs = True
-    graph_module_code = self.graph_module.print_readable(print_output=False).rstrip()
+    graph_module_code = self.graph_module.print_readable(
+        print_output=False
+    ).rstrip()
     input_strs = []
     for value in self.inputs:
       if torch.is_tensor(value):
         if not random_inputs:
-          input_strs.append(f"# size={_get_shape_str(value)}, dtype={value.dtype}")
-          input_strs.append(f"torch.load(io.BytesIO({_tensor_to_buffer(value)})),")
+          input_strs.append(
+              f"# size={_get_shape_str(value)}, dtype={value.dtype}"
+          )
+          input_strs.append(
+              f"torch.load(io.BytesIO({_tensor_to_buffer(value)})),"
+          )
         else:
           input_strs.append(_tensor_to_random_tensor_call(value) + ",")
       else:
         input_strs.append(str(value) + ",")
     inputs_code = (
-        "_args = (\n" + "\n".join([" " * 4 + code for code in input_strs]) + "\n)"
+        "_args = (\n"
+        + "\n".join([" " * 4 + code for code in input_strs])
+        + "\n)"
     )
     code = graph_module_code + "\n\n" + inputs_code
@@ -145,6 +152,7 @@ class Culprit(SearchResult):
   def print_code(self, print_output=True):
     """Print the Python code for culprit graph module, sample args, and AI
     Edge Torch conversion that will fail with the error.
     Args:
@@ -157,7 +165,9 @@ class Culprit(SearchResult):
         + "from torch import device\n"
         + "import ai_edge_torch\n\n"
         + definitions
-        + f"\n\n_edge_model = ai_edge_torch.convert({_CULPRIT_GRAPH_MODULE_NAME}().eval(), _args)\n"
+        + "\n\n_edge_model ="
+        f" ai_edge_torch.convert({_CULPRIT_GRAPH_MODULE_NAME}().eval(),"
+        " _args)\n"
     )
     if self._runtime_errors:
       code += "_edge_model(*_args)\n"
@@ -179,8 +189,8 @@ class Culprit(SearchResult):
 def _normalize_getitem_nodes(fx_gm: torch.fx.GraphModule):
-  """
-  This function turns all operator getitem nodes in ExportedProgram FX graph to
+  """This function turns all operator getitem nodes in ExportedProgram FX graph to
   new nodes composed of "computation + getitem". The normalization duplicates
   some computations in the graph but would make the graph more friendly for
   partitioning in FX minifier.
@@ -212,7 +222,9 @@ def _normalize_getitem_nodes(fx_gm: torch.fx.GraphModule):
   return fx_gm
-def _erase_unused_inputs(fx_gm: torch.fx.GraphModule, inputs: Tuple[torch.Tensor]):
+def _erase_unused_inputs(
+    fx_gm: torch.fx.GraphModule, inputs: Tuple[torch.Tensor]
+):
   fx_gm = copy.deepcopy(fx_gm)
   inputs = tuple(inputs)
   args = fx_gm.graph.process_inputs(*inputs)
@@ -316,7 +328,9 @@ def _erase_sub_gm_from_gm(
   return fx_gm, fx_inputs
-def _normalize_minified_fx_gm(fx_gm: torch.fx.GraphModule, inputs: Tuple[torch.Tensor]):
+def _normalize_minified_fx_gm(
+    fx_gm: torch.fx.GraphModule, inputs: Tuple[torch.Tensor]
+):
   fx_gm, inputs = _erase_unused_inputs(fx_gm, inputs)
   fx_gm = _lift_dead_ops_to_outputs(fx_gm)
   fx_gm, _ = aot_autograd.aot_export_module(fx_gm, inputs, trace_joint=False)
@@ -357,16 +371,15 @@ def _search_model(
   """Finds subgraphs in the torch model that satify a certain predicate function provided by the users.
   Args:
-    predicate_f: a predicate function the users specify.
-      It takes a FX (sub)graph and the inputs to this graph,
-      return True if the graph satisfies the predicate,
-      return False otherwise.
+    predicate_f: a predicate function the users specify. It takes a FX
+      (sub)graph and the inputs to this graph, return True if the graph
+      satisfies the predicate, return False otherwise.
     model: model in which to search subgraph.
-    export_args: A set of args to trace the model with,
-      i.e. model(*args) must run.
-    max_granularity - FX minifier arg. The maximum granularity (number of nodes)
-      in the returned ATen FX subgraph of the culprit.
-    enable_fx_minifier_logging: If true, allows the underlying FX minifier to log the progress.
+    export_args: A set of args to trace the model with, i.e. model(*args) must
+      run. max_granularity - FX minifier arg. The maximum granularity (number of
+      nodes) in the returned ATen FX subgraph of the culprit.
+    enable_fx_minifier_logging: If true, allows the underlying FX minifier to
+      log the progress.
   """
   if isinstance(model, torch.nn.Module):
@@ -374,7 +387,8 @@ def _search_model(
       ep = torch.export.export(model, export_args)
     except Exception as err:
       raise ValueError(
-          "Your model is not exportable by torch.export.export. Please modify your model to be torch-exportable first."
+          "Your model is not exportable by torch.export.export. Please modify"
+          " your model to be torch-exportable first."
       ) from err
   else:
     ep = model
@@ -382,26 +396,37 @@ def _search_model(
   fx_gm, fx_inputs = utils.exported_program_to_fx_graph_module_and_inputs(ep)
   fx_gm = _normalize_getitem_nodes(fx_gm)
-  # HACK: temporarily disable XLA_HLO_DEBUG so that fx_minifier won't dump
-  # intermediate stablehlo files to storage.
+  # HACK: temporarily disable XLA_HLO_DEBUG and create_minified_hlo_graph so that
+  # fx_minifier won't dump intermediate stablehlo files to storage.
   # https://github.com/pytorch/pytorch/blob/main/torch/_functorch/fx_minifier.py#L440
   @contextlib.contextmanager
-  def disable_xla_hlo_debug():
+  def disable_minifier_xla_debug():
     xla_hlo_debug_value = None
     if "XLA_HLO_DEBUG" in os.environ:
       xla_hlo_debug_value = os.environ["XLA_HLO_DEBUG"]
       del os.environ["XLA_HLO_DEBUG"]
+    create_minified_hlo_graph = (
+        torch._functorch.fx_minifier.create_minified_hlo_graph
+    )
+    torch._functorch.fx_minifier.create_minified_hlo_graph = (
+        lambda *args, **kwargs: None
+    )
     try:
-      yield None
+      yield
     finally:
       if xla_hlo_debug_value is not None:
         os.environ["XLA_HLO_DEBUG"] = xla_hlo_debug_value
+      torch._functorch.fx_minifier.create_minified_hlo_graph = (
+          create_minified_hlo_graph
+      )
   found_culprits_num = 0
   while True:
     try:
-      with disable_xla_hlo_debug(), open(os.devnull, "w") as devnull:
+      with disable_minifier_xla_debug(), open(os.devnull, "w") as devnull:
         with contextlib.nullcontext() if enable_fx_minifier_logging else utils.redirect_stdio(
             stdout=devnull,
             stderr=devnull,
@@ -413,7 +438,9 @@ def _search_model(
               max_granularity=max_granularity,
           )
-      min_fx_gm, min_inputs = _normalize_minified_fx_gm(raw_min_fx_gm, raw_min_inputs)
+      min_fx_gm, min_inputs = _normalize_minified_fx_gm(
+          raw_min_fx_gm, raw_min_inputs
+      )
       found_culprits_num += 1
       yield SearchResult(min_fx_gm, min_inputs)
@@ -422,7 +449,10 @@ def _search_model(
       )
     except RuntimeError as e:
-      if str(e) == "Input graph did not fail the tester" and found_culprits_num > 0:
+      if (
+          str(e) == "Input graph did not fail the tester"
+          and found_culprits_num > 0
+      ):
         break
       raise e
@@ -439,13 +469,13 @@ def find_culprits(
   Args:
     torch_model: model to export and save
-    args: A set of args to trace the model with, i.e.
-      torch_model(*args) must run
-    max_granularity - FX minifier arg. The maximum granularity (number of nodes)
-      in the returned ATen FX subgraph of the culprit.
-    runtime_errors: If true, find culprits for Python runtime errors
-      with converted model.
-    enable_fx_minifier_logging: If true, allows the underlying FX minifier to log the progress.
+    args: A set of args to trace the model with, i.e. torch_model(*args) must
+      run max_granularity - FX minifier arg. The maximum granularity (number of
+      nodes) in the returned ATen FX subgraph of the culprit.
+    runtime_errors: If true, find culprits for Python runtime errors with
+      converted model.
+    enable_fx_minifier_logging: If true, allows the underlying FX minifier to
+      log the progress.
   """
   fx_minifier_checker = functools.partial(
@@ -460,5 +490,7 @@ def find_culprits(
       enable_fx_minifier_logging=enable_fx_minifier_logging,
   ):
     yield Culprit(
-        search_result.graph_module, search_result.inputs, _runtime_errors=runtime_errors
+        search_result.graph_module,
+        search_result.inputs,
+        _runtime_errors=runtime_errors,
     )

ai_edge_torch/debug/test/test_culprit.py CHANGED Viewed

@@ -17,18 +17,20 @@
 import ast
 import io
 import sys
-import unittest
+from ai_edge_torch.debug import find_culprits
 import torch
-from ai_edge_torch.debug import find_culprits
+from absl.testing import absltest as googletest
 _test_culprit_lib = torch.library.Library("test_culprit", "DEF")
 _test_culprit_lib.define("non_lowerable_op(Tensor x) -> Tensor")
-@torch.library.impl(_test_culprit_lib, "non_lowerable_op", "CompositeExplicitAutograd")
+@torch.library.impl(
+    _test_culprit_lib, "non_lowerable_op", "CompositeExplicitAutograd"
+)
 def non_lowerable_op(x):
   if x.max() > 10.0:
     return x + 1.0
@@ -48,7 +50,7 @@ class BadModel(torch.nn.Module):
     return x
-class TestCulprit(unittest.TestCase):
+class TestCulprit(googletest.TestCase):
   def test_find_culprits(self):
     model = BadModel().eval()
@@ -130,4 +132,4 @@ class TestCulprit(unittest.TestCase):
 if __name__ == "__main__":
-  unittest.main()
+  googletest.main()

ai_edge_torch/debug/test/test_search_model.py CHANGED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Tests for search_model."""
-import unittest
+from ai_edge_torch.debug import _search_model
 import torch
-from ai_edge_torch.debug import _search_model
+from absl.testing import absltest as googletest
-class TestSearchModel(unittest.TestCase):
+class TestSearchModel(googletest.TestCase):
   def test_search_model_with_ops(self):
     class MultipleOpsModel(torch.nn.Module):
@@ -43,8 +42,10 @@ class TestSearchModel(unittest.TestCase):
     results = list(_search_model(find_subgraph_with_sub, model, args))
     self.assertEqual(len(results), 2)
-    self.assertIn(torch.ops.aten.sub.Tensor, [n.target for n in results[0].graph.nodes])
+    self.assertIn(
+        torch.ops.aten.sub.Tensor, [n.target for n in results[0].graph.nodes]
+    )
 if __name__ == "__main__":
-  unittest.main()
+  googletest.main()

ai_edge_torch/debug/utils.py CHANGED Viewed

@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Utils for debugging."""
 import contextlib
 import sys
 import torch
-from torch.export.graph_signature import InputKind
-import torch.fx._pytree as fx_pytree
 from torch.utils import _pytree as pytree
-def exported_program_to_fx_graph_module_and_inputs(ep: torch.export.ExportedProgram):
+def exported_program_to_fx_graph_module_and_inputs(
+    ep: torch.export.ExportedProgram,
+):
   fx_gm = ep.graph_module
   fx_inputs = pytree.tree_map(
       torch.tensor, ep._graph_module_flat_inputs(*ep.example_inputs)
@@ -31,6 +33,15 @@ def exported_program_to_fx_graph_module_and_inputs(ep: torch.export.ExportedProg
 @contextlib.contextmanager
 def redirect_stdio(stdout, stderr):
+  """Redirects stdout and stderr to the given file objects.
+  Args:
+    stdout: A file object to redirect stdout to.
+    stderr: A file object to redirect stderr to.
+  Yields:
+    The file objects that stdout and stderr were redirected to.
+  """
   old_stdout = sys.stdout
   old_stderr = sys.stderr

ai_edge_torch/fx_pass_base.py ADDED Viewed

@@ -0,0 +1,101 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import abc
+import collections
+from typing import Sequence, Union
+import torch
+from torch.fx.passes.infra.pass_base import PassBase
+from torch.fx.passes.infra.pass_base import PassResult
+from torch.fx.passes.infra.pass_manager import pass_result_wrapper
+import torch.utils._pytree as pytree
+FxPassBase = PassBase
+FxPassResult = PassResult
+ExportedProgramPassResult = collections.namedtuple(
+    "ExportedProgramPassResult", ["exported_program", "modified"]
+)
+class ExportedProgramPassBase(abc.ABC):
+  def __call__(
+      self, exported_program: torch.export.ExportedProgram
+  ) -> ExportedProgramPassResult:
+    self.requires(exported_program)
+    res = self.call(exported_program)
+    self.ensures(exported_program)
+    return res
+  @abc.abstractmethod
+  def call(
+      self, exported_program: torch.export.ExportedProgram
+  ) -> ExportedProgramPassResult:
+    pass
+  def requires(self, exported_program: torch.export.ExportedProgram) -> None:
+    pass
+  def ensures(self, exported_program: torch.export.ExportedProgram) -> None:
+    pass
+# TODO(cnchan): make a PassManager class.
+def run_passes(
+    exported_program: torch.export.ExportedProgram,
+    passes: Sequence[Union[ExportedProgramPassBase, FxPassBase]],
+) -> torch.export.ExportedProgram:
+  passes, _ = pytree.tree_flatten(passes)
+  for pass_ in passes:
+    if not isinstance(pass_, ExportedProgramPassBase):
+      pass_ = pass_result_wrapper(pass_)
+    if isinstance(pass_, ExportedProgramPassBase):
+      exported_program = pass_(exported_program).exported_program
+    else:
+      gm = exported_program.graph_module
+      gm, modified = pass_(gm)
+      if modified and gm is not exported_program.graph_module:
+        exported_program = torch.export.ExportedProgram(
+            root=gm,
+            graph=gm.graph,
+            graph_signature=exported_program.graph_signature,
+            state_dict=exported_program.state_dict,
+            range_constraints=exported_program.range_constraints,
+            module_call_graph=exported_program.module_call_graph,
+            example_inputs=exported_program.example_inputs,
+            verifier=exported_program.verifier,
+            constants=exported_program.constants,
+        )
+  return exported_program
+class CanonicalizePass(ExportedProgramPassBase):
+  # A dummy decomp table for running ExportedProgram.run_decompositions without
+  # any op decompositions but just aot_export_module. Due to the check in
+  # run_decompositions, if None or an empty dict is passed as decomp_table,
+  # it will run the default aten-coreaten decompositions. Therefore a non-empty
+  # dummy decomp table is needed.
+  # Ref: https://github.com/pytorch/pytorch/blob/db895ace1d36726e64781774f53b3d3098206116/torch/export/exported_program.py#L543
+  _DUMMY_DECOMP_TABLE = {
+      torch._ops.OperatorBase(): lambda: None,
+  }
+  def call(self, exported_program: torch.export.ExportedProgram):
+    exported_program = exported_program.run_decompositions(
+        self._DUMMY_DECOMP_TABLE
+    )
+    return ExportedProgramPassResult(exported_program, True)

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting a Gemma1 model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.gemma import gemma1
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma-2b'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = gemma1.build_2b_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'gemma_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of converting a Gemma2 model to multi-signature tflite model."""
+import os
+import pathlib
+from absl import app
+from absl import flags
+from ai_edge_torch.generative.examples.gemma import gemma2
+from ai_edge_torch.generative.utilities import converter
+_CHECKPOINT_PATH = flags.DEFINE_string(
+    'checkpoint_path',
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma2-2b'),
+    'The path to the model checkpoint, or directory holding the checkpoint.',
+)
+_TFLITE_PATH = flags.DEFINE_string(
+    'tflite_path',
+    '/tmp/',
+    'The tflite file path to export.',
+)
+_PREFILL_SEQ_LEN = flags.DEFINE_integer(
+    'prefill_seq_len',
+    1024,
+    'The maximum size of prefill input tensor.',
+)
+_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
+    'kv_cache_max_len',
+    1280,
+    'The maximum size of KV cache buffer, including both prefill and decode.',
+)
+_QUANTIZE = flags.DEFINE_bool(
+    'quantize',
+    True,
+    'Whether the model should be quantized.',
+)
+def main(_):
+  pytorch_model = gemma2.build_2b_model(
+      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
+  )
+  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
+  output_filename = f'gemma2_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  converter.convert_to_tflite(
+      pytorch_model,
+      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
+      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      quantize=_QUANTIZE.value,
+  )
+if __name__ == '__main__':
+  app.run(main)

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl