PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240611__py3-none-any.whl → 0.2.0.dev20240619__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240611py3-none-any.whl → 0.2.0.dev20240619py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (24) hide show

ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py CHANGED Viewed

@@ -25,6 +25,25 @@ from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layo
 from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_mark  # NOQA
+def can_partition(graph_module: torch.fx.GraphModule):
+  """Returns true if the input graph_module can be partitioned by min cut solver
+  in a reasonable time.
+  The min cut solver implements O(|V|^2|E|) Dinic's algorithm, which may
+  take a long time to complete for large graph module. This function determines
+  whether the graph module can be partitioned by the graph module size.
+  See go/pytorch-layout-transpose-optimization for more details.
+  """
+  graph = graph_module.graph
+  n_nodes = len(graph.nodes)
+  n_edges = sum(len(n.users) for n in graph.nodes)
+  # According to the experiments our model set, |V| < 2000 can
+  # be partitioned generally in a reasonable time.
+  return n_nodes**2 * n_edges < 2000**3
 class MinCutSolver:
   # A number that is large enough but can fit into int32 with all computations
   # in the maximum flow.

ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/pass_body.py CHANGED Viewed

@@ -261,10 +261,17 @@ class OptimizeLayoutTransposesPass(ExportedProgramPassBase):
     self.mark_const_nodes(exported_program)
     graph_module = exported_program.graph_module
-    if os.environ.get("AIEDGETORCH_LAYOUT_OPTIMIZE_USE_MINCUT_PARTITIONER"):
+    partitioner = os.environ.get("AIEDGETORCH_LAYOUT_OPTIMIZE_PARTITIONER", None)
+    if partitioner == "MINCUT":
       graph_module = layout_partitioners.min_cut.partition(graph_module)
-    else:
+    elif partitioner == "GREEDY":
       graph_module = layout_partitioners.greedy.partition(graph_module)
+    else:
+      # By default use min cut partitioner if possible
+      if layout_partitioners.min_cut.can_partition(graph_module):
+        graph_module = layout_partitioners.min_cut.partition(graph_module)
+      else:
+        graph_module = layout_partitioners.greedy.partition(graph_module)
     graph = graph_module.graph
     for node in list(graph.nodes):

ai_edge_torch/debug/__init__.py CHANGED Viewed

@@ -13,4 +13,5 @@
 # limitations under the License.
 # ==============================================================================
+from .culprit import _search_model
 from .culprit import find_culprits

ai_edge_torch/debug/culprit.py CHANGED Viewed

@@ -21,7 +21,7 @@ import io
 import operator
 import os
 import sys
-from typing import Any, Generator, List, Optional, Tuple
+from typing import Any, Callable, Generator, List, Optional, Tuple, Union
 from functorch.compile import minifier as fx_minifier
 import torch
@@ -85,10 +85,9 @@ def _tensor_to_buffer(t: torch.Tensor):
 @dataclasses.dataclass
-class Culprit:
+class SearchResult:
   graph_module: torch.fx.GraphModule
   inputs: Tuple[Any]
-  _runtime_errors: bool
   @property
   def graph(self) -> torch.fx.Graph:
@@ -98,6 +97,11 @@ class Culprit:
   def graph(self, fx_g: torch.fx.Graph):
     self.graph_module.graph = fx_g
+@dataclasses.dataclass
+class Culprit(SearchResult):
+  _runtime_errors: bool
   @property
   def stack_traces(self) -> List[str]:
     stack_traces = set()
@@ -342,42 +346,42 @@ def _fx_minifier_checker(fx_gm, inputs, runtime_errors=False):
   return False
-def find_culprits(
-    torch_model: torch.nn.Module,
-    args: Tuple[Any],
-    max_granularity: Optional[int] = None,
-    runtime_errors: bool = False,
+def _search_model(
+    predicate_f: Callable[[torch.fx.GraphModule, List[Any]], bool],
+    model: Union[torch.export.ExportedProgram, torch.nn.Module],
+    export_args: Tuple[Any] = None,
     *,
+    max_granularity: Optional[int] = None,
     enable_fx_minifier_logging: bool = False,
-) -> Generator[Culprit, None, None]:
-  """Finds culprits in the AI Edge Torch model conversion.
+) -> Generator[SearchResult, None, None]:
+  """Finds subgraphs in the torch model that satify a certain predicate function provided by the users.
   Args:
-    torch_model: model to export and save
-    args: A set of args to trace the model with, i.e.
-      torch_model(*args) must run
+    predicate_f: a predicate function the users specify.
+      It takes a FX (sub)graph and the inputs to this graph,
+      return True if the graph satisfies the predicate,
+      return False otherwise.
+    model: model in which to search subgraph.
+    export_args: A set of args to trace the model with,
+      i.e. model(*args) must run.
     max_granularity - FX minifier arg. The maximum granularity (number of nodes)
       in the returned ATen FX subgraph of the culprit.
-    runtime_errors: If true, find culprits for Python runtime errors
-      with converted model.
-    enable_fx_minifier_logging: If true, allows the underlying FX minifier to log
-      the progress.
+    enable_fx_minifier_logging: If true, allows the underlying FX minifier to log the progress.
   """
-  try:
-    ep = torch.export.export(torch_model, args)
-  except Exception as err:
-    raise ValueError(
-        "Your model is not exportable by torch.export.export. Please modify your model to be torch-exportable first."
-    ) from err
+  if isinstance(model, torch.nn.Module):
+    try:
+      ep = torch.export.export(model, export_args)
+    except Exception as err:
+      raise ValueError(
+          "Your model is not exportable by torch.export.export. Please modify your model to be torch-exportable first."
+      ) from err
+  else:
+    ep = model
   fx_gm, fx_inputs = utils.exported_program_to_fx_graph_module_and_inputs(ep)
   fx_gm = _normalize_getitem_nodes(fx_gm)
-  fx_minifier_checker = functools.partial(
-      _fx_minifier_checker, runtime_errors=runtime_errors
-  )
   # HACK: temporarily disable XLA_HLO_DEBUG so that fx_minifier won't dump
   # intermediate stablehlo files to storage.
   # https://github.com/pytorch/pytorch/blob/main/torch/_functorch/fx_minifier.py#L440
@@ -405,13 +409,13 @@ def find_culprits(
           raw_min_fx_gm, raw_min_inputs = fx_minifier(
               fx_gm,
               fx_inputs,
-              fx_minifier_checker,
+              predicate_f,
               max_granularity=max_granularity,
           )
       min_fx_gm, min_inputs = _normalize_minified_fx_gm(raw_min_fx_gm, raw_min_inputs)
       found_culprits_num += 1
-      yield Culprit(min_fx_gm, min_inputs, _runtime_errors=runtime_errors)
+      yield SearchResult(min_fx_gm, min_inputs)
       fx_gm, fx_inputs = _erase_sub_gm_from_gm(
           fx_gm, fx_inputs, raw_min_fx_gm, raw_min_inputs
@@ -421,3 +425,40 @@ def find_culprits(
       if str(e) == "Input graph did not fail the tester" and found_culprits_num > 0:
         break
       raise e
+def find_culprits(
+    torch_model: torch.nn.Module,
+    args: Tuple[Any],
+    max_granularity: Optional[int] = None,
+    runtime_errors: bool = False,
+    *,
+    enable_fx_minifier_logging: bool = False,
+) -> Generator[Culprit, None, None]:
+  """Finds culprits in the AI Edge Torch model conversion.
+  Args:
+    torch_model: model to export and save
+    args: A set of args to trace the model with, i.e.
+      torch_model(*args) must run
+    max_granularity - FX minifier arg. The maximum granularity (number of nodes)
+      in the returned ATen FX subgraph of the culprit.
+    runtime_errors: If true, find culprits for Python runtime errors
+      with converted model.
+    enable_fx_minifier_logging: If true, allows the underlying FX minifier to log the progress.
+  """
+  fx_minifier_checker = functools.partial(
+      _fx_minifier_checker, runtime_errors=runtime_errors
+  )
+  for search_result in _search_model(
+      fx_minifier_checker,
+      torch_model,
+      args,
+      max_granularity=max_granularity,
+      enable_fx_minifier_logging=enable_fx_minifier_logging,
+  ):
+    yield Culprit(
+        search_result.graph_module, search_result.inputs, _runtime_errors=runtime_errors
+    )

ai_edge_torch/debug/test/test_search_model.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import unittest
+import torch
+from ai_edge_torch.debug import _search_model
+class TestSearchModel(unittest.TestCase):
+  def test_search_model_with_ops(self):
+    class MultipleOpsModel(torch.nn.Module):
+      def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        sub_0 = x - 1
+        add_0 = y + 1
+        mul_0 = x * y
+        add_1 = sub_0 + add_0
+        mul_1 = add_0 * mul_0
+        sub_1 = add_1 - mul_1
+        return sub_1
+    model = MultipleOpsModel().eval()
+    args = (torch.rand(10), torch.rand(10))
+    def find_subgraph_with_sub(fx_gm, inputs):
+      return torch.ops.aten.sub.Tensor in [n.target for n in fx_gm.graph.nodes]
+    results = list(_search_model(find_subgraph_with_sub, model, args))
+    self.assertEqual(len(results), 2)
+    self.assertIn(torch.ops.aten.sub.Tensor, [n.target for n in results[0].graph.nodes])
+if __name__ == "__main__":
+  unittest.main()

ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py CHANGED Viewed

@@ -21,11 +21,11 @@ import torch
 import ai_edge_torch
 import ai_edge_torch.generative.examples.stable_diffusion.clip as clip
 import ai_edge_torch.generative.examples.stable_diffusion.decoder as decoder
-from ai_edge_torch.generative.examples.stable_diffusion.diffusion import Diffusion  # NOQA
+import ai_edge_torch.generative.examples.stable_diffusion.diffusion as diffusion
 from ai_edge_torch.generative.examples.stable_diffusion.encoder import Encoder
 import ai_edge_torch.generative.examples.stable_diffusion.util as util
-import ai_edge_torch.generative.utilities.autoencoder_loader as autoencoder_loader
 import ai_edge_torch.generative.utilities.loader as loading_utils
+import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
 @torch.inference_mode
@@ -45,11 +45,14 @@ def convert_stable_diffusion_to_tflite(
   encoder = Encoder()
   encoder.load_state_dict(torch.load(encoder_ckpt_path))
-  diffusion = Diffusion()
-  diffusion.load_state_dict(torch.load(diffusion_ckpt_path))
+  diffusion_model = diffusion.Diffusion(diffusion.get_model_config(2))
+  diffusion_loader = stable_diffusion_loader.DiffusionModelLoader(
+      diffusion_ckpt_path, diffusion.TENSORS_NAMES
+  )
+  diffusion_loader.load(diffusion_model)
   decoder_model = decoder.Decoder(decoder.get_model_config())
-  decoder_loader = autoencoder_loader.AutoEncoderModelLoader(
+  decoder_loader = stable_diffusion_loader.AutoEncoderModelLoader(
       decoder_ckpt_path, decoder.TENSORS_NAMES
   )
   decoder_loader.load(decoder_model)
@@ -84,7 +87,7 @@ def convert_stable_diffusion_to_tflite(
   # Diffusion
   ai_edge_torch.signature(
       'diffusion',
-      diffusion,
+      diffusion_model,
       (torch.repeat_interleave(input_latents, 2, 0), context, time_embedding),
   ).convert().export('/tmp/stable_diffusion/diffusion.tflite')

ai_edge_torch/generative/examples/stable_diffusion/decoder.py CHANGED Viewed

@@ -20,20 +20,20 @@ import ai_edge_torch.generative.layers.builder as layers_builder
 import ai_edge_torch.generative.layers.model_config as layers_cfg
 import ai_edge_torch.generative.layers.unet.blocks_2d as blocks_2d
 import ai_edge_torch.generative.layers.unet.model_config as unet_cfg
-import ai_edge_torch.generative.utilities.autoencoder_loader as autoencoder_loader
+import ai_edge_torch.generative.utilities.stable_diffusion_loader as stable_diffusion_loader
-TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
+TENSORS_NAMES = stable_diffusion_loader.AutoEncoderModelLoader.TensorNames(
     post_quant_conv="0",
     conv_in="1",
-    mid_block_tensor_names=autoencoder_loader.MidBlockTensorNames(
+    mid_block_tensor_names=stable_diffusion_loader.MidBlockTensorNames(
         residual_block_tensor_names=[
-            autoencoder_loader.ResidualBlockTensorNames(
+            stable_diffusion_loader.ResidualBlockTensorNames(
                 norm_1="2.groupnorm_1",
                 norm_2="2.groupnorm_2",
                 conv_1="2.conv_1",
                 conv_2="2.conv_2",
             ),
-            autoencoder_loader.ResidualBlockTensorNames(
+            stable_diffusion_loader.ResidualBlockTensorNames(
                 norm_1="4.groupnorm_1",
                 norm_2="4.groupnorm_2",
                 conv_1="4.conv_1",
@@ -41,7 +41,7 @@ TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
             ),
         ],
         attention_block_tensor_names=[
-            autoencoder_loader.AttnetionBlockTensorNames(
+            stable_diffusion_loader.AttentionBlockTensorNames(
                 norm="3.groupnorm",
                 fused_qkv_proj="3.attention.in_proj",
                 output_proj="3.attention.out_proj",
@@ -49,21 +49,21 @@ TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
         ],
     ),
     up_decoder_blocks_tensor_names=[
-        autoencoder_loader.UpDecoderBlockTensorNames(
+        stable_diffusion_loader.UpDecoderBlockTensorNames(
             residual_block_tensor_names=[
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="5.groupnorm_1",
                     norm_2="5.groupnorm_2",
                     conv_1="5.conv_1",
                     conv_2="5.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="6.groupnorm_1",
                     norm_2="6.groupnorm_2",
                     conv_1="6.conv_1",
                     conv_2="6.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="7.groupnorm_1",
                     norm_2="7.groupnorm_2",
                     conv_1="7.conv_1",
@@ -72,21 +72,21 @@ TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
             ],
             upsample_conv="9",
         ),
-        autoencoder_loader.UpDecoderBlockTensorNames(
+        stable_diffusion_loader.UpDecoderBlockTensorNames(
             residual_block_tensor_names=[
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="10.groupnorm_1",
                     norm_2="10.groupnorm_2",
                     conv_1="10.conv_1",
                     conv_2="10.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="11.groupnorm_1",
                     norm_2="11.groupnorm_2",
                     conv_1="11.conv_1",
                     conv_2="11.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="12.groupnorm_1",
                     norm_2="12.groupnorm_2",
                     conv_1="12.conv_1",
@@ -95,22 +95,22 @@ TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
             ],
             upsample_conv="14",
         ),
-        autoencoder_loader.UpDecoderBlockTensorNames(
+        stable_diffusion_loader.UpDecoderBlockTensorNames(
             residual_block_tensor_names=[
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="15.groupnorm_1",
                     norm_2="15.groupnorm_2",
                     conv_1="15.conv_1",
                     conv_2="15.conv_2",
                     residual_layer="15.residual_layer",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="16.groupnorm_1",
                     norm_2="16.groupnorm_2",
                     conv_1="16.conv_1",
                     conv_2="16.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="17.groupnorm_1",
                     norm_2="17.groupnorm_2",
                     conv_1="17.conv_1",
@@ -119,22 +119,22 @@ TENSORS_NAMES = autoencoder_loader.AutoEncoderModelLoader.TensorNames(
             ],
             upsample_conv="19",
         ),
-        autoencoder_loader.UpDecoderBlockTensorNames(
+        stable_diffusion_loader.UpDecoderBlockTensorNames(
             residual_block_tensor_names=[
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="20.groupnorm_1",
                     norm_2="20.groupnorm_2",
                     conv_1="20.conv_1",
                     conv_2="20.conv_2",
                     residual_layer="20.residual_layer",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="21.groupnorm_1",
                     norm_2="21.groupnorm_2",
                     conv_1="21.conv_1",
                     conv_2="21.conv_2",
                 ),
-                autoencoder_loader.ResidualBlockTensorNames(
+                stable_diffusion_loader.ResidualBlockTensorNames(
                     norm_1="22.groupnorm_1",
                     norm_2="22.groupnorm_2",
                     conv_1="22.conv_1",
@@ -225,8 +225,8 @@ class Decoder(nn.Module):
                   num_layers=config.layers_per_block,
                   add_upsample=not_final_block,
                   upsample_conv=True,
-                  sampling_config=unet_cfg.SamplingConfig(
-                      2, unet_cfg.SamplingType.NEAREST
+                  sampling_config=unet_cfg.UpSamplingConfig(
+                      mode=unet_cfg.SamplingType.NEAREST, scale_factor=2
                   ),
               )
           )
@@ -245,6 +245,14 @@ class Decoder(nn.Module):
     )
   def forward(self, latents_tensor: torch.Tensor) -> torch.Tensor:
+    """Forward function of decoder model.
+    Args:
+        latents (torch.Tensor): latents space tensor.
+    Returns:
+        output decoded image tensor from decoder model.
+    """
     x = latents_tensor / self.config.scaling_factor
     x = self.post_quant_conv(x)
     x = self.conv_in(x)
@@ -271,7 +279,7 @@ def get_model_config() -> unet_cfg.AutoEncoderConfig:
   )
   att_config = unet_cfg.AttentionBlock2DConfig(
-      dims=block_out_channels[-1],
+      dim=block_out_channels[-1],
       normalization_config=norm_config,
       attention_config=layers_cfg.AttentionConfig(
           num_heads=1,

ai-edge-torch-nightly 0.2.0.dev20240611__py3-none-any.whl → 0.2.0.dev20240619__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240611py3-none-any.whl → 0.2.0.dev20240619py3-none-any.whl