PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/{convert → _convert}/fx_passes/build_aten_composite_pass.py RENAMED Viewed

@@ -13,27 +13,22 @@
 # limitations under the License.
 # ==============================================================================
-import copy
-import functools
 from typing import Any, Callable
+from ai_edge_torch import fx_pass_base
+from ai_edge_torch import lowertools
 import torch
-from torch.fx import GraphModule
-from torch.fx import Node
-from torch.fx.passes.infra.pass_base import PassBase
-from torch.fx.passes.infra.pass_base import PassResult
 import torch.utils._pytree as pytree
-from ai_edge_torch.hlfb import StableHLOCompositeBuilder
-_composite_builders: dict[Callable, Callable[[GraphModule, Node], None]] = {}
+_composite_builders: dict[
+    Callable, Callable[[torch.fx.GraphModule, torch.fx.Node], None]
+] = {}
 def _register_composite_builder(op):
   def inner(func):
     if isinstance(op, torch._ops.OpOverloadPacket):
-      for overload in v.overloads():
-        _composite_builders[getattr(v, overload)] = func
+      for overload in op.overloads():
+        _composite_builders[getattr(op, overload)] = func
     else:
       _composite_builders[op] = func
     return func
@@ -41,7 +36,22 @@ def _register_composite_builder(op):
   return inner
-def _tree_map_to_composite_attr_values(values, *, stringify_incompatible_values=True):
+def _tree_map_to_composite_attr_values(
+    values, *, stringify_incompatible_values=True
+):
+  """Convert a tree of values to a tree of composite attribute values.
+  This is used for pre-processing op attributes before passing them to
+  the composite op as attributes.
+  Args:
+    values: A tree of values.
+    stringify_incompatible_values: If True, stringify values that are not
+      compatible with composite attributes.
+  Returns:
+    A tree of composite attribute values.
+  """
   def convert(value):
     nonlocal stringify_incompatible_values
@@ -58,6 +68,11 @@ def _tree_map_to_composite_attr_values(values, *, stringify_incompatible_values=
 class TorchOpArgumentsMapper:
+  """A helper class to map op arguments to kwargs.
+  This is mainly used to extract the default values for op arguments and present
+  all arguments as kwargs.
+  """
   def __init__(self, op):
     if isinstance(op, torch._ops.OpOverloadPacket):
@@ -65,16 +80,26 @@ class TorchOpArgumentsMapper:
     assert hasattr(op, "_schema")
     self.op = op
-    self.arg_specs = [(spec.name, spec.default_value) for spec in op._schema.arguments]
+    self.arg_specs = [
+        (spec.name, spec.default_value) for spec in op._schema.arguments
+    ]
   def get_full_kwargs(self, args, kwargs=None) -> dict[str, Any]:
-    """Inspect the op's schema and extract all its args and kwargs
-    into one single kwargs dict, with default values for those
-    unspecified args and kwargs.
+    """Extracts all arguments of the op as kwargs.
+    Inspect the op's schema and extract all its args and kwargs into one single
+    kwargs dict, with default values for those unspecified args and kwargs.
+    Args:
+      args: The op's arguments.
+      kwargs: The op's kwargs.
+    Returns:
+      A kwargs dict with all args and kwargs.
     """
     full_kwargs = {**(kwargs or {})}
-    for arg, (name, default_value) in zip(args, self.arg_specs):
+    for arg, (name, _) in zip(args, self.arg_specs):
       full_kwargs[name] = arg
     for name, default_value in self.arg_specs[len(args) :]:
@@ -85,12 +110,13 @@ class TorchOpArgumentsMapper:
 @_register_composite_builder(torch.ops.aten.hardswish.default)
-def _aten_hardswish(gm: GraphModule, node: Node):
+def _aten_hardswish(_: torch.fx.GraphModule, node: torch.fx.Node):
+  """Build a composite for aten.hardswish.default."""
   op = node.target
   def hardswish(self: torch.Tensor):
     nonlocal op
-    builder = StableHLOCompositeBuilder("aten.hardswish.default")
+    builder = lowertools.StableHLOCompositeBuilder("aten.hardswish.default")
     self = builder.mark_inputs(self)
     output = op(self)
     output = builder.mark_outputs(output)
@@ -100,7 +126,8 @@ def _aten_hardswish(gm: GraphModule, node: Node):
 @_register_composite_builder(torch.ops.aten.gelu.default)
-def _aten_gelu(gm: GraphModule, node: Node):
+def _aten_gelu(_: torch.fx.GraphModule, node: torch.fx.Node):
+  """Build a composite for aten.gelu.default."""
   op = node.target
   args_mapper = TorchOpArgumentsMapper(op)
@@ -110,16 +137,17 @@ def _aten_gelu(gm: GraphModule, node: Node):
     full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
     # TFLite supports exact and tanh approximate.
-    if full_kwargs["approximate"] != "none" and full_kwargs["approximate"] != "tanh":
+    if (
+        full_kwargs["approximate"] != "none"
+        and full_kwargs["approximate"] != "tanh"
+    ):
       return op(*args, **kwargs)
-    builder = StableHLOCompositeBuilder(
+    builder = lowertools.StableHLOCompositeBuilder(
         "aten.gelu.default",
-        attr=_tree_map_to_composite_attr_values(
-            {
-                "approximate": full_kwargs["approximate"],
-            }
-        ),
+        attr=_tree_map_to_composite_attr_values({
+            "approximate": full_kwargs["approximate"],
+        }),
     )
     full_kwargs["self"] = builder.mark_inputs(full_kwargs["self"])
     output = op(full_kwargs["self"])
@@ -130,7 +158,8 @@ def _aten_gelu(gm: GraphModule, node: Node):
 @_register_composite_builder(torch.ops.aten.avg_pool2d.default)
-def _aten_avg_pool2d(gm: GraphModule, node: Node):
+def _aten_avg_pool2d(_: torch.fx.GraphModule, node: torch.fx.Node):
+  """Build a composite for aten.avg_pool2d.default."""
   op = node.target
   args_mapper = TorchOpArgumentsMapper(op)
@@ -150,7 +179,10 @@ def _aten_avg_pool2d(gm: GraphModule, node: Node):
       ):
         dim_output_size = int((dim_input_size + dim_stride - 1) / dim_stride)
         padding_needed = max(
-            0, (dim_output_size - 1) * dim_stride + dim_kernel_size - dim_input_size
+            0,
+            (dim_output_size - 1) * dim_stride
+            + dim_kernel_size
+            - dim_input_size,
         )
         if padding_needed % 2 != 0:
           return False
@@ -191,18 +223,16 @@ def _aten_avg_pool2d(gm: GraphModule, node: Node):
     ):
       return op(*args, **kwargs)
-    builder = StableHLOCompositeBuilder(
+    builder = lowertools.StableHLOCompositeBuilder(
         "aten.avg_pool2d.default",
-        attr=_tree_map_to_composite_attr_values(
-            {
-                "kernel_size": full_kwargs["kernel_size"],
-                "stride": full_kwargs["stride"],
-                "padding": full_kwargs["padding"],
-                "ceil_mode": full_kwargs["ceil_mode"],
-                "count_include_pad": full_kwargs["count_include_pad"],
-                "divisor_override": full_kwargs["divisor_override"],
-            }
-        ),
+        attr=_tree_map_to_composite_attr_values({
+            "kernel_size": full_kwargs["kernel_size"],
+            "stride": full_kwargs["stride"],
+            "padding": full_kwargs["padding"],
+            "ceil_mode": full_kwargs["ceil_mode"],
+            "count_include_pad": full_kwargs["count_include_pad"],
+            "divisor_override": full_kwargs["divisor_override"],
+        }),
     )
     full_kwargs["self"] = builder.mark_inputs(full_kwargs["self"])
@@ -213,13 +243,46 @@ def _aten_avg_pool2d(gm: GraphModule, node: Node):
   node.target = avg_pool2d
-class BuildAtenCompositePass(PassBase):
+@_register_composite_builder(torch.ops.aten.embedding.default)
+def _aten_embedding(gm: torch.fx.GraphModule, node: torch.fx.Node):
+  op = node.target
+  args_mapper = TorchOpArgumentsMapper(op)
+  def embedding(*args, **kwargs):
+    nonlocal op, args_mapper
+    full_kwargs = args_mapper.get_full_kwargs(args, kwargs)
+    _, embedding_dim = full_kwargs["weight"].size()
+    idx = full_kwargs["indices"]
+    # Explicitly cast to INT32. This places the CastOp outside of the HLFB.
+    idx = idx.type(torch.int)
+    original_idx_shape = idx.size()
+    # Explicitly reshape to 1D. This places the ReshapeOp outside of the HLFB.
+    idx = torch.reshape(idx, (idx.numel(),))
+    builder = lowertools.StableHLOCompositeBuilder("odml.embedding_lookup")
+    full_kwargs["indices"], full_kwargs["weight"] = builder.mark_inputs(
+        idx,
+        full_kwargs["weight"],
+    )
+    output = op(**full_kwargs)
+    output = builder.mark_outputs(output)
+    # Explicitly reshape back to the original shape. This places the ReshapeOp outside of the HLFB.
+    output = torch.reshape(output, (*(original_idx_shape), embedding_dim))
+    return output
+  node.target = embedding
+class BuildAtenCompositePass(fx_pass_base.PassBase):
-  def call(self, graph_module: GraphModule):
+  def call(self, graph_module: torch.fx.GraphModule):
     for node in graph_module.graph.nodes:
       if node.target in _composite_builders:
         _composite_builders[node.target](graph_module, node)
     graph_module.graph.lint()
     graph_module.recompile()
-    return PassResult(graph_module, True)
+    return fx_pass_base.PassResult(graph_module, True)

ai_edge_torch/{convert → _convert}/fx_passes/build_interpolate_composite_pass.py RENAMED Viewed

@@ -12,31 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Build interpolate composite pass."""
 import functools
-import torch
-from ai_edge_torch.convert.fx_passes._pass_base import ExportedProgramPassBase
-from ai_edge_torch.convert.fx_passes._pass_base import ExportedProgramPassResult  # NOQA
+from ai_edge_torch import fx_pass_base
 from ai_edge_torch.hlfb import mark_pattern
+from ai_edge_torch.hlfb.mark_pattern import pattern as pattern_module
+import torch
 # For torch nightly released after mid June 2024,
 # torch.nn.functional.interpolate no longer gets exported into decomposed graph
-# but single aten op torch.ops.aten.upsample_nearest2d.vec/torch.ops.aten.upsample_bilinear2d.vec.
-# This behavior would our pattern matching based composite builder.
-# It requires the pattern and model graph to get decomposed first for backward compatibility.
-_INTERPOLATE_DECOMPOSITIONS = torch._decomp.get_decompositions(
-    [
-        torch.ops.aten.upsample_bilinear2d.vec,
-        torch.ops.aten.upsample_nearest2d.vec,
-    ]
-)
+# but a single aten op:
+# torch.ops.aten.upsample_nearest2d.vec/torch.ops.aten.upsample_bilinear2d.vec.
+# This would interefere with our pattern matching based composite builder.
+# Here we register the now missing decompositions first.
+_INTERPOLATE_DECOMPOSITIONS = torch._decomp.get_decompositions([
+    torch.ops.aten.upsample_bilinear2d.vec,
+    torch.ops.aten.upsample_nearest2d.vec,
+])
 @functools.cache
 def _get_upsample_bilinear2d_pattern():
-  pattern = mark_pattern.Pattern(
+  pattern = pattern_module.Pattern(
       "odml.upsample_bilinear2d",
       lambda x: torch.nn.functional.interpolate(
           x, scale_factor=2, mode="bilinear", align_corners=False
@@ -59,7 +58,7 @@ def _get_upsample_bilinear2d_pattern():
 @functools.cache
 def _get_upsample_bilinear2d_align_corners_pattern():
-  pattern = mark_pattern.Pattern(
+  pattern = pattern_module.Pattern(
       "odml.upsample_bilinear2d",
       lambda x: torch.nn.functional.interpolate(
           x, scale_factor=2, mode="bilinear", align_corners=True
@@ -82,9 +81,11 @@ def _get_upsample_bilinear2d_align_corners_pattern():
 @functools.cache
 def _get_interpolate_nearest2d_pattern():
-  pattern = mark_pattern.Pattern(
+  pattern = pattern_module.Pattern(
       "tfl.resize_nearest_neighbor",
-      lambda x: torch.nn.functional.interpolate(x, scale_factor=2, mode="nearest"),
+      lambda x: torch.nn.functional.interpolate(
+          x, scale_factor=2, mode="nearest"
+      ),
       export_args=(torch.rand(1, 3, 100, 100),),
       decomp_table=_INTERPOLATE_DECOMPOSITIONS,
   )
@@ -101,7 +102,7 @@ def _get_interpolate_nearest2d_pattern():
   return pattern
-class BuildInterpolateCompositePass(ExportedProgramPassBase):
+class BuildInterpolateCompositePass(fx_pass_base.ExportedProgramPassBase):
   def __init__(self):
     super().__init__()
@@ -112,7 +113,9 @@ class BuildInterpolateCompositePass(ExportedProgramPassBase):
     ]
   def call(self, exported_program: torch.export.ExportedProgram):
-    exported_program = exported_program.run_decompositions(_INTERPOLATE_DECOMPOSITIONS)
+    exported_program = exported_program.run_decompositions(
+        _INTERPOLATE_DECOMPOSITIONS
+    )
     graph_module = exported_program.graph_module
     for pattern in self._patterns:
@@ -120,4 +123,4 @@ class BuildInterpolateCompositePass(ExportedProgramPassBase):
     graph_module.graph.lint()
     graph_module.recompile()
-    return ExportedProgramPassResult(exported_program, True)
+    return fx_pass_base.ExportedProgramPassResult(exported_program, True)

ai_edge_torch/{convert → _convert}/fx_passes/inject_mlir_debuginfo_pass.py RENAMED Viewed

@@ -13,11 +13,10 @@
 # limitations under the License.
 # ==============================================================================
+from ai_edge_torch import fx_pass_base
+from ai_edge_torch import lowertools
 import torch
-from torch.fx.passes.infra.pass_base import PassBase
-from torch.fx.passes.infra.pass_base import PassResult
 import torch.utils._pytree as pytree
-import torch_xla.experimental.xla_mlir_debuginfo  # Import required to register torch.ops.xla.write_mlir_debuginfo
 def _get_mlir_debuginfo(node: torch.fx.Node):
@@ -54,7 +53,7 @@ def _wrap_call_function_node_with_debuginfo_writer(node: torch.fx.GraphModule):
     outputs = target(*args, **kwargs)
     outputs = pytree.tree_map_only(
         torch.Tensor,
-        lambda x: torch.ops.xla.write_mlir_debuginfo(x, debuginfo),
+        lambda x: lowertools.write_mlir_debuginfo_op(x, debuginfo),
         outputs,
     )
     return outputs
@@ -62,7 +61,7 @@ def _wrap_call_function_node_with_debuginfo_writer(node: torch.fx.GraphModule):
   node.target = debuginfo_writer
-class InjectMlirDebuginfoPass(PassBase):
+class InjectMlirDebuginfoPass(fx_pass_base.PassBase):
   def call(self, graph_module: torch.fx.GraphModule):
     for node in graph_module.graph.nodes:
@@ -70,4 +69,4 @@ class InjectMlirDebuginfoPass(PassBase):
     graph_module.graph.lint()
     graph_module.recompile()
-    return PassResult(graph_module, True)
+    return fx_pass_base.PassResult(graph_module, True)

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/__init__.py RENAMED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass.pass_body import OptimizeLayoutTransposesPass  # NOQA
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass.pass_body import OptimizeLayoutTransposesPass  # NOQA

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/layout_check.py RENAMED Viewed

@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Layout check for the optimized layout transposes pass."""
 import dataclasses
 import operator
+from ai_edge_torch import lowertools
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_rewrite
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import utils
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass.op_func_registry import OpFuncRegistry
 import torch
 from torch.fx import Node
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_mark  # NOQA
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_rewrite  # NOQA
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import utils  # NOQA
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass.op_func_registry import OpFuncRegistry  # NOQA
 aten = torch.ops.aten
 __all__ = [
@@ -113,6 +114,10 @@ def is_4d(node: Node):
   val = node.meta.get("val")
   if val is None:
     return False
+  if isinstance(val, (list, tuple)) and val:
+    val = val[0]
   if not hasattr(val, "shape"):
     return False
@@ -145,8 +150,11 @@ def _qdq_layout_sensitive_inputs_getter(node: Node):
 # ==== Ops must be NHWC if possible
+@layout_sensitive_inputs_getters.register(aten.conv2d)
 @layout_sensitive_inputs_getters.register(aten.convolution)
-@layout_sensitive_inputs_getters.register(aten._native_batch_norm_legit_no_training)
+@layout_sensitive_inputs_getters.register(
+    aten._native_batch_norm_legit_no_training
+)
 @layout_sensitive_inputs_getters.register(aten.native_group_norm)
 def _first_arg_getter(node):
   return [node.args[0]]
@@ -161,6 +169,7 @@ def _first_arg_getter(node):
 @nhwcable_node_checkers.register(aten.upsample_bilinear2d)
 @nhwcable_node_checkers.register(aten.upsample_nearest2d)
 @nhwcable_node_checkers.register(aten._adaptive_avg_pool2d)
+@nhwcable_node_checkers.register(aten.conv2d)
 @nhwcable_node_checkers.register(aten.convolution)
 def _all_layout_sensitive_inputs_are_4d_checker(node: Node):
   can_be = all_layout_sensitive_inputs_are_4d(node)
@@ -168,10 +177,31 @@ def _all_layout_sensitive_inputs_are_4d_checker(node: Node):
 @nhwcable_node_checkers.register(aten._native_batch_norm_legit_no_training)
-@nhwcable_node_checkers.register(aten.native_group_norm)
 def _aten_norm_checker(node):
   val = node.meta.get("val")
-  if not isinstance(val, (list, tuple)) or not val or not hasattr(val[0], "shape"):
+  if (
+      not isinstance(val, (list, tuple))
+      or not val
+      or not hasattr(val[0], "shape")
+  ):
+    return NHWCable(can_be=False, must_be=False)
+  return NHWCable(can_be=len(val[0].shape) == 4, must_be=False)
+@nhwcable_node_checkers.register(aten.native_group_norm)
+def _aten_native_group_norm_checker(node):
+  val = node.meta.get("val")
+  if (
+      not isinstance(val, (list, tuple))
+      or not val
+      or not hasattr(val[0], "shape")
+  ):
+    return NHWCable(can_be=False, must_be=False)
+  if len(node.args) >= 3 and (
+      node.args[1] is not None or node.args[2] is not None
+  ):
+    # Disable NHWC rewriter due to precision issue with weight and bias.
+    # TODO(b/354780253): Re-enable NHWC rewriter with proper lowering.
     return NHWCable(can_be=False, must_be=False)
   return NHWCable(can_be=len(val[0].shape) == 4, must_be=False)
@@ -179,7 +209,7 @@ def _aten_norm_checker(node):
 # ==== Ops must be NCHW
-@nhwcable_node_checkers.register(torch.ops.xla.mark_tensor)
+@nhwcable_node_checkers.register(lowertools.mark_tensor_op)
 @nhwcable_node_checkers.register(utils.tensor_to_nchw)
 @nhwcable_node_checkers.register(utils.tensor_to_nhwc)
 @nhwcable_node_checkers.register("output")

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/layout_mark.py RENAMED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Layout mark for the optimized layout transposes pass."""
 import torch
 # Tag which is added to a node's meta to indicate that is is part of the NHWC

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/layout_partitioners/__init__.py RENAMED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Layout partitioners."""
 from . import greedy
 from . import min_cut

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/layout_partitioners/greedy.py RENAMED Viewed

@@ -12,24 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Greedy partitioning algorithm."""
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_check
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_mark
 import torch
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_check  # NOQA
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_mark  # NOQA
 def partition(graph_module: torch.fx.GraphModule):
-  """Partition the graph module into NHWC and non-NHWC subgraphs, and mark
-  nodes in the NHWC partitions.
+  """Partition the graph module into NHWC and non-NHWC subgraphs.
+  Partition the graph module into NHWC and non-NHWC subgraphs and mark nodes in
+  the NHWC partitions.
   Implements O(|V|) greedy partitioning algorithm.
-  See go/pytorch-layout-transpose-optimization for more details.
+  Args:
+    graph_module: The graph module to be partitioned.
+  Returns:
+    The partitioned graph module.
   """
   graph = graph_module.graph
   for node in list(graph.nodes):
-    if len(node.all_input_nodes) == 0:
+    if not node.all_input_nodes:
       # This node has no inputs so we don't need to change anything
       continue
@@ -45,7 +52,9 @@ def partition(graph_module: torch.fx.GraphModule):
       layout_sensitive_inputs = layout_check.get_layout_sensitive_inputs(node)
-      should_be_nhwc = any(map(layout_mark.is_nhwc_node, layout_sensitive_inputs))
+      should_be_nhwc = any(
+          map(layout_mark.is_nhwc_node, layout_sensitive_inputs)
+      )
       for input_node in layout_sensitive_inputs:
         if not layout_mark.is_nhwc_node(input_node) and not layout_check.is_4d(
             input_node

ai_edge_torch/{convert → _convert}/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py RENAMED Viewed

@@ -12,28 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""Min cut solver for partitioning the graph module into NHWC and non-NHWC subgraphs."""
 import collections
 import dataclasses
-import itertools
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_check  # NOQA
+from ai_edge_torch._convert.fx_passes.optimize_layout_transposes_pass import layout_mark  # NOQA
 import numpy as np
 import scipy
 import torch
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_check  # NOQA
-from ai_edge_torch.convert.fx_passes.optimize_layout_transposes_pass import layout_mark  # NOQA
 def can_partition(graph_module: torch.fx.GraphModule):
   """Returns true if the input graph_module can be partitioned by min cut solver
   in a reasonable time.
   The min cut solver implements O(|V|^2|E|) Dinic's algorithm, which may
   take a long time to complete for large graph module. This function determines
   whether the graph module can be partitioned by the graph module size.
-  See go/pytorch-layout-transpose-optimization for more details.
   """
   graph = graph_module.graph
   n_nodes = len(graph.nodes)
@@ -83,7 +81,10 @@ class MinCutSolver:
   def graph(self):
     edges = np.array(self.edges)
     return scipy.sparse.csr_matrix(
-        (np.minimum(edges[:, 2], MinCutSolver.INF_COST), (edges[:, 0], edges[:, 1])),
+        (
+            np.minimum(edges[:, 2], MinCutSolver.INF_COST),
+            (edges[:, 0], edges[:, 1]),
+        ),
         shape=(self._nodes_cnt, self._nodes_cnt),
         dtype=np.int32,
     )
@@ -135,10 +136,10 @@ class MultiUsersDummyNode:
 def partition(graph_module: torch.fx.GraphModule):
   """Partition the graph module into NHWC and non-NHWC subgraphs, and mark
   nodes in the NHWC partitions.
   Implements O(|V|^2|E|) min-cut (optimal) partitioning algorithm.
-  See go/pytorch-layout-transpose-optimization for more details.
   """
   graph = graph_module.graph

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl