PyPI - onnx-diagnostic - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl - Mend

onnx-diagnostic 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

onnx_diagnostic/torch_onnx/sbs.py CHANGED Viewed

@@ -1,158 +1,586 @@
-from typing import Any, Dict, Iterator, Optional, Tuple, Union
+import inspect
+import time
+from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Sequence, Tuple, Union
 import onnx
+import onnx.helper as oh
+import numpy as np
 import torch
-from ..helpers import string_type, string_diff, max_diff
-from ..helpers.onnx_helper import to_array_extended
-from ..helpers.torch_helper import to_numpy
+from ..helpers import string_type, string_diff, max_diff, flatten_object
+from ..helpers.onnx_helper import pretty_onnx
+from ..helpers.torch_helper import (
+    to_numpy,
+    from_numpy,
+    to_tensor,
+    torch_dtype_to_onnx_dtype,
+)
+from ..helpers.torch_fx_graph_helper import prepare_args_kwargs, run_fx_node
+from ..reference.ort_evaluator import OnnxList, OnnxruntimeEvaluator
+from .sbs_dataclasses import (
+    ReplayConfiguration,
+    RunAlignedRecord,
+    StatusRunAligned,
+    make_torch_inputs,
+)
-def validate_fx_tensor(
-    node: torch.fx.Node, tensor: torch.Tensor, expected_shape: Tuple[Any, ...]
-) -> None:
-    """
-    Validates the shape of tensor is expected.
+def _check_tensor_(use_tensor, name, obj, flip_type=False):
+    if flip_type:
+        if use_tensor:
+            if isinstance(obj, np.ndarray):
+                obj = from_numpy(obj)
+        else:
+            if isinstance(obj, torch.Tensor):
+                obj = to_numpy(obj)
-    :param node: node
-    :param tensor: tensor
-    :param expected_shape: expected shape
-    """
-    assert len(tensor.shape) == len(expected_shape), (
-        f"Shape mismatch, got {tensor.shape} expected {expected_shape}, "
-        f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-        f"node.args={node.args}, node.kwargs={node.kwargs}, "
-        f"node.meta={node.meta}"
+    assert not use_tensor or isinstance(obj, (torch.Tensor, OnnxList)), (
+        f"Unexpected type {type(obj)} for {name!r}. "
+        f"use_tensor is True so torch.Tensor is expected."
+    )
+    assert use_tensor or isinstance(obj, (np.ndarray, OnnxList)), (
+        f"Unexpected type {type(obj)} for {name!r}. "
+        f"use_tensor is False so np.array is expected."
     )
-    for a, b in zip(tensor.shape, expected_shape):
-        assert not isinstance(b, int) or a == b or {a, b} == {0, 1}, (
-            f"Dimension mismatch, got {tensor.shape} expected {expected_shape}, "
-            f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-            f"node.args={node.args}, node.kwargs={node.kwargs}, "
-            f"node.meta={node.meta}"
+    return obj
+def _make_node_from_initializer(proto: onnx.TensorProto) -> onnx.NodeProto:
+    return oh.make_node("Constant", [], [proto.name], value=proto)
+def _loop_cmp(
+    mapping_onnx_to_torch: Dict[str, str],
+    torch_results: Dict[str, torch.Tensor],
+    onnx_results: Dict[str, Any],
+    onnx_name: str,
+    onnx_result: torch.Tensor,
+    second_onnx_result: torch.Tensor,
+    verbose: int,
+    atol: Optional[float],
+    rtol: Optional[float],
+    i_torch: int,
+    i_onnx: int,
+    str_kws: Dict[str, bool],
+    exc: bool,
+    use_tensor: bool,
+) -> Optional[RunAlignedRecord]:
+    onnx_results[onnx_name] = _check_tensor_(use_tensor, onnx_name, onnx_result)
+    if verbose > 1:
+        print(f"[run_aligned-nx] +res: {onnx_name}={string_type(onnx_result, **str_kws)}")
+    to = mapping_onnx_to_torch.get(onnx_name, onnx_name)
+    if to in torch_results:
+        d = max_diff(torch_results[to], onnx_result, hist=[0.1, 0.01])
+        if verbose > 1:
+            if onnx_name == to:
+                print(f"[run_aligned-==] cmp {to}: {string_diff(d)}")
+            else:
+                print(f"[run_aligned-~~] cmd {to}/{onnx_name}: {string_diff(d)}")
+            if not (atol is None or rtol is None or (d["abs"] <= atol and d["rel"] <= rtol)):
+                if exc:
+                    raise ValueError(
+                        f"discrepancies detected for results [{to}/{onnx_name}]: "
+                        f"{string_diff(d)}"
+                        f"\n-- onnx_result: {string_type(onnx_result[to], **str_kws)}"
+                        f"\n-- onnx_results: {string_type(onnx_result, **str_kws)}"
+                        f"\n-- torch\n{onnx_result[to]}"
+                    )
+                else:
+                    print(
+                        f"[run_align-dx] discrepancies {string_diff(d)} - [{to}/{onnx_name}]"
+                    )
+        r = RunAlignedRecord(
+            ep_id_node=i_torch,
+            onnx_id_node=i_onnx,
+            ep_name=to,
+            onnx_name=onnx_name,
+            ep_shape_type=string_type(torch_results[to], **str_kws),
+            onnx_shape_type=string_type(onnx_result, **str_kws),
         )
+        r.set_diff(d)
+        if second_onnx_result is not None:
+            d2 = max_diff(torch_results[to], second_onnx_result, hist=[0.1, 0.01])
+            r.set_diff2(d2)
+        mapping_onnx_to_torch[onnx_name] = to
+        return r
+    return None
-def validate_fx_outputs(node: torch.fx.Node, outputs: Tuple[Any, ...]) -> None:
-    """
-    Validates the outputs of a node using metadata stored in the node.
+def _duplicated_values(d):
+    rev = {}
+    for k, v in d.items():
+        if v in rev:
+            rev[v].append(k)
+        else:
+            rev[v] = [k]
+    res = {k: v for k, v in rev.items() if len(v) > 1}
+    final = set()
+    for v in res.values():
+        final |= set(v)
+    return final
-    :param node: node
-    :param outputs: outputs
-    """
-    if "val" not in node.meta:
-        return
-    if isinstance(outputs, torch.Tensor):
-        validate_fx_tensor(node, outputs, node.meta["val"].shape)
-        return
-    if isinstance(outputs, (tuple, list)):
-        assert isinstance(node.meta["val"], (list, tuple)), (
-            f"Unexpected type {string_type(node.meta['val'])} for node.meta['val'], "
-            f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-            f"node.args={node.args}, node.kwargs={node.kwargs}, "
-            f"node.meta={node.meta}"
+def _validation_nn_functional(
+    node: onnx.NodeProto, new_feeds: Dict[str, torch.Tensor], expected: List[torch.Tensor]
+) -> Optional[str]:
+    if node.op_type == "Gemm" and len(node.input) == 3:
+        atts = {}
+        for att in node.attribute:
+            if att.name in ("alpha", "beta"):
+                atts[att.name] = att.f
+            elif att.name in ("transA", "transB"):
+                atts[att.name] = att.i
+        if atts == {"transB": 1}:
+            res = torch.nn.functional.linear(*[new_feeds[i] for i in node.input])
+            diff = max_diff(res, expected[0])
+            return f"function.linear:{string_diff(diff)}"
+    return None
+def _loop_onnx_node(
+    onx: onnx.ModelProto,
+    ep_graph_nodes: List[torch.fx.Node],
+    onnx_results: Dict[str, Any],
+    mapping_onnx_to_torch: Dict[str, str],
+    torch_results: Dict[str, torch.Tensor],
+    ep_durations,
+    use_tensor: bool,
+    i_torch: int,
+    i_onnx: int,
+    name_to_ep_node: Dict[str, int],
+    run_cls_kwargs: Dict[str, Any],
+    str_kws: Dict[str, bool],
+    status: StatusRunAligned,
+    already_run_onnx: Set[int],
+    torch_names_to_onnx_names: Dict[str, str],
+    verbose: int,
+    exc: bool,
+    atol: float,
+    rtol: float,
+    reset_names: Set[str],
+    replay_configuration: Optional[ReplayConfiguration],
+    has_cuda: bool,
+    run_cls: type,
+    loop: Any,
+    run_onnx_with_torch_inputs: bool,
+) -> Iterator[Optional[RunAlignedRecord]]:
+    if i_onnx in already_run_onnx:
+        yield None
+    node = onx.graph.node[i_onnx]
+    if verbose > 1:
+        print(
+            f"[run_aligned] run onx.graph.node[{i_onnx}]: "
+            f"{node.op_type}({', '.join(node.input)}) -> {', '.join(node.output)}"
         )
-        assert len(outputs) == len(node.meta["val"]), (
-            f"Length mismatch, got {len(outputs)} expected {len(node.meta['val'])}, "
-            f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-            f"node.args={node.args}, node.kwargs={node.kwargs}, "
-            f"node.meta={node.meta}"
+    elif verbose == 1:
+        loop.set_description(
+            f"ep {i_torch}/{len(ep_graph_nodes)} nx {i_onnx}/{len(onx.graph.node)} "
+            f"{status.to_str()}"
         )
-        for a, b in zip(outputs, node.meta["val"]):
-            validate_fx_tensor(node, a, b.shape)
-        return
-    if isinstance(outputs, int):
-        assert (
-            isinstance(node.meta["val"], (torch.SymInt, torch.SymBool, torch.SymFloat))
-            or outputs == node.meta["val"]
-        ), (
-            f"Int mismatch, got {outputs} expected {node.meta['val']}, "
-            f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-            f"node.args={node.args}, node.kwargs={node.kwargs}, "
-            f"node.meta={node.meta}"
+        loop.update(min(1, 1 + i_torch + i_onnx))
+    ref = run_cls(node, **run_cls_kwargs)
+    # We need to clone because the runtime maybe using dlpack to create OrtValue
+    hidden_inputs = OnnxruntimeEvaluator._get_hidden_node_inputs(node)
+    all_inputs = [*node.input, *hidden_inputs] if hidden_inputs else node.input
+    feeds = (
+        {k: onnx_results[k].clone() for k in all_inputs if k}
+        if use_tensor
+        else {k: onnx_results[k].copy() for k in all_inputs if k}
+    )
+    assert "" not in feeds, f"Unexpected feeds={string_type(feeds, **str_kws)}"
+    if verbose > 1:
+        print(f"[run_aligned] feeds={string_type(feeds, **str_kws)}")
+    begin = time.perf_counter()
+    try:
+        res = ref.run(None, feeds)  # type: ignore[attr-defined]
+    except Exception as e:
+        raise RuntimeError(
+            f"Unable to run node {node.op_type}, domain={node.domain} "
+            f"with inputs={node.input}, feeds={string_type(feeds, **str_kws)}"
+        ) from e
+    duration = time.perf_counter() - begin
+    if verbose > 1:
+        print(f"[run_aligned] res={string_type(res, **str_kws)}")
+    assert (
+        not has_cuda
+        or not any(t is not None and t.is_cuda for t in feeds.values())
+        or any(t is not None and t.is_cuda for t in res)
+        or node.op_type in {"Shape", "Size"}  # on CPU no matter what
+        or node.op_type
+        in {
+            "Add",
+            "Concat",
+            "Div",
+            "Gather",
+            "Mul",
+            "Range",
+            "Squeeze",
+            "Sub",
+            "Unsqueeze",
+        }  # not sure, could be about shapes
+    ), (
+        f"One input is on cuda but there is no float output on cuda, "
+        f"feeds={string_type(feeds, with_device=True, with_shape=True)}, "
+        f"res={string_type(res, with_device=True, with_shape=True)}, "
+        f"node is {pretty_onnx(node)}"
+    )
+    comment = None
+    cross = None
+    if run_onnx_with_torch_inputs:
+        # Let's run the operator with torch results if they are available
+        new_feeds, removed = make_torch_inputs(
+            node.input,
+            {
+                **{v: k for k, v in torch_names_to_onnx_names.items()},
+                **mapping_onnx_to_torch,
+            },
+            onnx_results,
+            torch_results,
+            submodel=None,
         )
-        return
-    if outputs is None:
-        assert node.meta["val"] is None, (
-            f"None mismatch, got {outputs} expected {node.meta['val']}, "
-            f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-            f"node.args={node.args}, node.kwargs={node.kwargs}, "
-            f"node.meta={node.meta}"
+        if not removed:
+            if verbose > 1:
+                print(
+                    f"[run_aligned] feeds for second run="
+                    f"{string_type(new_feeds, **str_kws)}"
+                )
+            cross = ref.run(None, new_feeds)
+            if verbose > 1:
+                print(f"[run_aligned] got for second run={string_type(cross, **str_kws)}")
+            # Gemm = torch.nn.function.linear, in that case, we just run it as well
+            to = mapping_onnx_to_torch.get(node.output[0], node.output[0])
+            if to in torch_results:
+                comment = _validation_nn_functional(node, new_feeds, [torch_results[to]])
+        elif verbose > 1:
+            print(f"[run_aligned] second run not possible because of missing {removed}")
+    if cross is None:
+        cross = [None for _ in res]
+    list_node_output = list(node.output)
+    node_output = [o for o in list_node_output if o]
+    for o, r, r2 in zip(node_output, res, cross):
+        if r is None or not o:
+            continue
+        tmp = _loop_cmp(
+            mapping_onnx_to_torch,
+            torch_results,
+            onnx_results,
+            o,
+            r,
+            r2,
+            verbose,
+            atol,
+            rtol,
+            i_torch,
+            i_onnx,
+            str_kws,
+            exc,
+            use_tensor,
         )
-        return
-    raise NotImplementedError(
-        f"Validation for output type {type(outputs)} is not implemented, "
-        f"node.name={node.name!r}, node.target={getattr(node, 'target', None)}, "
-        f"node.args={node.args}, node.kwargs={node.kwargs}, "
-        f"node.meta={node.meta}"
-    )
+        if tmp is not None:
+            if tmp.ep_name in name_to_ep_node:
+                tmp.ep_id_node = name_to_ep_node[tmp.ep_name]
+                tmp.ep_target = str(ep_graph_nodes[tmp.ep_id_node].target)
+                tmp.ep_time_run = ep_durations[tmp.ep_id_node]
+            else:
+                tmp.ep_id_node = None
+                tmp.ep_target = None
+                tmp.ep_name = None
+            tmp.onnx_op_type = onx.graph.node[tmp.onnx_id_node].op_type
+            tmp.onnx_id_output = list_node_output.index(o)
+            tmp.onnx_time_run = duration
+            status.yielded_nodes += 1
+            if tmp.err_abs is not None:
+                status.update(tmp.err_abs)
+            tmp.comment = comment
+            yield tmp
+            # do we need to dump pieces if graph the user can replay?
+            if replay_configuration:
+                if replay_configuration.select(
+                    name=tmp.onnx_name, op_type=tmp.onnx_op_type, err_abs=tmp.err_abs
+                ):
+                    replay_configuration.dump(
+                        name=tmp.onnx_name,
+                        onnx_id_node=tmp.onnx_id_node,
+                        model=onx,
+                        onnx_results=onnx_results,
+                        torch_results=torch_results,
+                        onnx_name_to_ep_name={
+                            **{v: k for k, v in torch_names_to_onnx_names.items()},
+                            **mapping_onnx_to_torch,
+                        },
+                        verbose=max(verbose - 1, 0),
+                    )
+                    status.last_replay = tmp.onnx_name
-def run_fx_node(
-    node: torch.fx.Node, args: Tuple[Any, ...], kwargs: Optional[Dict[str, Any]] = None
-) -> Tuple[Any, ...]:
-    """
-    Executes a node
+            # reset_names: replaces onnx_results by torch_results to see
+            # if that fixes the discrepancies problem
+            if reset_names and tmp.ep_name in reset_names:
+                assert (
+                    tmp.ep_name in torch_results
+                ), f"name {tmp.ep_name!r} set to be reset is missing in torch_results."
+                assert (
+                    tmp.onnx_name in onnx_results
+                ), f"name {tmp.onnx_name!r} set to be reset is missing in onnx_results."
+                onnx_results[tmp.onnx_name] = torch_results[tmp.ep_name]
+                tmp = _loop_cmp(
+                    mapping_onnx_to_torch,
+                    torch_results,
+                    onnx_results,
+                    o,
+                    torch_results[tmp.ep_name],
+                    None,
+                    verbose,
+                    atol,
+                    rtol,
+                    i_torch,
+                    i_onnx,
+                    str_kws,
+                    exc,
+                    use_tensor,
+                )
+                assert tmp.err_abs == 0, f"Reset did not happen, tmp={tmp}"
+                if tmp is not None:
+                    tmp.onnx_op_type = "reset"
+                    tmp.onnx_id_output = list_node_output.index(o)
+                    status.yielded_nodes += 1
+                    yield tmp
+    already_run_onnx.add(i_onnx)
-    :param node: runs a node
-    :param args: unnamed inputs to the node
-    :param kwargs: named inputs to the node
-    :return: results
-    """
-    if node.op == "output":
-        assert len(args) == 1 and not kwargs, (
-            f"Unexpected inputs: args={string_type(args, limit=20)} "
-            f"kwargs={string_type(kwargs, limit=20)}"
+def _preparation_with_fx_graph(
+    ep_graph_nodes: List[torch.fx.Node],
+    name_to_ep_node: Dict[str, int],
+    torch_input_names: List[str],
+    onx: onnx.ModelProto,
+    torch_names_to_onnx_names: Dict[str, str],
+    skip_mapping_torch_onnx,
+    torch_results: Dict[str, torch.Tensor],
+    placeholders: Dict[str, torch.Tensor],
+    placeholders_to_state_dict: Dict[str, str],
+    ep_state_dict: Dict[str, torch.Tensor],
+    positions: Dict[str, Dict[str, int]],
+) -> List[str]:
+    torch_output_names = None
+    for i, node in enumerate(ep_graph_nodes):
+        if isinstance(node.name, str):
+            positions[node.name] = dict(fx=i)
+        else:
+            for n in node.name:
+                positions[n] = dict(fx=i)
+        if node.op == "placeholder":
+            if node.name in placeholders_to_state_dict:
+                # This a weight.
+                placeholders[node.name] = ep_state_dict[placeholders_to_state_dict[node.name]]
+                torch_results[node.name] = placeholders[node.name]
+                assert isinstance(torch_results[node.name], torch.Tensor), (
+                    f"torch_results[{node.name}] not a tensor but "
+                    f"{type(torch_results[node.name])}"
+                )
+            else:
+                # This is an input
+                assert len(torch_input_names) < len(onx.graph.input), (
+                    f"torch_input_names={torch_input_names!r}, "
+                    f"onnx_input_names={[n.name for n in onx.graph.input]}, "
+                    f"node.name={node.name!r} cannot be an input, "
+                    f"placeholders_to_state_dict={sorted(placeholders_to_state_dict)}"
+                )
+                assert node.name not in skip_mapping_torch_onnx, (
+                    f"{node.name!r} is ambiguous, cannot be mapped due to "
+                    f"{skip_mapping_torch_onnx}"
+                )
+                torch_names_to_onnx_names[node.name] = onx.graph.input[
+                    len(torch_input_names)
+                ].name
+                torch_input_names.append(node.name)
+        elif node.op == "output":
+            torch_output_names = [n.name for n in node.args[0]]
+        assert isinstance(node.name, str), (
+            f"Unexpected type {type(node.name)} for node={node} (target={node.target}), "
+            f"args={node.args}"
         )
-        return args
-    if node.op == "call_function":
-        assert callable(node.target), f"{node.target!r} not callable in node {node!r}"
-        outputs = node.target(*args, **(kwargs or {}))
-        validate_fx_outputs(node, outputs)
-        return outputs
-    raise NotImplementedError(
-        f"node.op={node.op!r} is not implemented, node.name={node.name!r}"
+        name_to_ep_node[node.name] = i
+    assert torch_output_names is not None, "No output node ws found the graph."
+    return torch_output_names
+def _preparation_with_onnx_model(
+    default_device,
+    use_tensor: bool,
+    onx: onnx.ModelProto,
+    already_yielded: Dict[str, Any],
+    str_kws: Dict[str, bool],
+    positions: Dict[str, Dict[str, int]],
+    torch_names_to_onnx_names: Dict[str, str],
+    torch_output_names: List[str],
+    torch_results: Dict[str, torch.Tensor],
+    skip_mapping_torch_onnx: Set[str],
+    verbose: int,
+    args: Sequence[Any],
+    kwargs: Dict[str, Any],
+) -> Tuple[Dict[str, str], Dict[str, torch.Tensor], float, float, List[RunAlignedRecord]]:
+    for inp in onx.graph.input:
+        n = inp.name
+        if n in positions:
+            positions[n]["onnx"] = -1
+        else:
+            positions[n] = dict(onnx=-1)
+    for inp in onx.graph.initializer:
+        n = inp.name
+        if n in positions:
+            positions[n]["onnx"] = -1
+        else:
+            positions[n] = dict(onnx=-1)
+    for i, node in enumerate(onx.graph.node):
+        for n in node.output:
+            if n in positions:
+                positions[n]["onnx"] = i
+            else:
+                positions[n] = dict(onnx=i)
+    onnx_outputs_names = [o.name for o in onx.graph.output]
+    assert torch_output_names is not None and len(torch_output_names) == len(
+        onnx_outputs_names
+    ), (
+        f"Unexpected number of outputs, torch_output_names={torch_output_names}, "
+        f"onnx_outputs_names={onnx_outputs_names}"
     )
+    mapping_onnx_to_torch = dict(zip(onnx_outputs_names, torch_output_names))
+    onnx_args = list(args) if args else []
+    if kwargs:
+        onnx_args.extend(flatten_object(kwargs, drop_keys=True))
+    if verbose:
+        print(f"[run_aligned]   args: {string_type(args, **str_kws)}")
+        print(f"[run_aligned] kwargs: {string_type(kwargs, **str_kws)}")
+        print(f"[run_aligned]   onnx: {string_type(onnx_args, **str_kws)}")
+        print(f"[run_aligned] nx: walks through {len(onx.graph.input)} onnx inputs")
+    onnx_results: Dict[str, Any] = {}
+    for inp, v in zip(onx.graph.input, onnx_args):
+        onnx_results[inp.name] = _check_tensor_(
+            use_tensor, inp.name, v if use_tensor else to_numpy(v)
+        )
+        if verbose:
+            print(f"[run_aligned-nx] +inp: {inp.name}: {string_type(v, **str_kws)}")
-def _pick_result(torch_results: Dict[str, Any], ref: Any) -> Any:
-    "See :func:`prepare_args_kwargs`."
-    if isinstance(ref, torch.fx.Node):
-        return torch_results[ref.name]
-    if isinstance(ref, list):
-        return [_pick_result(torch_results, n) for n in ref]
-    if isinstance(ref, tuple):
-        return tuple(_pick_result(torch_results, n) for n in ref)
-    if isinstance(ref, dict):
-        return {k: _pick_result(torch_results, v) for k, v in ref.items()}
-    if isinstance(ref, (bool, int, float, str, torch.device, torch.dtype)):
-        return ref
-    if ref is None:
-        return None
-    raise NotImplementedError(f"Unable to process args type {type(ref)}")
-def prepare_args_kwargs(
-    torch_results: Dict[str, Any], node: torch.fx.Node
-) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
-    """
-    Prepares args and kwargs before executing a fx node.
+    # alias for initializers
+    skip_onnx_name = set()
+    init_aliases: Dict[str, str] = {}
+    for init in onx.graph.initializer:
+        new_names = {
+            n
+            for n in [
+                f"p_{init.name.replace('.', '_')}",
+                f"p_{init.name.split('::')[0].split('--')[-1].replace('.', '_')}",
+                f"{init.name.split('::')[0].split('--')[-1].replace('.', '_')}",
+            ]
+            if n != init.name
+        }
+        drop = False
+        for new_name in new_names:
+            if new_name in skip_onnx_name:
+                drop = True
+                break
+        if drop:
+            skip_onnx_name |= new_names | {init.name}
+            for new_name in new_names:
+                if new_names in init_aliases:
+                    del init_aliases[new_name]
+        else:
+            for new_name in new_names:
+                init_aliases[new_name] = init.name
+    rev_init_aliases: Dict[str, Set[str]] = {}
+    for k, v in init_aliases.items():
+        if v in rev_init_aliases:
+            rev_init_aliases[v].add(k)
+        else:
+            rev_init_aliases[v] = {k}
-    :param torch_results: existing results
-    :param node: node to execute
-    :return: new args and kwargs
-    """
-    new_args = _pick_result(torch_results, node.args)
-    new_kwargs = _pick_result(torch_results, node.kwargs)
-    return new_args, new_kwargs
+    # initializers
+    if verbose:
+        print(f"[run_aligned] nx: handles {len(onx.graph.initializer)} initializers from onnx")
+    memory_cpu = 0
+    memory_cuda = 0
+    records_to_yield = []
+    for init in onx.graph.initializer:  # type: ignore
+        t = None
+        if init.name in torch_results:
+            if init.name not in skip_mapping_torch_onnx:
+                t = torch_results[init.name]
+                torch_names_to_onnx_names[init.name] = init.name
+        elif init.name not in skip_onnx_name and init.name in rev_init_aliases:
+            new_names = [
+                k
+                for k in rev_init_aliases[init.name]
+                if k in torch_results and k not in skip_mapping_torch_onnx
+            ]
+            if new_names and len(new_names) == 1:
+                new_name = new_names[0]  # type: ignore[assignment, index]
+                t = torch_results[new_name]
+                if (
+                    len(set(t.shape)) == len(t.shape)  # not repeated dimension
+                    and t.shape == tuple(init.dims)
+                    and torch_dtype_to_onnx_dtype(t.dtype) == init.data_type
+                ):
+                    torch_names_to_onnx_names[new_name] = init.name
+                else:
+                    t = None
+                # We should check tensors and proto are the same.
+        if t is None:
+            t = to_tensor(init)
+            if default_device and t.numel() >= 1024:
+                # Let's force its way to cuda (should check the device has well).
+                t = t.to(default_device)
+            records_to_yield.append(
+                RunAlignedRecord(
+                    onnx_id_node=-1,
+                    onnx_name=init.name,
+                    onnx_op_type="initializer",
+                    onnx_shape_type=string_type(t, **str_kws),
+                ).check(already_yielded)
+            )
+        size = t.element_size() * t.numel()
+        if t.is_cuda:
+            memory_cuda += size
+        else:
+            memory_cpu += size
+        if init.name not in onnx_results:
+            # otherwise, it is an input with a default value
+            onnx_results[init.name] = _check_tensor_(use_tensor, init.name, t, flip_type=True)
+    return mapping_onnx_to_torch, onnx_results, memory_cpu, memory_cuda, records_to_yield
 def run_aligned(
     ep: torch.export.ExportedProgram,
     onx: Union[onnx.ModelProto, onnx.FunctionProto],
-    args: Tuple[torch.Tensor, ...],
-    check_conversion_cls: Union[Dict[str, Any], type],
+    run_cls: Callable[
+        [
+            Union[
+                onnx.ModelProto,
+                onnx.FunctionProto,
+                onnx.GraphProto,
+                onnx.NodeProto,
+            ]
+        ],
+        List[Union[np.ndarray, torch.Tensor]],
+    ],
+    args: Optional[Tuple[torch.Tensor, ...]] = None,
     kwargs: Optional[Dict[str, Any]] = None,
+    use_tensor: bool = False,
+    atol: Optional[float] = None,
+    rtol: Optional[float] = None,
     verbose: int = 0,
-) -> Iterator[Tuple[Any, ...]]:
+    exc: bool = True,
+    reset_names: Optional[List[str]] = None,
+    replay_configuration: Optional[ReplayConfiguration] = None,
+    run_onnx_with_torch_inputs: bool = False,
+) -> Iterator[RunAlignedRecord]:
     """
     Runs in parallel both the exported program
     and the onnx proto and looks for discrepancies.
@@ -162,11 +590,25 @@ def run_aligned(
     :param ep: exported program
     :param onx: model or function proto
+    :param run_cls: defines the runtime to use for this task
     :param args: input args
-    :param check_conversion_cls: defines the runtime to use for this task
     :param kwargs: input kwargs
+    :param use_tensor: use torch tensors instead of numpy arrays
+        for the onnx runtime
+    :param atol: absolute tolerance
+    :param rtol: relative tolerance
     :param verbose: verbosity level
-    :return: a list of tuples containing the results, they come in tuple,
+    :param exc: stops if an exception
+    :param reset_names: list of names, the onnx execution takes the torch outputs instead
+        of its own result if the names falls into that set
+    :param replay_configuration: configuration to let the user dump any problematic
+        piece of the onnx graph he wants to replay in order to investigate later,
+        see :class: `ReplayConfiguration
+        <onnx_diagnostic.torch_onnx.sbs.ReplayConfiguration>`
+    :param run_onnx_with_torch_inputs: run an onnx operator with torch results
+        if they available
+    :return: a list of :class:`RunAlignedRecord
+        <onnx_diagnostic.torch_onnx.sbs_dataclasses.RunAlignedRecord>`
     Example:
@@ -174,11 +616,10 @@ def run_aligned(
         :showcode:
         :warningout: UserWarning
-        import pprint
         import pandas
         import torch
         from onnx_diagnostic.reference import (
-            # This can be replace by any runtime taking NodeProto as an input.
+            # This can be replaced by any runtime taking NodeProto as an input.
             ExtendedReferenceEvaluator as ReferenceEvaluator,
         )
         from onnx_diagnostic.torch_onnx.sbs import run_aligned
@@ -193,13 +634,6 @@ def run_aligned(
                 return ru
-        def post_process(obs):
-            dobs = dict(zip(["ep_id_node", "onnx_id_node", "ep_name", "onnx_name"], obs))
-            dobs["err_abs"] = obs[-1]["abs"]
-            dobs["err_rel"] = obs[-1]["rel"]
-            return dobs
         x = torch.randn((5, 4))
         Model()(x)  # to make sure the model is running
         ep = torch.export.export(
@@ -209,131 +643,341 @@ def run_aligned(
             Model(), (x,), dynamic_shapes=({0: torch.export.Dim("batch")},)
         ).model_proto
         results = list(
-            map(
-                post_process,
-                run_aligned(
-                    ep,
-                    onx,
-                    (x,),
-                    check_conversion_cls=dict(cls=ReferenceEvaluator, atol=1e-5, rtol=1e-5),
-                    verbose=1,
-                ),
-            ),
+            run_aligned(ep, onx, ReferenceEvaluator, (x,), atol=1e-5, rtol=1e-5, verbose=1)
         )
         print("------------")
         print("final results")
         df = pandas.DataFrame(results)
+        df = df.apply(lambda col: col.fillna("") if col.dtype == "object" else col)
         print(df)
-    """
-    assert not kwargs, f"Not implemented when kwargs={string_type(kwargs,with_shape=True)}"
-    cls, atol, rtol = (
-        (
-            check_conversion_cls["cls"],
-            check_conversion_cls["atol"],
-            check_conversion_cls["rtol"],
+    This example uses :class:`onnx.reference.ReferenceEvaluator` to run the onnx model
+    but onnxruntime can also be used through
+    :class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`.
+    It relies on :epkg:`onnxruntime` and selects CPU or CUDA depending
+    on the device where the inputs are located.
+    The :class:`torch.export.ExportedProgram` can be saved on disk
+    with ``ep.save("<filename>.pt")`` and restored with
+    ``torch.export.load("<filename>.pt")``. That leeds the input to save.
+    We can decouple the export and the alignment.
+    .. runpython::
+        :showcode:
+        :warningout: UserWarning
+        import onnx
+        import torch
+        from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+        class Model(torch.nn.Module):
+            def forward(self, x):
+                ry = x.abs()
+                rz = ry.exp()
+                rw = rz + 1
+                ru = rw.log() + rw
+                return ru
+        x = torch.randn((5, 4))
+        dynamic_shapes = ({0: "batch"},)
+        Model()(x)  # to make sure the model is running
+        ep = torch.export.export(
+            Model(), (x,), dynamic_shapes=use_dyn_not_str(dynamic_shapes)
         )
-        if isinstance(check_conversion_cls, dict)
-        else (check_conversion_cls, None, None)
-    )
+        onx = torch.onnx.export(
+            Model(), (x,), dynamic_shapes=dynamic_shapes
+        ).model_proto
-    # retrieve the positions
-    positions: Dict[str, Any] = {}
-    for i, node in enumerate(ep.graph.nodes):
-        if isinstance(node.name, str):
-            positions[node.name] = dict(fx=i)
-        else:
-            for n in node.name:
-                positions[n] = dict(fx=i)
+        torch.export.save(ep, "test_doc_sbs_example.pt2")
+        onnx.save(onx, "test_doc_sbs_example.onnx")
+        torch.save((x,), "test_doc_sbs_example.pt")
-    for i, node in enumerate(onx.graph.node):
-        for n in node.output:
-            if n in positions:
-                positions[n]["onnx"] = i
-            else:
-                positions[n] = dict(onnx=i)
+    Then we can restore all of them and run it.
-    onnx_results: Dict[str, Any] = {}
-    for init in onx.graph.initializer:  # type: ignore
-        positions[init.name] = -1
-        onnx_results[init.name] = to_array_extended(init)
-        param_name = f"p_{init.name.replace('.', '_')}"
-        if param_name == init.name:
-            continue
-        assert param_name not in onnx_results, (
-            f"Some confusion may happen because {init.name!r} -> {param_name!r} "
-            f"and onnx_results has {sorted(onnx_results)}"
+    .. runpython::
+        :showcode:
+        :warningout: UserWarning
+        import pandas
+        import onnx
+        import torch
+        from onnx_diagnostic.torch_onnx.sbs import run_aligned
+        from onnx_diagnostic.reference import OnnxruntimeEvaluator
+        ep = torch.export.load("test_doc_sbs_example.pt2")
+        onx = onnx.load("test_doc_sbs_example.onnx")
+        inputs = torch.load("test_doc_sbs_example.pt")
+        results = list(
+            run_aligned(
+                ep,
+                onx,
+                OnnxruntimeEvaluator,
+                inputs,
+                atol=1e-5,
+                rtol=1e-5,
+                verbose=1,
+                use_tensor=True,
+            )
         )
-        onnx_results[param_name] = onnx_results[init.name]
+        print("------------")
+        print("final results")
+        df = pandas.DataFrame(results)
+        df = df.apply(lambda col: col.fillna("") if col.dtype == "object" else col)
+        print(df)
+    A command line can also be run:
+    .. code-block:: bash
-    torch_results: Dict[str, Any] = {
-        k: torch.from_numpy(v.copy())
-        for k, v in onnx_results.items()
-        if not k.startswith("init")
+            python -m onnx_diagnostic sbs -i <tensors>.input.pt \\
+                                          --ep <exported_program>.pt2 \\
+                                          -m <model>.onnx  \\
+                                          -o results.xlsx \\
+                                          -v 1 --atol=0.1 --rtol=1
+    """
+    assert callable(run_cls), f"run_cls={run_cls} not a callable"
+    already_yielded = {}
+    reset_names = set(reset_names) if reset_names else set()
+    str_kws = dict(with_shape=True, with_device=True)
+    has_cuda = any(
+        (isinstance(t, torch.Tensor) and t.is_cuda)
+        for t in flatten_object([args, kwargs], drop_keys=True)
+    )
+    default_device = None
+    if has_cuda:
+        for t in flatten_object([args, kwargs], drop_keys=True):
+            if t is not None and t.is_cuda:
+                default_device = t.device
+                break
+    run_cls_kwargs = {
+        "ir_version": onx.ir_version,
+        "opsets": {d.domain: d.version for d in onx.opset_import},
+        "verbose": max(verbose - 1, 0),
+        "providers": (
+            ["CUDAExecutionProvider", "CPUExecutionProvider"]
+            if has_cuda
+            else ["CPUExecutionProvider"]
+        ),
     }
-    last_position = 0
+    run_cls_kwargs = {
+        k: v
+        for k, v in run_cls_kwargs.items()
+        if k in set(inspect.signature(run_cls).parameters)
+    }
+    if verbose:
+        print(f"[run_aligned] run_cls={run_cls}")
+        print(f"[run_aligned] run_cls_kwargs={run_cls_kwargs}")
+        if replay_configuration:
+            print(f"[run_aligned] replay={replay_configuration}")
+    # preparation with ep.graph.nodes
+    ep_state_dict = {**ep.state_dict, **dict(ep.named_buffers(), **ep.tensor_constants)}
+    placeholders_to_state_dict = {
+        **{f"p_{name.replace('.', '_').lower()}": name for name in ep.state_dict},
+        **{f"b_{name.replace('.', '_').lower()}": name for name, _ in ep.named_buffers()},
+        **{f"c_{name.replace('.', '_').lower()}": name for name in ep.tensor_constants},
+    }
+    skip_mapping_torch_onnx = _duplicated_values(placeholders_to_state_dict)
+    placeholders = {}
+    if verbose:
+        print(f"[run_aligned] ep: model has {len(ep_state_dict)} torch constants or weights.")
+    if verbose:
+        print(f"[run_aligned] ep: walks through {len(ep.graph.nodes)} nodes from torch")
+    # dictionary mapping result names and their position in both graphs.
+    positions: Dict[str, Dict[str, int]] = {}
+    ep_graph_nodes = list(ep.graph.nodes)
+    torch_results: Dict[str, Any] = {}
     torch_output_names = None
-    for node in ep.graph.nodes:
-        if node.op == "output":
-            torch_output_names = [n.name for n in node.args[0]]
-    onnx_outputs_names = [o.name for o in onx.graph.output]
-    assert torch_output_names is not None and len(torch_output_names) == len(
-        onnx_outputs_names
-    ), (
-        f"Unexpected number of outputs, torch_output_names={torch_output_names}, "
-        f"onnx_outputs_names={onnx_outputs_names}"
+    torch_input_names: List[str] = []
+    name_to_ep_node = {}
+    torch_names_to_onnx_names = {}
+    torch_output_names = _preparation_with_fx_graph(
+        ep_graph_nodes,
+        name_to_ep_node,
+        torch_input_names,
+        onx,
+        torch_names_to_onnx_names,
+        skip_mapping_torch_onnx,
+        torch_results,
+        placeholders,
+        placeholders_to_state_dict,
+        ep_state_dict,
+        positions,
     )
-    mapping_onnx_to_torch = dict(zip(onnx_outputs_names, torch_output_names))
+    # prepration for onnx
     if verbose:
-        for k, v in torch_results.items():
-            print(
-                f"[run_aligned] +torch-cst: {k}: "
-                f"{string_type(v, with_shape=True, with_min_max=True)}"
-            )
-        for k, v in onnx_results.items():
-            print(
-                f"[run_aligned] +onnx-init: {k}: "
-                f"{string_type(v, with_shape=True, with_min_max=True)}"
-            )
+        print(f"[run_aligned] ep: found {len(torch_results)} torch constants or weights.")
+        print(f"[run_aligned] ep: found inputs  {torch_input_names}")
+        print(f"[run_aligned] ep: found outputs {torch_output_names}")
+        print(f"[run_aligned] nx: walks through {len(onx.graph.node)} nodes from onnx")
-    for inp, v in zip(onx.graph.input, args):
-        onnx_results[inp.name] = to_numpy(v)
-        if verbose:
-            print(
-                f"[run_aligned] +onnx-input: {inp.name}: "
-                f"{string_type(v, with_shape=True, with_min_max=True)}"
-            )
+    mapping_onnx_to_torch, onnx_results, memory_cpu, memory_cuda, records_to_yield = (
+        _preparation_with_onnx_model(
+            default_device,
+            use_tensor,
+            onx,
+            already_yielded,
+            str_kws,
+            positions,
+            torch_names_to_onnx_names,
+            torch_output_names,
+            torch_results,
+            skip_mapping_torch_onnx,
+            verbose,
+            args,
+            kwargs,
+        )
+    )
+    for record in records_to_yield:
+        yield record
-    for i, node in enumerate(ep.graph.nodes):
-        if verbose:
+    if verbose:
+        print(f"[run_aligned] nx: handled {len(onnx_results)} initializers from onnx")
+        print(f"[run_aligned] nx: memory cpu {memory_cpu / 2**20:.3f} Mb")
+        print(f"[run_aligned] nx: memory cuda {memory_cuda / 2**20:.3f} Mb")
+        print(f"[run_aligned] nx: {len(onnx_results)} constants")
+        print(f"[run_aligned] nx: {len(onx.graph.input)} inputs")
+        print(f"[run_aligned] nx: {len(onx.graph.output)} outputs")
+        print(f"[run_aligned] bo: {len(mapping_onnx_to_torch)} outputs")
+        print(f"[run_aligned] run_cls_kwargs={run_cls_kwargs}")
+        if verbose > 1:
+            for k, v in torch_results.items():
+                print(f"[run_aligned-ep] +cst: {k}: {string_type(v, **str_kws)}")
+            for k, v in onnx_results.items():
+                print(f"[run_aligned-nx] +ini: {k}: {string_type(v, **str_kws)}")
+    # starts the side-by-side
+    if verbose:
+        print(
+            f"[run_aligned] ep: starts side-by-side with {len(ep_graph_nodes)} "
+            f"fx nodes and {len(onx.graph.node)} onnx nodes"
+        )
+    if verbose == 1:
+        import tqdm
+        loop = tqdm.tqdm(total=len(ep_graph_nodes) + len(onx.graph.node))
+    else:
+        loop = None
+    already_run: Set[int] = set()
+    ep_durations = {}
+    status = StatusRunAligned()
+    last_position = 0
+    for i_torch, node in enumerate(ep_graph_nodes):
+        if verbose > 1:
             if node.op == "call_function":
                 print(
-                    f"[run_aligned] run ep.graph.nodes[{i}]: "
+                    f"[run_aligned] run ep.graph.nodes[{i_torch}]: "
                     f"{node.op}[{node.target}] -> {node.name!r}"
                 )
             else:
-                print(f"[run_aligned] run ep.graph.nodes[{i}]: {node.op} -> {node.name!r}")
+                print(
+                    f"[run_aligned] run ep.graph.nodes[{i_torch}]: {node.op} -> {node.name!r}"
+                )
+        elif verbose == 1:
+            loop.set_description(
+                f"ep {i_torch}/{len(ep_graph_nodes)} nx {last_position}/{len(onx.graph.node)} "
+                f"{status.to_str()}"
+            )
+            loop.update(min(1, 1 + i_torch + last_position))
         if node.op == "placeholder":
-            if node.name in onnx_results:
-                torch_results[node.name] = torch.from_numpy(onnx_results[node.name].copy())
-                if verbose:
-                    t = torch_results[node.name]
-                    print(
-                        f"[run_aligned] +torch {node.name}="
-                        f"{string_type(t, with_shape=True, with_min_max=True)}"
+            is_input = node.name not in placeholders
+            if is_input:
+                torch_results[node.name] = (
+                    onnx_results[torch_names_to_onnx_names[node.name]]
+                    if use_tensor
+                    else from_numpy(onnx_results[torch_names_to_onnx_names[node.name]])
+                )
+                assert isinstance(torch_results[node.name], torch.Tensor), (
+                    f"torch_results[{node.name}] not a tensor but "
+                    f"{type(torch_results[node.name])}, use_tensor={use_tensor}"
+                )
+                t = torch_results[node.name]
+                if verbose > 1:
+                    print(f"[run_aligned-ep] =ags: {node.name}={string_type(t, **str_kws)}")
+                # Otherwise, it is an input.
+                record = RunAlignedRecord(
+                    ep_id_node=i_torch,
+                    onnx_id_node=-1,
+                    ep_name=node.name,
+                    onnx_name=torch_names_to_onnx_names[node.name],
+                    ep_target="input",
+                    onnx_op_type="input",
+                    ep_shape_type=string_type(t, **str_kws),
+                    onnx_shape_type=string_type(
+                        onnx_results[torch_names_to_onnx_names[node.name]], **str_kws
+                    ),
+                )
+                yield record.check(already_yielded)
+            else:
+                assert node.name in placeholders_to_state_dict, (
+                    f"Unable to find placeholder {node.name!r} (node.op={node.op!r}), "
+                    f"existing: {sorted(placeholders_to_state_dict)}"
+                )
+                assert node.name in torch_results, (
+                    f"placeholder {node.name!r} (node.op={node.op!r}), "
+                    f"should have been added to torch_results: {sorted(torch_results)}"
+                )
+                t = torch_results[node.name]
+                if (
+                    node.name in torch_names_to_onnx_names
+                    and node.name not in skip_mapping_torch_onnx
+                ):
+                    if verbose > 1:
+                        print(
+                            f"[run_aligned-ep] =plh: "
+                            f"{node.name}={string_type(t, **str_kws)}"
+                        )
+                    record = RunAlignedRecord(
+                        ep_id_node=i_torch,
+                        onnx_id_node=-1,
+                        ep_name=node.name,
+                        onnx_name=torch_names_to_onnx_names[node.name],
+                        ep_target="placeholder",
+                        onnx_op_type="initializer",
+                        ep_shape_type=string_type(t, **str_kws),
+                        onnx_shape_type=string_type(
+                            onnx_results[torch_names_to_onnx_names[node.name]], **str_kws
+                        ),
                     )
-                continue
-            raise AssertionError(
-                f"unable to process node {node.op} -> {node.name!r} "
-                f"not in {sorted(onnx_results)}, len(args)={len(args)}, "
-                f"onx.graph.input={[i.name for i in onx.graph.input]}"
-            )
+                    if not is_input:
+                        record.set_diff(
+                            max_diff(
+                                t,
+                                onnx_results[torch_names_to_onnx_names[node.name]],
+                                hist=[0.1, 0.01],
+                            )
+                        )
+                    yield record.check(already_yielded)
+                else:
+                    if verbose > 1:
+                        print(
+                            f"[run_aligned-ep] +plh: {node.name}={string_type(t, **str_kws)}"
+                        )
+                    yield RunAlignedRecord(
+                        ep_id_node=i_torch,
+                        ep_name=node.name,
+                        ep_target="placeholder",
+                        ep_shape_type=string_type(t, **str_kws),
+                    ).check(already_yielded)
+            continue
         outputs = [node.name] if isinstance(node.name, str) else list(node.name)
         args, kwargs = prepare_args_kwargs(torch_results, node)
+        begin = time.perf_counter()
         new_outputs = run_fx_node(node, args, kwargs)
-        if isinstance(new_outputs, (torch.Tensor, int, float, list)):
+        duration = time.perf_counter() - begin
+        ep_durations[i_torch] = duration
+        if isinstance(new_outputs, (torch.Tensor, int, float, list, tuple)):
             new_outputs = (new_outputs,)
         if new_outputs is None:
@@ -342,99 +986,112 @@ def run_aligned(
         for k, v in zip(outputs, new_outputs):
             torch_results[k] = v
-        if verbose:
+        if verbose > 1:
             for k, v in zip(outputs, new_outputs):
-                print(
-                    f"[run_aligned] +torch {k}="
-                    f"{string_type(v, with_shape=True, with_min_max=True)}"
-                )
+                print(f"[run_aligned-ep] +res: {k}={string_type(v, **str_kws)}")
         max_pos = -2
         for n in outputs:
-            if n in positions and "onnx" in positions[n]:
-                max_pos = max(max_pos, positions[n]["onnx"])
+            if n in positions:
+                if "onnx" in positions[n]:
+                    max_pos = max(max_pos, positions[n]["onnx"])
+                if "fx" in positions[n]:
+                    if positions[n]["fx"] > i_torch:
+                        max_pos = -2
+                        break
         if max_pos == -2:
             # we skip.
             continue
+        next_to_visit = last_position
         for i_onnx in range(last_position, max_pos + 1):
+            if i_onnx in already_run:
+                continue
+            # The onnx node may produce more than one output, in that
+            # case, we need to check the exported program is not behind.
             node = onx.graph.node[i_onnx]
-            if verbose:
-                print(
-                    f"[run_aligned] run onx.graph.node[{i_onnx}]: "
-                    f"{node.op_type}({', '.join(node.input)}) -> {', '.join(node.output)}"
-                )
-            ref = cls(node)
-            feeds = {k: onnx_results[k] for k in node.input}
-            res = ref.run(None, feeds)
-            for o, r in zip(node.output, res):
-                onnx_results[o] = r
-                if verbose:
-                    print(
-                        f"[run_aligned] +onnx {o}="
-                        f"{string_type(r, with_shape=True, with_min_max=True)}"
-                    )
+            ep_behind = False
+            for iname in node.output:
+                if iname in positions and "fx" in positions[iname]:
+                    if positions[iname]["fx"] > i_torch:
+                        ep_behind = True
+                        break
+            if ep_behind:
+                break
-                to = mapping_onnx_to_torch.get(o, o)
-                if to in torch_results:
-                    d = max_diff(torch_results[to], r)
-                    if verbose:
-                        if o == to:
-                            print(f"[run_aligned] =common results {to}: {string_diff(d)}")
-                        else:
-                            print(f"[run_aligned] =common results {to}/{o}: {string_diff(d)}")
-                        if not (
-                            atol is None
-                            or rtol is None
-                            or (d["abs"] <= atol and d["rel"] <= rtol)
-                        ):
-                            skw = dict(with_shape=True, with_min_max=True)
-                            raise ValueError(
-                                f"discrepancies detected for results [{to}/{o}]: "
-                                f"{string_diff(d)}"
-                                f"\n-- torch_results: {string_type(torch_results[to], **skw)}"
-                                f"\n-- onnx_results: {string_type(r, **skw)}"
-                                f"\n-- torch\n{torch_results[to]}\n-- onnx\n{r}"
-                            )
-                    yield (i, i_onnx, o, to, d)
+            for r in _loop_onnx_node(
+                onx,
+                ep_graph_nodes,
+                onnx_results,
+                mapping_onnx_to_torch,
+                torch_results,
+                ep_durations,
+                use_tensor,
+                i_torch,
+                i_onnx,
+                name_to_ep_node,
+                run_cls_kwargs,
+                str_kws,
+                status,
+                already_run,
+                torch_names_to_onnx_names,
+                verbose,
+                exc,
+                atol,
+                rtol,
+                reset_names,
+                replay_configuration,
+                has_cuda,
+                run_cls,
+                loop,
+                run_onnx_with_torch_inputs,
+            ):
+                if r:
+                    yield r.check(already_yielded)
+            next_to_visit = i_onnx + 1
-        last_position = max_pos + 1
+        last_position = next_to_visit
     # complete the execution of the onnx graph
+    if verbose > 1:
+        print(
+            f"[run_aligned] complete execution of onnx graph from pos={last_position} "
+            f"to {len(onx.graph.node)}"
+        )
     for i_onnx in range(last_position, len(onx.graph.node)):
-        node = onx.graph.node[i_onnx]
-        if verbose:
-            print(
-                f"[run_aligned] run onx.graph.node[{i_onnx}]: "
-                f"{node.op_type}({', '.join(node.input)}) -> {', '.join(node.output)}"
-            )
-        ref = cls(node)
-        feeds = {k: onnx_results[k] for k in node.input}
-        res = ref.run(None, feeds)
-        for o, r in zip(node.output, res):
-            onnx_results[o] = r
-            if verbose:
-                print(
-                    f"[run_aligned] +onnx {o}="
-                    f"{string_type(r, with_shape=True, with_min_max=True)}"
-                )
+        if i_onnx in already_run:
+            continue
+        for r in _loop_onnx_node(
+            onx,
+            ep_graph_nodes,
+            onnx_results,
+            mapping_onnx_to_torch,
+            torch_results,
+            ep_durations,
+            use_tensor,
+            i_torch,
+            i_onnx,
+            name_to_ep_node,
+            run_cls_kwargs,
+            str_kws,
+            status,
+            already_run,
+            torch_names_to_onnx_names,
+            verbose,
+            exc,
+            atol,
+            rtol,
+            reset_names,
+            replay_configuration,
+            has_cuda,
+            run_cls,
+            loop,
+            run_onnx_with_torch_inputs,
+        ):
+            if r:
+                yield r.check(already_yielded)
-            to = mapping_onnx_to_torch.get(o, o)
-            if to in torch_results:
-                d = max_diff(torch_results[to], r)
-                if verbose:
-                    if o == to:
-                        print(f"[run_aligned] =common results* {to}: {string_diff(d)}")
-                    else:
-                        print(f"[run_aligned] =common results* {to}/{o}: {string_diff(d)}")
-                    if not (
-                        atol is None or rtol is None or (d["abs"] <= atol and d["rel"] <= rtol)
-                    ):
-                        skw = dict(with_shape=True, with_min_max=True)
-                        raise ValueError(
-                            f"discrepancies detected for results* [{to}/{o}]: {string_diff(d)}"
-                            f"\n-- torch_results: {string_type(torch_results[to], **skw)}"
-                            f"\n-- onnx_results: {string_type(r, **skw)}"
-                            f"\n-- torch\n{torch_results[to]}\n-- onnx\n{r}"
-                        )
-                yield (i, i_onnx, o, to, d)
+    if loop is not None:
+        loop.close()
+    if verbose:
+        print(f"[run_aligned] done with status={status.to_str()}")

onnx-diagnostic 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

onnx-diagnostic 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl