PyPI - onnx-diagnostic - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.8.1"
+__version__ = "0.8.3"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -1,14 +1,87 @@
 import argparse
+import contextlib
 import json
 import os
 import re
 import sys
 import textwrap
+import time
 import onnx
 from typing import Any, Dict, List, Optional, Union
 from argparse import ArgumentParser, RawTextHelpFormatter, BooleanOptionalAction
+def get_parser_dot() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="dot",
+        description=textwrap.dedent(
+            """
+            Converts a model into a dot file dot can draw into a graph.
+            """
+        ),
+    )
+    parser.add_argument("input", type=str, help="onnx model to lighten")
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="",
+        type=str,
+        required=False,
+        help="dot model to output or empty to print out the result",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        type=int,
+        default=0,
+        required=False,
+        help="verbosity",
+    )
+    parser.add_argument(
+        "-r",
+        "--run",
+        default="",
+        required=False,
+        help="run dot, in that case, format must be given (svg, png)",
+    )
+    return parser
+def _cmd_dot(argv: List[Any]):
+    import subprocess
+    from .helpers.dot_helper import to_dot
+    parser = get_parser_dot()
+    args = parser.parse_args(argv[1:])
+    if args.verbose:
+        print(f"-- loads {args.input!r}")
+    onx = onnx.load(args.input, load_external_data=False)
+    if args.verbose:
+        print("-- converts into dot")
+    dot = to_dot(onx)
+    if args.output:
+        if args.verbose:
+            print(f"-- saves into {args.output}")
+        with open(args.output, "w") as f:
+            f.write(dot)
+    else:
+        print(dot)
+    if args.run:
+        assert args.output, "Cannot run dot without an output file."
+        cmds = ["dot", f"-T{args.run}", args.output, "-o", f"{args.output}.{args.run}"]
+        if args.verbose:
+            print(f"-- run {' '.join(cmds)}")
+        p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        res = p.communicate()
+        out, err = res
+        if out:
+            print("--")
+            print(out)
+        if err:
+            print("--")
+            print(err)
 def get_parser_lighten() -> ArgumentParser:
     parser = ArgumentParser(
         prog="lighten",
@@ -624,6 +697,18 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         ),
         action=_ParseDict,
     )
+    parser.add_argument(
+        "--save-ep",
+        default="",
+        help=textwrap.dedent(
+            """
+            saves the exported program with torch.export.save
+            and the inputs sets with torch.save,
+            then command line sbs can be used to look for discrepancies.
+            """
+        ),
+    )
     return parser
@@ -690,6 +775,7 @@ def _cmd_validate(argv: List[Any]):
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
             exporter_options=args.expop,
+            save_ep=args.save_ep,
         )
         print("")
         print("-- summary --")
@@ -1104,6 +1190,287 @@ def _cmd_agg(argv: List[Any]):
         print(f"Wrote {args.output!r}")
+def get_parser_sbs() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="side-by-side (sbs)",
+        description=textwrap.dedent(
+            """
+            Compares the intermediate outputs between the exported program and
+            the exported onnx model. It assumes some names are common.
+            The execution of the exported program and the onnx model
+            are done in parallel. The device is the one used to store the
+            model and the inputs.
+            Where do discrepancies start? This function tries to answer that question.
+            """
+        ),
+        epilog=textwrap.dedent(
+            """
+            The command line expects the following files to be saved with
+            the following function. inputs is a dictionary of the input of the model.
+            - torch.export.save(ep: torch.export.ExportedProgram)
+            - torch.save(**inputs)
+            - onnx.save(...)
+            The Replay functionality is just a way to investigates a part of a model.
+            It saves torch and onnx inputs, the torch outputs, and the minimal onnx model
+            which shares its inputs with the exported program.
+            This is used to investigate the discrepancies between the torch
+            model (through the exported program) and its onnx conversion.
+            This functionality dumps everything it can to disk
+            so that it be replayed in a separate process.
+            """
+        ),
+    )
+    parser.add_argument(
+        "-i",
+        "--inputs",
+        type=str,
+        required=True,
+        help="model inputs saved with torch.save",
+    )
+    parser.add_argument(
+        "-e",
+        "--ep",
+        type=str,
+        required=True,
+        help=textwrap.dedent(
+            """
+            exported program saved with torch.export.save,
+            input sets saved with torch.save,
+            """
+        ),
+    )
+    parser.add_argument(
+        "-m",
+        "--onnx",
+        type=str,
+        required=True,
+        help="exported model in onnx format",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        required=True,
+        help="output name to stored what the command line produces, "
+        "it should be an excel file",
+    )
+    parser.add_argument(
+        "--atol",
+        default=1e-5,
+        required=False,
+        help="absolute tolerance",
+    )
+    parser.add_argument(
+        "--rtol",
+        default=1e-5,
+        required=False,
+        help="relative tolerance",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        required=False,
+        help="verbosity",
+    )
+    parser.add_argument(
+        "-r",
+        "--ratio",
+        default=100,
+        required=False,
+        help="Saves the result in an excel file every <ratio> nodes, default is 100.",
+    )
+    parser.add_argument(
+        "--first",
+        action=BooleanOptionalAction,
+        default=False,
+        help="First runs the whole model.",
+    )
+    parser.add_argument(
+        "-2",
+        "--second-run",
+        action=BooleanOptionalAction,
+        default=False,
+        help=textwrap.dedent(
+            """
+            Tries to run all onnx nodes with torch results produced by the exported
+            program. It then measures the discrepancies again. It can be used
+            to identify kernel introduces discrepancies from other just propagating them.
+            """
+        ),
+    )
+    parser.add_argument(
+        "--reset",
+        required=False,
+        default="",
+        help=textwrap.dedent(
+            """
+            List of result names separated by a comma. For those results,
+            the side-by-side will take torch results instead of onnx results
+            to compute the rest of the onnx model.
+            """
+        ),
+    )
+    parser.add_argument(
+        "-s",
+        "--replay-threshold",
+        type=float,
+        required=False,
+        default=1e18,
+        help="Triggers the replay if the discrepancies are higher than this value.",
+    )
+    parser.add_argument(
+        "-n",
+        "--replay-names",
+        required=False,
+        default="",
+        help="Triggers the replay if a result name is in this set of values (comma separated)",
+    )
+    parser.add_argument(
+        "-t",
+        "--replay-op-types",
+        required=False,
+        default="",
+        help="Triggers the replay if an onnx type is in this set of values (comma separated)",
+    )
+    parser.add_argument(
+        "-f",
+        "--replay-folder",
+        required=False,
+        default="replay",
+        help="If the replay is triggered, this defines the folder where everything is dumped.",
+    )
+    return parser
+def _cmd_sbs(argv: List[Any]):
+    import pandas
+    import torch
+    from .helpers import flatten_object, max_diff, string_diff, string_type
+    from .torch_onnx.sbs import run_aligned
+    from .torch_onnx.sbs_dataclasses import ReplayConfiguration
+    from .reference import OnnxruntimeEvaluator
+    parser = get_parser_sbs()
+    args = parser.parse_args(argv[1:])
+    def _size(name):
+        s = os.stat(name).st_size
+        return f"{s / 2**20:1.3f} Mb"
+    print("-- side by side")
+    print(f"-- ep:     {_size(args.ep)}: {args.ep}")
+    print(f"-- inputs: {_size(args.inputs)}: {args.inputs}")
+    print(f"-- onnx:   {_size(args.onnx)}: {args.onnx}")
+    print(f"-- output: {args.output}")
+    print(f"-- load inputs {args.inputs!r}")
+    begin = time.perf_counter()
+    inputs = torch.load(args.inputs)
+    s = string_type(inputs, with_shape=True, with_device=True)
+    print(f"-- done in {time.perf_counter() - begin:1.1f}s - {s}")
+    if isinstance(inputs, dict) and len(inputs) == 2 and set(inputs) == {"args", "kwargs"}:
+        margs = inputs["args"]
+        mkwargs = inputs["kwargs"]
+    elif isinstance(inputs, tuple):
+        margs = inputs
+        mkwargs = {}
+    elif isinstance(inputs, dict):
+        margs = tuple()
+        mkwargs = inputs
+    else:
+        raise ValueError(
+            f"Unable to infer args, kwargs from inputs {string_type(inputs, with_shape=True)}"
+        )
+    print("-- import transformers.modeling_outputs to register serialization functions")
+    with contextlib.suppress(ImportError):
+        import transformers.modeling_outputs  # noqa: F401
+    print(f"-- load ep {args.ep!r}")
+    begin = time.perf_counter()
+    # We need to load the plugs.
+    from .torch_export_patches.patches.patch_transformers import get_transformers_plugs
+    plugs = get_transformers_plugs()
+    assert plugs, "Missing PLUGS for Qwen2.5"
+    ep = torch.export.load(args.ep)
+    print(f"-- done in {time.perf_counter() - begin:1.1f}s")
+    if args.first:
+        print("-- compare first, run ep")
+        print(f"-- args: {string_type(margs, with_shape=True, with_device=True)}")
+        print(f"-- mkwargs: {string_type(mkwargs, with_shape=True, with_device=True)}")
+        expected = ep.module()(*margs, **mkwargs)
+        print(f"-- expected: {string_type(expected, with_shape=True, with_device=True)}")
+        sess = OnnxruntimeEvaluator(args.onnx, whole=True)
+        onx_inputs = flatten_object([margs, mkwargs], drop_keys=True)
+        feeds = dict(zip(sess.input_names, onx_inputs))
+        print(f"-- feeds: {string_type(feeds, with_shape=True, with_device=True)}")
+        got = sess.run(None, feeds)
+        print(f"-- got: {string_type(got, with_shape=True, with_device=True)}")
+        diff = max_diff(expected, got, hist=[0.1])
+        print(f"-- diff: {string_diff(diff)}")
+        print("-- done")
+        del sess
+    print(f"-- load onnx {args.onnx!r}")
+    begin = time.perf_counter()
+    onx = onnx.load(args.onnx)
+    print(f"-- done in {time.perf_counter() - begin:1.1f}s")
+    replay_configuration = None
+    if args.replay_threshold < 1e6 or args.replay_names or args.replay_op_types:
+        replay_configuration = ReplayConfiguration(
+            threshold=args.replay_threshold,
+            selected_names=set(args.replay_names.split(",")) if args.replay_names else None,
+            selected_op_types=(
+                set(args.replay_op_types.split(",")) if args.replay_op_types else None
+            ),
+            dump_folder=args.replay_folder,
+        )
+    print("-- starts side-by-side")
+    ratio = int(args.ratio)
+    data = []
+    for obs in run_aligned(
+        ep,
+        onx,
+        run_cls=OnnxruntimeEvaluator,  # type: ignore[arg-type]
+        atol=float(args.atol),
+        rtol=float(args.rtol),
+        verbose=int(args.verbose),
+        args=margs,
+        kwargs=mkwargs,
+        use_tensor=True,
+        reset_names=args.reset.split(","),
+        exc=False,
+        replay_configuration=replay_configuration,
+        run_onnx_with_torch_inputs=args.second_run,
+    ):
+        data.append(obs)
+        if (
+            obs.onnx_op_type != "initializer"
+            and obs.ep_target != "placeholder"
+            and len(data) % ratio == 0
+        ):
+            df = pandas.DataFrame(data).apply(
+                lambda col: col.fillna("") if col.dtype == "object" else col
+            )
+            df.to_excel(args.output)
+    print(f"-- final saves into {args.output!r}")
+    df = (
+        pandas.DataFrame(data)
+        .apply(lambda col: col.fillna("") if col.dtype == "object" else col)
+        .dropna(axis=1, how="all")
+    )
+    df.to_excel(args.output, index=False)
+    print("-- done")
 def get_main_parser() -> ArgumentParser:
     parser = ArgumentParser(
         prog="onnx_diagnostic",
@@ -1116,10 +1483,12 @@ def get_main_parser() -> ArgumentParser:
             agg          - aggregates statistics from multiple files
             config       - prints a configuration for a model id
+            dot          - converts an onnx model into dot format
             exportsample - produces a code to export a model
             find         - find node consuming or producing a result
             lighten      - makes an onnx model lighter by removing the weights,
             print        - prints the model on standard output
+            sbs          - compares an exported program and a onnx model
             stats        - produces statistics on a model
             unlighten    - restores an onnx model produces by the previous experiment
             validate     - validate a model
@@ -1131,10 +1500,12 @@ def get_main_parser() -> ArgumentParser:
         choices=[
             "agg",
             "config",
+            "dot",
             "exportsample",
             "find",
             "lighten",
             "print",
+            "sbs",
             "stats",
             "unlighten",
             "validate",
@@ -1146,15 +1517,17 @@ def get_main_parser() -> ArgumentParser:
 def main(argv: Optional[List[Any]] = None):
     fcts = dict(
+        agg=_cmd_agg,
+        config=_cmd_config,
+        dot=_cmd_dot,
+        exportsample=_cmd_export_sample,
+        find=_cmd_find,
         lighten=_cmd_lighten,
-        unlighten=_cmd_unlighten,
         print=_cmd_print,
-        find=_cmd_find,
-        config=_cmd_config,
-        validate=_cmd_validate,
+        sbs=_cmd_sbs,
         stats=_cmd_stats,
-        agg=_cmd_agg,
-        exportsample=_cmd_export_sample,
+        unlighten=_cmd_unlighten,
+        validate=_cmd_validate,
     )
     if argv is None:
@@ -1169,15 +1542,17 @@ def main(argv: Optional[List[Any]] = None):
             parser.parse_args(argv)
         else:
             parsers = dict(
+                agg=get_parser_agg,
+                config=get_parser_config,
+                dot=get_parser_dot,
+                exportsample=lambda: get_parser_validate("exportsample"),  # type: ignore[operator]
+                find=get_parser_find,
                 lighten=get_parser_lighten,
-                unlighten=get_parser_unlighten,
                 print=get_parser_print,
-                find=get_parser_find,
-                config=get_parser_config,
-                validate=get_parser_validate,
+                sbs=get_parser_sbs,
                 stats=get_parser_stats,
-                agg=get_parser_agg,
-                exportsample=lambda: get_parser_validate("exportsample"),  # type: ignore[operator]
+                unlighten=get_parser_unlighten,
+                validate=get_parser_validate,
             )
             cmd = argv[0]
             if cmd not in parsers:

onnx_diagnostic/export/api.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from typing import Any, Dict, List, Sequence, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 import torch
+from .onnx_plug import EagerDirectReplacementWithOnnx
 def to_onnx(
@@ -14,6 +15,12 @@ def to_onnx(
     output_names: Optional[List[str]] = None,
     output_dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     exporter: str = "onnx-dynamo",
+    exporter_kwargs: Optional[Dict[str, Any]] = None,
+    save_ep: Optional[str] = None,
+    optimize: bool = True,
+    use_control_flow_dispatcher: bool = False,
+    onnx_plugs: Optional[List[EagerDirectReplacementWithOnnx]] = None,
+    inline: bool = True,
 ) -> Any:
     """
     Common API for exporters. By default, the models are optimized to use the
@@ -32,6 +39,13 @@ def to_onnx(
     :param output_names: to change the output of the onnx model
     :param output_dynamic_shapes: to overwrite the dynamic shapes names
     :param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
+    :param exporter_kwargs: additional parameters sent to the exporter
+    :param save_ep: saves the exported program
+    :param optimize: optimizes the model
+    :param use_control_flow_dispatcher: use the dispatcher created to supported
+        custom loops (see :func:`onnx_diagnostic.export.control_flow_onnx.loop_for_onnx`)
+    :param onnx_plugs: the code was modified to replace some parts with onnx translation
+    :param inline: inline local functions
     :return: the output of the selected exporter, usually a structure including
         an onnx model
@@ -46,11 +60,74 @@ def to_onnx(
             exporter=exporter,
             filename=filename,
         )
+    Some examples using control flows are available in
+    :func:`onnx_diagnostic.export.control_flow_onnx.loop_for_onnx` or
+    :class:`onnx_diagnostic.export.onnx_plug.EagerDirectReplacementWithOnnx`.
     """
+    if exporter_kwargs and "inline" in exporter_kwargs:
+        assert (
+            inline == exporter_kwargs["inline"]
+        ), f"Mismatch between inline={inline} and exporter_kwargs={exporter_kwargs}"
+        exporter_kwargs.pop("inline")
     if exporter == "custom":
-        from experimental_experiment.torch_interpreter import to_onnx as _to_onnx
+        from experimental_experiment.torch_interpreter import (
+            to_onnx as _to_onnx,
+            ExportOptions,
+        )
         from experimental_experiment.xbuilder import OptimizationOptions
+        options = None
+        if exporter_kwargs is not None:
+            options = exporter_kwargs.pop("options", None)
+        if options is None:
+            options = OptimizationOptions(patterns="default+onnxruntime")
+        if onnx_plugs or use_control_flow_dispatcher:
+            from experimental_experiment.torch_interpreter import Dispatcher
+            if use_control_flow_dispatcher:
+                from .control_flow_onnx import create_global_dispatcher
+                control_flow_dispatcher = create_global_dispatcher()
+            else:
+                control_flow_dispatcher = None
+            class MainDispatcher(Dispatcher):
+                def __init__(self, previous_dispatcher=None):
+                    super().__init__({})
+                    self.previous_dispatcher = previous_dispatcher
+                @property
+                def supported(self):
+                    if self.previous_dispatcher:
+                        return (
+                            set(self.registered_functions) | self.previous_dispatcher.supported
+                        )
+                    return set(self.registered_functions)
+                def find_function(self, name: Any):
+                    if self.previous_dispatcher:
+                        find = self.previous_dispatcher.find_function(name)
+                        if find:
+                            return find
+                    return Dispatcher.find_function(self, name)
+                def find_method(self, name: Any):
+                    if self.previous_dispatcher:
+                        find = self.previous_dispatcher.find_method(name)
+                        if find:
+                            return find
+                    return Dispatcher.find_method(self, name)
+            main_dispatcher = MainDispatcher(control_flow_dispatcher)
+            if onnx_plugs:
+                for plug in onnx_plugs:
+                    main_dispatcher.registered_functions[plug.target_name] = (
+                        plug.custom_converter()
+                    )
+        else:
+            main_dispatcher = None
         return _to_onnx(
             mod,
             args=args,
@@ -63,14 +140,24 @@ def to_onnx(
             dynamic_shapes=dynamic_shapes,
             large_model=True,
             output_dynamic_shapes=output_dynamic_shapes,
-            options=OptimizationOptions(patterns="default+onnxruntime"),
+            export_options=ExportOptions(save_ep=save_ep),
+            options=options,
+            inline=inline,
+            dispatcher=main_dispatcher,
+            **(exporter_kwargs or {}),
         )
     if exporter in ("dynamo", "onnx-dynamo"):
+        import os
         import onnxscript.rewriter.ort_fusions as ort_fusions
         assert (
             not output_dynamic_shapes
         ), f"output_dynamic_shapes not supported for exporter={exporter!r}"
+        custom_translation_table = {}
+        if onnx_plugs:
+            for plug in onnx_plugs:
+                custom_translation_table[plug.torch_op] = plug.onnx_dynamo_converter()
         epo = torch.onnx.export(
             mod,
             args=args or tuple(),
@@ -80,9 +167,34 @@ def to_onnx(
             opset_version=target_opset,
             dynamic_shapes=dynamic_shapes,
             dynamo=True,
+            verbose=verbose,
+            dump_exported_program=bool(save_ep),
+            artifacts_dir=os.path.dirname(filename) if filename else ".",
+            custom_translation_table=custom_translation_table,
+            **(exporter_kwargs or {}),
         )
-        ort_fusions.optimize_for_ort(epo.model)
-        epo.save(filename)
+        if not inline and optimize:
+            ort_fusions.optimize_for_ort(epo.model)
+        if onnx_plugs:
+            import onnx_ir as ir
+            import onnx_ir.passes.common as common_passes
+            irfunctions = [ir.from_proto(plug.function_proto) for plug in onnx_plugs]
+            for func in irfunctions:
+                epo.model.functions[func.identifier()] = func
+            if inline:
+                common_passes.InlinePass()(epo.model)
+                common_passes.RemoveUnusedOpsetsPass()(epo.model)
+        if inline and optimize:
+            ort_fusions.optimize_for_ort(epo.model)
+        if filename:
+            epo.save(filename, external_data=True)
+        if save_ep:
+            if isinstance(save_ep, tuple):
+                save_ep = save_ep[0]
+            torch.export.save(epo.exported_program, f"{save_ep}.pt2")
         return epo
     if exporter == "modelbuilder":
@@ -117,6 +229,7 @@ def to_onnx(
             precision=str(first_float[0].dtype).split(".")[-1],
             execution_provider="cuda" if first.is_cuda else "cpu",
             cache_dir=os.path.dirname(filename),
+            **(exporter_kwargs or {}),
         )
         save_model_builder(onx, os.path.dirname(filename))
         return onx

onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl