PyPI - onnx-diagnostic - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

onnx-diagnostic 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.8.4"
+__version__ = "0.8.6"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -198,15 +198,19 @@ def get_parser_print() -> ArgumentParser:
     )
     parser.add_argument(
         "fmt",
-        choices=["pretty", "raw", "text", "printer"],
+        choices=["dot", "pretty", "printer", "raw", "shape", "text"],
         default="pretty",
         help=textwrap.dedent(
             """
             Prints out a model on the standard output.
-            raw     - just prints the model with print(...)
-            printer - onnx.printer.to_text(...)
+            dot     - converts the graph into dot
             pretty  - an improved rendering
+            printer - onnx.printer.to_text(...)
+            raw     - just prints the model with print(...)
+            shape   - prints every node node with input and output shapes
             text    - uses GraphRendering
             """.strip(
                 "\n"
             )
@@ -232,6 +236,14 @@ def _cmd_print(argv: List[Any]):
         from .helpers.graph_helper import GraphRendering
         print(GraphRendering(onx).text_rendering())
+    elif args.fmt == "shape":
+        from experimental_experiment.xbuilder import GraphBuilder
+        print(GraphBuilder(onx).pretty_text())
+    elif args.fmt == "dot":
+        from .helpers.dot_helper import to_dot
+        print(to_dot(onx))
     else:
         raise ValueError(f"Unexpected value fmt={args.fmt!r}")
@@ -517,12 +529,12 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         nargs="*",
         help=textwrap.dedent(
             """
-        Applies patches before exporting, it can be a boolean
-        to enable to disable the patches or be more finetuned
-        (default is True). It is possible to disable patch for torch
-        by adding:
-            --patch "patch_sympy=False" --patch "patch_torch=False"
-        """.strip(
+            Applies patches before exporting, it can be a boolean
+            to enable to disable the patches or be more finetuned
+            (default is True). It is possible to disable patch for torch
+            by adding:
+                --patch "patch_sympy=False" --patch "patch_torch=False"
+            """.strip(
                 "\n"
             )
         ),
@@ -1496,6 +1508,50 @@ def _cmd_sbs(argv: List[Any]):
     print("-- done")
+def get_parser_compare() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="compare",
+        description=textwrap.dedent(
+            """
+            Compares two onnx models by aligning the nodes between both models.
+            This is done through an edit distance.
+            """
+        ),
+        epilog=textwrap.dedent(
+            """
+            Each element (initializer, input, node, output) of the model
+            is converted into an observation. Then it defines a distance between
+            two elements. And finally, it finds the best alignment with
+            an edit distance.
+            """
+        ),
+    )
+    parser.add_argument("model1", type=str, help="first model to compare")
+    parser.add_argument("model2", type=str, help="second model to compare")
+    return parser
+def _cmd_compare(argv: List[Any]):
+    import onnx
+    from .torch_onnx.compare import ObsCompare, ObsComparePair
+    parser = get_parser_compare()
+    args = parser.parse_args(argv[1:])
+    print(f"-- loading {args.model1!r}")
+    seq1 = ObsCompare.obs_sequence_from_model(onnx.load(args.model1, load_external_data=False))
+    print(f"-- loading {args.model2!r}")
+    seq2 = ObsCompare.obs_sequence_from_model(onnx.load(args.model2, load_external_data=False))
+    print("-- starts comparison")
+    dist, _path, pair_cmp = ObsComparePair.distance_sequence(seq1, seq2)
+    print(f"-- done with distance {dist}")
+    print(ObsComparePair.to_str(pair_cmp))
+#############
+# main parser
+#############
 def get_main_parser() -> ArgumentParser:
     parser = ArgumentParser(
         prog="onnx_diagnostic",
@@ -1543,6 +1599,7 @@ def get_main_parser() -> ArgumentParser:
 def main(argv: Optional[List[Any]] = None):
     fcts = dict(
         agg=_cmd_agg,
+        compare=_cmd_compare,
         config=_cmd_config,
         dot=_cmd_dot,
         exportsample=_cmd_export_sample,
@@ -1568,6 +1625,7 @@ def main(argv: Optional[List[Any]] = None):
         else:
             parsers = dict(
                 agg=get_parser_agg,
+                compare=get_parser_compare,
                 config=get_parser_config,
                 dot=get_parser_dot,
                 exportsample=lambda: get_parser_validate("exportsample"),  # type: ignore[operator]

onnx_diagnostic/ci_models/__init__.py ADDED Viewed

File without changes

onnx_diagnostic/ci_models/ci_helpers.py ADDED Viewed

@@ -0,0 +1,430 @@
+import datetime
+import os
+import time
+import subprocess
+from argparse import ArgumentParser, BooleanOptionalAction
+from typing import Any, Dict, List, Tuple
+import onnx
+def get_versions():
+    """
+    Returns the version of the package currently used.
+    The output is a dictionary.
+    The function uses delayed import to make to fail fast at startup.
+    """
+    import onnx
+    import onnx_diagnostic
+    import onnxruntime
+    import torch
+    import transformers
+    return {
+        "transformers": transformers.__version__,
+        "onnxruntime": onnxruntime.__version__,
+        "onnx": onnx.__version__,
+        "onnx-diagnostic": onnx_diagnostic.__version__,
+        "torch": torch.__version__,
+    }
+def get_torch_dtype_from_command_line_args(dtype: str) -> "torch.dtype":  # noqa: F821
+    """
+    Returns the torch dtype base on the argument provided on the command line.
+    Imports are delayed to be faster when running the help of the command line.
+    """
+    import torch
+    torch_dtype = {
+        "float16": torch.float16,
+        "bfloat16": torch.bfloat16,
+        "float32": torch.float32,
+        "fp16": torch.float16,
+        "bf16": torch.bfloat16,
+        "fp32": torch.float32,
+    }
+    assert (
+        dtype in torch_dtype
+    ), f"Unexpected dtype {dtype!r}, not found in {set(torch_dtype)}."
+    return torch_dtype[dtype]
+def get_parser(name: str) -> ArgumentParser:
+    """Creates a default parser for many models."""
+    parser = ArgumentParser(
+        prog=name, description=f"""Export command line for model {name!r}."""
+    )
+    parser.add_argument(
+        "-m",
+        "--mid",
+        type=str,
+        default="Qwen/Qwen2.5-VL-7B-Instruct",
+        help="model id, default is Qwen/Qwen2.5-VL-7B-Instruct",
+    )
+    parser.add_argument("-d", "--device", default="cpu", help="Device, cpu (default) or cuda.")
+    parser.add_argument(
+        "-t", "--dtype", default="float32", help="dtype, float32 (default) or float16"
+    )
+    parser.add_argument(
+        "-e", "--exporter", default="onnx-dynamo", help="exporter, default is onnx-dynamo"
+    )
+    parser.add_argument(
+        "--pretrained",
+        default=True,
+        help="use pretrained model or a random model",
+        action=BooleanOptionalAction,
+    )
+    parser.add_argument(
+        "--second-input",
+        default=True,
+        help="check discrepancies with other inputs",
+        action=BooleanOptionalAction,
+    )
+    parser.add_argument(
+        "--zip",
+        default=False,
+        help="Creates a file .zip with onnx file and data file.",
+        action=BooleanOptionalAction,
+    )
+    parser.add_argument(
+        "-o",
+        "--output-folder",
+        default="dump_models",
+        help="Folders where to put the results.",
+        action=BooleanOptionalAction,
+    )
+    parser.add_argument(
+        "-x",
+        "--existing-onnx",
+        default="",
+        help="If an onnx file exists, only measures the discrepancies.",
+    )
+    parser.add_argument(
+        "-p",
+        "--part",
+        default="visual",
+        help="part of the model to export",
+    )
+    parser.add_argument(
+        "-a",
+        "--atol",
+        type=float,
+        default=1.0,
+        help="fails if the maximum discrepancy is above that threshold",
+    )
+    parser.add_argument(
+        "--mismatch01",
+        type=float,
+        default=0.1,
+        help="fails if the ratio of mismatches at level 0.1 is above that threshold",
+    )
+    parser.add_argument(
+        "--profile-exporter",
+        default=False,
+        help="Profiles the exporter and outputs an html document from pyinstrument",
+        action=BooleanOptionalAction,
+    )
+    return parser
+def remove_inplace_body_last_input_output_type_for_loop_because_they_might_be_sequences(
+    filename: str,
+):
+    """
+    Modified inplace an onnx file. It wipes out shapes provided
+    in ``model.graph.value_info`` because they are wrong when a Loop outputs
+    a sequence. It alose removes the types in attribute 'Body'
+    of an operator Loop because it may be a tensor when a sequence is expected.
+    This should not be needed in the future.
+    """
+    model = onnx.load(filename, load_external_data=False)
+    for node in model.graph.node:
+        if node.op_type == "Loop":
+            g = node.attribute[0].g
+            g.input[-1].type.CopyFrom(onnx.TypeProto())
+            g.output[-1].type.CopyFrom(onnx.TypeProto())
+    del model.graph.value_info[:]
+    model = onnx.shape_inference.infer_shapes(model)
+    onnx.save(model, filename, save_as_external_data=False)
+def simplify_model_id_for_a_filename(model_id: str) -> str:
+    """Changes a model id in a way it can be used in a filename."""
+    return model_id.lower().replace("/", ".")
+def compute_expected_outputs(
+    output_filename: str, model_to_export: "torch.nn.Module", input_filename: str  # noqa: F821
+) -> Tuple[Any, List[Any], List[float]]:
+    """
+    Computes the expected outputs for a model.
+    The function uses delayed import to make to fail fast at startup.
+    It caches the expected outputs in a file. They are restored if the file exists
+    or computed and saved if not.
+    Imports are delayed to be faster when running the help of the command line.
+    """
+    import tqdm
+    import torch
+    from ..helpers import string_type
+    inputs = torch.load(input_filename, weights_only=False)
+    export_inputs = inputs["export_inputs"]
+    other_inputs = inputs["other_inputs"]
+    if os.path.exists(output_filename):
+        print(f"-- restore expected outputs from {output_filename!r}")
+        expected = torch.load(output_filename, weights_only=False)
+        export_expected = expected["export_expected"]
+        other_expected = expected["other_expected"]
+        durations = expected["durations"]
+    else:
+        print(
+            f"-- compute with inputs: "
+            f"{string_type(export_inputs, with_shape=True, with_device=True)}"
+        )
+        export_expected = model_to_export(**export_inputs)
+        print(f"-- got: {string_type(export_expected, with_shape=True)}")
+        print(
+            f"-- compute with inputs: "
+            f"{string_type(other_inputs, with_shape=True, with_device=True)}"
+        )
+        other_expected = []
+        durations = []
+        for other in tqdm.tqdm(other_inputs):
+            begin = time.perf_counter()
+            expected = model_to_export(**other)
+            other_expected.append(expected)
+            durations.append(time.perf_counter() - begin)
+        print(f"-- got: {string_type(other_expected, with_shape=True, with_device=True)}")
+        expected = dict(
+            export_expected=export_expected,
+            other_expected=other_expected,
+            durations=durations,
+        )
+        print(f"-- dump expected outputs into {output_filename!r}")
+        torch.save(expected, output_filename)
+    print(f"-- computation took {sum(durations)}")
+    print(
+        f"-- export_expected={string_type(export_expected, with_shape=True, with_device=True)}"
+    )
+    print(
+        f"-- other_expected={string_type(other_expected, with_shape=True, with_device=True)}"
+    )
+    return export_expected, other_expected, durations
+def check_for_discrepancies_and_log_everything_into_a_json_file(
+    agg_stat_file: str,
+    stat_file: str,
+    export_duration: float,
+    device: str,
+    model_file: str,
+    cached_inputs: str,
+    cached_expected_outputs: str,
+    main_info: Dict[str, Any],
+    atol: float,
+    mismatch01: float,
+):
+    """
+    Checks discrepancies for a specific model.
+    Imports are delayed to be faster when running the help of the command line.
+    :param agg_stat_file: a file when the discrepancies are collected, this is used to
+        produce a table to make it easier to compare across types, devices, ...
+    :param stat_file: discrepancies results dumps into that file
+    :param export_duration: export duration
+    :param device: targeted device (to select onnxruntime provider)
+    :param model_file: onnx model file
+    :param cache_inputs: inputs saved with :func:`torch.save` and
+        restored with :func:`torch.load`,
+        needs to contains `export_inputs` (to check the model is valid),
+        and `other_inputs`, other sets of inputs to measure the discrepancies,
+        and speed up (rough estimation)
+    :param cached_expected_outputs: expected outputs saved with :func:`torch.save`
+        and restored with :func:`torch.load`,
+        needs to contains `export_expected` (to check the model is valid),
+        and `other_expected`, other sets of outputs to measure the discrepancies,
+        and speed up (rough estimation)
+    :param main_info: a dictionary with values used to tell which version, device, ...
+    :param atol: assert if tolerance is above this
+    :param mismatch01: assert if the ratio of mismatches is above that threshold
+    """
+    import tqdm
+    import onnxruntime
+    import torch
+    from ..helpers import flatten_object, max_diff, string_type, string_diff
+    cached = (
+        torch.load(cached_inputs, weights_only=False),
+        torch.load(cached_expected_outputs, weights_only=False),
+    )
+    durations = cached[0].get("durations", [])
+    export_inputs = cached[0]["export_inputs"]
+    other_inputs = cached[0]["other_inputs"]
+    export_expected = cached[1]["export_expected"]
+    other_expected = cached[1]["other_expected"]
+    onx = onnx.load(model_file, load_external_data=False)
+    opsets = [d for d in onx.opset_import if d.domain == ""]
+    assert (
+        opsets
+    ), f"Unable to find standard opset in file {model_file!r}, opsets={onx.opset_import}"
+    opset = opsets[0].version
+    with open(stat_file, "w") as f:
+        def fprint(s):
+            print(s)
+            f.write(f"{s}\n")
+        fprint(f"-- export duration: {export_duration}")
+        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        if device == "cpu":
+            providers = providers[1:]
+        fprint(f"-- checking discrepancies with providers={providers!r}")
+        fprint(f"-- model_file={model_file!r}")
+        sess = onnxruntime.InferenceSession(model_file, providers=providers)
+        fprint(
+            f"-- export_inputs {string_type(export_inputs, with_shape=True, with_device=True)}"
+        )
+        fprint(
+            f"-- export_expected "
+            f"{string_type(export_expected, with_shape=True, with_device=True)}"
+        )
+        feeds = dict(
+            zip(
+                [i.name for i in sess.get_inputs()],
+                [
+                    v.detach().cpu().numpy()
+                    for v in flatten_object(export_inputs, drop_keys=True)
+                ],
+            )
+        )
+        small = sess.run(None, feeds)
+        flat_export_expected = flatten_object(export_expected, drop_keys=True)
+        diff = max_diff(flat_export_expected, small, hist=[0.1, 0.01])
+        fprint(f"-- discrepancies={diff}")
+        assert diff["abs"] <= atol and diff["rep"][">0.1"] / diff["n"] <= mismatch01, (
+            f"absolution tolerance is above {atol} or number of mismatches is above "
+            f"{mismatch01}, dicrepancies={string_diff(diff)}"
+        )
+        if other_inputs and other_expected:
+            feeds = [
+                dict(
+                    zip(
+                        [i.name for i in sess.get_inputs()],
+                        [
+                            v.detach().cpu().numpy()
+                            for v in flatten_object(inputs, drop_keys=True)
+                        ],
+                    )
+                )
+                for inputs in other_inputs
+            ]
+            fprint("")
+            fprint(f"-- inputs {string_type(feeds, with_shape=True, with_device=True)}")
+            fprint(
+                f"-- expected {string_type(other_expected, with_shape=True, with_device=True)}"
+            )
+            begin = time.perf_counter()
+            gots = []
+            for feed in tqdm.tqdm(feeds):
+                gots.append(sess.run(None, feed))
+            oduration = time.perf_counter() - begin
+            fprint(
+                f"-- torch duration={sum(durations[:len(gots)])}, onnx duration={oduration}, "
+                f"speedup={sum(durations[:len(gots)])/oduration} n={len(gots)}"
+            )
+            info = {
+                **main_info,
+                "timestamp": datetime.datetime.now().isoformat(),
+                "export_duration": export_duration,
+                "latency_torch": sum(durations[: len(gots)]),
+                "latency_ort": oduration,
+                "speedup": sum(durations[: len(gots)]) / oduration,
+                "latency_ort_n": len(gots),
+                "opset": opset,
+                **get_versions(),
+            }
+            with open(agg_stat_file, "a") as fs:
+                for fe, e, b in zip(feeds, other_expected, gots):
+                    flat_e = flatten_object(e, drop_keys=True)
+                    se = string_type(fe, with_shape=True)
+                    diff = max_diff(flat_e, b, hist=[0.1, 0.01])
+                    assert (
+                        diff["abs"] <= atol and diff["rep"][">0.1"] / diff["n"] <= mismatch01
+                    ), (
+                        f"absolution tolerance is above {atol} or number of mismatches is "
+                        f"above {mismatch01}, dicrepancies={string_diff(diff)}"
+                    )
+                    js = string_diff(diff, js=True, ratio=True, inputs=se, **info)
+                    fs.write(js)
+                    fs.write("\n")
+                    fprint(f"-- inputs={se} -- {js}")
+    if os.path.exists(agg_stat_file):
+        print(f"-- statistics from {agg_stat_file!r}")
+        import pandas
+        df = pandas.read_json(agg_stat_file, lines=True)
+        first = [
+            "timestamp",
+            "model_id",
+            "pretrained",
+            "part",
+            "device",
+            "dtype",
+            "attention",
+            "opset",
+        ]
+        index = [*first[1:], "exporter"]
+        df = df[[*first, *[c for c in df.columns if c not in set(first)]]]
+        df.to_excel(agg_stat_file + ".xlsx")
+        values = [
+            "abs",
+            "%>0.1",
+            "%>0.01",
+            "export_duration",
+            "speedup",
+            "latency_torch",
+            "latency_ort_n",
+        ]
+        agg = {
+            **{c: "max" for c in values if c != "speedup"},
+            "speedup": "min",
+        }
+        stat = df[[*index, *values]].groupby(index, dropna=False).agg(agg)
+        stat.to_excel(agg_stat_file + ".agg.xlsx")
+        stat = (
+            df[df.exporter != "custom"][[*index, *values]]
+            .groupby(index, dropna=False)
+            .agg(agg)
+        )
+        stat.to_excel(agg_stat_file + ".agg.onnx-dynamo.xlsx")
+def zip_model_and_data_into_a_single_file(zip_file: str, model_file: str):
+    """
+    Zips an onnx model and its data into a zingle file.
+    :param zip_file: zip file to create
+    :param model_file: onnx file
+    """
+    print()
+    print(f"-- make file {zip_file!r}")
+    cmd = ["zip", "-v", "-1", zip_file]
+    for name in [model_file, f"{model_file}.data"]:
+        print(f"-- add {name!r}")
+        cmd.append(name)
+    print(f"-- cmd: {' '.join(cmd)}")
+    subprocess.run(cmd, check=True)
+    print("-- done.")

onnx-diagnostic 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

onnx-diagnostic 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl