PyPI - onnx-diagnostic - Versions diffs - 0.7.11__py3-none-any.whl → 0.7.13__py3-none-any.whl - Mend

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/torch_models/validate.py CHANGED Viewed

@@ -1,9 +1,10 @@
+import gc
 import datetime
 import inspect
 import os
 import pprint
 import sys
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 import time
 import numpy as np
 import onnx
@@ -11,7 +12,7 @@ import torch
 from ..export import CoupleInputsDynamicShapes
 from ..helpers import max_diff, string_type, string_diff
 from ..helpers.helper import flatten_object
-from ..helpers.rt_helper import make_feeds
+from ..helpers.rt_helper import make_feeds, reorder_modelbuilder_cache_to_torch
 from ..helpers.torch_helper import to_any, torch_deepcopy
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
@@ -112,6 +113,9 @@ def _make_folder_name(
     device: Optional[Union[str, torch.device]] = None,
     subfolder: Optional[str] = None,
     opset: Optional[int] = None,
+    drop_inputs: Optional[List[str]] = None,
+    same_as_pretrained: bool = False,
+    use_pretrained: bool = False,
 ) -> str:
     "Creates a filename unique based on the given options."
     els = [model_id.replace("/", "_")]
@@ -137,6 +141,13 @@ def _make_folder_name(
         els.append(sdev)
     if opset is not None:
         els.append(f"op{opset}")
+    if drop_inputs:
+        ii = "-".join(f"{s[0]}{s[-1]}" for s in drop_inputs)
+        els.append(f"I-{ii.upper()}")
+    if use_pretrained:
+        els.append("TRAINED")
+    elif same_as_pretrained:
+        els.append("SAMESIZE")
     return "-".join(els)
@@ -233,21 +244,35 @@ def _quiet_or_not_quiet(
         summary[f"{suffix}_output"] = string_type(res, with_shape=True, with_min_max=True)
         summary[f"{suffix}_warmup"] = warmup
         summary[f"{suffix}_repeat"] = repeat
-        for _w in range(max(0, warmup - 1)):
+        last_ = None
+        end_w = max(0, warmup - 1)
+        for _w in range(end_w):
             t = fct()
-            summary[f"io_{suffix}_{_w+1}"] = string_type(t, with_shape=True, with_min_max=True)
+            _ = string_type(t, with_shape=True, with_min_max=True)
+            if _ != last_ or _w == end_w - 1:
+                summary[f"io_{suffix}_{_w+1}"] = _
+                last_ = _
         summary[f"time_{suffix}_warmup"] = time.perf_counter() - begin
         times = []
         for _r in range(repeat):
             begin = time.perf_counter()
             t = fct()
             times.append(time.perf_counter() - begin)
-        a = np.array(times)
+        a = np.array(times, dtype=np.float64)
+        a.sort()
+        i5 = max(1, a.shape[0] * 5 // 100)
+        i2 = max(1, a.shape[0] * 2 // 100)
         summary[f"time_{suffix}_latency"] = a.mean()
         summary[f"time_{suffix}_latency_std"] = a.std()
         summary[f"time_{suffix}_latency_min"] = a.min()
-        summary[f"time_{suffix}_latency_min"] = a.max()
+        summary[f"time_{suffix}_latency_max"] = a.max()
+        summary[f"time_{suffix}_latency_098"] = a[-i2]
+        summary[f"time_{suffix}_latency_095"] = a[-i5]
+        summary[f"time_{suffix}_latency_005"] = a[i5]
+        summary[f"time_{suffix}_latency_002"] = a[i2]
         summary[f"time_{suffix}_n"] = len(a)
+        summary[f"time_{suffix}_latency_m98"] = a[i2:-i2].mean()
     return res
@@ -264,14 +289,18 @@ def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     return new_cfg
-def _preprocess_model_id(model_id, subfolder):
+def _preprocess_model_id(
+    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
+) -> Tuple[str, Optional[str], bool, bool]:
     if subfolder or "//" not in model_id:
-        return model_id, subfolder
+        return model_id, subfolder, same_as_pretrained, use_pretrained
     spl = model_id.split("//")
+    if spl[-1] == "pretrained":
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
     if spl[-1] in {"transformer", "vae"}:
         # known subfolder
-        return "//".join(spl[:-1]), spl[-1]
-    return model_id, subfolder
+        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
+    return model_id, subfolder, same_as_pretrained, use_pretrained
 def validate_model(
@@ -351,9 +380,10 @@ def validate_model(
         ``orteval10``, ``ref`` only if `do_run` is true
     :param repeat: number of time to measure the model
     :param warmup: warmup the model first
-    :param inputs2: checks that the second set of inputs is reunning as well,
+    :param inputs2: checks that other sets of inputs are running as well,
         this ensures that the model does support dynamism, the value is used
-        as an increment to the first set of values (added to dimensions)
+        as an increment to the first set of values (added to dimensions),
+        or an empty cache for example
     :param output_names: output names the onnx exporter should use
     :param ort_logs: increases onnxruntime verbosity when creating the session
     :return: two dictionaries, one with some metrics,
@@ -383,14 +413,23 @@ def validate_model(
     :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator`
     if ``runtime == 'ref'``,
     ``orteval10`` increases the verbosity.
+    .. versionchanged:: 0.7.13
+        *inputs2* not only means a second set of inputs but many
+        such as ``input_empty_cache``
+        which refers to a set of inputs using an empty cache.
     """
-    model_id, subfolder = _preprocess_model_id(model_id, subfolder)
+    validation_begin = time.perf_counter()
+    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+        model_id,
+        subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+    )
+    time_preprocess_model_id = time.perf_counter() - validation_begin
+    default_patch = dict(patch_transformers=True, patch_diffusers=True, patch=True)
     if isinstance(patch, bool):
-        patch_kwargs = (
-            dict(patch_transformers=True, patch_diffusers=True, patch=True)
-            if patch
-            else dict(patch=False)
-        )
+        patch_kwargs = default_patch if patch else dict(patch=False)
     elif isinstance(patch, str):
         patch_kwargs = {"patch": True, **{p: True for p in patch.split(",")}}  # noqa: C420
     else:
@@ -399,11 +438,13 @@ def validate_model(
         if "patch" not in patch_kwargs:
             if any(patch_kwargs.values()):
                 patch_kwargs["patch"] = True
+        elif len(patch) == 1 and patch.get("patch", False):
+            patch_kwargs.update(default_patch)
     assert not rewrite or patch_kwargs.get("patch", False), (
         f"rewrite={rewrite}, patch={patch}, patch_kwargs={patch_kwargs} "
         f"patch must be True to enable rewriting, "
-        f"if --no-patch was specified on the command line, --no-rewrite must be added."
+        f"if --patch=0 was specified on the command line, rewrites are disabled."
     )
     summary = version_summary()
     summary.update(
@@ -426,6 +467,7 @@ def validate_model(
             version_exporter=exporter or "",
             version_runtime=runtime,
             version_inputs2=inputs2,
+            time_preprocess_model_id=time_preprocess_model_id,
         )
     )
     if opset:
@@ -441,6 +483,9 @@ def validate_model(
             device=device,
             subfolder=subfolder,
             opset=opset,
+            drop_inputs=drop_inputs,
+            use_pretrained=use_pretrained,
+            same_as_pretrained=same_as_pretrained,
         )
         dump_folder = os.path.join(dump_folder, folder_name)
         if not os.path.exists(dump_folder):
@@ -473,7 +518,7 @@ def validate_model(
     mop = model_options or {}
     data = _quiet_or_not_quiet(
         quiet,
-        "create",
+        "create_torch_model",
         summary,
         None,
         (
@@ -492,10 +537,9 @@ def validate_model(
             )
         ),
     )
-    assert not inputs2 or "inputs2" in data, (
-        f"inputs2 is True but second set is missing in data for "
-        f"model id {model_id!r}: {sorted(data)}"
-    )
+    second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]
     if dump_folder:
         with open(os.path.join(dump_folder, "model_config.txt"), "w") as f:
             f.write(f"model_id: {model_id}\n------\n")
@@ -536,6 +580,11 @@ def validate_model(
             if verbose:
                 print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+    # modelbuilder needs different treatments sometimes, so
+    # we mark it for later usage.
+    # for example, it has different past_kv ordering than
+    # flattened CacheObject
+    data["exporter"] = exporter
     data["input_options"] = iop
     data["model_options"] = mop
     data["model_dump_folder"] = dump_folder
@@ -583,16 +632,14 @@ def validate_model(
         if verbose:
             print(f"[validate_model] new inputs: {string_type(data['inputs'])}")
             print(f"[validate_model] new dynamic_hapes: {string_type(data['dynamic_shapes'])}")
-        if inputs2:
-            assert (
-                "inputs2" in data
-            ), "Cannot test a second set of inputs as it was not defined."
-            data["inputs2"], _ = filter_inputs(
-                data["inputs2"],
-                drop_names=drop_inputs,
-                model=data["model"],
-                dynamic_shapes=data["dynamic_shapes"],
-            )
+        if second_input_keys:
+            for k in second_input_keys:
+                data[k], _ = filter_inputs(
+                    data[k],
+                    drop_names=drop_inputs,
+                    model=data["model"],
+                    dynamic_shapes=data["dynamic_shapes"],
+                )
     if not empty(dtype):
         if isinstance(dtype, str):
@@ -602,8 +649,9 @@ def validate_model(
         data["model"] = to_any(data["model"], dtype)  # type: ignore
         data["inputs"] = to_any(data["inputs"], dtype)  # type: ignore
         summary["model_dtype"] = str(dtype)
-        if "inputs2" in data:
-            data["inputs2"] = to_any(data["inputs2"], dtype)  # type: ignore
+        if second_input_keys:
+            for k in second_input_keys:
+                data[k] = to_any(data[k], dtype)  # type: ignore
     if not empty(device):
         if verbose:
@@ -611,11 +659,13 @@ def validate_model(
         data["model"] = to_any(data["model"], device)  # type: ignore
         data["inputs"] = to_any(data["inputs"], device)  # type: ignore
         summary["model_device"] = str(device)
-        if "inputs2" in data:
-            data["inputs2"] = to_any(data["inputs2"], device)  # type: ignore
+        if second_input_keys:
+            for k in second_input_keys:
+                data[k] = to_any(data[k], device)  # type: ignore
     for k in ["task", "size", "n_weights"]:
         summary[f"model_{k.replace('_','')}"] = data[k]
+    summary["second_input_keys"] = ",".join(second_input_keys)
     summary["model_inputs_options"] = str(input_options or "")
     summary["model_inputs"] = string_type(data["inputs"], with_shape=True)
     summary["model_shapes"] = string_type(data["dynamic_shapes"])
@@ -642,22 +692,37 @@ def validate_model(
             print(f"[validate_model] +INPUT {k}={string_type(v, with_shape=True)}")
         for k, v in data["dynamic_shapes"].items():
             print(f"[validate_model] +SHAPE {k}={string_type(v)}")
+        print(f"[validate_model] second_input_keys={second_input_keys}")
         print("[validate_model] --")
     if do_run:
+        validation_begin = time.perf_counter()
         _validate_do_run_model(
             data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet
         )
-        if inputs2:
-            _validate_do_run_model(
-                data, summary, "inputs2", "run2", "run_expected2", verbose, 1, 0, quiet
-            )
+        if second_input_keys:
+            for k in second_input_keys:
+                _validate_do_run_model(
+                    data,
+                    summary,
+                    k,
+                    f"run2{k[6:]}",
+                    f"run_expected2{k[6:]}",
+                    verbose,
+                    1,
+                    0,
+                    quiet,
+                )
+        summary["time_total_validation_torch"] = time.perf_counter() - validation_begin
     if exporter:
         print(
             f"[validate_model] -- export the model with {exporter!r}, "
             f"optimization={optimization!r}"
         )
+        exporter_begin = time.perf_counter()
         if patch_kwargs:
             if verbose:
                 print(
@@ -700,7 +765,9 @@ def validate_model(
                 dump_folder=dump_folder,
                 output_names=output_names,
             )
         summary.update(summary_export)
+        summary["time_total_exporter"] = time.perf_counter() - exporter_begin
     dump_stats = None
     if dump_folder:
@@ -741,6 +808,8 @@ def validate_model(
             data["onnx_filename"] = onnx_filename
             summary["time_onnx_save"] = duration
             summary.update(compute_statistics(onnx_filename))
+            del epo
         if verbose:
             print(f"[validate_model] dumps statistics in {dump_folder!r}...")
         dump_stats = os.path.join(dump_folder, f"{folder_name}.stats")
@@ -763,6 +832,20 @@ def validate_model(
         return summary, data
     if do_run:
+        # Let's move the model to CPU to make sure it frees GPU memory.
+        if verbose:
+            # It does not really work for the time being and the model
+            # gets loaded twice, one by torch, one by onnxruntime
+            print("[validation_model] -- delete the model")
+            for key in ["model", "onnx_program", "config"]:
+                if key in data:
+                    del data[key]
+            if device is not None and "cuda" in str(device).lower():
+                torch.cuda.empty_cache()
+            gc.collect()
+            print("[validation_model] -- done")
+        validation_begin = time.perf_counter()
         summary_valid, data = validate_onnx_model(
             data=data,
             quiet=quiet,
@@ -770,10 +853,11 @@ def validate_model(
             runtime=runtime,
             repeat=repeat,
             warmup=warmup,
-            inputs2=inputs2,
+            second_input_keys=second_input_keys,
             ort_logs=ort_logs,
         )
         summary.update(summary_valid)
+        summary["time_total_validation_onnx"] = time.perf_counter() - validation_begin
     if ortfusiontype and "onnx_filename" in data:
         assert (
@@ -832,13 +916,17 @@ def validate_model(
                     runtime=runtime,
                     repeat=repeat,
                     warmup=warmup,
-                    inputs2=inputs2,
+                    second_input_keys=second_input_keys,
                 )
                 summary.update(summary_valid)
+    _compute_final_statistics(summary)
+    summary["time_total"] = time.perf_counter() - validation_begin
     if verbose:
         print("[validate_model] -- done (final)")
     if dump_stats:
+        # Dumps again the statistics.
         with open(dump_stats, "w") as f:
             for k, v in sorted(summary.items()):
                 f.write(f":{k}:{v};\n")
@@ -848,15 +936,24 @@ def validate_model(
 def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
     """Computes some statistics on the model itself."""
     onx = onnx.load(onnx_filename, load_external_data=False)
+    cache_functions = {(f.domain, f.name): f for f in onx.functions}
+    local_domains = set(f.domain for f in onx.functions)
     def node_iter(proto):
         if isinstance(proto, onnx.ModelProto):
-            yield from node_iter(proto.graph)
             for f in proto.functions:
                 yield from node_iter(f)
+            yield from node_iter(proto.graph)
         elif isinstance(proto, (onnx.FunctionProto, onnx.GraphProto)):
             for node in proto.node:
                 yield node
+                # Let's inline the function
+                key = node.domain, node.op_type
+                if key in cache_functions:
+                    yield from node_iter(cache_functions[key])
+                # Let's continue
                 for att in node.attribute:
                     if att.type == onnx.AttributeProto.GRAPH:
                         yield from node_iter(att.g)
@@ -874,6 +971,11 @@ def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
             n_nodes += 1
             if proto.op_type != "Constant":
                 n_nodes_nocst += 1
+            if proto.domain in local_domains:
+                key = "n_node_local_function"
+                if key not in counts:
+                    counts[key] = 0
+                counts[key] += 1
         else:
             key = f"n_node_initializer_{proto.data_type}"
@@ -960,6 +1062,26 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet):
     )
+_cache_export_times = []
+_main_export_function = torch.export.export
+def _torch_export_export(*args, _export=_main_export_function, **kwargs):
+    begin = time.perf_counter()
+    res = _export(*args, **kwargs)
+    duration = time.perf_counter() - begin
+    _cache_export_times.append(duration)
+    return res
+def _restore_torch_export_export(summary):
+    torch.export.export = _main_export_function
+    if _cache_export_times:
+        summary["time_torch_export_export"] = sum(_cache_export_times)
+        summary["time_torch_export_export_n"] = len(_cache_export_times)
+    _cache_export_times.clear()
 def call_exporter(
     data: Dict[str, Any],
     exporter: str,
@@ -985,6 +1107,9 @@ def call_exporter(
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
+    _cache_export_times.clear()
+    torch.export.export = _torch_export_export
     if exporter == "export" or exporter.startswith("export-"):
         # torch export
         summary, data = call_torch_export_export(
@@ -995,6 +1120,7 @@ def call_exporter(
             optimization=optimization,
             do_run=do_run,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter.startswith("onnx-"):
         # torch export
@@ -1006,6 +1132,7 @@ def call_exporter(
             optimization=optimization,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter == "custom" or exporter.startswith("custom"):
         # torch export
@@ -1018,6 +1145,7 @@ def call_exporter(
             dump_folder=dump_folder,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter == "modelbuilder":
         # torch export
@@ -1029,6 +1157,7 @@ def call_exporter(
             optimization=optimization,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     raise NotImplementedError(
         f"export with {exporter!r} and optimization={optimization!r} not implemented yet, "
@@ -1171,7 +1300,7 @@ def validate_onnx_model(
     runtime: str = "onnxruntime",
     repeat: int = 1,
     warmup: int = 0,
-    inputs2: int = 1,
+    second_input_keys: Optional[List[str]] = None,
     ort_logs: bool = False,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
@@ -1188,7 +1317,7 @@ def validate_onnx_model(
     :param runtime: onnx runtime to use, onnxruntime, torch, orteval, ref
     :param repeat: run that number of times the model
     :param warmup: warmup the model
-    :param inputs2: to validate the model on the second input set
+    :param second_input_keys: to validate the model on other input sets
         to make sure the exported model supports dynamism, the value is
         used as an increment added to the first set of inputs (added to dimensions)
     :param ort_logs: triggers the logs for onnxruntime
@@ -1313,16 +1442,24 @@ def validate_onnx_model(
         print(f"[validate_onnx_model] done (ort_session) flavour={flavour!r}")
     keys = [("inputs", "run_expected", "")]
-    if inputs2:
-        keys.append(("inputs2", "run_expected2", "2"))
+    if second_input_keys:
+        keys.extend([(k, f"run_expected2{k[6:]}", f"2{k[6:]}") for k in second_input_keys])
     for k_input, k_expected, suffix in keys:
         # make_feeds
+        assert k_input in data, f"Unable to find {k_input!r} in {sorted(data)}"
+        assert k_expected in data, f"Unable to find {k_expected!r} in {sorted(data)}"
         if verbose:
             print(f"[validate_onnx_model] -- make_feeds for {k_input!r}...")
             print(
                 f"[validate_onnx_model] inputs={string_type(data[k_input], with_shape=True)}"
             )
-        feeds = make_feeds(sess, data[k_input], use_numpy=True, check_flatten=False)
+        feeds = make_feeds(
+            sess,
+            data[k_input],
+            use_numpy=True,
+            check_flatten=False,
+            is_modelbuilder=data["exporter"] == "modelbuilder",
+        )
         if verbose:
             print(f"[validate_onnx_model] ort inputs={string_type(feeds, with_shape=True)}")
         summary[_mk(f"onnx_ort_inputs{suffix}")] = string_type(feeds, with_shape=True)
@@ -1342,6 +1479,13 @@ def validate_onnx_model(
             repeat=repeat,
             warmup=warmup,
         )
+        # NOTE: modelbuilder has different order on past_kv outputs
+        if data["exporter"] == "modelbuilder":
+            logits = got[:1]
+            past_key_values = got[1:]
+            reorder_past_key_values = reorder_modelbuilder_cache_to_torch(past_key_values)
+            got = logits + reorder_past_key_values
         if f"ERR_{_mk(f'time_onnx_ort_run{suffix}')}" in summary:
             return summary, data
@@ -1382,7 +1526,7 @@ def call_torch_export_onnx(
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
-    available = {None, "", "ir", "os_ort"}
+    available = {None, "", "ir", "os_ort", "ir+default"}
     assert (
         optimization in available
     ), f"unexpected value for optimization={optimization}, available={available}"
@@ -1472,11 +1616,31 @@ def call_torch_export_onnx(
         print(epo)
         print("[call_torch_export_onnx] -- End of ONNXProgram")
-    if optimization in {"ir", "os_ort"}:
+    if optimization in {"ir", "os_ort", "ir+default"}:
         if verbose:
             print(f"[call_torch_export_onnx] starts optimization={optimization!r}...")
         if optimization == "ir":
             label, f_optim = "export_onnx_opt_ir", (lambda epo=epo: epo.optimize())
+        elif optimization == "ir+default":
+            import onnxscript
+            from experimental_experiment.xbuilder import GraphBuilder, OptimizationOptions
+            def _ir_default_opt(epo):
+                onnxscript.optimizer.optimize_ir(epo.model)
+                onx = epo.model_proto
+                # not very efficient
+                gr = GraphBuilder(
+                    onx,
+                    infer_shapes_options=True,
+                    optimization_options=OptimizationOptions(patterns="default"),
+                )
+                cont = gr.to_onnx(large_model=True)
+                epo.model = cont.to_ir()
+            label, f_optim = "export_onnx_opt_ir_default", (
+                lambda epo=epo: _ir_default_opt(epo)
+            )
         else:
             import onnxscript
             import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -1567,6 +1731,98 @@ def call_torch_export_model_builder(
     return summary, data
+def process_statistics(data: Sequence[Dict[str, float]]) -> Dict[str, Any]:
+    """
+    Processes statistics coming from the exporters.
+    It takes a sequence of dictionaries (like a data frame)
+    and extracts some metrics.
+    """
+    def _simplify(p):
+        for s in [
+            "remove_unused",
+            "constant_folding",
+            "remove_identity",
+            "remove_duplicated_initializer",
+            "dynamic_dimension_naming",
+            "inline",
+            "check",
+            "build_graph_for_pattern",
+            "pattern_optimization",
+            "topological_sort",
+        ]:
+            if s in p or s.replace("_", "-") in p:
+                return s
+        if p.startswith(("apply_", "match_")):
+            return p
+        return "other"
+    def _add(d, a, v, use_max=False):
+        if v:
+            if a not in d:
+                d[a] = v
+            elif use_max:
+                d[a] = max(d[a], v)
+            else:
+                d[a] += v
+    counts: Dict[str, Any] = {}
+    applied_pattern_time: Dict[str, Any] = {}
+    applied_pattern_n: Dict[str, Any] = {}
+    matching_pattern_time: Dict[str, Any] = {}
+    matching_pattern_n: Dict[str, Any] = {}
+    for obs in data:
+        pattern = _simplify(obs["pattern"])
+        _add(counts, "opt_nodes_added", obs.get("added", 0))
+        _add(counts, "opt_nodes_removed", obs.get("removed", 0))
+        _add(counts, "opt_time_steps", obs.get("time_in", 0))
+        _add(counts, "opt_n_steps", 1)
+        _add(
+            counts,
+            "opt_n_iteration",
+            max(counts.get("opt_n_iteration", 0), obs.get("iteration", 0)),
+            use_max=True,
+        )
+        if pattern.startswith("apply_"):
+            _add(counts, "opt_n_applied_patterns", 1)
+            _add(counts, "opt_time_applied_patterns", obs.get("time_in", 0))
+            _add(applied_pattern_time, pattern, obs.get("time_in", 0))
+            _add(applied_pattern_n, pattern, 1)
+        elif pattern.startswith("match_"):
+            _add(counts, "opt_n_matching_patterns", 1)
+            _add(counts, "opt_time_matching_patterns", obs.get("time_in", 0))
+            _add(matching_pattern_time, pattern, obs.get("time_in", 0))
+            _add(matching_pattern_n, pattern, 1)
+        else:
+            _add(counts, f"opt_time_{pattern}", obs.get("time_in", 0))
+            _add(counts, f"opt_n_{pattern}", 1)
+            _add(counts, f"opt_nodes_added_{pattern}", obs.get("added", 0))
+            _add(counts, f"opt_nodes_removed_{pattern}", obs.get("removed", 0))
+    if applied_pattern_time:
+        longest = max((v, k) for k, v in applied_pattern_time.items())
+        counts["opt_top_time_applied_pattern"], counts["opt_top_time_applied_pattern_arg"] = (
+            longest
+        )
+        longest = max((v, k) for k, v in applied_pattern_n.items())
+        counts["opt_top_n_applied_pattern"], counts["opt_top_n_applied_pattern_arg"] = longest
+    if matching_pattern_time:
+        longest = max((v, k) for k, v in matching_pattern_time.items())
+        (
+            counts["opt_top_time_matching_pattern"],
+            counts["opt_top_time_matching_pattern_arg"],
+        ) = longest
+        longest = max((v, k) for k, v in matching_pattern_n.items())
+        counts["opt_top_n_matching_pattern"], counts["opt_top_n_matching_pattern_arg"] = (
+            longest
+        )
+        counts["onnx_opt_optimized"] = 1
+    return counts
 def call_torch_export_custom(
     data: Dict[str, Any],
     exporter: str,
@@ -1619,6 +1875,8 @@ def call_torch_export_custom(
         "custom-nostrict-noinline",
         "custom-nostrict-default-noinline",
         "custom-nostrict-all-noinline",
+        "custom-dec",
+        "custom-decall",
     }
     assert exporter in available, f"Unexpected value for exporter={exporter!r} in {available}"
     assert "model" in data, f"model is missing from data: {sorted(data)}"
@@ -1655,7 +1913,9 @@ def call_torch_export_custom(
     export_options = ExportOptions(
         strict=strict,
         decomposition_table=(
-            "default" if "-default" in exporter else ("all" if "-all" in exporter else None)
+            "default"
+            if ("-default" in exporter or "-dec" in exporter)
+            else ("all" if ("-all" in exporter or "-decall" in exporter) else None)
         ),
         save_ep=(os.path.join(dump_folder, f"{exporter}.ep") if dump_folder else None),
     )
@@ -1696,67 +1956,10 @@ def call_torch_export_custom(
     if "ERR_export_onnx_c" in summary:
         return summary, data
-    new_stat = {}
+    new_stat: Dict[str, Any] = {k: v for k, v in opt_stats.items() if k.startswith("time_")}
+    new_stat.update({k[5:]: v for k, v in opt_stats.items() if k.startswith("stat_time_")})
     if "optimization" in opt_stats:
-        added, removed, time_in = 0, 0, 0.0
-        max_iter = 0
-        applied = {}
-        matched = set()
-        n_applied = 0
-        by_pattern = {}
-        by_pattern_n = {}
-        by_iter = {}
-        cst_added, cst_removed, cst_time_in = 0, 0, 0.0
-        for obs in opt_stats["optimization"]:
-            pattern = obs["pattern"]
-            if pattern == "constant_folding":
-                cst_added += obs.get("added", 0)
-                cst_removed += obs.get("removed", 0)
-                cst_time_in += obs.get("time_in", 0)
-            if pattern not in by_pattern:
-                by_pattern[pattern] = 0
-                by_pattern_n[pattern] = 0
-                by_iter[pattern] = 0
-            time_in += obs.get("time_in", 0)
-            added += obs.get("added", 0)
-            removed += obs.get("removed", 0)
-            max_iter = max(max_iter, obs.get("iteration", 0))
-            by_pattern[pattern] += obs.get("time_in", 0)
-            by_pattern_n[pattern] += obs.get("added", 0) - obs.get("removed", 0)
-            if not pattern.startswith("match"):
-                by_iter[pattern] = max(by_iter[pattern], obs.get("iteration", 0))
-            p = obs["pattern"]
-            if p.startswith("match_"):
-                matched.add(p)
-            elif p.startswith("apply_"):
-                key = f"op_opt_{p}"
-                key2 = f"op_opt_maxiter_{p}"
-                if key not in applied:
-                    applied[key] = 1
-                    applied[key2] = obs["iteration"]
-                else:
-                    applied[key] += 1
-                    applied[key2] = max(obs["iteration"], applied[key2])
-                n_applied += 1
-        new_stat.update(
-            dict(
-                onnx_opt_optimized=1,
-                op_opt_all_time_in=time_in,
-                op_opt_all_added=added,
-                op_opt_all_removed=removed,
-                op_opt_max_iter=max_iter,
-                op_opt_unique_matched=len(matched),
-                op_opt_unique_applied=len(applied),
-                op_opt_n_applied=n_applied,
-                time_export_optimization=time_in,
-                op_opt_export_optimization=time_in,
-                op_opt_cst_time_in=cst_time_in,
-                op_opt_cst_added=cst_added,
-                op_opt_cst_removed=cst_removed,
-            )
-        )
+        new_stat.update(process_statistics(opt_stats["optimization"]))
     summary.update(new_stat)
     assert epo is not None, "no onnx export was found"
@@ -1875,3 +2078,24 @@ def run_ort_fusion(
         f"opt_ort_{model_type}_duration": duration,
         f"opt_ort_{model_type}_duration_save": d,
     }, {f"opt_ort_{model_type}": output_path}
+def _compute_final_statistics(summary: Dict[str, Any]):
+    """
+    Updates inline the list of statistics. It adds:
+    - speedup
+    """
+    stats = {}
+    if (
+        "time_run_latency" in summary
+        and "time_run_onnx_ort_latency" in summary
+        and summary["time_run_onnx_ort_latency"] > 0
+    ):
+        stats["stat_estimated_speedup_ort"] = (
+            summary["time_run_latency"] / summary["time_run_onnx_ort_latency"]
+        )
+        stats["stat_estimated_speedup_ort_m98"] = (
+            summary["time_run_latency_m98"] / summary["time_run_onnx_ort_latency_m98"]
+        )
+    summary.update(stats)

onnx-diagnostic 0.7.11__py3-none-any.whl → 0.7.13__py3-none-any.whl

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.13py3-none-any.whl