PyPI - onnx-diagnostic - Versions diffs - 0.7.11__py3-none-any.whl → 0.7.12__py3-none-any.whl - Mend

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.11"
+__version__ = "0.7.12"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -581,6 +581,7 @@ def _cmd_validate(argv: List[Any]):
         ):
             print(f"validate - unsupported args: export={args.export!r}, opt={args.opt!r}")
             return
+        patch_dict = args.patch if isinstance(args.patch, dict) else {"patch": args.patch}
         summary, _data = validate_model(
             model_id=args.mid,
             task=args.task,
@@ -591,8 +592,8 @@ def _cmd_validate(argv: List[Any]):
             use_pretrained=args.trained,
             dtype=args.dtype,
             device=args.device,
-            patch=args.patch,
-            rewrite=args.rewrite,
+            patch=patch_dict,
+            rewrite=args.rewrite and patch_dict.get("patch", True),
             stop_if_static=args.stop_if_static,
             optimization=args.opt,
             exporter=args.export,
@@ -827,6 +828,8 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic,"
         "n_model_pass,n_model_faster,"
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
+        "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
+        "n_node_layer_normalization,n_node_layer_normalization23,"
         "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -285,7 +285,8 @@ class CubePlot:
         nn = df.shape[1] // n_cols
         nn += int(df.shape[1] % n_cols != 0)
         ratio = float(os.environ.get("FIGSIZEH", "1"))
-        fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 3 * ratio))
+        figsize = (6 * n_cols, nn * (2.5 + df.shape[0] / 15) * ratio)
+        fig, axs = plt.subplots(nn, n_cols, figsize=figsize)
         pos = 0
         imgs = []
         for c in self._make_loop(df.columns, verbose):
@@ -332,10 +333,12 @@ class CubePlot:
         n_cols = len(groups)
         title_suffix = f"\n{title_suffix}" if title_suffix else ""
+        ratio = float(os.environ.get("FIGSIZEH", "1"))
+        figsize = (5 * n_cols, max(len(g) for g in groups) * (2 + df.shape[1] / 2) * ratio)
         fig, axs = plt.subplots(
             df.shape[1],
             n_cols,
-            figsize=(5 * n_cols, max(len(g) for g in groups) * df.shape[1] / 2),
+            figsize=figsize,
             sharex=True,
             sharey="row" if n_cols > 1 else False,
         )
@@ -877,7 +880,11 @@ class CubeLogs:
             print(f"[CubeLogs.view] key_columns={key_columns}")
         g = data[[*key_index, *key_columns]].copy()
         g["count"] = 1
-        r = g.groupby([*key_index, *key_columns], dropna=False).sum()
+        r = (
+            g.copy()
+            if not key_index and not key_columns
+            else g.groupby([*key_index, *key_columns], dropna=False).sum()
+        )
         not_unique = r[r["count"] > 1]
         assert not_unique.shape[0] == 0, (
             f"view_def.name={view_def.name!r}, "
@@ -1505,6 +1512,11 @@ class CubeLogsPerformance(CubeLogs):
             "n_model_faster3x",
             "n_model_faster4x",
             "n_node_attention",
+            "n_node_attention23",
+            "n_node_rotary_embedding",
+            "n_node_rotary_embedding23",
+            "n_node_layer_normalization",
+            "n_node_layer_normalization23",
             "n_node_control_flow",
             "n_node_scatter",
             "n_node_function",
@@ -1568,7 +1580,9 @@ class CubeLogsPerformance(CubeLogs):
         def gdf(df, cname, default_value=np.nan):
             if cname in df.columns:
-                return df[cname]
+                if np.isnan(default_value):
+                    return df[cname]
+                return df[cname].fillna(default_value)
             return pandas.Series(default_value, index=df.index)
         def ghas_value(df, cname):
@@ -1676,15 +1690,54 @@ class CubeLogsPerformance(CubeLogs):
                     "time_latency",
                     gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
                 ),
+                n_node_attention23=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__Attention")
+                ),
+                n_node_rotary_embedding23=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__RotaryEmbedding")
+                ),
+                n_node_layer_normalization23=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__LayerNormalization", 0)
+                    + gdf(df, "op_onnx__RMSNormalization", 0)
+                    + gdf(df, "op_onnx__BatchNormlization", 0)
+                    + gdf(df, "op_onnx__InstanceNormlization", 0)
+                    + gdf(df, "op_onnx__GroupNormalization", 0),
+                ),
                 n_node_attention=lambda df: gpreserve(
                     df,
-                    "op_onnx_com.microsoft_Attention",
-                    gdf(df, "op_onnx_com.microsoft_Attention")
-                    + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention"),
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_Attention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PackedAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PackedMultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PagedAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_LongformerAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderMaskedSelfAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
+                ),
+                n_node_layer_normalization=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_EmbedLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SkipLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_LayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SkipSimplifiedLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SimplifiedLayerNormalization", 0),
+                ),
+                n_node_rotary_embedding=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_GemmaRotaryEmbedding", 0)
+                    + gdf(df, "op_onnx_com.microsoft_RotaryEmbedding", 0),
                 ),
                 n_node_control_flow=lambda df: gpreserve(
                     df,
-                    "op_onnx__If",
+                    "time_latency_eager",
                     (
                         gdf(df, "op_onnx__If", 0)
                         + gdf(df, "op_onnx__Scan", 0)
@@ -1693,7 +1746,7 @@ class CubeLogsPerformance(CubeLogs):
                 ),
                 n_node_scatter=lambda df: gpreserve(
                     df,
-                    "op_onnx__ScatterND",
+                    "time_latency_eager",
                     gdf(df, "op_onnx__ScatterND", 0) + gdf(df, "op_onnx__ScatterElements", 0),
                 ),
                 n_node_function=lambda df: gpreserve(
@@ -1706,13 +1759,13 @@ class CubeLogsPerformance(CubeLogs):
                     df, "onnx_n_initializer", gdf(df, "onnx_n_initializer")
                 ),
                 n_node_constant=lambda df: gpreserve(
-                    df, "op_onnx__Constant", gdf(df, "op_onnx__Constant")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Constant")
                 ),
                 n_node_shape=lambda df: gpreserve(
-                    df, "op_onnx__Shape", gdf(df, "op_onnx__Shape")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Shape")
                 ),
                 n_node_expand=lambda df: gpreserve(
-                    df, "op_onnx__Expand", gdf(df, "op_onnx__Expand")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Expand")
                 ),
             )
             assert (

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .onnx_helper import dtype_to_tensor_dtype
 from .cache_helper import is_cache_dynamic_registered
@@ -23,6 +22,7 @@ def make_feeds(
     use_numpy: bool = False,
     copy: bool = False,
     check_flatten: bool = True,
+    is_modelbuilder: bool = False,
 ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
     """
     Serializes the inputs to produce feeds expected
@@ -35,10 +35,15 @@ def make_feeds(
         by ``OrtValue``
     :param check_flatten: if True, checks the ``torch.utils._pytree.tree_flatten``
         returns the same number of outputs
+    :param is_modelbuilder: if True, the exporter is ModelBuilder, and we need to reorder
+        the past_key_values inputs to match the expected order, and get rid of position_ids.
     :return: feeds dictionary
     """
-    # position_ids is a special case because ModelBuilder does not usually use it.
-    # We use types to detect the best inputs.
+    # NOTE: position_ids is a special case because ModelBuilder does not usually use it,
+    # because it's fued into rotary embedding in GQA.
+    if is_modelbuilder and isinstance(inputs, dict):
+        inputs.pop("position_ids", None)  # Ensure 'position_ids' absent before removing.
     flat = flatten_object(inputs, drop_keys=True)
     assert (
         not check_flatten
@@ -76,39 +81,6 @@ def make_feeds(
         f"\n-- inputs={string_type(inputs, with_shape=True)}"
         f"\n-- names={names}"
     )
-    if len(names) < len(flat) and (
-        isinstance(proto, onnx.ModelProto) or hasattr(proto, "get_inputs")
-    ):
-        typed_names = (
-            [(i.name, i.type.tensor_type.elem_type) for i in proto.graph.input]
-            if isinstance(proto, onnx.ModelProto)
-            else [(i.name, name_type_to_onnx_dtype(i.type)) for i in proto.get_inputs()]
-        )
-        new_flat = []
-        pos = 0
-        for _name, dtype in typed_names:
-            assert isinstance(
-                dtype, int
-            ), f"Unexpected value for dtype={dtype!r}, type(proto)={type(proto)}"
-            itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            while dtype != itype:
-                pos += 1
-                if pos >= len(flat):
-                    break
-                itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            if pos >= len(flat):
-                break
-            new_flat.append(flat[pos])
-            pos += 1
-        assert len(new_flat) == len(names), (
-            f"Unable to align expected input {names} with the given input, "
-            f"type(proto)={type(proto)}"
-            f"\n-- inputs: {string_type(inputs, with_shape=True)}"
-            f"\n-- typed_names: {typed_names}"
-        )
-        flat = new_flat
     if copy:
         flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
@@ -122,4 +94,49 @@ def make_feeds(
         elif isinstance(i, float):
             i = np.array(i, dtype=np.float32)
         new_flat.append(i)
+    # NOTE: model builder has a different order for past_key_values
+    #       we need to reorder them to match the expected order
+    if is_modelbuilder:
+        # We assume that if "past_key_values" is in the names when it's
+        # modelbuilder
+        non_past_kv_input_names = [n for n in names if "past_key_values" not in n]
+        past_kv_names = [n for n in names if "past_key_values" in n]
+        reorder_past_kv_names = reorder_modelbuilder_cache_to_torch(past_kv_names)
+        names = non_past_kv_input_names + reorder_past_kv_names
     return dict(zip(names, new_flat))
+def reorder_modelbuilder_cache_to_torch(past_kv: List[Any]) -> List[Any]:
+    """
+    Reorders the past_kvs for ModelBuilder to match the expected order
+    by PyTorch exported models.
+    .. note::
+        This function can take either the names or the actual tensors
+        as long as they are in a list.
+    Conceptually,
+    From::
+        [past_key_values.0.key, past_key_values.0.value,
+        past_key_values.1.key, past_key_values.1.value, ...]
+    To::
+        [past_key_values.0.key, past_key_values.1.key,
+        ..., past_key_values.0.value, past_key_values.1.value, ...]
+    :param past_kv: list of flattened inputs
+    :return: reordered list of flattened inputs
+    """
+    total_len = len(past_kv)
+    if total_len % 2 != 0:
+        raise ValueError("The length of past_key_values should be even.")
+    keys = []
+    values = []
+    for i in range(0, total_len, 2):
+        keys.append(past_kv[i])
+        values.append(past_kv[i + 1])
+    return keys + values

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -254,6 +254,17 @@ def torch_export_patches(
     may appear ``AssertionError: Mutating module attribute _seen_tokens during export.``.
     It can be avoided by setting ``strict=False`` when call :func:`torch.export.export`.
     """
+    if verbose:
+        print(f"[torch_export_patches] patch_sympy={patch_sympy!r}")
+        print(f"                     . patch_torch={patch_torch!r}")
+        print(f"                     . patch_transformers={patch_transformers!r}")
+        print(f"                     . patch_diffusers={patch_diffusers!r}")
+        print(f"                     . catch_constraints={catch_constraints!r}")
+        print(f"                     . stop_if_static={stop_if_static!r}")
+        print(f"                     . patch={patch!r}")
+        print(f"                     . custom_patches={custom_patches!r}")
+        print(f"[torch_export_patches] dump_rewriting={dump_rewriting!r}")
     if rewrite:
         from .patch_module import torch_export_rewrite

onnx_diagnostic/torch_models/hghub/hub_api.py CHANGED Viewed

@@ -289,21 +289,17 @@ def task_from_tags(tags: Union[str, List[str]]) -> str:
 def enumerate_model_list(
     n: int = 50,
-    task: Optional[str] = None,
-    library: Optional[str] = None,
-    tags: Optional[Union[str, List[str]]] = None,
+    pipeline_tag: Optional[str] = None,
     search: Optional[str] = None,
     dump: Optional[str] = None,
-    filter: Optional[str] = None,
+    filter: Optional[Union[str, List[str]]] = None,
     verbose: int = 0,
 ):
     """
     Enumerates models coming from :epkg:`huggingface_hub`.
     :param n: number of models to retrieve (-1 for all)
-    :param task: see :meth:`huggingface_hub.HfApi.list_models`
-    :param tags: see :meth:`huggingface_hub.HfApi.list_models`
-    :param library: see :meth:`huggingface_hub.HfApi.list_models`
+    :param pipeline_tag: see :meth:`huggingface_hub.HfApi.list_models`
     :param search: see :meth:`huggingface_hub.HfApi.list_models`
     :param filter: see :meth:`huggingface_hub.HfApi.list_models`
     :param dump: dumps the result in this csv file
@@ -311,9 +307,7 @@ def enumerate_model_list(
     """
     api = HfApi()
     models = api.list_models(
-        task=task,
-        library=library,
-        tags=tags,
+        pipeline_tag=pipeline_tag,
         search=search,
         full=True,
         filter=filter,

onnx_diagnostic/torch_models/hghub/model_inputs.py CHANGED Viewed

@@ -189,7 +189,7 @@ def get_untrained_model_with_inputs(
                     f"subfolder={subfolder!r}"
                 )
             model = transformers.AutoModel.from_pretrained(
-                model_id, subfolder=subfolder, trust_remote_code=True, **mkwargs
+                model_id, subfolder=subfolder or "", trust_remote_code=True, **mkwargs
             )
             if verbose:
                 print(

onnx_diagnostic/torch_models/validate.py CHANGED Viewed

@@ -3,7 +3,7 @@ import inspect
 import os
 import pprint
 import sys
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 import time
 import numpy as np
 import onnx
@@ -11,7 +11,7 @@ import torch
 from ..export import CoupleInputsDynamicShapes
 from ..helpers import max_diff, string_type, string_diff
 from ..helpers.helper import flatten_object
-from ..helpers.rt_helper import make_feeds
+from ..helpers.rt_helper import make_feeds, reorder_modelbuilder_cache_to_torch
 from ..helpers.torch_helper import to_any, torch_deepcopy
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
@@ -112,6 +112,7 @@ def _make_folder_name(
     device: Optional[Union[str, torch.device]] = None,
     subfolder: Optional[str] = None,
     opset: Optional[int] = None,
+    drop_inputs: Optional[List[str]] = None,
 ) -> str:
     "Creates a filename unique based on the given options."
     els = [model_id.replace("/", "_")]
@@ -137,6 +138,9 @@ def _make_folder_name(
         els.append(sdev)
     if opset is not None:
         els.append(f"op{opset}")
+    if drop_inputs:
+        ii = "-".join(f"{s[0]}{s[-1]}" for s in drop_inputs)
+        els.append(f"I-{ii.upper()}")
     return "-".join(els)
@@ -264,14 +268,18 @@ def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     return new_cfg
-def _preprocess_model_id(model_id, subfolder):
+def _preprocess_model_id(
+    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
+) -> Tuple[str, Optional[str], bool, bool]:
     if subfolder or "//" not in model_id:
-        return model_id, subfolder
+        return model_id, subfolder, same_as_pretrained, use_pretrained
     spl = model_id.split("//")
+    if spl[-1] == "pretrained":
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
     if spl[-1] in {"transformer", "vae"}:
         # known subfolder
-        return "//".join(spl[:-1]), spl[-1]
-    return model_id, subfolder
+        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
+    return model_id, subfolder, same_as_pretrained, use_pretrained
 def validate_model(
@@ -384,13 +392,15 @@ def validate_model(
     if ``runtime == 'ref'``,
     ``orteval10`` increases the verbosity.
     """
-    model_id, subfolder = _preprocess_model_id(model_id, subfolder)
+    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+        model_id,
+        subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+    )
+    default_patch = dict(patch_transformers=True, patch_diffusers=True, patch=True)
     if isinstance(patch, bool):
-        patch_kwargs = (
-            dict(patch_transformers=True, patch_diffusers=True, patch=True)
-            if patch
-            else dict(patch=False)
-        )
+        patch_kwargs = default_patch if patch else dict(patch=False)
     elif isinstance(patch, str):
         patch_kwargs = {"patch": True, **{p: True for p in patch.split(",")}}  # noqa: C420
     else:
@@ -399,11 +409,13 @@ def validate_model(
         if "patch" not in patch_kwargs:
             if any(patch_kwargs.values()):
                 patch_kwargs["patch"] = True
+        elif len(patch) == 1 and patch.get("patch", False):
+            patch_kwargs.update(default_patch)
     assert not rewrite or patch_kwargs.get("patch", False), (
         f"rewrite={rewrite}, patch={patch}, patch_kwargs={patch_kwargs} "
         f"patch must be True to enable rewriting, "
-        f"if --no-patch was specified on the command line, --no-rewrite must be added."
+        f"if --patch=0 was specified on the command line, rewrites are disabled."
     )
     summary = version_summary()
     summary.update(
@@ -441,6 +453,7 @@ def validate_model(
             device=device,
             subfolder=subfolder,
             opset=opset,
+            drop_inputs=drop_inputs,
         )
         dump_folder = os.path.join(dump_folder, folder_name)
         if not os.path.exists(dump_folder):
@@ -536,6 +549,11 @@ def validate_model(
             if verbose:
                 print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+    # modelbuilder needs different treatments sometimes, so
+    # we mark it for later usage.
+    # for example, it has different past_kv ordering than
+    # flattened CacheObject
+    data["exporter"] = exporter
     data["input_options"] = iop
     data["model_options"] = mop
     data["model_dump_folder"] = dump_folder
@@ -836,6 +854,8 @@ def validate_model(
                 )
                 summary.update(summary_valid)
+    _compute_final_statistics(summary)
     if verbose:
         print("[validate_model] -- done (final)")
     if dump_stats:
@@ -848,15 +868,24 @@ def validate_model(
 def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
     """Computes some statistics on the model itself."""
     onx = onnx.load(onnx_filename, load_external_data=False)
+    cache_functions = {(f.domain, f.name): f for f in onx.functions}
+    local_domains = set(f.domain for f in onx.functions)
     def node_iter(proto):
         if isinstance(proto, onnx.ModelProto):
-            yield from node_iter(proto.graph)
             for f in proto.functions:
                 yield from node_iter(f)
+            yield from node_iter(proto.graph)
         elif isinstance(proto, (onnx.FunctionProto, onnx.GraphProto)):
             for node in proto.node:
                 yield node
+                # Let's inline the function
+                key = node.domain, node.op_type
+                if key in cache_functions:
+                    yield from node_iter(cache_functions[key])
+                # Let's continue
                 for att in node.attribute:
                     if att.type == onnx.AttributeProto.GRAPH:
                         yield from node_iter(att.g)
@@ -874,6 +903,11 @@ def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
             n_nodes += 1
             if proto.op_type != "Constant":
                 n_nodes_nocst += 1
+            if proto.domain in local_domains:
+                key = "n_node_local_function"
+                if key not in counts:
+                    counts[key] = 0
+                counts[key] += 1
         else:
             key = f"n_node_initializer_{proto.data_type}"
@@ -960,6 +994,26 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet):
     )
+_cache_export_times = []
+_main_export_function = torch.export.export
+def _torch_export_export(*args, _export=_main_export_function, **kwargs):
+    begin = time.perf_counter()
+    res = _export(*args, **kwargs)
+    duration = time.perf_counter() - begin
+    _cache_export_times.append(duration)
+    return res
+def _restore_torch_export_export(summary):
+    torch.export.export = _main_export_function
+    if _cache_export_times:
+        summary["time_torch_export_export"] = sum(_cache_export_times)
+        summary["time_torch_export_export_n"] = len(_cache_export_times)
+    _cache_export_times.clear()
 def call_exporter(
     data: Dict[str, Any],
     exporter: str,
@@ -985,6 +1039,9 @@ def call_exporter(
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
+    _cache_export_times.clear()
+    torch.export.export = _torch_export_export
     if exporter == "export" or exporter.startswith("export-"):
         # torch export
         summary, data = call_torch_export_export(
@@ -995,6 +1052,7 @@ def call_exporter(
             optimization=optimization,
             do_run=do_run,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter.startswith("onnx-"):
         # torch export
@@ -1006,6 +1064,7 @@ def call_exporter(
             optimization=optimization,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter == "custom" or exporter.startswith("custom"):
         # torch export
@@ -1018,6 +1077,7 @@ def call_exporter(
             dump_folder=dump_folder,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     if exporter == "modelbuilder":
         # torch export
@@ -1029,6 +1089,7 @@ def call_exporter(
             optimization=optimization,
             output_names=output_names,
         )
+        _restore_torch_export_export(summary)
         return summary, data
     raise NotImplementedError(
         f"export with {exporter!r} and optimization={optimization!r} not implemented yet, "
@@ -1322,7 +1383,13 @@ def validate_onnx_model(
             print(
                 f"[validate_onnx_model] inputs={string_type(data[k_input], with_shape=True)}"
             )
-        feeds = make_feeds(sess, data[k_input], use_numpy=True, check_flatten=False)
+        feeds = make_feeds(
+            sess,
+            data[k_input],
+            use_numpy=True,
+            check_flatten=False,
+            is_modelbuilder=data["exporter"] == "modelbuilder",
+        )
         if verbose:
             print(f"[validate_onnx_model] ort inputs={string_type(feeds, with_shape=True)}")
         summary[_mk(f"onnx_ort_inputs{suffix}")] = string_type(feeds, with_shape=True)
@@ -1342,6 +1409,13 @@ def validate_onnx_model(
             repeat=repeat,
             warmup=warmup,
         )
+        # NOTE: modelbuilder has different order on past_kv outputs
+        if data["exporter"] == "modelbuilder":
+            logits = got[:1]
+            past_key_values = got[1:]
+            reorder_past_key_values = reorder_modelbuilder_cache_to_torch(past_key_values)
+            got = logits + reorder_past_key_values
         if f"ERR_{_mk(f'time_onnx_ort_run{suffix}')}" in summary:
             return summary, data
@@ -1382,7 +1456,7 @@ def call_torch_export_onnx(
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
-    available = {None, "", "ir", "os_ort"}
+    available = {None, "", "ir", "os_ort", "ir+default"}
     assert (
         optimization in available
     ), f"unexpected value for optimization={optimization}, available={available}"
@@ -1472,11 +1546,31 @@ def call_torch_export_onnx(
         print(epo)
         print("[call_torch_export_onnx] -- End of ONNXProgram")
-    if optimization in {"ir", "os_ort"}:
+    if optimization in {"ir", "os_ort", "ir+default"}:
         if verbose:
             print(f"[call_torch_export_onnx] starts optimization={optimization!r}...")
         if optimization == "ir":
             label, f_optim = "export_onnx_opt_ir", (lambda epo=epo: epo.optimize())
+        elif optimization == "ir+default":
+            import onnxscript
+            from experimental_experiment.xbuilder import GraphBuilder, OptimizationOptions
+            def _ir_default_opt(epo):
+                onnxscript.optimizer.optimize_ir(epo.model)
+                onx = epo.model_proto
+                # not very efficient
+                gr = GraphBuilder(
+                    onx,
+                    infer_shapes_options=True,
+                    optimization_options=OptimizationOptions(patterns="default"),
+                )
+                cont = gr.to_onnx(large_model=True)
+                epo.model = cont.to_ir()
+            label, f_optim = "export_onnx_opt_ir_default", (
+                lambda epo=epo: _ir_default_opt(epo)
+            )
         else:
             import onnxscript
             import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -1567,6 +1661,97 @@ def call_torch_export_model_builder(
     return summary, data
+def process_statistics(data: Sequence[Dict[str, float]]) -> Dict[str, Any]:
+    """
+    Processes statistics coming from the exporters.
+    It takes a sequence of dictionaries (like a data frame)
+    and extracts some metrics.
+    """
+    def _simplify(p):
+        for s in [
+            "remove_unused",
+            "constant_folding",
+            "remove_identity",
+            "remove_duplicated_initializer",
+            "dynamic_dimension_naming",
+            "inline",
+            "check",
+            "build_graph_for_pattern",
+            "pattern_optimization",
+        ]:
+            if s in p or s.replace("_", "-") in p:
+                return s
+        if p.startswith(("apply_", "match_")):
+            return p
+        return "other"
+    def _add(d, a, v, use_max=False):
+        if v:
+            if a not in d:
+                d[a] = v
+            elif use_max:
+                d[a] = max(d[a], v)
+            else:
+                d[a] += v
+    counts: Dict[str, Any] = {}
+    applied_pattern_time: Dict[str, Any] = {}
+    applied_pattern_n: Dict[str, Any] = {}
+    matching_pattern_time: Dict[str, Any] = {}
+    matching_pattern_n: Dict[str, Any] = {}
+    for obs in data:
+        pattern = _simplify(obs["pattern"])
+        _add(counts, "opt_nodes_added", obs.get("added", 0))
+        _add(counts, "opt_nodes_removed", obs.get("removed", 0))
+        _add(counts, "opt_time_steps", obs.get("time_in", 0))
+        _add(counts, "opt_n_steps", 1)
+        _add(
+            counts,
+            "opt_n_iteration",
+            max(counts.get("opt_n_iteration", 0), obs.get("iteration", 0)),
+            use_max=True,
+        )
+        if pattern.startswith("apply_"):
+            _add(counts, "opt_n_applied_patterns", 1)
+            _add(counts, "opt_time_applied_patterns", obs.get("time_in", 0))
+            _add(applied_pattern_time, pattern, obs.get("time_in", 0))
+            _add(applied_pattern_n, pattern, 1)
+        elif pattern.startswith("match_"):
+            _add(counts, "opt_n_matching_patterns", 1)
+            _add(counts, "opt_time_matching_patterns", obs.get("time_in", 0))
+            _add(matching_pattern_time, pattern, obs.get("time_in", 0))
+            _add(matching_pattern_n, pattern, 1)
+        else:
+            _add(counts, f"opt_time_{pattern}", obs.get("time_in", 0))
+            _add(counts, f"opt_n_{pattern}", 1)
+            _add(counts, f"opt_nodes_added_{pattern}", obs.get("added", 0))
+            _add(counts, f"opt_nodes_removed_{pattern}", obs.get("removed", 0))
+    if applied_pattern_time:
+        longest = max((v, k) for k, v in applied_pattern_time.items())
+        counts["opt_top_time_applied_pattern"], counts["opt_top_time_applied_pattern_arg"] = (
+            longest
+        )
+        longest = max((v, k) for k, v in applied_pattern_n.items())
+        counts["opt_top_n_applied_pattern"], counts["opt_top_n_applied_pattern_arg"] = longest
+    if matching_pattern_time:
+        longest = max((v, k) for k, v in matching_pattern_time.items())
+        (
+            counts["opt_top_time_matching_pattern"],
+            counts["opt_top_time_matching_pattern_arg"],
+        ) = longest
+        longest = max((v, k) for k, v in matching_pattern_n.items())
+        counts["opt_top_n_matching_pattern"], counts["opt_top_n_matching_pattern_arg"] = (
+            longest
+        )
+        counts["onnx_opt_optimized"] = 1
+    return counts
 def call_torch_export_custom(
     data: Dict[str, Any],
     exporter: str,
@@ -1696,67 +1881,10 @@ def call_torch_export_custom(
     if "ERR_export_onnx_c" in summary:
         return summary, data
-    new_stat = {}
+    new_stat: Dict[str, Any] = {k: v for k, v in opt_stats.items() if k.startswith("time_")}
+    new_stat.update({k[5:]: v for k, v in opt_stats.items() if k.startswith("stat_time_")})
     if "optimization" in opt_stats:
-        added, removed, time_in = 0, 0, 0.0
-        max_iter = 0
-        applied = {}
-        matched = set()
-        n_applied = 0
-        by_pattern = {}
-        by_pattern_n = {}
-        by_iter = {}
-        cst_added, cst_removed, cst_time_in = 0, 0, 0.0
-        for obs in opt_stats["optimization"]:
-            pattern = obs["pattern"]
-            if pattern == "constant_folding":
-                cst_added += obs.get("added", 0)
-                cst_removed += obs.get("removed", 0)
-                cst_time_in += obs.get("time_in", 0)
-            if pattern not in by_pattern:
-                by_pattern[pattern] = 0
-                by_pattern_n[pattern] = 0
-                by_iter[pattern] = 0
-            time_in += obs.get("time_in", 0)
-            added += obs.get("added", 0)
-            removed += obs.get("removed", 0)
-            max_iter = max(max_iter, obs.get("iteration", 0))
-            by_pattern[pattern] += obs.get("time_in", 0)
-            by_pattern_n[pattern] += obs.get("added", 0) - obs.get("removed", 0)
-            if not pattern.startswith("match"):
-                by_iter[pattern] = max(by_iter[pattern], obs.get("iteration", 0))
-            p = obs["pattern"]
-            if p.startswith("match_"):
-                matched.add(p)
-            elif p.startswith("apply_"):
-                key = f"op_opt_{p}"
-                key2 = f"op_opt_maxiter_{p}"
-                if key not in applied:
-                    applied[key] = 1
-                    applied[key2] = obs["iteration"]
-                else:
-                    applied[key] += 1
-                    applied[key2] = max(obs["iteration"], applied[key2])
-                n_applied += 1
-        new_stat.update(
-            dict(
-                onnx_opt_optimized=1,
-                op_opt_all_time_in=time_in,
-                op_opt_all_added=added,
-                op_opt_all_removed=removed,
-                op_opt_max_iter=max_iter,
-                op_opt_unique_matched=len(matched),
-                op_opt_unique_applied=len(applied),
-                op_opt_n_applied=n_applied,
-                time_export_optimization=time_in,
-                op_opt_export_optimization=time_in,
-                op_opt_cst_time_in=cst_time_in,
-                op_opt_cst_added=cst_added,
-                op_opt_cst_removed=cst_removed,
-            )
-        )
+        new_stat.update(process_statistics(opt_stats["optimization"]))
     summary.update(new_stat)
     assert epo is not None, "no onnx export was found"
@@ -1875,3 +2003,21 @@ def run_ort_fusion(
         f"opt_ort_{model_type}_duration": duration,
         f"opt_ort_{model_type}_duration_save": d,
     }, {f"opt_ort_{model_type}": output_path}
+def _compute_final_statistics(summary: Dict[str, Any]):
+    """
+    Updates inline the list of statistics. It adds:
+    - speedup
+    """
+    stats = {}
+    if (
+        "time_run_latency" in summary
+        and "time_run_onnx_ort_latency" in summary
+        and summary["time_run_onnx_ort_latency"] > 0
+    ):
+        stats["stat_estimated_speedup_ort"] = (
+            summary["time_run_latency"] / summary["time_run_onnx_ort_latency"]
+        )
+    summary.update(stats)

{onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: onnx-diagnostic
-Version: 0.7.11
+Version: 0.7.12
 Summary: Investigate ONNX models
 Home-page: https://github.com/sdpython/onnx-diagnostic
 Author: Xavier Dupré
@@ -25,7 +25,7 @@ Description-Content-Type: text/x-rst
 License-File: LICENSE.txt
 Requires-Dist: numpy
 Requires-Dist: onnx>=1.16.0
-Requires-Dist: onnxruntime>=1.21
+Requires-Dist: onnxruntime>=1.23
 Requires-Dist: optree
 Requires-Dist: torch>=2.8
 Requires-Dist: torch_geometric

{onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-onnx_diagnostic/__init__.py,sha256=tyRANqD6rauk6F7FpFJN5K1FyB1baNeni92_ol7nrdU,174
+onnx_diagnostic/__init__.py,sha256=dcCB9tAfK6HWFqGTvBN7m6WdJ5DFFu0P3gcwcKdA7MI,174
 onnx_diagnostic/__main__.py,sha256=YmyV_Aq_ianDlHyKLHMa6h8YK3ZmFPpLVHLKjM91aCk,79
-onnx_diagnostic/_command_lines_parser.py,sha256=EljrcTEKF4TuSdougR3i3FL4_jVDG8xizrLsLIA2JRs,33185
+onnx_diagnostic/_command_lines_parser.py,sha256=wleBwnoCDyAWRYRREUSGkwAJKw2YI4Td_7ydxmdOXfI,33457
 onnx_diagnostic/api.py,sha256=BhCl_yCd78N7TlVtPOHjeYv1QBEy39TjZ647rcHqLh0,345
 onnx_diagnostic/doc.py,sha256=t3RELgfooYnVMAi0JSpggWkQEgUsREz8NmRvn0TnLI8,2829
 onnx_diagnostic/ext_test_case.py,sha256=emfQGiQSz5FVDhyJ1Acsv_Tast7tWl426TjtpNqxDBU,43558
@@ -17,13 +17,13 @@ onnx_diagnostic/helpers/config_helper.py,sha256=H2mOcMXfrcolFnt8EuqmRFkpQ3YdNRDf
 onnx_diagnostic/helpers/doc_helper.py,sha256=pl5MZd3_FaE8BqQnqoBuSBxoNCFcd2OJd3eITUSku5c,5897
 onnx_diagnostic/helpers/graph_helper.py,sha256=hevQT5a7_QuriVPQcbT5qe18n99Doyl5h3-qshx1-uk,14093
 onnx_diagnostic/helpers/helper.py,sha256=mRQ-wo9P30m0Z0_v3EfEDwK_dZFTUhIVKo-5ut9DPW8,63194
-onnx_diagnostic/helpers/log_helper.py,sha256=ODtMLFfJvkyss9PJwEZFd5_8bLcliaMq0A17t0dSIFA,82771
+onnx_diagnostic/helpers/log_helper.py,sha256=SKzxJ6DdP9uq4e2feA2nqd2Rreq4G-ujKZFUELfycP0,85674
 onnx_diagnostic/helpers/memory_peak.py,sha256=OT6mz0muBbBZY0pjgW2_eCk_lOtFRo-5w4jFo2Z6Kok,6380
 onnx_diagnostic/helpers/mini_onnx_builder.py,sha256=FgK-Kws1WpSYdYJCPyONwQYY3AjbgUHimZlaYyiNUfE,21286
 onnx_diagnostic/helpers/model_builder_helper.py,sha256=tJi4VkP0TS2yyDSxQPNu9WRoSnPCAjr6L0J49X2LdXk,12810
 onnx_diagnostic/helpers/onnx_helper.py,sha256=oxl3x0EQowGP9kfz8aKDqnJZcvYY8FeZLsfoLJDiSUg,39826
 onnx_diagnostic/helpers/ort_session.py,sha256=UgUUeUslDxEFBc6w6f3HMq_a7bn4TBlItmojqWquSj4,29281
-onnx_diagnostic/helpers/rt_helper.py,sha256=qbV6zyMs-iH6H65WHC2tu4h0psnHg0TX5fwfO_k-glg,4623
+onnx_diagnostic/helpers/rt_helper.py,sha256=E9fQ76lcLJqcOCNsAeZBdxmmEO_FH0oSIlFRU2gnQ6U,5229
 onnx_diagnostic/helpers/torch_helper.py,sha256=e0KkSTdoZthc5Yuf9e8XVGAx-lqOYy4DeRRe-N4QUYQ,33478
 onnx_diagnostic/reference/__init__.py,sha256=rLZsxOlnb7-81F2CzepGnZLejaROg4JvgFaGR9FwVQA,208
 onnx_diagnostic/reference/evaluator.py,sha256=RzNzjFDeMe-4X51Tb22N6aagazY5ktNq-mRmPcfY5EU,8848
@@ -90,7 +90,7 @@ onnx_diagnostic/tasks/text_generation.py,sha256=hV-oK1bWjtepxkA491Va_0CWrELZbfP4
 onnx_diagnostic/tasks/text_to_image.py,sha256=mOS3Ruosi3hzRMxXLDN7ZkAbi7NnQb7MWwQP_okGVHs,2962
 onnx_diagnostic/tasks/zero_shot_image_classification.py,sha256=jJCMWuOqGv5ahCfjrcqxuYCJFhTgHV5KUf2yyv2yxYA,4624
 onnx_diagnostic/torch_export_patches/__init__.py,sha256=0SaZedwznm1hQUCvXZsGZORV5vby954wEExr5faepGg,720
-onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=KYux1-Ea3zCxffxc-17DVfO0G_XCU1flPw_XUc_Fcmg,28008
+onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=TUDY6sRf2Si-t7rK_hdKiFqSP2gjJbPpIGgnW2Mt5eA,28686
 onnx_diagnostic/torch_export_patches/onnx_export_serialization.py,sha256=klvqiMjccwGhiRnLRVbwTi5WWkMfvtnOV5ycirPcAdA,11354
 onnx_diagnostic/torch_export_patches/patch_expressions.py,sha256=vr4tt61cbDnaaaduzMj4UBZ8OUtr6GfDpIWwOYqjWzs,3213
 onnx_diagnostic/torch_export_patches/patch_inputs.py,sha256=3ySY1nAzINSS1hAzTycwfdbPas8G5CDL2MjnaAHBkMU,7825
@@ -106,12 +106,12 @@ onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py,sha256=drq3
 onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py,sha256=mcmZGekzQlLgE_o3SdKlRgCx4ewwyyAuNWZ9CaN_zrI,9317
 onnx_diagnostic/torch_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_models/llms.py,sha256=soyg4yC87ptGoeulJhKqw5opGmuLvH1pn_ZDXZ4Jr8E,90
-onnx_diagnostic/torch_models/validate.py,sha256=3UJzjH89dpa_pyFoFG_fZ2IwOa25gtC7RxHjKX7c2PQ,70887
+onnx_diagnostic/torch_models/validate.py,sha256=oDPnZDFpiPx7s0we4usaD4pQpJEgqnKYjW-L-TM8Bsw,76395
 onnx_diagnostic/torch_models/hghub/__init__.py,sha256=vi1Q7YHdddj1soiBN42MSvJdFqe2_KUoWafHISjwOu8,58
-onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=YYSX3pLsGCTwhMFSu-6ML4Bcy09EWmg1GgXSZ5eCQjA,14515
+onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=rFbiPNLET-KdBpnv-p0nKgwHX6d7C_Z0s9zZ86_92kQ,14307
 onnx_diagnostic/torch_models/hghub/hub_data.py,sha256=8V_pAgACPLPsLRYUododg7MSL6str-T3tBEGY4OaeYQ,8724
 onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py,sha256=3yH1pQbCYNDmRMNUCwMFf5ELnAa35ubTKD2JRF5y9Ls,287515
-onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=qg-_incL_nX9J1bit_nYV5diQN0Zqf7b10ZZfTikbjg,13701
+onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=NgKFt3fwM5PYUOWwApKphiAWfQyJk3rjGXHr4kkSRiE,13707
 onnx_diagnostic/torch_models/hghub/model_specific.py,sha256=j50Nu7wddJMoqmD4QzMbNdFDUUgUmSBKRzPDH55TlUQ,2498
 onnx_diagnostic/torch_models/untrained/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_models/untrained/llm_phi2.py,sha256=JbGZmW41MPJcQgqaJc9R2G00nI79nI-lABN-ffA1lmY,4037
@@ -119,8 +119,8 @@ onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py,sha256=QXw_Bs2SzfeiQMf-tm
 onnx_diagnostic/torch_onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_onnx/runtime_info.py,sha256=1g9F_Jf9AAgYQU4stbsrFXwQl-30mWlQrFbQ7val8Ps,9268
 onnx_diagnostic/torch_onnx/sbs.py,sha256=fN799L_G1c2RKEuNcKt_MnQri5dwD4OzeCkBBFFoUBI,16865
-onnx_diagnostic-0.7.11.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
-onnx_diagnostic-0.7.11.dist-info/METADATA,sha256=vfCWZZUvnv_GKZxFRDvpKFrz5JU3LDmBH0WPK6uN__I,7435
-onnx_diagnostic-0.7.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-onnx_diagnostic-0.7.11.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
-onnx_diagnostic-0.7.11.dist-info/RECORD,,
+onnx_diagnostic-0.7.12.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
+onnx_diagnostic-0.7.12.dist-info/METADATA,sha256=aQ02curD3P5PXXiaUBlf6pLkpoqMR_F6721HDpsxhLE,7435
+onnx_diagnostic-0.7.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+onnx_diagnostic-0.7.12.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
+onnx_diagnostic-0.7.12.dist-info/RECORD,,

{onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

onnx-diagnostic 0.7.11__py3-none-any.whl → 0.7.12__py3-none-any.whl

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.12py3-none-any.whl