PyPI - onnx-diagnostic - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +78 -22
onnx_diagnostic/export/api.py +35 -5
onnx_diagnostic/export/control_flow.py +511 -0
onnx_diagnostic/export/control_flow_research.py +135 -0
onnx_diagnostic/ext_test_case.py +33 -9
onnx_diagnostic/helpers/cache_helper.py +217 -203
onnx_diagnostic/helpers/helper.py +6 -2
onnx_diagnostic/helpers/log_helper.py +39 -5
onnx_diagnostic/helpers/memory_peak.py +2 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +55 -3
onnx_diagnostic/helpers/onnx_helper.py +13 -16
onnx_diagnostic/helpers/rt_helper.py +579 -15
onnx_diagnostic/helpers/torch_helper.py +5 -0
onnx_diagnostic/tasks/image_text_to_text.py +5 -1
onnx_diagnostic/tasks/text2text_generation.py +1 -0
onnx_diagnostic/tasks/text_generation.py +84 -54
onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +563 -61
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
onnx_diagnostic/torch_models/validate.py +620 -213
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +30 -28
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
 import onnx
@@ -10,13 +11,9 @@ from .ort_session import InferenceSessionForTorch
 def name_type_to_onnx_dtype(name: str) -> int:
-    if name == "tensor(int64)":
-        return onnx.TensorProto.INT64
-    if name == "tensor(float)":
-        return onnx.TensorProto.FLOAT
-    if name == "tensor(float16)":
-        return onnx.TensorProto.FLOAT16
-    raise AssertionError(f"Unexpected value {name!r}")
+    assert name.startswith("tensor(") and name.endswith(")"), f"Invalid value name={name!r}"
+    look = name[7:-1]
+    return getattr(onnx.TensorProto, look.upper())
 def make_feeds(
@@ -153,7 +150,7 @@ def make_empty_cache(
 def generate_and_validate(
     model,
     input_ids: torch.Tensor,
-    eos_token_id: int,
+    eos_token_id: int = 2,
     max_new_tokens: int = 100,
     session: Optional[Union[InferenceSessionForTorch, onnx.ModelProto, str]] = None,
     atol: float = 0.1,
@@ -262,10 +259,10 @@ def generate_and_validate(
 def onnx_generate(
     model_or_path: Union[onnx.ModelProto, str, InferenceSessionForTorch],
     input_ids: torch.Tensor,
-    eos_token_id: int,
+    eos_token_id: int = 2,
     max_new_tokens=100,
     return_session: bool = False,
-) -> Union[torch.Tensor, Tuple[torch.Tensor, InferenceSessionForTorch]]:
+) -> Union[torch.Tensor, Tuple[torch.Tensor, InferenceSessionForTorch, Dict[str, Any]]]:
     """
     Implements a simple method ``generate`` for an ONNX model.
     The function does not expect any ``position_ids`` as input.
@@ -277,7 +274,7 @@ def onnx_generate(
     :param return_session: returns the instance of class
         :class:`InferenceSessionForTorch
         <onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch>`
-        created if necessary
+        created if necessary, the function returns the feeds for the next iteration
     :return: input tokens concatenated with new tokens
     .. runpython::
@@ -353,12 +350,19 @@ def onnx_generate(
     input_shapes = session.input_shapes
     input_names = session.input_names
     input_types = session.input_types
+    has_position_ids = "position_ids" in session.input_names
     assert (
         len(input_names) > 2
         and input_names[:2] == ["input_ids", "attention_mask"]
-        and input_names[2].startswith("past_key_values")
-    ), f"Only text generation is supported but input_names == {input_names}"
+        and input_names[3 if has_position_ids else 2].startswith("past_key_values")
+    ), (
+        f"Only text generation is supported but input_names == {input_names}, "
+        f"has_position_ids={has_position_ids}"
+    )
+    assert (
+        not has_position_ids or input_names[2] == "position_ids"
+    ), f"position_ids must the third input but input_names={input_names}"
     # First call: prefill
     feeds = dict(
@@ -370,6 +374,10 @@ def onnx_generate(
             input_ids.shape[0], input_names[2:], input_shapes[2:], input_types[2:]
         ),
     )
+    if has_position_ids:
+        feeds["position_ids"] = torch.unsqueeze(
+            torch.arange(input_ids.shape[1], dtype=torch.int64, device=input_ids.device), 0
+        )
     outputs = session.run(None, feeds)
@@ -393,11 +401,21 @@ def onnx_generate(
                 input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
             ),
         )
-        feeds.update(dict(zip(input_names[2:], outputs[1:])))
+        if has_position_ids:
+            feeds["position_ids"] = torch.unsqueeze(
+                torch.arange(
+                    input_ids.shape[1],
+                    input_ids.shape[1] + 1,
+                    dtype=torch.int64,
+                    device=input_ids.device,
+                ),
+                0,
+            )
+        feeds.update(dict(zip(input_names[3 if has_position_ids else 2 :], outputs[1:])))
         outputs = session.run(None, feeds)
     if return_session:
-        return input_ids, session
+        return input_ids, session, feeds
     return input_ids
@@ -474,3 +492,549 @@ def onnx_generate_with_genai(
     if return_session:
         return input_ids, session
     return input_ids
+_mapping_types = {
+    "float": "F",
+    "double": "D",
+    "float16": "H",
+    "uint8": "U8",
+    "uint16": "U16",
+    "uint32": "U32",
+    "uint64": "U64",
+    "int8": "I8",
+    "int16": "I16",
+    "int32": "I32",
+    "int64": "I64",
+}
+def _process_shape(shape_df):
+    if isinstance(shape_df, float) or len(shape_df) == 0:
+        return ""
+    values = []
+    for val in shape_df:
+        if len(val) != 1:
+            raise ValueError(f"Unable to process shape {val!r} from {values!r}.")
+        for _k, _v in val.items():
+            k, v = _k, _v
+            break
+        if v:
+            vs = "x".join(map(str, v))
+            values.append(f"{_mapping_types.get(k,k)}[{vs}]")
+        else:
+            values.append(f"{_mapping_types.get(k,k)}")
+    return "+".join(values)
+def post_process_df_profile(
+    df: "pandas.DataFrame",  # noqa: F821
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = True,
+    with_shape: bool = False,
+) -> "pandas.DataFrame":  # noqa: F821
+    """
+    Post-processed a dataframe obtained after profiling onnxruntime.
+    It adds a column for a more explicit event name and adds
+    a column for the iteration number
+    :param agg: aggregate the result
+    :param first_it_out: leave the first iteration
+        out of the aggregation
+    :param agg_op_name: aggregate on operator name or operator index
+    :param with_shape: keep the shape to aggregate
+    :return: DataFrame
+    """
+    events = {"kernel_time", "fence_after", "fence_before"}
+    def sep_event(s):
+        for e in events:
+            if s.endswith(e):
+                return e
+        return s
+    df = df.copy()
+    df["event_name"] = df["name"].apply(sep_event)
+    df["iteration"] = -1
+    current = -1
+    for i in range(df.shape[0]):
+        if df.loc[i, "name"] == "SequentialExecutor::Execute":
+            current += 1
+        df.loc[i, "iteration"] = current
+    if not agg:
+        if with_shape:
+            df["args_input_type_shape"] = df["args_input_type_shape"].apply(_process_shape)
+            df["args_output_type_shape"] = df["args_output_type_shape"].apply(_process_shape)
+        else:
+            df = df.drop(["args_input_type_shape", "args_output_type_shape"], axis=1)
+        if first_it_out:
+            df["it==0"] = (df["iteration"] <= 0).astype(int)
+        return df
+    agg_cols = ["cat", "args_node_index", "args_op_name", "args_provider", "event_name"]
+    if with_shape:
+        agg_cols.append("args_input_type_shape")
+        df["args_input_type_shape"] = df["args_input_type_shape"].apply(_process_shape)
+        df["args_output_type_shape"] = df["args_output_type_shape"].apply(_process_shape)
+    else:
+        df = df.drop(["args_input_type_shape", "args_output_type_shape"], axis=1)
+    if first_it_out:
+        df["it==0"] = (df["iteration"] <= 0).astype(int)
+        agg_cols.insert(0, "it==0")
+    if agg_op_name:
+        del agg_cols[agg_cols.index("args_node_index")]
+    for c in agg_cols:
+        df[c] = df[c].fillna("")
+    df["dur"] = df["dur"].fillna(0)
+    agg = df[[*agg_cols, "dur"]].groupby(agg_cols).sum()
+    return agg
+def js_profile_to_dataframe(
+    filename: str,
+    as_df: bool = True,
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = False,
+    with_shape: bool = False,
+) -> Union[List, "pandas.DataFrame"]:  # noqa: F821
+    """
+    Profiles the execution of an onnx graph with onnxruntime.
+    :param filename: filename holding the profiling stored in json format
+    :param as_df: returns the
+    :param first_it_out: if aggregated, leaves the first iteration out
+    :param agg: aggregate by event
+    :param agg_op_name: aggregate on operator name or operator index
+    :param with_shape: keep the shape before aggregating
+    :return: DataFrame or dictionary
+    """
+    with open(filename, "r") as f:
+        content = f.read()
+    js = json.loads(content)
+    suffixes = ["_kernel_time", "_fence_before", "_fence_after"]
+    rows = []
+    for row in js:
+        if "args" in row and isinstance(row["args"], dict):
+            for k, v in row["args"].items():
+                row[f"args_{k}"] = v
+            del row["args"]
+        name = row["name"]
+        for suf in suffixes:
+            if name.endswith(suf):
+                changed = name[: -len(suf)]
+                row["op_name"] = changed
+                break
+        rows.append(row)
+    if as_df:
+        import pandas
+        return post_process_df_profile(
+            pandas.DataFrame(rows),
+            first_it_out=first_it_out,
+            agg=agg,
+            agg_op_name=agg_op_name,
+            with_shape=with_shape,
+        )
+    return rows
+def _preprocess_graph1(df):
+    df = df.copy()
+    df["args_provider"] = df["args_provider"].apply(
+        lambda s: s.replace("ExecutionProvider", "") if isinstance(s, str) else s
+    )
+    agg_cols = ["dur", "args_op_name", "args_provider"]
+    for c in ["it==0", "args_input_type_shape"]:
+        if c in df.columns:
+            agg_cols.append(c)
+    if "it==0" in df.columns:
+        vs = ["t>=1", "t=0"]
+        df["it==0"] = df["it==0"].apply(lambda v: vs[v])
+    gr_dur = df[agg_cols].groupby(agg_cols[1:]).sum().sort_values("dur")
+    gr_n = df[agg_cols].groupby(agg_cols[1:]).count()
+    gr_n = gr_n.loc[gr_dur.index, :]
+    gr_n.columns = ["count"]
+    gr = gr_dur.merge(gr_n, left_index=True, right_index=True, how="outer")
+    gr["ratio"] = gr["dur"] / gr["dur"].sum()
+    return gr_dur, gr_n, gr
+def _preprocess_graph2(df):
+    df = df.reset_index(drop=False).copy()
+    df["args_node_index"] = df["args_node_index"].apply(
+        lambda i: int(i) if i not in {None, ""} else -1
+    )
+    df["args_provider"] = df["args_provider"].apply(
+        lambda s: s.replace("ExecutionProvider", "") if isinstance(s, str) else s
+    )
+    df = df[(df["cat"] == "Node") & (df["event_name"] == "kernel_time")]
+    agg_cols = ["dur", "args_node_index", "args_op_name", "args_provider"]
+    for c in ["it==0", "args_input_type_shape"]:
+        if c in df.columns:
+            agg_cols.append(c)
+    if "it==0" in df.columns:
+        vs = ["t>=1", "t=0"]
+        df["it==0"] = df["it==0"].apply(lambda v: vs[v])
+    df = df[agg_cols].groupby(agg_cols[1:]).sum()
+    df = df.sort_index(ascending=False)
+    df["ratio"] = df["dur"] / df["dur"].sum()
+    return df
+def plot_ort_profile(
+    df: "pandas.DataFrame",  # noqa: F821
+    ax0: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    ax1: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    title: Optional[str] = None,
+) -> "matplotlib.axes.Axes":  # noqa: F821
+    """
+    Plots time spend in computation based on a dataframe
+    produced by function :func:`js_profile_to_dataframe`.
+    :param df: dataframe
+    :param ax0: first axis to draw time
+    :param ax1: second axis to draw occurrences
+    :param title: graph title
+    :return: the graph
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import js_profile_to_dataframe, plot_ort_profile
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+        plot_ort_profile(df, ax[0], ax[1], "test_title")
+        fig.tight_layout()
+    With ``agg=True``:
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import js_profile_to_dataframe, plot_ort_profile
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True, agg=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+        plot_ort_profile(df, ax[0], ax[1], "test_title")
+        fig.tight_layout()
+    """
+    fontsize = 10
+    if ax0 is None:
+        import matplotlib.pyplot as plt
+        ax0 = plt.gca()
+    if "args_provider" in df.columns:
+        # Aggregation by operator
+        gr_dur, gr_n, _ = _preprocess_graph1(df)
+        gr_dur.plot.barh(ax=ax0)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=fontsize)
+            ax0.get_yaxis().set_label_text("")
+            ax0.set_yticklabels(
+                ax0.get_yticklabels(), rotation=45, ha="right", fontsize=fontsize
+            )
+        if title is not None:
+            ax0.set_title(title)
+        if ax1 is not None:
+            gr_n.plot.barh(ax=ax1)
+            ax1.set_title("n occurrences")
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=fontsize)
+                ax1.get_yaxis().set_label_text("")
+                ax1.set_yticklabels(
+                    ax1.get_yticklabels(), rotation=45, ha="right", fontsize=fontsize
+                )
+        return ax0
+    df = _preprocess_graph2(df)
+    df[["dur"]].plot.barh(ax=ax0)
+    if title is not None:
+        ax0.set_title(title)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=fontsize)
+        ax0.get_yaxis().set_label_text("")
+        ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=fontsize)
+    return ax0
+def plot_ort_profile_timeline(
+    df: "pandas.DataFrame",  # noqa: F821
+    ax: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    iteration: int = -2,
+    title: Optional[str] = None,
+    quantile: float = 0.5,
+    fontsize: int = 12,
+) -> "matplotlib.axes.Axes":  # noqa: F821
+    """
+    Creates a timeline based on a dataframe
+    produced by function :func:`js_profile_to_dataframe`.
+    :param df: dataframe
+    :param ax: first axis to draw time
+    :param iteration: iteration to plot, negative value to start from the end
+    :param title: graph title
+    :param quantile: draw the 10% less consuming operators in a different color
+    :param fontsize: font size
+    :return: the graph
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import (
+            js_profile_to_dataframe,
+            plot_ort_profile_timeline,
+        )
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
+        plot_ort_profile_timeline(df, ax, title="test_timeline", quantile=0.5)
+        fig.tight_layout()
+    """
+    if ax is None:
+        import matplotlib.pyplot as plt
+        ax = plt.gca()
+    df = df.copy()
+    df["iteration"] = df["iteration"].astype(int)
+    iterations = set(df["iteration"])
+    n_iter = iteration if iteration >= 0 else max(iterations) + 1 + iteration
+    dfi = df[df["iteration"] == n_iter]
+    assert dfi.shape[0] > 0, f"Iteration {iteration} cannot be found in {iterations}."
+    if "fence_before" in set(dfi["event_name"]):
+        started = {}
+        data = []
+        for irow in dfi.iterrows():
+            assert isinstance(irow, tuple), f"pandas has changed its api, type is {type(irow)}"
+            assert len(irow) == 2, f"pandas has changed its api, row is {irow}"
+            row = irow[1]
+            it = row["iteration"]
+            op_type = row["args_op_name"]
+            op_name = row["op_name"]
+            event_name = row["event_name"]
+            provider = row["args_provider"]
+            ts = float(row["ts"])
+            dur = float(row["dur"])
+            if event_name == "fence_before":
+                started[op_type, op_name, it] = dict(
+                    op_name=op_name, op_type=op_type, begin=ts
+                )
+            elif event_name == "kernel_time":
+                obs = started[op_type, op_name, it]
+                obs["duration"] = dur
+                obs["begin_kernel"] = ts
+                obs["provider"] = provider
+            elif event_name == "fence_after":
+                obs = started[op_type, op_name, it]
+                obs["end"] = ts
+                data.append(obs)
+                del started[op_type, op_name, it]
+            else:
+                assert event_name in {
+                    "SequentialExecutor::Execute",
+                    "model_run",
+                }, f"Unexpected event_name={event_name!r}, row={row}"
+    else:
+        # New format
+        data = []
+        for irow in dfi.iterrows():
+            row = irow[1]
+            if row["event_name"] != "kernel_time":
+                continue
+            obs = dict(
+                duration=float(row["dur"]),
+                op_name=row["op_name"],
+                op_type=row["args_op_name"],
+                provider=row["args_provider"],
+                begin=float(row["ts"]),
+                end=float(row["ts"]) + float(row["dur"]),
+                begin_kernel=float(row["ts"]),
+            )
+            data.append(obs)
+    # durations
+    data_dur = list(sorted(d["duration"] for d in data))
+    threshold = data_dur[int(quantile * len(data_dur))]
+    origin = dfi["ts"].min()
+    colors = ["blue", "green", "red", "orange"]
+    import matplotlib.patches as mpatches
+    cs = [0, 0]
+    for i, obs in enumerate(data):
+        dur = obs["duration"]
+        cat = int(dur >= threshold)
+        # color
+        color = colors[cat * 2 + cs[cat] % 2]
+        cs[cat] += 1
+        # rectangle
+        t1 = obs["begin"] - origin
+        t2 = obs["end"] - origin
+        shape = mpatches.Rectangle((0, t1), 1, t2 - t1, ec="none", color=color)
+        ax.add_artist(shape)
+        tk1 = obs["begin_kernel"] - origin
+        tk2 = (obs["begin_kernel"] + obs["duration"]) - origin
+        ax.plot([0, 1], [tk1, tk1], "b--")
+        ax.plot([0, 1], [tk2, tk2], "b--")
+        if i == 0:
+            ax.plot([0, 2], [tk1, tk1], "b")
+        elif i == len(data) - 1:
+            ax.plot([0, 2], [tk2, tk2], "b")
+        # text
+        y = (tk1 + tk2) / 2
+        text = obs["op_type"]
+        prov = obs["provider"].replace("ExecutionProvider", "")
+        name = obs["op_name"]
+        if len(name) >= 10:
+            name = name[:5] + "..." + name[5:]
+        ax.text(1, y, f"{i}:{prov}:{text}-{name}", fontsize=fontsize, va="center")
+    ax.invert_yaxis()
+    return ax

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -450,6 +450,11 @@ def fake_torchdynamo_exporting():
     """
     memorize = torch.compiler._is_exporting_flag
     torch.compiler._is_exporting_flag = True
+    assert torch.compiler.is_exporting(), (
+        f"Changes not detected "
+        f"torch.compiler._is_exporting_flag={torch.compiler._is_exporting_flag} "
+        f"and torch.compiler.is_exporting()={torch.compiler.is_exporting()}"
+    )
     try:
         yield
     finally:

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -311,7 +311,11 @@ def get_inputs_default(
         attention_mask=torch.cat(
             [
                 torch.ones((batch_size, sequence_length), dtype=torch.int64),
-                input_ids.ne(pad_token_id).to(torch.int64),
+                (
+                    torch.ones(input_ids.shape)
+                    if pad_token_id is None
+                    else input_ids.ne(pad_token_id)
+                ).to(torch.int64),
             ],
             axis=-1,
         ),

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -151,6 +151,7 @@ def get_inputs(
         assert (
             add_second_input > 0
         ), f"Not implemented for add_second_input={add_second_input}."
+        res["inputs_prompt"] = dict(input_ids=torch.randint(1000, 30000, (1, 11)))
         res["inputs2"] = get_inputs(
             model=model,
             config=config,

onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl