PyPI - onnx-diagnostic - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
 import onnx
@@ -491,3 +492,549 @@ def onnx_generate_with_genai(
     if return_session:
         return input_ids, session
     return input_ids
+_mapping_types = {
+    "float": "F",
+    "double": "D",
+    "float16": "H",
+    "uint8": "U8",
+    "uint16": "U16",
+    "uint32": "U32",
+    "uint64": "U64",
+    "int8": "I8",
+    "int16": "I16",
+    "int32": "I32",
+    "int64": "I64",
+}
+def _process_shape(shape_df):
+    if isinstance(shape_df, float) or len(shape_df) == 0:
+        return ""
+    values = []
+    for val in shape_df:
+        if len(val) != 1:
+            raise ValueError(f"Unable to process shape {val!r} from {values!r}.")
+        for _k, _v in val.items():
+            k, v = _k, _v
+            break
+        if v:
+            vs = "x".join(map(str, v))
+            values.append(f"{_mapping_types.get(k,k)}[{vs}]")
+        else:
+            values.append(f"{_mapping_types.get(k,k)}")
+    return "+".join(values)
+def post_process_df_profile(
+    df: "pandas.DataFrame",  # noqa: F821
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = True,
+    with_shape: bool = False,
+) -> "pandas.DataFrame":  # noqa: F821
+    """
+    Post-processed a dataframe obtained after profiling onnxruntime.
+    It adds a column for a more explicit event name and adds
+    a column for the iteration number
+    :param agg: aggregate the result
+    :param first_it_out: leave the first iteration
+        out of the aggregation
+    :param agg_op_name: aggregate on operator name or operator index
+    :param with_shape: keep the shape to aggregate
+    :return: DataFrame
+    """
+    events = {"kernel_time", "fence_after", "fence_before"}
+    def sep_event(s):
+        for e in events:
+            if s.endswith(e):
+                return e
+        return s
+    df = df.copy()
+    df["event_name"] = df["name"].apply(sep_event)
+    df["iteration"] = -1
+    current = -1
+    for i in range(df.shape[0]):
+        if df.loc[i, "name"] == "SequentialExecutor::Execute":
+            current += 1
+        df.loc[i, "iteration"] = current
+    if not agg:
+        if with_shape:
+            df["args_input_type_shape"] = df["args_input_type_shape"].apply(_process_shape)
+            df["args_output_type_shape"] = df["args_output_type_shape"].apply(_process_shape)
+        else:
+            df = df.drop(["args_input_type_shape", "args_output_type_shape"], axis=1)
+        if first_it_out:
+            df["it==0"] = (df["iteration"] <= 0).astype(int)
+        return df
+    agg_cols = ["cat", "args_node_index", "args_op_name", "args_provider", "event_name"]
+    if with_shape:
+        agg_cols.append("args_input_type_shape")
+        df["args_input_type_shape"] = df["args_input_type_shape"].apply(_process_shape)
+        df["args_output_type_shape"] = df["args_output_type_shape"].apply(_process_shape)
+    else:
+        df = df.drop(["args_input_type_shape", "args_output_type_shape"], axis=1)
+    if first_it_out:
+        df["it==0"] = (df["iteration"] <= 0).astype(int)
+        agg_cols.insert(0, "it==0")
+    if agg_op_name:
+        del agg_cols[agg_cols.index("args_node_index")]
+    for c in agg_cols:
+        df[c] = df[c].fillna("")
+    df["dur"] = df["dur"].fillna(0)
+    agg = df[[*agg_cols, "dur"]].groupby(agg_cols).sum()
+    return agg
+def js_profile_to_dataframe(
+    filename: str,
+    as_df: bool = True,
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = False,
+    with_shape: bool = False,
+) -> Union[List, "pandas.DataFrame"]:  # noqa: F821
+    """
+    Profiles the execution of an onnx graph with onnxruntime.
+    :param filename: filename holding the profiling stored in json format
+    :param as_df: returns the
+    :param first_it_out: if aggregated, leaves the first iteration out
+    :param agg: aggregate by event
+    :param agg_op_name: aggregate on operator name or operator index
+    :param with_shape: keep the shape before aggregating
+    :return: DataFrame or dictionary
+    """
+    with open(filename, "r") as f:
+        content = f.read()
+    js = json.loads(content)
+    suffixes = ["_kernel_time", "_fence_before", "_fence_after"]
+    rows = []
+    for row in js:
+        if "args" in row and isinstance(row["args"], dict):
+            for k, v in row["args"].items():
+                row[f"args_{k}"] = v
+            del row["args"]
+        name = row["name"]
+        for suf in suffixes:
+            if name.endswith(suf):
+                changed = name[: -len(suf)]
+                row["op_name"] = changed
+                break
+        rows.append(row)
+    if as_df:
+        import pandas
+        return post_process_df_profile(
+            pandas.DataFrame(rows),
+            first_it_out=first_it_out,
+            agg=agg,
+            agg_op_name=agg_op_name,
+            with_shape=with_shape,
+        )
+    return rows
+def _preprocess_graph1(df):
+    df = df.copy()
+    df["args_provider"] = df["args_provider"].apply(
+        lambda s: s.replace("ExecutionProvider", "") if isinstance(s, str) else s
+    )
+    agg_cols = ["dur", "args_op_name", "args_provider"]
+    for c in ["it==0", "args_input_type_shape"]:
+        if c in df.columns:
+            agg_cols.append(c)
+    if "it==0" in df.columns:
+        vs = ["t>=1", "t=0"]
+        df["it==0"] = df["it==0"].apply(lambda v: vs[v])
+    gr_dur = df[agg_cols].groupby(agg_cols[1:]).sum().sort_values("dur")
+    gr_n = df[agg_cols].groupby(agg_cols[1:]).count()
+    gr_n = gr_n.loc[gr_dur.index, :]
+    gr_n.columns = ["count"]
+    gr = gr_dur.merge(gr_n, left_index=True, right_index=True, how="outer")
+    gr["ratio"] = gr["dur"] / gr["dur"].sum()
+    return gr_dur, gr_n, gr
+def _preprocess_graph2(df):
+    df = df.reset_index(drop=False).copy()
+    df["args_node_index"] = df["args_node_index"].apply(
+        lambda i: int(i) if i not in {None, ""} else -1
+    )
+    df["args_provider"] = df["args_provider"].apply(
+        lambda s: s.replace("ExecutionProvider", "") if isinstance(s, str) else s
+    )
+    df = df[(df["cat"] == "Node") & (df["event_name"] == "kernel_time")]
+    agg_cols = ["dur", "args_node_index", "args_op_name", "args_provider"]
+    for c in ["it==0", "args_input_type_shape"]:
+        if c in df.columns:
+            agg_cols.append(c)
+    if "it==0" in df.columns:
+        vs = ["t>=1", "t=0"]
+        df["it==0"] = df["it==0"].apply(lambda v: vs[v])
+    df = df[agg_cols].groupby(agg_cols[1:]).sum()
+    df = df.sort_index(ascending=False)
+    df["ratio"] = df["dur"] / df["dur"].sum()
+    return df
+def plot_ort_profile(
+    df: "pandas.DataFrame",  # noqa: F821
+    ax0: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    ax1: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    title: Optional[str] = None,
+) -> "matplotlib.axes.Axes":  # noqa: F821
+    """
+    Plots time spend in computation based on a dataframe
+    produced by function :func:`js_profile_to_dataframe`.
+    :param df: dataframe
+    :param ax0: first axis to draw time
+    :param ax1: second axis to draw occurrences
+    :param title: graph title
+    :return: the graph
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import js_profile_to_dataframe, plot_ort_profile
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+        plot_ort_profile(df, ax[0], ax[1], "test_title")
+        fig.tight_layout()
+    With ``agg=True``:
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import js_profile_to_dataframe, plot_ort_profile
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True, agg=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+        plot_ort_profile(df, ax[0], ax[1], "test_title")
+        fig.tight_layout()
+    """
+    fontsize = 10
+    if ax0 is None:
+        import matplotlib.pyplot as plt
+        ax0 = plt.gca()
+    if "args_provider" in df.columns:
+        # Aggregation by operator
+        gr_dur, gr_n, _ = _preprocess_graph1(df)
+        gr_dur.plot.barh(ax=ax0)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=fontsize)
+            ax0.get_yaxis().set_label_text("")
+            ax0.set_yticklabels(
+                ax0.get_yticklabels(), rotation=45, ha="right", fontsize=fontsize
+            )
+        if title is not None:
+            ax0.set_title(title)
+        if ax1 is not None:
+            gr_n.plot.barh(ax=ax1)
+            ax1.set_title("n occurrences")
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=fontsize)
+                ax1.get_yaxis().set_label_text("")
+                ax1.set_yticklabels(
+                    ax1.get_yticklabels(), rotation=45, ha="right", fontsize=fontsize
+                )
+        return ax0
+    df = _preprocess_graph2(df)
+    df[["dur"]].plot.barh(ax=ax0)
+    if title is not None:
+        ax0.set_title(title)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=fontsize)
+        ax0.get_yaxis().set_label_text("")
+        ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=fontsize)
+    return ax0
+def plot_ort_profile_timeline(
+    df: "pandas.DataFrame",  # noqa: F821
+    ax: Optional["matplotlib.axes.Axes"] = None,  # noqa: F821
+    iteration: int = -2,
+    title: Optional[str] = None,
+    quantile: float = 0.5,
+    fontsize: int = 12,
+) -> "matplotlib.axes.Axes":  # noqa: F821
+    """
+    Creates a timeline based on a dataframe
+    produced by function :func:`js_profile_to_dataframe`.
+    :param df: dataframe
+    :param ax: first axis to draw time
+    :param iteration: iteration to plot, negative value to start from the end
+    :param title: graph title
+    :param quantile: draw the 10% less consuming operators in a different color
+    :param fontsize: font size
+    :return: the graph
+    .. plot::
+        :include-source:
+        import numpy as np
+        from onnx import TensorProto
+        import onnx.helper as oh
+        from onnx.checker import check_model
+        from onnx.numpy_helper import from_array
+        import matplotlib.pyplot as plt
+        from onnxruntime import InferenceSession, SessionOptions
+        from onnx_diagnostic.helpers.rt_helper import (
+            js_profile_to_dataframe,
+            plot_ort_profile_timeline,
+        )
+        def get_model():
+            model_def0 = oh.make_model(
+                oh.make_graph(
+                    [
+                        oh.make_node("Add", ["X", "init1"], ["X1"]),
+                        oh.make_node("Abs", ["X"], ["X2"]),
+                        oh.make_node("Add", ["X", "init3"], ["inter"]),
+                        oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
+                        oh.make_node("Sub", ["X2", "Xm"], ["final"]),
+                    ],
+                    "test",
+                    [oh.make_tensor_value_info("X", TensorProto.FLOAT, [None])],
+                    [oh.make_tensor_value_info("final", TensorProto.FLOAT, [None])],
+                    [
+                        from_array(np.array([1], dtype=np.float32), name="init1"),
+                        from_array(np.array([3], dtype=np.float32), name="init3"),
+                    ],
+                ),
+                opset_imports=[oh.make_opsetid("", 18)],
+                ir_version=9,
+            )
+            check_model(model_def0)
+            return model_def0
+        sess_options = SessionOptions()
+        sess_options.enable_profiling = True
+        sess = InferenceSession(
+            get_model().SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
+        )
+        for _ in range(11):
+            sess.run(None, dict(X=np.arange(10).astype(np.float32)))
+        prof = sess.end_profiling()
+        df = js_profile_to_dataframe(prof, first_it_out=True)
+        print(df.head())
+        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
+        plot_ort_profile_timeline(df, ax, title="test_timeline", quantile=0.5)
+        fig.tight_layout()
+    """
+    if ax is None:
+        import matplotlib.pyplot as plt
+        ax = plt.gca()
+    df = df.copy()
+    df["iteration"] = df["iteration"].astype(int)
+    iterations = set(df["iteration"])
+    n_iter = iteration if iteration >= 0 else max(iterations) + 1 + iteration
+    dfi = df[df["iteration"] == n_iter]
+    assert dfi.shape[0] > 0, f"Iteration {iteration} cannot be found in {iterations}."
+    if "fence_before" in set(dfi["event_name"]):
+        started = {}
+        data = []
+        for irow in dfi.iterrows():
+            assert isinstance(irow, tuple), f"pandas has changed its api, type is {type(irow)}"
+            assert len(irow) == 2, f"pandas has changed its api, row is {irow}"
+            row = irow[1]
+            it = row["iteration"]
+            op_type = row["args_op_name"]
+            op_name = row["op_name"]
+            event_name = row["event_name"]
+            provider = row["args_provider"]
+            ts = float(row["ts"])
+            dur = float(row["dur"])
+            if event_name == "fence_before":
+                started[op_type, op_name, it] = dict(
+                    op_name=op_name, op_type=op_type, begin=ts
+                )
+            elif event_name == "kernel_time":
+                obs = started[op_type, op_name, it]
+                obs["duration"] = dur
+                obs["begin_kernel"] = ts
+                obs["provider"] = provider
+            elif event_name == "fence_after":
+                obs = started[op_type, op_name, it]
+                obs["end"] = ts
+                data.append(obs)
+                del started[op_type, op_name, it]
+            else:
+                assert event_name in {
+                    "SequentialExecutor::Execute",
+                    "model_run",
+                }, f"Unexpected event_name={event_name!r}, row={row}"
+    else:
+        # New format
+        data = []
+        for irow in dfi.iterrows():
+            row = irow[1]
+            if row["event_name"] != "kernel_time":
+                continue
+            obs = dict(
+                duration=float(row["dur"]),
+                op_name=row["op_name"],
+                op_type=row["args_op_name"],
+                provider=row["args_provider"],
+                begin=float(row["ts"]),
+                end=float(row["ts"]) + float(row["dur"]),
+                begin_kernel=float(row["ts"]),
+            )
+            data.append(obs)
+    # durations
+    data_dur = list(sorted(d["duration"] for d in data))
+    threshold = data_dur[int(quantile * len(data_dur))]
+    origin = dfi["ts"].min()
+    colors = ["blue", "green", "red", "orange"]
+    import matplotlib.patches as mpatches
+    cs = [0, 0]
+    for i, obs in enumerate(data):
+        dur = obs["duration"]
+        cat = int(dur >= threshold)
+        # color
+        color = colors[cat * 2 + cs[cat] % 2]
+        cs[cat] += 1
+        # rectangle
+        t1 = obs["begin"] - origin
+        t2 = obs["end"] - origin
+        shape = mpatches.Rectangle((0, t1), 1, t2 - t1, ec="none", color=color)
+        ax.add_artist(shape)
+        tk1 = obs["begin_kernel"] - origin
+        tk2 = (obs["begin_kernel"] + obs["duration"]) - origin
+        ax.plot([0, 1], [tk1, tk1], "b--")
+        ax.plot([0, 1], [tk2, tk2], "b--")
+        if i == 0:
+            ax.plot([0, 2], [tk1, tk1], "b")
+        elif i == len(data) - 1:
+            ax.plot([0, 2], [tk2, tk2], "b")
+        # text
+        y = (tk1 + tk2) / 2
+        text = obs["op_type"]
+        prov = obs["provider"].replace("ExecutionProvider", "")
+        name = obs["op_name"]
+        if len(name) >= 10:
+            name = name[:5] + "..." + name[5:]
+        ax.text(1, y, f"{i}:{prov}:{text}-{name}", fontsize=fontsize, va="center")
+    ax.invert_yaxis()
+    return ax

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -450,6 +450,11 @@ def fake_torchdynamo_exporting():
     """
     memorize = torch.compiler._is_exporting_flag
     torch.compiler._is_exporting_flag = True
+    assert torch.compiler.is_exporting(), (
+        f"Changes not detected "
+        f"torch.compiler._is_exporting_flag={torch.compiler._is_exporting_flag} "
+        f"and torch.compiler.is_exporting()={torch.compiler.is_exporting()}"
+    )
     try:
         yield
     finally:

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -311,7 +311,11 @@ def get_inputs_default(
         attention_mask=torch.cat(
             [
                 torch.ones((batch_size, sequence_length), dtype=torch.int64),
-                input_ids.ne(pad_token_id).to(torch.int64),
+                (
+                    torch.ones(input_ids.shape)
+                    if pad_token_id is None
+                    else input_ids.ne(pad_token_id)
+                ).to(torch.int64),
             ],
             axis=-1,
         ),

onnx_diagnostic/torch_export_patches/eval/model_cases.py CHANGED Viewed

@@ -570,6 +570,34 @@ class ControlFlowScanDecomposition_151564(torch.nn.Module):
     _dynamic = {"images": {0: DYN, 1: DYN}, "position": {0: DYN}}
+class ControlFlowWhileDec(torch.nn.Module):
+    def forward(self, ci, a, b):
+        def cond_fn(i, x, y):
+            return i > 0
+        def body_fn(i, x, y):
+            return i - 1, x + y, y - x
+        return torch._higher_order_ops.while_loop(cond_fn, body_fn, [ci, a, b])
+    _inputs = [(torch.tensor(1), torch.randn(2, 3), torch.randn(2, 3))]
+    _dynamic = {}, {0: DYN, 1: DYN}, {0: DYN}
+class ControlFlowWhileInc(torch.nn.Module):
+    def forward(self, ci, a, b):
+        def cond_fn(i, x, y):
+            return i < x.size(0)
+        def body_fn(i, x, y):
+            return i + 1, x + y, y - x
+        return torch._higher_order_ops.while_loop(cond_fn, body_fn, [ci, a, b])
+    _inputs = [(torch.tensor(1), torch.randn(2, 3), torch.randn(2, 3))]
+    _dynamic = {}, {0: DYN, 1: DYN}, {0: DYN}
 class SignatureInt1(torch.nn.Module):
     def __init__(self, n_dims: int = 3, n_targets: int = 1):
         super().__init__()

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -32,7 +32,7 @@ def get_patches(mod, verbose: int = 0) -> Tuple[str, List[Any]]:
             v = getattr(mod, k)
             if hasattr(v, "_PATCHED_CLASS_") and hasattr(v, "_PATCHES_"):
                 to_patch.append(v)
-            else:
+            elif v.__doc__:
                 # a function
                 doc = v.__doc__.lstrip()
                 if doc.startswith("manual patch"):

onnx_diagnostic/torch_export_patches/onnx_export_serialization.py CHANGED Viewed

@@ -4,14 +4,18 @@ import packaging.version as pv
 import optree
 import torch
 import transformers
-from transformers.cache_utils import (
-    DynamicCache,
-    EncoderDecoderCache,
-    HybridCache,
-    SlidingWindowCache,
-    StaticCache,
-)
+from transformers.cache_utils import DynamicCache, StaticCache
+try:
+    from transformers.cache_utils import (
+        EncoderDecoderCache,
+        HybridCache,
+        SlidingWindowCache,
+    )
+except ImportError:
+    EncoderDecoderCache = None
+    HybridCache = None
+    SlidingWindowCache = None
 from ..helpers import string_type
 from .serialization import _lower_name_with_

onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl