PyPI - onnx-diagnostic - Versions diffs - 0.7.12__py3-none-any.whl → 0.7.14__py3-none-any.whl - Mend

onnx-diagnostic 0.7.12py3-none-any.whl → 0.7.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.12"
+__version__ = "0.7.14"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -400,12 +400,17 @@ def get_parser_validate() -> ArgumentParser:
             position_ids is usually not needed, they can be removed by adding:
-            --drop position_ids
+                --drop position_ids
             The behaviour may be modified compare the original configuration,
             the following argument can be rope_scaling to dynamic:
-            --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+                --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+            You can profile the command line by running:
+                pyinstrument -m onnx_diagnostic validate ...
+                pyinstrument -r html -o profile.html -m onnx_diagnostic validate ...
             """
         ),
         formatter_class=RawTextHelpFormatter,

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -56,6 +56,14 @@ class CoupleInputsDynamicShapes:
         self.kwargs = kwargs
         self.dynamic_shapes = dynamic_shapes
         self.args_names = args_names
+        if not self.kwargs and isinstance(self.dynamic_shapes, dict):
+            # This assumes the dictionary for the dynamic shapes is ordered
+            # the same way the args are. The input names are not known.
+            assert len(self.dynamic_shapes) == len(self.args), (
+                f"Length mismatch, kwargs is empty, len(dynamic_shapes)="
+                f"{len(self.dynamic_shapes)}, len(args)={len(self.args)}"
+            )
+            self.dynamic_shapes = tuple(self.dynamic_shapes.values())
     def __str__(self) -> str:
         return "\n".join(
@@ -232,8 +240,9 @@ class CoupleInputsDynamicShapes:
         """
         if not self.args:
             assert isinstance(self.kwargs, dict) and isinstance(self.dynamic_shapes, dict), (
-                f"Type mismatch, args={string_type(self.args)} and "
-                f"dynamic_shapes={self.dynamic_shapes} should have the same type."
+                f"Type mismatch, args={string_type(self.args)}, "
+                f"kwargs={string_type(self.kwargs)} and dynamic_shapes="
+                f"{string_type(self.dynamic_shapes)} should have the same type."
             )
             res = self._generic_walker_step(
                 processor,

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -397,7 +397,7 @@ def string_type(
             return "AUTO"
         if verbose:
             print(f"[string_type] Y7:{type(obj)}")
-        return str(obj)
+        return str(obj).replace("DimHint(DYNAMIC)", "DYNAMIC").replace("DimHint(AUTO)", "AUTO")
     if isinstance(obj, bool):
         if with_min_max:
@@ -516,8 +516,10 @@ def string_type(
                 print(f"[string_type] V2:{type(obj)}")
             return "OV(NOTENSOR)"
         if with_min_max:
+            from .torch_helper import to_numpy
             try:
-                t = obj.numpy()
+                t = to_numpy(obj)
             except Exception:
                 # pass unable to convert into numpy (bfloat16, ...)
                 if verbose:
@@ -939,7 +941,7 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
             return flatten_object(list(x.values()), drop_keys=drop_keys)
         return flatten_object(list(x.items()), drop_keys=drop_keys)
-    if x.__class__.__name__ in {"DynamicCache", "StaticCache"}:
+    if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
         from .cache_helper import CacheKeyValue
         kc = CacheKeyValue(x)
@@ -1233,9 +1235,13 @@ def max_diff(
     if isinstance(expected, np.ndarray) or isinstance(got, np.ndarray):
         if isinstance(expected, torch.Tensor):
-            expected = expected.detach().cpu().numpy()
+            from .torch_helper import to_numpy
+            expected = to_numpy(expected)
         if isinstance(got, torch.Tensor):
-            got = got.detach().cpu().numpy()
+            from .torch_helper import to_numpy
+            got = to_numpy(got)
         if verbose >= 6:
             print(f"[max_diff] tensor: {string_type(expected)} ? {string_type(got)}")

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1167,7 +1167,7 @@ class CubeLogs:
                     df.to_excel(
                         writer,
                         sheet_name=name,
-                        freeze_panes=(df.columns.nlevels + df.index.nlevels, df.index.nlevels),
+                        freeze_panes=(df.columns.nlevels + 1, df.index.nlevels),
                     )
                     f_highlights[name] = tview.f_highlight
                     if tview.plots:
@@ -1210,7 +1210,7 @@ class CubeLogs:
                     for k, v in sbs.items():
                         print(f"[CubeLogs.to_excel] sbs {k}: {v}")
                 name = "∧".join(sbs)
-                sbs_raw, sbs_agg = self.sbs(sbs)
+                sbs_raw, sbs_agg, sbs_col = self.sbs(sbs)
                 if verbose:
                     print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
                     print(
@@ -1222,7 +1222,7 @@ class CubeLogs:
                     writer,
                     sheet_name=name,
                     freeze_panes=(
-                        sbs_raw.columns.nlevels + sbs_raw.index.nlevels,
+                        sbs_raw.columns.nlevels + 1,
                         sbs_raw.index.nlevels,
                     ),
                 )
@@ -1230,10 +1230,18 @@ class CubeLogs:
                     writer,
                     sheet_name=f"{name}-AGG",
                     freeze_panes=(
-                        sbs_agg.columns.nlevels + sbs_agg.index.nlevels,
+                        sbs_agg.columns.nlevels + 1,
                         sbs_agg.index.nlevels,
                     ),
                 )
+                sbs_col.to_excel(
+                    writer,
+                    sheet_name=f"{name}-COL",
+                    freeze_panes=(
+                        sbs_col.columns.nlevels + 1,
+                        sbs_col.index.nlevels,
+                    ),
+                )
             if plots:
                 from openpyxl.drawing.image import Image
@@ -1314,7 +1322,7 @@ class CubeLogs:
     def sbs(
         self, configs: Dict[str, Dict[str, Any]], column_name: str = "CONF"
-    ) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
+    ) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:
         """
         Creates a side-by-side for two configurations.
         Every configuration a dictionary column:value which filters in
@@ -1325,7 +1333,7 @@ class CubeLogs:
         :param configs: example
             ``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
         :param column_name: column to add with the name of the configuration
-        :return: data and aggregated date
+        :return: data, aggregated date, data with a row per model
         """
         assert (
             len(configs) >= 2
@@ -1433,6 +1441,8 @@ class CubeLogs:
                             _mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
                             _mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
                             _mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
+                            _mkc(m, f"{n1}*({n1}∧{n2})"): si * (~sinan & ~sjnan).astype(float),
+                            _mkc(m, f"{n2}*({n1}∧{n2})"): sj * (~sinan & ~sjnan).astype(float),
                         }
                     )
                     nas.columns.names = view_res.columns.names
@@ -1452,13 +1462,11 @@ class CubeLogs:
         }
         flat = view_res.groupby(self.time).agg(aggs)
         flat = flat.stack("METRICS", future_stack=True)
-        return res, flat
+        return res, flat, view_res.T.sort_index().T
 class CubeLogsPerformance(CubeLogs):
-    """
-    Processes logs coming from experiments.
-    """
+    """Processes logs coming from experiments."""
     def __init__(
         self,
@@ -1511,20 +1519,25 @@ class CubeLogsPerformance(CubeLogs):
             "n_model_faster2x",
             "n_model_faster3x",
             "n_model_faster4x",
+            "n_model_faster5x",
             "n_node_attention",
             "n_node_attention23",
-            "n_node_rotary_embedding",
-            "n_node_rotary_embedding23",
-            "n_node_layer_normalization",
-            "n_node_layer_normalization23",
+            "n_node_causal_mask",
+            "n_node_constant",
             "n_node_control_flow",
-            "n_node_scatter",
+            "n_node_expand",
             "n_node_function",
+            "n_node_gqa",
             "n_node_initializer",
             "n_node_initializer_small",
-            "n_node_constant",
+            "n_node_layer_normalization",
+            "n_node_layer_normalization23",
+            "n_node_reshape",
+            "n_node_rotary_embedding",
+            "n_node_rotary_embedding23",
+            "n_node_scatter",
+            "n_node_sequence",
             "n_node_shape",
-            "n_node_expand",
             "onnx_n_nodes_no_cst",
             "peak_gpu_torch",
             "peak_gpu_nvidia",
@@ -1690,6 +1703,11 @@ class CubeLogsPerformance(CubeLogs):
                     "time_latency",
                     gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
                 ),
+                n_model_faster5x=lambda df: gpreserve(
+                    df,
+                    "time_latency",
+                    gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 4.98,
+                ),
                 n_node_attention23=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Attention")
                 ),
@@ -1720,6 +1738,11 @@ class CubeLogsPerformance(CubeLogs):
                     + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
                     + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
                 ),
+                n_node_gqa=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0),
+                ),
                 n_node_layer_normalization=lambda df: gpreserve(
                     df,
                     "time_latency_eager",
@@ -1764,9 +1787,22 @@ class CubeLogsPerformance(CubeLogs):
                 n_node_shape=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Shape")
                 ),
+                n_node_reshape=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__Reshape")
+                ),
                 n_node_expand=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Expand")
                 ),
+                n_node_causal_mask=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__CausalMask", 0),
+                ),
+                n_node_sequence=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__SequenceAt", 0) + gdf(df, "op_onnx__SplitToSequence", 0),
+                ),
             )
             assert (
                 formula in lambdas

onnx_diagnostic/helpers/mini_onnx_builder.py CHANGED Viewed

@@ -381,6 +381,23 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
                 else:
                     for p, o in _flatten_iterator(getattr(obj, att), sep):
                         yield f"DynamicCache_{att}{sep}{p}", o
+        elif obj.__class__.__name__ == "StaticCache":
+            # transformers
+            import transformers
+            from .cache_helper import CacheKeyValue
+            assert isinstance(
+                obj, transformers.cache_utils.StaticCache
+            ), f"Unexpected type {type(obj)}"
+            obj = CacheKeyValue(obj)
+            atts = ["key_cache", "value_cache"]
+            for i, att in enumerate(atts):
+                if i == len(atts) - 1:
+                    for p, o in _flatten_iterator(getattr(obj, att), sep):
+                        yield f"StaticCache._{att}{sep}{p}", o
+                else:
+                    for p, o in _flatten_iterator(getattr(obj, att), sep):
+                        yield f"StaticCache_{att}{sep}{p}", o
         else:
             raise NotImplementedError(f"Unexpected type {type(obj)}")

onnx_diagnostic/helpers/model_builder_helper.py CHANGED Viewed

@@ -203,6 +203,7 @@ def create_model_builder(
         "ChatGLMModel": builder.ChatGLMModel,
         "Ernie4_5_ForCausalLM": builder.ErnieModel,
         "GemmaForCausalLM": builder.Gemma2Model,
+        "Gemma2ForCausalLM": builder.Gemma2Model,
         "Gemma3ForCausalLM": builder.Gemma3Model,
         "Gemma3ForConditionalGeneration": builder.Gemma3Model,
         "GraniteForCausalLM": builder.GraniteModel,

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 import onnx
 import torch
 from .helper import string_type, flatten_object
+from .torch_helper import to_numpy
 from .cache_helper import is_cache_dynamic_registered
@@ -56,7 +57,7 @@ def make_feeds(
         f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
     )
     if use_numpy:
-        flat = [t.detach().cpu().numpy() if isinstance(t, torch.Tensor) else t for t in flat]
+        flat = [to_numpy(t) if isinstance(t, torch.Tensor) else t for t in flat]
     names = (
         [i.name for i in proto.graph.input]
         if isinstance(proto, onnx.ModelProto)

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import sys
 import warnings
 from collections.abc import Iterable
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 import numpy as np
 import onnx
 from onnx.external_data_helper import load_external_data_for_tensor, uses_external_data
@@ -283,9 +283,11 @@ def steal_forward(
     ],
     fprint: Callable = string_type,
     dump_file: Optional[str] = None,
+    dump_drop: Optional[Set[str]] = None,
     submodules: bool = False,
     verbose: int = 0,
     storage_limit: int = 2**27,
+    save_as_external_data: bool = True,
     **kwargs,
 ):
     """
@@ -303,6 +305,9 @@ def steal_forward(
     :param dump_file: dumps stolen inputs and outputs in an onnx model,
         they can be restored with :func:`create_input_tensors_from_onnx_model
         <onnx_diagnostic.helpers.mini_onnx_builder.create_input_tensors_from_onnx_model>`
+    :param dump_drop: to drop some inputs too big (only if dump_file is specified)
+    :param save_as_external_data: True by default, but maybe better to have everything
+        in a single file if possible
     :param submodules: if True and model is a module, the list extended with all the submodules
         the module contains
     :param verbose: verbosity
@@ -411,6 +416,15 @@ def steal_forward(
             if verbose:
                 size = torch_tensor_size(storage)
                 print(f"-- gather stored {len(storage)} objects, size={size // 2 ** 20} Mb")
+            if dump_drop:
+                for k, v in storage.items():
+                    if k[-1] == "I":
+                        _args, kwargs = v
+                        ii = set(kwargs) & dump_drop
+                        if ii:
+                            for i in ii:
+                                print("---", i)
+                                del kwargs[i]
             proto = create_onnx_model_from_input_tensors(storage)
             if verbose:
                 print("-- dumps stored objects")
@@ -420,7 +434,7 @@ def steal_forward(
             onnx.save(
                 proto,
                 dump_file,
-                save_as_external_data=True,
+                save_as_external_data=save_as_external_data,
                 all_tensors_to_one_file=True,
                 location=location,
             )
@@ -464,10 +478,10 @@ def is_torchdynamo_exporting() -> bool:
             return False
-def to_numpy(tensor: "torch.Tensor"):  # noqa: F821
+def to_numpy(tensor: "torch.Tensor") -> np.ndarray:  # noqa: F821
     """Converts a :class:`torch.Tensor` to :class:`numpy.ndarray`."""
     try:
-        return tensor.numpy()
+        return tensor.detach().cpu().numpy()
     except TypeError:
         # We try with ml_dtypes
         pass
@@ -476,7 +490,7 @@ def to_numpy(tensor: "torch.Tensor"):  # noqa: F821
     conv = {torch.bfloat16: ml_dtypes.bfloat16}
     assert tensor.dtype in conv, f"Unsupported type {tensor.dtype}, not in {conv}"
-    return tensor.to(torch.float32).numpy().astype(conv[tensor.dtype])
+    return tensor.detach().to(torch.float32).cpu().numpy().astype(conv[tensor.dtype])
 def replace_string_by_dynamic(dynamic_shapes: Any) -> Any:
@@ -765,7 +779,12 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
 def torch_deepcopy(value: Any) -> Any:
-    """Makes a deepcopy."""
+    """
+    Makes a deep copy.
+    :param value: any value
+    :return: a deep copy
+    """
     if value is None:
         return None
     if isinstance(value, (int, float, str)):
@@ -794,9 +813,14 @@ def torch_deepcopy(value: Any) -> Any:
         from .cache_helper import CacheKeyValue
         ca = CacheKeyValue(value)
+        if len(ca.key_cache) == 0:
+            # Use of deepcopy.
+            import copy
+            return copy.deepcopy(value)
         return make_static_cache(
             torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))),
-            max_cache_len=value.max_cache_len,
+            max_cache_len=max([value.max_cache_len, *[t.shape[2] for t in ca.key_cache]]),
         )
     if value.__class__.__name__ == "HybridCache":
         from .cache_helper import CacheKeyValue

onnx_diagnostic/reference/torch_evaluator.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
 import numpy as np
 import onnx
 import torch
-from ..helpers.torch_helper import to_tensor
+from ..helpers.torch_helper import to_tensor, to_numpy
 from ..torch_onnx.runtime_info import first_used_last_used, RuntimeValue
 from .report_results_comparison import ReportResultComparison
 from . import torch_ops
@@ -578,7 +578,7 @@ class TorchOnnxEvaluator:
                 print(f"- clean {o}")
         if use_numpy:
-            return [None if a is None else a.detach().cpu().numpy() for a in fres]
+            return [None if a is None else to_numpy(a) for a in fres]
         return fres
     def run_with_values(

onnx_diagnostic/tasks/data/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+import os
+def get_data(name: str):
+    """Returns data stored in this folder."""
+    filename = os.path.join(os.path.dirname(__file__), name)
+    assert os.path.exists(
+        filename
+    ), f"Unable to find a file with {name!r}, looked for {filename!r}"
+    from ...helpers.mini_onnx_builder import create_input_tensors_from_onnx_model
+    return create_input_tensors_from_onnx_model(filename)

onnx_diagnostic/tasks/data/dummies_imagetext2text_generation_gemma3.onnx ADDED Viewed

Binary file

onnx-diagnostic 0.7.12__py3-none-any.whl → 0.7.14__py3-none-any.whl

onnx-diagnostic 0.7.12py3-none-any.whl → 0.7.14py3-none-any.whl