PyPI - onnx-diagnostic - Versions diffs - 0.7.13__py3-none-any.whl → 0.7.15__py3-none-any.whl - Mend

onnx-diagnostic 0.7.13py3-none-any.whl → 0.7.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.13"
+__version__ = "0.7.15"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -400,12 +400,17 @@ def get_parser_validate() -> ArgumentParser:
             position_ids is usually not needed, they can be removed by adding:
-            --drop position_ids
+                --drop position_ids
             The behaviour may be modified compare the original configuration,
             the following argument can be rope_scaling to dynamic:
-            --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+                --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+            You can profile the command line by running:
+                pyinstrument -m onnx_diagnostic validate ...
+                pyinstrument -r html -o profile.html -m onnx_diagnostic validate ...
             """
         ),
         formatter_class=RawTextHelpFormatter,
@@ -548,6 +553,12 @@ def get_parser_validate() -> ArgumentParser:
         action=BooleanOptionalAction,
         help="Enables onnxruntime logging when the session is created",
     )
+    parser.add_argument(
+        "--quiet-input-sets",
+        default="",
+        help="Avoids raising an exception when an input sets does not work with "
+        "the exported model, example: --quiet-input-sets=inputs,inputs22",
+    )
     return parser
@@ -609,6 +620,7 @@ def _cmd_validate(argv: List[Any]):
             warmup=args.warmup,
             inputs2=args.inputs2,
             ort_logs=args.ort_logs,
+            quiet_input_sets=set(args.quiet_input_sets.split(",")),
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
@@ -829,7 +841,7 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_pass,n_model_faster,"
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
         "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
-        "n_node_layer_normalization,n_node_layer_normalization23,"
+        "n_node_gqa,n_node_layer_normalization,n_node_layer_normalization23,"
         "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -108,7 +108,7 @@ def flatten_unflatten_for_dynamic_shapes(
 def is_cache_dynamic_registered(fast: bool = False) -> bool:
     """
-    Tells class :class:`transformers.cache_utils.DynamicCache` can be
+    Tells if class :class:`transformers.cache_utils.DynamicCache` can be
     serialized and deserialized. Only then, :func:`torch.export.export`
     can export a model.

onnx_diagnostic/helpers/config_helper.py CHANGED Viewed

@@ -95,7 +95,8 @@ def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[typ
     mod_name = cls.__module__
     mod = importlib.import_module(mod_name)
     source = inspect.getsource(mod)
-    reg = re.compile("config: ([A-Za-z0-9]+)")
+    # [^O] avoids capturing Optional[Something]
+    reg = re.compile("config: ([^O][A-Za-z0-9]+)")
     fall = reg.findall(source)
     if len(fall) == 0:
         assert not exc, (

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1167,7 +1167,7 @@ class CubeLogs:
                     df.to_excel(
                         writer,
                         sheet_name=name,
-                        freeze_panes=(df.columns.nlevels + df.index.nlevels, df.index.nlevels),
+                        freeze_panes=(df.columns.nlevels + 1, df.index.nlevels),
                     )
                     f_highlights[name] = tview.f_highlight
                     if tview.plots:
@@ -1210,7 +1210,7 @@ class CubeLogs:
                     for k, v in sbs.items():
                         print(f"[CubeLogs.to_excel] sbs {k}: {v}")
                 name = "∧".join(sbs)
-                sbs_raw, sbs_agg = self.sbs(sbs)
+                sbs_raw, sbs_agg, sbs_col = self.sbs(sbs)
                 if verbose:
                     print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
                     print(
@@ -1222,7 +1222,7 @@ class CubeLogs:
                     writer,
                     sheet_name=name,
                     freeze_panes=(
-                        sbs_raw.columns.nlevels + sbs_raw.index.nlevels,
+                        sbs_raw.columns.nlevels + 1,
                         sbs_raw.index.nlevels,
                     ),
                 )
@@ -1230,10 +1230,18 @@ class CubeLogs:
                     writer,
                     sheet_name=f"{name}-AGG",
                     freeze_panes=(
-                        sbs_agg.columns.nlevels + sbs_agg.index.nlevels,
+                        sbs_agg.columns.nlevels + 1,
                         sbs_agg.index.nlevels,
                     ),
                 )
+                sbs_col.to_excel(
+                    writer,
+                    sheet_name=f"{name}-COL",
+                    freeze_panes=(
+                        sbs_col.columns.nlevels + 1,
+                        sbs_col.index.nlevels,
+                    ),
+                )
             if plots:
                 from openpyxl.drawing.image import Image
@@ -1314,7 +1322,7 @@ class CubeLogs:
     def sbs(
         self, configs: Dict[str, Dict[str, Any]], column_name: str = "CONF"
-    ) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
+    ) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:
         """
         Creates a side-by-side for two configurations.
         Every configuration a dictionary column:value which filters in
@@ -1325,7 +1333,7 @@ class CubeLogs:
         :param configs: example
             ``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
         :param column_name: column to add with the name of the configuration
-        :return: data and aggregated date
+        :return: data, aggregated date, data with a row per model
         """
         assert (
             len(configs) >= 2
@@ -1433,6 +1441,8 @@ class CubeLogs:
                             _mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
                             _mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
                             _mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
+                            _mkc(m, f"{n1}*({n1}∧{n2})"): si * (~sinan & ~sjnan).astype(float),
+                            _mkc(m, f"{n2}*({n1}∧{n2})"): sj * (~sinan & ~sjnan).astype(float),
                         }
                     )
                     nas.columns.names = view_res.columns.names
@@ -1452,13 +1462,11 @@ class CubeLogs:
         }
         flat = view_res.groupby(self.time).agg(aggs)
         flat = flat.stack("METRICS", future_stack=True)
-        return res, flat
+        return res, flat, view_res.T.sort_index().T
 class CubeLogsPerformance(CubeLogs):
-    """
-    Processes logs coming from experiments.
-    """
+    """Processes logs coming from experiments."""
     def __init__(
         self,
@@ -1511,20 +1519,25 @@ class CubeLogsPerformance(CubeLogs):
             "n_model_faster2x",
             "n_model_faster3x",
             "n_model_faster4x",
+            "n_model_faster5x",
             "n_node_attention",
             "n_node_attention23",
-            "n_node_rotary_embedding",
-            "n_node_rotary_embedding23",
-            "n_node_layer_normalization",
-            "n_node_layer_normalization23",
+            "n_node_causal_mask",
+            "n_node_constant",
             "n_node_control_flow",
-            "n_node_scatter",
+            "n_node_expand",
             "n_node_function",
+            "n_node_gqa",
             "n_node_initializer",
             "n_node_initializer_small",
-            "n_node_constant",
+            "n_node_layer_normalization",
+            "n_node_layer_normalization23",
+            "n_node_reshape",
+            "n_node_rotary_embedding",
+            "n_node_rotary_embedding23",
+            "n_node_scatter",
+            "n_node_sequence",
             "n_node_shape",
-            "n_node_expand",
             "onnx_n_nodes_no_cst",
             "peak_gpu_torch",
             "peak_gpu_nvidia",
@@ -1690,6 +1703,11 @@ class CubeLogsPerformance(CubeLogs):
                     "time_latency",
                     gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
                 ),
+                n_model_faster5x=lambda df: gpreserve(
+                    df,
+                    "time_latency",
+                    gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 4.98,
+                ),
                 n_node_attention23=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Attention")
                 ),
@@ -1720,6 +1738,11 @@ class CubeLogsPerformance(CubeLogs):
                     + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
                     + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
                 ),
+                n_node_gqa=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0),
+                ),
                 n_node_layer_normalization=lambda df: gpreserve(
                     df,
                     "time_latency_eager",
@@ -1764,9 +1787,22 @@ class CubeLogsPerformance(CubeLogs):
                 n_node_shape=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Shape")
                 ),
+                n_node_reshape=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__Reshape")
+                ),
                 n_node_expand=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Expand")
                 ),
+                n_node_causal_mask=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__CausalMask", 0),
+                ),
+                n_node_sequence=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__SequenceAt", 0) + gdf(df, "op_onnx__SplitToSequence", 0),
+                ),
             )
             assert (
                 formula in lambdas

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -3,8 +3,6 @@ import numpy as np
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .torch_helper import to_numpy
-from .cache_helper import is_cache_dynamic_registered
 def name_type_to_onnx_dtype(name: str) -> int:
@@ -49,7 +47,7 @@ def make_feeds(
     assert (
         not check_flatten
         or not all(isinstance(obj, torch.Tensor) for obj in flat)
-        or not is_cache_dynamic_registered(fast=True)
+        # or not is_cache_dynamic_registered(fast=True)
         or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
     ), (
         f"Unexpected number of flattened objects, "
@@ -57,6 +55,8 @@ def make_feeds(
         f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
     )
     if use_numpy:
+        from .torch_helper import to_numpy
         flat = [to_numpy(t) if isinstance(t, torch.Tensor) else t for t in flat]
     names = (
         [i.name for i in proto.graph.input]

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -186,12 +186,13 @@ def _get_inputs_gemma3(
             f"total_sequence_length={total_sequence_length} != 860 "
             f"for model {model.__class__.__name__}"
         )
-        assert (
-            head_dim == 256
-        ), f"head_dim={head_dim} != 256 for model {model.__class__.__name__}"
+        assert head_dim in (
+            256,
+            32,
+        ), f"head_dim={head_dim} not in (32, 256) for model {model.__class__.__name__}"
         assert n_images == 1, f"n_images={n_images} != 1 for model {model.__class__.__name__}"
-        assert num_key_value_heads == 4, (
-            f"num_key_value_heads={num_key_value_heads} != 256 "
+        assert num_key_value_heads in (1, 4), (
+            f"num_key_value_heads={num_key_value_heads} not in (1, 4) "
             f"for this model {model.__class__.__name__}"
         )

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -19,6 +19,9 @@ __TASK__ = "text-generation"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
     # FalconMambaConfig: use_mambapy
+    if hasattr(config, "text_config"):
+        # The model is probably of mixture of models used only for text.
+        config = config.text_config
     check_hasattr(
         config,
         ("head_dim", ("hidden_size", "num_attention_heads"), "use_mambapy"),
@@ -284,6 +287,21 @@ def get_inputs(
             add_second_input=0,
             **kwargs,
         )["inputs"]
+        res["inputs_batch1"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_hidden_layers=num_hidden_layers,
+            batch_size=1,
+            sequence_length=sequence_length,
+            sequence_length2=sequence_length2,
+            dynamic_rope=dynamic_rope,
+            num_key_value_heads=num_key_value_heads,
+            head_dim=head_dim,
+            cls_cache=cls_cache,
+            add_second_input=0,
+            **kwargs,
+        )["inputs"]
     return res
@@ -293,6 +311,9 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
     If the configuration is None, the function selects typical dimensions.
     """
+    if hasattr(config, "text_config"):
+        # The model is probably of mixture of models used only for text.
+        config = config.text_config
     if config is not None:
         check_hasattr(
             config,

onnx_diagnostic/torch_export_patches/eval/__init__.py CHANGED Viewed

@@ -676,7 +676,13 @@ def run_exporter(
     if dynamic and len(inputs) > 1:
         for index, i in enumerate(inputs):
-            expected = model(*_clone(i))
+            if quiet:
+                try:
+                    expected = model(*_clone(i))
+                except Exception as e:
+                    return dict(error=str(e), success=0, error_step=f"run0.{index}")
+            else:
+                expected = model(*_clone(i))
             try:
                 got = mod(*i)
             except Exception as e:

onnx_diagnostic/torch_export_patches/eval/model_cases.py CHANGED Viewed

@@ -353,12 +353,9 @@ class ControlFlowCondNonZero(torch.nn.Module):
 class ControlFlowCondIdentity_153832(torch.nn.Module):
-    """
-    `#153832 <https://github.com/pytorch/pytorch/issues/153832>`_
-    """
+    """`#153832 <https://github.com/pytorch/pytorch/issues/153832>`_"""
     def forward(self, x, y):
         def branch_cond_then_1(x):
             x = torch.abs(x) + 1
             return x

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -340,6 +340,7 @@ def torch_export_patches(
         ###############
         if patch_torch:
+            from torch.fx.experimental.symbolic_shapes import ShapeEnv
             from .patches.patch_torch import (
                 patched_infer_size,
                 patched_vmap,
@@ -347,6 +348,9 @@ def torch_export_patches(
                 patched__constrain_user_specified_dimhint_range,
                 _catch_produce_guards_and_solve_constraints,
                 patch__check_input_constraints_for_graph,
+                patched__broadcast_in_dim_meta,
+                patched__maybe_broadcast,
+                patched_ShapeEnv,
             )
             if verbose:
@@ -383,6 +387,20 @@ def torch_export_patches(
                 patched__constrain_user_specified_dimhint_range
             )
+            # torch._prims._broadcast_in_dim_meta
+            f_broadcast_in_dim = torch._prims.broadcast_in_dim
+            f__broadcast_in_dim_meta = torch._prims._broadcast_in_dim_meta
+            torch._prims._broadcast_in_dim_meta = patched__broadcast_in_dim_meta
+            torch._prims.broadcast_in_dim = patched__broadcast_in_dim_meta
+            # torch._refs._maybe_broadcast
+            f__maybe_broadcast = torch._refs._maybe_broadcast
+            torch._refs._maybe_broadcast = patched__maybe_broadcast
+            # ShapeEnv
+            f_shape_env__evaluate_expr = ShapeEnv._evaluate_expr
+            ShapeEnv._evaluate_expr = patched_ShapeEnv._evaluate_expr
         # torch._export.non_strict_utils.produce_guards_and_solve_constraints
         if patch_torch and catch_constraints:
             if verbose:
@@ -404,10 +422,7 @@ def torch_export_patches(
                 )
             )
-        if stop_if_static:
-            from torch.fx.experimental.symbolic_shapes import ShapeEnv
-            from .patches.patch_torch import patched_ShapeEnv
+        if patch_torch and stop_if_static:
             ShapeEnv._log_guard_remember = ShapeEnv._log_guard
             if verbose:
@@ -584,6 +599,10 @@ def torch_export_patches(
                 torch._export.non_strict_utils._constrain_user_specified_dimhint_range = (
                     f___constrain_user_specified_dimhint_range
                 )
+                torch._prims._broadcast_in_dim_meta = f__broadcast_in_dim_meta
+                torch._prims.broadcast_in_dim = f_broadcast_in_dim
+                torch._refs._maybe_broadcast = f__maybe_broadcast
+                ShapeEnv._evaluate_expr = f_shape_env__evaluate_expr
                 if verbose:
                     print("[torch_export_patches] restored pytorch functions")
@@ -723,9 +742,7 @@ def torch_export_patches(
 def replacement_before_exporting(args: Any) -> Any:
-    """
-    Does replacements on the given inputs if needed.
-    """
+    """Does replacements on the given inputs if needed."""
     if args is None:
         return None
     if isinstance(args, (int, float)):

onnx_diagnostic/torch_export_patches/onnx_export_serialization.py CHANGED Viewed

@@ -12,17 +12,26 @@ from transformers.cache_utils import (
     StaticCache,
 )
-try:
-    from transformers.models.mamba.modeling_mamba import MambaCache
-except ImportError:
-    from transformers.cache_utils import MambaCache
 from ..helpers import string_type
 from .serialization import _lower_name_with_
 PATCH_OF_PATCHES: Set[Any] = set()
+def get_mamba_cache_cls() -> type:
+    try:
+        from transformers.models.mamba.modeling_mamba import MambaCache
+        return MambaCache
+    except ImportError:
+        try:
+            from transformers.cache_utils import MambaCache
+            return MambaCache
+        except ImportError:
+            return None
 def register_class_serialization(
     cls,
     f_flatten: Callable,
@@ -203,13 +212,6 @@ def serialization_functions(
                 # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
                 verbose=verbose,
             ),
-            MambaCache: lambda verbose=verbose: register_class_serialization(
-                MambaCache,
-                flatten_mamba_cache,
-                unflatten_mamba_cache,
-                flatten_with_keys_mamba_cache,
-                verbose=verbose,
-            ),
             EncoderDecoderCache: lambda verbose=verbose: register_class_serialization(
                 EncoderDecoderCache,
                 flatten_encoder_decoder_cache,
@@ -232,6 +234,17 @@ def serialization_functions(
                 verbose=verbose,
             ),
         }
+        MambaCache = get_mamba_cache_cls()
+        if MambaCache:
+            transformers_classes[MambaCache] = (
+                lambda verbose=verbose: register_class_serialization(
+                    MambaCache,
+                    flatten_mamba_cache,
+                    unflatten_mamba_cache,
+                    flatten_with_keys_mamba_cache,
+                    verbose=verbose,
+                )
+            )
         classes.update(transformers_classes)
     if patch_diffusers:
@@ -287,7 +300,12 @@ def unregister_class_serialization(cls: type, verbose: int = 0):
 def unregister_cache_serialization(undo: Dict[str, bool], verbose: int = 0):
     """Undo all registrations."""
-    cls_ensemble = {MambaCache, DynamicCache, EncoderDecoderCache} | set(undo)
+    MambaCache = get_mamba_cache_cls()
+    cls_ensemble = (
+        {DynamicCache, EncoderDecoderCache}
+        | set(undo)
+        | ({MambaCache} if MambaCache else set())
+    )
     for cls in cls_ensemble:
         if undo.get(cls.__name__, False):
             unregister_class_serialization(cls, verbose)

onnx-diagnostic 0.7.13__py3-none-any.whl → 0.7.15__py3-none-any.whl

onnx-diagnostic 0.7.13py3-none-any.whl → 0.7.15py3-none-any.whl