PyPI - onnx-diagnostic - Versions diffs - 0.7.13__py3-none-any.whl → 0.7.14__py3-none-any.whl - Mend

onnx-diagnostic 0.7.13py3-none-any.whl → 0.7.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.13"
+__version__ = "0.7.14"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -400,12 +400,17 @@ def get_parser_validate() -> ArgumentParser:
             position_ids is usually not needed, they can be removed by adding:
-            --drop position_ids
+                --drop position_ids
             The behaviour may be modified compare the original configuration,
             the following argument can be rope_scaling to dynamic:
-            --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+                --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+            You can profile the command line by running:
+                pyinstrument -m onnx_diagnostic validate ...
+                pyinstrument -r html -o profile.html -m onnx_diagnostic validate ...
             """
         ),
         formatter_class=RawTextHelpFormatter,

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1167,7 +1167,7 @@ class CubeLogs:
                     df.to_excel(
                         writer,
                         sheet_name=name,
-                        freeze_panes=(df.columns.nlevels + df.index.nlevels, df.index.nlevels),
+                        freeze_panes=(df.columns.nlevels + 1, df.index.nlevels),
                     )
                     f_highlights[name] = tview.f_highlight
                     if tview.plots:
@@ -1210,7 +1210,7 @@ class CubeLogs:
                     for k, v in sbs.items():
                         print(f"[CubeLogs.to_excel] sbs {k}: {v}")
                 name = "∧".join(sbs)
-                sbs_raw, sbs_agg = self.sbs(sbs)
+                sbs_raw, sbs_agg, sbs_col = self.sbs(sbs)
                 if verbose:
                     print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
                     print(
@@ -1222,7 +1222,7 @@ class CubeLogs:
                     writer,
                     sheet_name=name,
                     freeze_panes=(
-                        sbs_raw.columns.nlevels + sbs_raw.index.nlevels,
+                        sbs_raw.columns.nlevels + 1,
                         sbs_raw.index.nlevels,
                     ),
                 )
@@ -1230,10 +1230,18 @@ class CubeLogs:
                     writer,
                     sheet_name=f"{name}-AGG",
                     freeze_panes=(
-                        sbs_agg.columns.nlevels + sbs_agg.index.nlevels,
+                        sbs_agg.columns.nlevels + 1,
                         sbs_agg.index.nlevels,
                     ),
                 )
+                sbs_col.to_excel(
+                    writer,
+                    sheet_name=f"{name}-COL",
+                    freeze_panes=(
+                        sbs_col.columns.nlevels + 1,
+                        sbs_col.index.nlevels,
+                    ),
+                )
             if plots:
                 from openpyxl.drawing.image import Image
@@ -1314,7 +1322,7 @@ class CubeLogs:
     def sbs(
         self, configs: Dict[str, Dict[str, Any]], column_name: str = "CONF"
-    ) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
+    ) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:
         """
         Creates a side-by-side for two configurations.
         Every configuration a dictionary column:value which filters in
@@ -1325,7 +1333,7 @@ class CubeLogs:
         :param configs: example
             ``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
         :param column_name: column to add with the name of the configuration
-        :return: data and aggregated date
+        :return: data, aggregated date, data with a row per model
         """
         assert (
             len(configs) >= 2
@@ -1433,6 +1441,8 @@ class CubeLogs:
                             _mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
                             _mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
                             _mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
+                            _mkc(m, f"{n1}*({n1}∧{n2})"): si * (~sinan & ~sjnan).astype(float),
+                            _mkc(m, f"{n2}*({n1}∧{n2})"): sj * (~sinan & ~sjnan).astype(float),
                         }
                     )
                     nas.columns.names = view_res.columns.names
@@ -1452,13 +1462,11 @@ class CubeLogs:
         }
         flat = view_res.groupby(self.time).agg(aggs)
         flat = flat.stack("METRICS", future_stack=True)
-        return res, flat
+        return res, flat, view_res.T.sort_index().T
 class CubeLogsPerformance(CubeLogs):
-    """
-    Processes logs coming from experiments.
-    """
+    """Processes logs coming from experiments."""
     def __init__(
         self,
@@ -1511,20 +1519,25 @@ class CubeLogsPerformance(CubeLogs):
             "n_model_faster2x",
             "n_model_faster3x",
             "n_model_faster4x",
+            "n_model_faster5x",
             "n_node_attention",
             "n_node_attention23",
-            "n_node_rotary_embedding",
-            "n_node_rotary_embedding23",
-            "n_node_layer_normalization",
-            "n_node_layer_normalization23",
+            "n_node_causal_mask",
+            "n_node_constant",
             "n_node_control_flow",
-            "n_node_scatter",
+            "n_node_expand",
             "n_node_function",
+            "n_node_gqa",
             "n_node_initializer",
             "n_node_initializer_small",
-            "n_node_constant",
+            "n_node_layer_normalization",
+            "n_node_layer_normalization23",
+            "n_node_reshape",
+            "n_node_rotary_embedding",
+            "n_node_rotary_embedding23",
+            "n_node_scatter",
+            "n_node_sequence",
             "n_node_shape",
-            "n_node_expand",
             "onnx_n_nodes_no_cst",
             "peak_gpu_torch",
             "peak_gpu_nvidia",
@@ -1690,6 +1703,11 @@ class CubeLogsPerformance(CubeLogs):
                     "time_latency",
                     gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
                 ),
+                n_model_faster5x=lambda df: gpreserve(
+                    df,
+                    "time_latency",
+                    gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 4.98,
+                ),
                 n_node_attention23=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Attention")
                 ),
@@ -1720,6 +1738,11 @@ class CubeLogsPerformance(CubeLogs):
                     + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
                     + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
                 ),
+                n_node_gqa=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0),
+                ),
                 n_node_layer_normalization=lambda df: gpreserve(
                     df,
                     "time_latency_eager",
@@ -1764,9 +1787,22 @@ class CubeLogsPerformance(CubeLogs):
                 n_node_shape=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Shape")
                 ),
+                n_node_reshape=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__Reshape")
+                ),
                 n_node_expand=lambda df: gpreserve(
                     df, "time_latency_eager", gdf(df, "op_onnx__Expand")
                 ),
+                n_node_causal_mask=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__CausalMask", 0),
+                ),
+                n_node_sequence=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__SequenceAt", 0) + gdf(df, "op_onnx__SplitToSequence", 0),
+                ),
             )
             assert (
                 formula in lambdas

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -284,6 +284,21 @@ def get_inputs(
             add_second_input=0,
             **kwargs,
         )["inputs"]
+        res["inputs_batch1"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_hidden_layers=num_hidden_layers,
+            batch_size=1,
+            sequence_length=sequence_length,
+            sequence_length2=sequence_length2,
+            dynamic_rope=dynamic_rope,
+            num_key_value_heads=num_key_value_heads,
+            head_dim=head_dim,
+            cls_cache=cls_cache,
+            add_second_input=0,
+            **kwargs,
+        )["inputs"]
     return res

onnx_diagnostic/torch_export_patches/eval/__init__.py CHANGED Viewed

@@ -676,7 +676,13 @@ def run_exporter(
     if dynamic and len(inputs) > 1:
         for index, i in enumerate(inputs):
-            expected = model(*_clone(i))
+            if quiet:
+                try:
+                    expected = model(*_clone(i))
+                except Exception as e:
+                    return dict(error=str(e), success=0, error_step=f"run0.{index}")
+            else:
+                expected = model(*_clone(i))
             try:
                 got = mod(*i)
             except Exception as e:

onnx_diagnostic/torch_export_patches/eval/model_cases.py CHANGED Viewed

@@ -353,12 +353,9 @@ class ControlFlowCondNonZero(torch.nn.Module):
 class ControlFlowCondIdentity_153832(torch.nn.Module):
-    """
-    `#153832 <https://github.com/pytorch/pytorch/issues/153832>`_
-    """
+    """`#153832 <https://github.com/pytorch/pytorch/issues/153832>`_"""
     def forward(self, x, y):
         def branch_cond_then_1(x):
             x = torch.abs(x) + 1
             return x

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -340,6 +340,7 @@ def torch_export_patches(
         ###############
         if patch_torch:
+            from torch.fx.experimental.symbolic_shapes import ShapeEnv
             from .patches.patch_torch import (
                 patched_infer_size,
                 patched_vmap,
@@ -347,6 +348,9 @@ def torch_export_patches(
                 patched__constrain_user_specified_dimhint_range,
                 _catch_produce_guards_and_solve_constraints,
                 patch__check_input_constraints_for_graph,
+                patched__broadcast_in_dim_meta,
+                patched__maybe_broadcast,
+                patched_ShapeEnv,
             )
             if verbose:
@@ -383,6 +387,20 @@ def torch_export_patches(
                 patched__constrain_user_specified_dimhint_range
             )
+            # torch._prims._broadcast_in_dim_meta
+            f_broadcast_in_dim = torch._prims.broadcast_in_dim
+            f__broadcast_in_dim_meta = torch._prims._broadcast_in_dim_meta
+            torch._prims._broadcast_in_dim_meta = patched__broadcast_in_dim_meta
+            torch._prims.broadcast_in_dim = patched__broadcast_in_dim_meta
+            # torch._refs._maybe_broadcast
+            f__maybe_broadcast = torch._refs._maybe_broadcast
+            torch._refs._maybe_broadcast = patched__maybe_broadcast
+            # ShapeEnv
+            f_shape_env__evaluate_expr = ShapeEnv._evaluate_expr
+            ShapeEnv._evaluate_expr = patched_ShapeEnv._evaluate_expr
         # torch._export.non_strict_utils.produce_guards_and_solve_constraints
         if patch_torch and catch_constraints:
             if verbose:
@@ -405,9 +423,6 @@ def torch_export_patches(
             )
         if stop_if_static:
-            from torch.fx.experimental.symbolic_shapes import ShapeEnv
-            from .patches.patch_torch import patched_ShapeEnv
             ShapeEnv._log_guard_remember = ShapeEnv._log_guard
             if verbose:
@@ -584,6 +599,10 @@ def torch_export_patches(
                 torch._export.non_strict_utils._constrain_user_specified_dimhint_range = (
                     f___constrain_user_specified_dimhint_range
                 )
+                torch._prims._broadcast_in_dim_meta = f__broadcast_in_dim_meta
+                torch._prims.broadcast_in_dim = f_broadcast_in_dim
+                torch._refs._maybe_broadcast = f__maybe_broadcast
+                ShapeEnv._evaluate_expr = f_shape_env__evaluate_expr
                 if verbose:
                     print("[torch_export_patches] restored pytorch functions")
@@ -723,9 +742,7 @@ def torch_export_patches(
 def replacement_before_exporting(args: Any) -> Any:
-    """
-    Does replacements on the given inputs if needed.
-    """
+    """Does replacements on the given inputs if needed."""
     if args is None:
         return None
     if isinstance(args, (int, float)):

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -1,7 +1,10 @@
+import functools
 import inspect
+import operator
 import os
 import traceback
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from functools import reduce
+from typing import Any, Callable, cast, Dict, List, Optional, Sequence, Tuple, Union
 import torch
 from torch._subclasses.fake_tensor import FakeTensorMode
@@ -396,6 +399,284 @@ class patched_ShapeEnv:
         #     stacklevel=0,
         # )
+    def _evaluate_expr(
+        self,
+        orig_expr: "sympy.Basic",  # noqa: F821
+        hint: Optional[Union[bool, int, float]] = None,
+        fx_node: Optional[torch.fx.Node] = None,
+        size_oblivious: bool = False,
+        fallback_value: Optional[bool] = None,
+        *,
+        forcing_spec: bool = False,
+    ) -> "sympy.Basic":  # noqa: F821
+        # TODO: split conjunctions and evaluate them separately
+        import sympy
+        from torch.fx.experimental import _config as config
+        from torch.fx.experimental.symbolic_shapes import (
+            SympyBoolean,
+            log,
+            SymT,
+            symbol_is_type,
+        )
+        from torch._guards import ShapeGuard
+        if isinstance(
+            orig_expr,
+            (sympy.logic.boolalg.BooleanTrue, sympy.logic.boolalg.BooleanFalse),
+        ):
+            return orig_expr
+        # Don't track this one. (Because this cache is inside this function the
+        # cache only lasts for the invocation of this function call)
+        @functools.cache
+        def compute_concrete_val() -> sympy.Basic:
+            if hint is None:
+                # This is only ever called for expressions WITHOUT unbacked
+                # symbols
+                r = self.size_hint(orig_expr)
+                assert r is not None
+                return r
+            else:
+                return sympy.sympify(hint)
+        concrete_val: Optional[sympy.Basic]
+        # Check if:
+        #   1. 'translation_validation' is set
+        #   2. the corresponding 'fx_node' is not 'None'
+        #   3. the guard should not be suppressed
+        #   4. the guard doesn't contain backed symfloat symbols
+        #      since z3 can't handle floats
+        #   5. fallback_value is none.
+        # If all of the above check, we create an FX node representing the
+        # actual expression to be guarded.
+        node = None
+        fresh = False
+        if (
+            self._translation_validation_enabled
+            and fx_node is not None
+            and not self._suppress_guards_tls()
+            and not size_oblivious
+            and not any(symbol_is_type(s, SymT.FLOAT) for s in orig_expr.free_symbols)
+            and fallback_value is None
+        ):
+            # TODO: does this even worked with unbacked :think:
+            concrete_val = compute_concrete_val()
+            if concrete_val is sympy.true:
+                node, fresh = self._create_fx_call_function(torch._assert, (fx_node,))
+            elif concrete_val is sympy.false:
+                neg, _ = self._create_fx_call_function(operator.not_, (fx_node,))
+                node, fresh = self._create_fx_call_function(torch._assert, (neg,))
+            else:
+                eql, _ = self._create_fx_call_function(operator.eq, (fx_node, concrete_val))
+                node, fresh = self._create_fx_call_function(torch._assert, (eql,))
+            assert node is not None
+            # If this is a fresh node, we have to remember the event index that
+            # corresponds to this assertion node.
+            # Reason: so that, given an assertion node, we can replay the ShapeEnv
+            # events until the point where this assertion node was freshly created.
+            if fresh:
+                self._add_fx_node_metadata(node)
+        # After creating the FX node corresponding to orig_expr, we must make sure that
+        # no error will be raised until the end of this function.
+        #
+        # Reason: the translation validation may become invalid otherwise.
+        #
+        # If an error is raised before the end of this function, we remove the FX node
+        # inserted, and re-raise the error.
+        guard = None
+        try:
+            if orig_expr.is_number:
+                self.log.debug("eval %s [trivial]", orig_expr)
+                if hint is not None:
+                    if isinstance(hint, bool):
+                        assert orig_expr == hint, f"{orig_expr} != {hint}"
+                    else:
+                        assert sympy.Eq(orig_expr, hint), f"{orig_expr} != {hint}"
+                return orig_expr
+            expr = orig_expr
+            static_expr = self._maybe_evaluate_static(expr, size_oblivious=size_oblivious)
+            if static_expr is not None:
+                self.log.debug(
+                    "eval %s == %s [statically known]",
+                    (f"size_oblivious({orig_expr})" if size_oblivious else size_oblivious),
+                    static_expr,
+                )
+                if not size_oblivious and config.backed_size_oblivious and hint is not None:
+                    # TODO: maybe reconcile this with use of counterfactual hints
+                    # in unbacked case
+                    assert static_expr == hint, f"{static_expr} != {hint}"
+                return static_expr
+            transmute_into_runtime_assert = False
+            concrete_val = None
+            if not (expr.free_symbols <= self.var_to_val.keys()):
+                # TODO: dedupe this with _maybe_evaluate_static
+                # Attempt to eliminate the unbacked SymInt
+                new_expr = self._maybe_evaluate_static(expr, unbacked_only=True)
+                assert new_expr is not None
+                if not (new_expr.free_symbols <= self.var_to_val.keys()):
+                    ok = False
+                    # fallback_value is set when guard_or_true or guard_or_false are used.
+                    if not ok and fallback_value is not None:
+                        self._log_suppressed_dde(orig_expr, fallback_value)
+                        return fallback_value
+                    # oblivious_var_to_val will be defined iff we have sizes
+                    # with DimDynamic.OBLIVIOUS_SIZE type.
+                    # See https://github.com/pytorch/pytorch/issues/137100#issuecomment-2495778113
+                    if (
+                        self.oblivious_var_to_val
+                        and not (
+                            correct_hint := orig_expr.xreplace(self.oblivious_var_to_val)
+                        ).free_symbols
+                        and not (
+                            counterfactual_hint := orig_expr.xreplace(
+                                {k: max(2, v) for k, v in self.oblivious_var_to_val.items()}
+                            )
+                        ).free_symbols
+                        and correct_hint == counterfactual_hint
+                    ):
+                        # TODO: better logging
+                        log.info(
+                            "oblivious_size %s -> %s (passed counterfactual)",
+                            orig_expr,
+                            # pyrefly: ignore  # unbound-name
+                            correct_hint,
+                        )
+                        # pyrefly: ignore  # unbound-name
+                        concrete_val = correct_hint
+                        # NB: do NOT transmute into runtime assert
+                        ok = True
+                    # unbacked_var_to_val is not None iff propagate_real_tensors is on.
+                    # if propagate_real_tensors is on, we check the example values
+                    # to generate (unsound_result)
+                    # and if they pass we add a runtime assertions and continue.
+                    if (
+                        not ok
+                        and self.unbacked_var_to_val
+                        and not (
+                            unsound_result := orig_expr.xreplace(
+                                self.unbacked_var_to_val
+                            ).xreplace(self.var_to_val)
+                        ).free_symbols
+                    ):
+                        # pyrefly: ignore  # unbound-name
+                        self._log_real_tensor_propagation(orig_expr, unsound_result)
+                        transmute_into_runtime_assert = True
+                        # pyrefly: ignore  # unbound-name
+                        concrete_val = unsound_result
+                        ok = True
+                    # Check if this is coming from a python assert statement,
+                    # if so, convert it to a runtime assertion
+                    # instead of failing.
+                    if not ok and self.trace_asserts and self._is_python_assert():
+                        concrete_val = sympy.true
+                        transmute_into_runtime_assert = True
+                        ok = True
+                    # PATCHED: ok -> True
+                    ok = True
+                    # if not ok:
+                    #    raise self._make_data_dependent_error(
+                    #        expr.xreplace(self.var_to_val),
+                    #        expr,
+                    #        expr_sym_node_id=self._expr_sym_node_id,
+                    #    )
+                else:
+                    expr = new_expr
+            if concrete_val is None:
+                concrete_val = compute_concrete_val()
+            self._check_frozen(expr, concrete_val)
+            if (
+                config.inject_EVALUATE_EXPR_flip_equality_TESTING_ONLY
+                and isinstance(hint, bool)
+                and isinstance(expr, (sympy.Eq, sympy.Ne))
+            ):
+                expr = sympy.Not(expr)
+            # Turn this into a boolean expression, no longer need to consult
+            # concrete_val
+            if concrete_val is sympy.true:
+                g = cast(SympyBoolean, expr)
+            elif concrete_val is sympy.false:
+                g = sympy.Not(expr)
+            else:
+                g = sympy.Eq(expr, concrete_val)  # type: ignore[arg-type]
+            if transmute_into_runtime_assert:
+                self.guard_or_defer_runtime_assert(
+                    g, f"propagate_real_tensors: {orig_expr} == {concrete_val}"
+                )
+                return concrete_val
+            if not self._suppress_guards_tls():
+                self._log_guard("eval", g, forcing_spec=forcing_spec)
+                # TODO: If we successfully eliminate a symbol via equality, it
+                # is not actually necessary to save a guard for the equality,
+                # as we will implicitly generate a guard when we match that
+                # input against the symbol.  Probably the easiest way to
+                # implement this is to have maybe_guard_rel return a bool
+                # saying if it "subsumed" the guard (and therefore the guard
+                # is no longer necessary)
+                self._maybe_guard_rel(g)
+                if (
+                    torch.compiler.is_exporting()
+                    and self.prefer_deferred_runtime_asserts_over_guards
+                ):
+                    # it's fine to defer simple guards here without checking,
+                    # the _maybe_guard_rel() call above will set replacements if possible,
+                    # and so the result here will be statically known
+                    self.guard_or_defer_runtime_assert(g, f"evaluate_expr: {orig_expr}")
+                else:
+                    # at this point, we've evaluated the concrete expr value, and have
+                    # flipped/negated the guard if necessary. Now we know what to guard
+                    # or defer to runtime assert on.
+                    guard = ShapeGuard(g, self._get_sloc(), size_oblivious=size_oblivious)
+                    self.guards.append(guard)
+                    self.axioms.update(dict(self.get_implications(self.simplify(g))))
+            else:
+                self._log_guard("eval [guard suppressed]", g, forcing_spec=forcing_spec)
+        except Exception:
+            if fresh:
+                self._remove_fx_node(node)
+            raise
+        if not self._suppress_guards_tls():
+            if guard is not None:  # we might have deferred this to runtime assert
+                for s in g.free_symbols:
+                    self.symbol_guard_counter[s] += 1
+                    # Forcing_spec to avoid infinite recursion
+                    if (
+                        not forcing_spec
+                        and config.symbol_guard_limit_before_specialize is not None
+                        and self.symbol_guard_counter[s]
+                        > config.symbol_guard_limit_before_specialize
+                    ):
+                        # Force specialization
+                        self.log.info(
+                            "symbol_guard_limit_before_specialize=%s exceeded on %s",
+                            config.symbol_guard_limit_before_specialize,
+                            s,
+                        )
+                        self.evaluate_expr(s, forcing_spec=True)
+        return concrete_val
 def patched_vmap(func, in_dims=0, out_dims=0):
     """
@@ -570,3 +851,146 @@ def patched__constrain_user_specified_dimhint_range(
         return msg
     return None
+def patched__maybe_broadcast(*args, preserve_cpu_scalar_tensors=True):
+    """Patches ``torch._refs._maybe_broadcast``."""
+    from torch._prims_common import ShapeType, TensorLike, Number
+    # Computes common shape
+    common_shape = patched__broadcast_shapes(
+        *(t.shape if isinstance(t, TensorLike) else None for t in args)
+    )
+    def should_expand(a: ShapeType, b: ShapeType) -> bool:
+        from torch.fx.experimental.symbolic_shapes import (
+            guard_or_false,
+            sym_and,
+            sym_or,
+        )
+        if len(a) != len(b):
+            return True
+        for x, y in zip(a, b):
+            if guard_or_false(x != y):
+                # We know they are not the same.
+                return True
+            # They are the same or we do not know if they are the same or not.
+            # 1==1 no-broadcast
+            # u0==1 and 1==u0 cases. We broadcast!
+            if guard_or_false(sym_and(x == 1, y == 1)):
+                pass
+            elif guard_or_false(sym_or(x == 1, y == 1)):
+                # assume broadcasting.
+                return True
+            # u0==u1 assume the same, no broadcasting!
+            # PATCHED: avoid errors
+            return True  # guard_or_true(x != y)
+            # torch._check(
+            #    x == y,
+            #    lambda x=x, y=y: (
+            #        f"sizes assumed to be the same due to unbacked "
+            #        f"broadcasting semantics x={x!r}, y={y!r}"
+            #    ),
+            # )
+        return False
+    def __maybe_broadcast(x, shape):
+        if x is None:
+            return None
+        elif isinstance(x, Number):
+            return x
+        elif isinstance(x, TensorLike):
+            if preserve_cpu_scalar_tensors and torch._prims_common.is_cpu_scalar_tensor(x):
+                return x
+            if should_expand(x.shape, common_shape):
+                return x.expand(common_shape)
+            return x
+        else:
+            raise RuntimeError(f"Unexpected type when broadcasting: {str(type(x))}!")
+    return tuple(__maybe_broadcast(x, common_shape) for x in args)
+def patched__broadcast_in_dim_meta(
+    a: torch._prims_common.TensorLikeType,
+    shape: torch._prims_common.ShapeType,
+    broadcast_dimensions: Sequence[int],
+):
+    """Patches ``torch._prims._broadcast_in_dim_meta``."""
+    from torch.fx.experimental.symbolic_shapes import (
+        guard_or_false,
+        guard_or_true,
+        sym_or,
+    )
+    # Type checks
+    assert isinstance(a, torch._prims_common.TensorLike)
+    assert isinstance(shape, Sequence)
+    assert isinstance(broadcast_dimensions, Sequence)
+    # every dimension must be accounted for
+    assert a.ndim == len(broadcast_dimensions)
+    # broadcast shape must have weakly more dimensions
+    assert len(shape) >= a.ndim
+    # broadcast_dimensions must be an ascending sequence
+    # (no relative reordering of dims) of integers and
+    # each dimension must be within the new shape
+    def _greater_than_reduce(acc, x):
+        assert isinstance(x, (int, torch.export.Dim)), f"unexpected type {type(x)} for x"
+        assert x > acc
+        assert x < len(shape)
+        return x
+    reduce(_greater_than_reduce, broadcast_dimensions, -1)
+    # shape must be broadcastable to
+    for idx, new_idx in enumerate(broadcast_dimensions):
+        torch._check(
+            sym_or(a.shape[idx] == 1, shape[new_idx] == a.shape[idx]),
+            lambda idx=idx, new_idx=new_idx: (
+                f"{a.shape[idx]} must be broadcastable to {shape[new_idx]}"
+            ),
+        )
+    new_strides = []
+    original_idx = 0
+    for idx in range(len(shape)):
+        if idx in broadcast_dimensions:
+            # Assigns a stride of zero to dimensions
+            # which were actually broadcast
+            if guard_or_false(a.shape[original_idx] == 1):
+                if guard_or_false(a.shape[original_idx] == shape[idx]):
+                    new_strides.append(a.stride()[original_idx])
+                else:
+                    new_strides.append(0)
+            else:
+                # PATCHED: disabled this check
+                # torch._check(
+                #    a.shape[original_idx] == shape[idx],
+                #    lambda idx=idx, original_idx=original_idx: (
+                #        f"non-broadcasting semantics require "
+                #        f"{a.shape[original_idx]} == {shape[idx]}"
+                #    ),
+                # )
+                new_strides.append(a.stride()[original_idx])
+            original_idx = original_idx + 1
+        else:
+            if guard_or_true(shape[idx] != 1):
+                # consistent with previous use of guard_size_oblivious
+                new_strides.append(0)
+            elif original_idx == a.ndim:
+                new_strides.append(1)
+            else:
+                new_strides.append(a.stride()[original_idx] * a.size()[original_idx])
+    return a.as_strided(shape, new_strides, a.storage_offset())

onnx_diagnostic/torch_models/hghub/model_inputs.py CHANGED Viewed

@@ -25,6 +25,20 @@ def _code_needing_rewriting(model: Any) -> Any:
     return code_needing_rewriting(model)
+def _preprocess_model_id(
+    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
+) -> Tuple[str, Optional[str], bool, bool]:
+    if subfolder or "//" not in model_id:
+        return model_id, subfolder, same_as_pretrained, use_pretrained
+    spl = model_id.split("//")
+    if spl[-1] == "pretrained":
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
+    if spl[-1] in {"transformer", "vae"}:
+        # known subfolder
+        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
+    return model_id, subfolder, same_as_pretrained, use_pretrained
 def get_untrained_model_with_inputs(
     model_id: str,
     config: Optional[Any] = None,
@@ -85,8 +99,16 @@ def get_untrained_model_with_inputs(
         f"model_id={model_id!r}, preinstalled model is only available "
         f"if use_only_preinstalled is False."
     )
+    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+        model_id,
+        subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+    )
     if verbose:
-        print(f"[get_untrained_model_with_inputs] model_id={model_id!r}")
+        print(
+            f"[get_untrained_model_with_inputs] model_id={model_id!r}, subfolder={subfolder!r}"
+        )
         if use_preinstalled:
             print(f"[get_untrained_model_with_inputs] use preinstalled {model_id!r}")
     if config is None:
@@ -178,7 +200,7 @@ def get_untrained_model_with_inputs(
         if verbose:
             print(
-                f"[get_untrained_model_with_inputs] package_source={package_source.__name__} é"
+                f"[get_untrained_model_with_inputs] package_source={package_source.__name__} "
                 f"from {package_source.__file__}"
             )
         if use_pretrained:

onnx_diagnostic/torch_models/validate.py CHANGED Viewed

@@ -19,6 +19,7 @@ from ..tasks import random_input_kwargs
 from ..torch_export_patches import torch_export_patches
 from ..torch_export_patches.patch_inputs import use_dyn_not_str
 from .hghub import get_untrained_model_with_inputs
+from .hghub.model_inputs import _preprocess_model_id
 def empty(value: Any) -> bool:
@@ -289,20 +290,6 @@ def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     return new_cfg
-def _preprocess_model_id(
-    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
-) -> Tuple[str, Optional[str], bool, bool]:
-    if subfolder or "//" not in model_id:
-        return model_id, subfolder, same_as_pretrained, use_pretrained
-    spl = model_id.split("//")
-    if spl[-1] == "pretrained":
-        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
-    if spl[-1] in {"transformer", "vae"}:
-        # known subfolder
-        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
-    return model_id, subfolder, same_as_pretrained, use_pretrained
 def validate_model(
     model_id: str,
     task: Optional[str] = None,
@@ -419,14 +406,14 @@ def validate_model(
         such as ``input_empty_cache``
         which refers to a set of inputs using an empty cache.
     """
-    validation_begin = time.perf_counter()
+    main_validation_begin = time.perf_counter()
     model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
         use_pretrained=use_pretrained,
     )
-    time_preprocess_model_id = time.perf_counter() - validation_begin
+    time_preprocess_model_id = time.perf_counter() - main_validation_begin
     default_patch = dict(patch_transformers=True, patch_diffusers=True, patch=True)
     if isinstance(patch, bool):
         patch_kwargs = default_patch if patch else dict(patch=False)
@@ -921,7 +908,7 @@ def validate_model(
                 summary.update(summary_valid)
     _compute_final_statistics(summary)
-    summary["time_total"] = time.perf_counter() - validation_begin
+    summary["time_total"] = time.perf_counter() - main_validation_begin
     if verbose:
         print("[validate_model] -- done (final)")
@@ -1744,6 +1731,7 @@ def process_statistics(data: Sequence[Dict[str, float]]) -> Dict[str, Any]:
             "constant_folding",
             "remove_identity",
             "remove_duplicated_initializer",
+            "remove_duplicated_shape",
             "dynamic_dimension_naming",
             "inline",
             "check",

{onnx_diagnostic-0.7.13.dist-info → onnx_diagnostic-0.7.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: onnx-diagnostic
-Version: 0.7.13
+Version: 0.7.14
 Summary: Tools to help converting pytorch models into ONNX.
 Home-page: https://github.com/sdpython/onnx-diagnostic
 Author: Xavier Dupré

{onnx_diagnostic-0.7.13.dist-info → onnx_diagnostic-0.7.14.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-onnx_diagnostic/__init__.py,sha256=Sv9eg4qDNdyO5uUafa3e98pIerP4faa203FF3hqygOI,174
+onnx_diagnostic/__init__.py,sha256=fxgnYe-ZeX2ZhqiqehQfAUIDhdiy2BjpbzcaUtrI5g8,174
 onnx_diagnostic/__main__.py,sha256=YmyV_Aq_ianDlHyKLHMa6h8YK3ZmFPpLVHLKjM91aCk,79
-onnx_diagnostic/_command_lines_parser.py,sha256=wleBwnoCDyAWRYRREUSGkwAJKw2YI4Td_7ydxmdOXfI,33457
+onnx_diagnostic/_command_lines_parser.py,sha256=qCPdI1_Za7OM1MuR1utyhTcSZQlM4UVmN8Su4HoRjvI,33670
 onnx_diagnostic/api.py,sha256=BhCl_yCd78N7TlVtPOHjeYv1QBEy39TjZ647rcHqLh0,345
 onnx_diagnostic/doc.py,sha256=t3RELgfooYnVMAi0JSpggWkQEgUsREz8NmRvn0TnLI8,2829
 onnx_diagnostic/ext_test_case.py,sha256=emfQGiQSz5FVDhyJ1Acsv_Tast7tWl426TjtpNqxDBU,43558
@@ -17,7 +17,7 @@ onnx_diagnostic/helpers/config_helper.py,sha256=H2mOcMXfrcolFnt8EuqmRFkpQ3YdNRDf
 onnx_diagnostic/helpers/doc_helper.py,sha256=pl5MZd3_FaE8BqQnqoBuSBxoNCFcd2OJd3eITUSku5c,5897
 onnx_diagnostic/helpers/graph_helper.py,sha256=hevQT5a7_QuriVPQcbT5qe18n99Doyl5h3-qshx1-uk,14093
 onnx_diagnostic/helpers/helper.py,sha256=zl7vG6G4ueq931Z9iT8OlKfmtFxvRJD2WJQh_qsMiBs,63401
-onnx_diagnostic/helpers/log_helper.py,sha256=SKzxJ6DdP9uq4e2feA2nqd2Rreq4G-ujKZFUELfycP0,85674
+onnx_diagnostic/helpers/log_helper.py,sha256=xBKz5rj2-jEtN_tFKsOV4RpBGermrv7CWqG3KUm2psI,87335
 onnx_diagnostic/helpers/memory_peak.py,sha256=OT6mz0muBbBZY0pjgW2_eCk_lOtFRo-5w4jFo2Z6Kok,6380
 onnx_diagnostic/helpers/mini_onnx_builder.py,sha256=Cgx1ojmV0S_JpZ_UqwsNxeULMMDvMInXslhkE34fwec,22051
 onnx_diagnostic/helpers/model_builder_helper.py,sha256=sK40KRN9GWK1vbNJHIXkYAojblbKD0bdom7BFmoNSv4,12860
@@ -86,34 +86,34 @@ onnx_diagnostic/tasks/sentence_similarity.py,sha256=vPqNZgAnIvY0rKWPUTs0IlU3RFQD
 onnx_diagnostic/tasks/summarization.py,sha256=8vB_JiRzDEacIvr8CYTuVQTH73xG_jNkndoS9RHJTSs,8292
 onnx_diagnostic/tasks/text2text_generation.py,sha256=35eF_RlSeMdLTZPooLMAnszs-z0bkKZ34Iej3JgA96A,8602
 onnx_diagnostic/tasks/text_classification.py,sha256=CGc72SpXFzTUyzAHEMPgyy_s187DaYGsRdrosxG80_Q,2711
-onnx_diagnostic/tasks/text_generation.py,sha256=PRUcVF6XBmOkNA2yi2MUDAT7G8JS1w_6nvjIGcmhST8,13366
+onnx_diagnostic/tasks/text_generation.py,sha256=-oWq_I1lAUm9wxJnvFM1kXDJAmHbCiM6lUG3waR3o2k,13909
 onnx_diagnostic/tasks/text_to_image.py,sha256=mOS3Ruosi3hzRMxXLDN7ZkAbi7NnQb7MWwQP_okGVHs,2962
 onnx_diagnostic/tasks/zero_shot_image_classification.py,sha256=jJCMWuOqGv5ahCfjrcqxuYCJFhTgHV5KUf2yyv2yxYA,4624
 onnx_diagnostic/tasks/data/__init__.py,sha256=uJoemrWgEjI6oA-tMX7r3__x-b3siPmkgqaY7bgIles,401
 onnx_diagnostic/tasks/data/dummies_imagetext2text_generation_gemma3.onnx,sha256=UbtvmWMqcZOKJ-I-HXWI1A6YR6QDaFS5u_yXm5C3ZBw,10299
 onnx_diagnostic/torch_export_patches/__init__.py,sha256=0SaZedwznm1hQUCvXZsGZORV5vby954wEExr5faepGg,720
-onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=fQGyk6IkapGYYlFxbly8hS5oLWkhIC4bHV3DfZA1Keg,29449
+onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=ZMsUeU3Hx5YD8xNgQTaW8Br88HvPSiCmqmKLhMz5jw0,30459
 onnx_diagnostic/torch_export_patches/onnx_export_serialization.py,sha256=klvqiMjccwGhiRnLRVbwTi5WWkMfvtnOV5ycirPcAdA,11354
 onnx_diagnostic/torch_export_patches/patch_expressions.py,sha256=vr4tt61cbDnaaaduzMj4UBZ8OUtr6GfDpIWwOYqjWzs,3213
 onnx_diagnostic/torch_export_patches/patch_inputs.py,sha256=2HQZKQV6TM5430RIvKiMPe4cfGvFdx1UnP1w76CeGE4,8110
 onnx_diagnostic/torch_export_patches/patch_module.py,sha256=R2d9IHM-RwsBKDsxuBIJnEqMoxbS9gd4YWFGG2wwV5A,39881
 onnx_diagnostic/torch_export_patches/patch_module_helper.py,sha256=2U0AdyZuU0W54QTdE7tY7imVzMnpQ5091ADNtTCkT8Y,6967
-onnx_diagnostic/torch_export_patches/eval/__init__.py,sha256=6z8Fk4rcJKo1Nh2F0K3JGkmFH0XZSIfv5-HvO6bhhTY,24818
-onnx_diagnostic/torch_export_patches/eval/model_cases.py,sha256=SqaQU0zsvQsZXU0kIrxcURvVCp-ysZAaF01WLlgKZsw,27183
+onnx_diagnostic/torch_export_patches/eval/__init__.py,sha256=YQoOGt9XQLWqnJ15NnT7ri_jDevfvpuQwEJo38E-VRU,25056
+onnx_diagnostic/torch_export_patches/eval/model_cases.py,sha256=joDJV1YfrhYBR_6eXYvNO1jbiJM8Whb47NWZxo8SBwg,27172
 onnx_diagnostic/torch_export_patches/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=bQFxePwj9OwCFykhcZiLvqOV2sXPBcZXa4260XueHLE,23117
+onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=muA2i6Krd6iB2-nIteplxo_pvQEx4LQMZTxDmLe1n44,40825
 onnx_diagnostic/torch_export_patches/patches/patch_transformers.py,sha256=hHO7XOaUw3XrhPSrG2hTpMNzGVm_zigLg8d7hMOK7Gs,79188
 onnx_diagnostic/torch_export_patches/serialization/__init__.py,sha256=BHLdRPtNAtNPAS-bPKEj3-foGSPvwAbZXrHzGGPDLEw,1876
 onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py,sha256=drq3EH_yjcSuIWYsVeUWm8Cx6YCZFU6bP_1PLtPfY5I,945
 onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py,sha256=mcmZGekzQlLgE_o3SdKlRgCx4ewwyyAuNWZ9CaN_zrI,9317
 onnx_diagnostic/torch_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_models/llms.py,sha256=soyg4yC87ptGoeulJhKqw5opGmuLvH1pn_ZDXZ4Jr8E,90
-onnx_diagnostic/torch_models/validate.py,sha256=B5h9iBTtkjFNFP6VzqxCvCfwKe4XQOpre1WeKwKEWNA,79628
+onnx_diagnostic/torch_models/validate.py,sha256=XNGZi7qSSytUczDfJ-X2ff5xvGdWdWkwjyz8ejxUSCE,79107
 onnx_diagnostic/torch_models/hghub/__init__.py,sha256=vi1Q7YHdddj1soiBN42MSvJdFqe2_KUoWafHISjwOu8,58
 onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=rFbiPNLET-KdBpnv-p0nKgwHX6d7C_Z0s9zZ86_92kQ,14307
 onnx_diagnostic/torch_models/hghub/hub_data.py,sha256=8V_pAgACPLPsLRYUododg7MSL6str-T3tBEGY4OaeYQ,8724
 onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py,sha256=aSa_7Rjider6ruqQ2-fXQMyyDS8VhB1xKxcPNk8qUeU,288776
-onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=Ioi92UHT3bsfA9oMi9IzY16FxnAKrPJHsEpFepBwr_o,14607
+onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=FaNFmWmzAqQQ7nM-L0eypeHG-YmCReXxwOOAb3UN7D0,15493
 onnx_diagnostic/torch_models/hghub/model_specific.py,sha256=j50Nu7wddJMoqmD4QzMbNdFDUUgUmSBKRzPDH55TlUQ,2498
 onnx_diagnostic/torch_models/untrained/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_models/untrained/llm_phi2.py,sha256=JbGZmW41MPJcQgqaJc9R2G00nI79nI-lABN-ffA1lmY,4037
@@ -121,8 +121,8 @@ onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py,sha256=QXw_Bs2SzfeiQMf-tm
 onnx_diagnostic/torch_onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 onnx_diagnostic/torch_onnx/runtime_info.py,sha256=1g9F_Jf9AAgYQU4stbsrFXwQl-30mWlQrFbQ7val8Ps,9268
 onnx_diagnostic/torch_onnx/sbs.py,sha256=IoKLA5UwS6kY8g4OOf_bdQwCziIsQfBczZ3w8wo4wZM,16905
-onnx_diagnostic-0.7.13.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
-onnx_diagnostic-0.7.13.dist-info/METADATA,sha256=1ZoJZw78GxT1chXfFumfWyr-kcD8puKgaJ_qTHbfs60,6730
-onnx_diagnostic-0.7.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-onnx_diagnostic-0.7.13.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
-onnx_diagnostic-0.7.13.dist-info/RECORD,,
+onnx_diagnostic-0.7.14.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
+onnx_diagnostic-0.7.14.dist-info/METADATA,sha256=id7f09epUAspAc4BxIlxRp0HhfGpR4SXI3BnYx0bjts,6730
+onnx_diagnostic-0.7.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+onnx_diagnostic-0.7.14.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
+onnx_diagnostic-0.7.14.dist-info/RECORD,,

{onnx_diagnostic-0.7.13.dist-info → onnx_diagnostic-0.7.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{onnx_diagnostic-0.7.13.dist-info → onnx_diagnostic-0.7.14.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{onnx_diagnostic-0.7.13.dist-info → onnx_diagnostic-0.7.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

onnx-diagnostic 0.7.13__py3-none-any.whl → 0.7.14__py3-none-any.whl

onnx-diagnostic 0.7.13py3-none-any.whl → 0.7.14py3-none-any.whl