PyPI - onnx-diagnostic - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

onnx-diagnostic 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __author__ = "Xavier Dupré"

onnx_diagnostic/cache_helpers.py CHANGED Viewed

@@ -4,6 +4,28 @@ import torch
 import transformers
 import transformers.cache_utils
+def is_cache_dynamic_registered() -> bool:
+    """
+    Tells class :class:`transformers.cache_utils.DynamicCache` can be
+    serialized and deserialized. Only then, :func:`torch.export.export`
+    can export a model.
+    """
+    bsize, nheads, slen, dim = 2, 4, 3, 7
+    cache = make_dynamic_cache(
+        [
+            (
+                torch.randn(bsize, nheads, slen, dim),
+                torch.randn(bsize, nheads, slen, dim),
+            )
+            for i in range(2)
+        ]
+    )
+    values, spec = torch.utils._pytree.tree_flatten(cache)
+    cache2 = torch.utils._pytree.tree_unflatten(values, spec)
+    return len(cache2.key_cache) == len(cache.value_cache)
 if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
     def make_dynamic_cache(

onnx_diagnostic/doc.py ADDED Viewed

@@ -0,0 +1,32 @@
+def reset_torch_transformers(gallery_conf, fname):
+    "Resets torch dynamo for :epkg:`sphinx-gallery`."
+    import matplotlib.pyplot as plt
+    import torch
+    plt.style.use("ggplot")
+    torch._dynamo.reset()
+def plot_legend(
+    text: str, text_bottom: str = "", color: str = "green", fontsize: int = 35
+) -> "matplotlib.axes.Axes":  # noqa: F821
+    import matplotlib.pyplot as plt
+    fig = plt.figure()
+    ax = fig.add_subplot()
+    ax.axis([0, 5, 0, 5])
+    ax.text(2.5, 4, "END", fontsize=50, horizontalalignment="center")
+    ax.text(
+        2.5,
+        2.5,
+        text,
+        fontsize=fontsize,
+        bbox={"facecolor": color, "alpha": 0.5, "pad": 10},
+        horizontalalignment="center",
+        verticalalignment="center",
+    )
+    if text_bottom:
+        ax.text(4.5, 0.5, text_bottom, fontsize=20, horizontalalignment="right")
+    ax.grid(False)
+    ax.set_axis_off()
+    return ax

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -1090,3 +1090,7 @@ class ExtTestCase(unittest.TestCase):
             and not numpy.isnan(diff["rel"])
             and diff["rel"] <= rtol
         ), f"discrepancies in {test_name!r}, diff={string_diff(diff)}"
+    def _debug(self):
+        "Tells if DEBUG=1 is set up."
+        return os.environ.get("DEBUG") in BOOLEAN_VALUES

onnx_diagnostic/ort_session.py CHANGED Viewed

@@ -240,7 +240,16 @@ class InferenceSessionForNumpy(_InferenceSession):
             el_type = ortvalues[i].element_type()
             if el_type < onnx.TensorProto.BFLOAT16:
-                res.append(np.from_dlpack(ortvalues[i]))
+                try:
+                    a = np.from_dlpack(ortvalues[i])
+                except RuntimeError as e:
+                    assert "ORT only supports contiguous tensor for now." in str(e), (
+                        f"As it says, non-contiguous OrtValue are not supported "
+                        f"though DLPack, i={i}, the error is different {e}"
+                    )
+                    # We make a copy in that case.
+                    a = ortvalues[i].numpy()
+                res.append(a)
                 continue
             # no easy conversion, let's use torch
@@ -430,6 +439,8 @@ class InferenceSessionForTorch(_InferenceSession):
         new_feeds = {}
         for k, v in feeds.items():
             assert hasattr(v, "__dlpack__"), f"class {type(v)} should be serialized"
+            if not v.is_contiguous():
+                v = v.contiguous()
             new_feeds[k] = ORTC.OrtValue.from_dlpack(v.__dlpack__(), v.dtype == torch.bool)
         if self.nvtx:
             self.torch.cuda.nvtx.range_push("run_with_ort_values")

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -145,6 +145,11 @@ def _unregister(cls: type, verbose: int = 0):
         # torch >= 2.7
         torch.utils._pytree._deregister_pytree_node(cls)
     optree.unregister_pytree_node(cls, namespace="torch")
+    if cls in torch.utils._pytree.SUPPORTED_NODES:
+        import packaging.version as pv
+        if pv.Version(torch.__version__) < pv.Version("2.7.0"):
+            del torch.utils._pytree.SUPPORTED_NODES[cls]
     assert cls not in torch.utils._pytree.SUPPORTED_NODES, (
         f"{cls} was not successful unregistered "
         f"from torch.utils._pytree.SUPPORTED_NODES="
@@ -190,6 +195,7 @@ def bypass_export_some_errors(
     patch_torch: bool = True,
     patch_transformers: bool = False,
     catch_constraints: bool = True,
+    stop_if_static: bool = False,
     verbose: int = 0,
     patch: bool = True,
 ) -> Callable:
@@ -203,8 +209,12 @@ def bypass_export_some_errors(
         as a result, some dynamic dimension may turn into static ones,
         the environment variable ``SKIP_SOLVE_CONSTRAINTS=0``
         can be put to stop at that stage.
+    :param stop_if_static: see example :ref:`l-plot-export-locale-issue`,
+        to stop the export as soon as an issue is detected with dynamic shapes
+        and show a stack trace indicating the exact location of the issue
     :param patch: if False, disable all patches except the registration of
         serialization function
+    :param verbose: to show which patches is applied
     The list of available patches.
@@ -348,6 +358,18 @@ def bypass_export_some_errors(
                 )
             )
+        if stop_if_static:
+            if verbose:
+                print(
+                    "[bypass_export_some_errors] assert when a dynamic dimension turns static"
+                )
+            from torch.fx.experimental.symbolic_shapes import ShapeEnv
+            from .patches.patch_torch import patched_ShapeEnv
+            f_shape_env__set_replacement = ShapeEnv._set_replacement
+            ShapeEnv._set_replacement = patched_ShapeEnv._set_replacement
         ####################
         # patch transformers
         ####################
@@ -401,6 +423,12 @@ def bypass_export_some_errors(
                 if verbose:
                     print("[bypass_export_some_errors] restored pytorch functions")
+            if stop_if_static:
+                if verbose:
+                    print("[bypass_export_some_errors] restored ShapeEnv._set_replacement")
+                ShapeEnv._set_replacement = f_shape_env__set_replacement
             if catch_constraints:
                 # to catch or skip dynamic_shapes issues
                 torch._export.non_strict_utils.produce_guards_and_solve_constraints = (

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -146,3 +146,186 @@ def patched__broadcast_shapes(*_shapes):
                 common_shape[idx] = torch.sym_max(common_shape[idx], shape[idx])
     return common_shape
+class patched_ShapeEnv:
+    def _set_replacement(
+        self, a: "sympy.Symbol", tgt: "sympy.Expr", msg: str  # noqa: F821
+    ) -> None:
+        """
+        Adds or updates a replacement for a symbol.
+        Use this instead of `self.replacements[a] = tgt`.
+        """
+        if tgt == self.replacements.get(a, None):
+            return
+        if a in tgt.free_symbols:
+            return
+        import sympy
+        from torch._logging import structured
+        from torch.utils._traceback import CapturedTraceback
+        from torch._logging import trace_structured
+        from torch._guards import TracingContext
+        from torch.utils._sympy.functions import FloorToInt, CeilToInt
+        from torch.utils._sympy.solve import try_solve
+        from torch.fx.experimental.symbolic_shapes import (
+            _is_supported_equivalence,
+            ValueRanges,
+        )
+        # Precondition: a == tgt
+        assert isinstance(a, sympy.Symbol)
+        if self.allow_complex_guards_as_runtime_asserts and not _is_supported_equivalence(tgt):
+            # continuing leads to placeholder shapes
+            # having complex expressions that we can't resolve
+            return
+        # Handles nested tensor symbolic variables which don't have
+        # var_to_range bounds
+        tgt_bound = None
+        if a in self.var_to_range:
+            src_bound = self.var_to_range[a]
+            # First, refine the value range of a based on the computed value range
+            # of tgt.  This is always OK to do, even if we decide not to do the
+            # substitution in the end.  This might be a no-op, if a already has
+            # a tighter bound
+            tgt_bound = self.bound_sympy(tgt)
+            self._update_var_to_range(a, tgt_bound)
+            # Next, check if we can update the range of free symbols in tgt
+            # based on the range in a. But only do it if:
+            #  - the source bound non-trivially improves over what we get out of
+            #    the existing bounds.
+            #  - the replacement is univariate and we can invert the tgt expression
+            if not tgt_bound.issubset(src_bound) and len(tgt.free_symbols) == 1:
+                b = next(iter(tgt.free_symbols))
+                # Try to invert the equality
+                r = try_solve(sympy.Eq(a, tgt), b, floordiv_inequality=False)
+                if r is not None:
+                    self.log.debug(
+                        "set_replacement: solve for %s in %s == %s gives %s",
+                        b,
+                        a,
+                        tgt,
+                        r,
+                    )
+                    # The solution here can be non-integral, for example, if
+                    # we have s0 = 2*s1, then s1 = s0/2.  What we would like
+                    # to do is calculated the bounds in arbitrary precision,
+                    # and then requantize the bound to integers when we are
+                    # done.
+                    rat_b_bound = self.bound_sympy(r[1])
+                    b_bound = ValueRanges(
+                        CeilToInt(rat_b_bound.lower), FloorToInt(rat_b_bound.upper)
+                    )
+                    self._update_var_to_range(b, b_bound, self.var_to_range_sloc[a])
+                    tgt_bound = self.bound_sympy(tgt)
+                    assert tgt_bound.issubset(
+                        src_bound
+                    ), f"{tgt_bound=} not a subset of {src_bound=}"
+            # TODO: Should we propagate size-like-ness?
+            #
+            # Pros: if u0 is size-like, intuitively u0 == u1 should cause u1
+            # to become size-like.
+            #
+            # Cons: if u0 is size-like, what about u0 - 1 == u1?  You CAN'T
+            # propagate in this case, because what if u0 == 0, then u1 is negative
+            # and clearly isn't a size.  So, at minimum, any f(x) whose value
+            # range isn't [0, inf] given x in [0, inf] cannot propagate
+            # size-like-ness.  But there are many situations where you could
+            # imagine u1 is going to be size-like and actually you just didn't
+            # have a refined enough value range on u0.  Since even innocuous
+            # looking arithmetic operations can destroy size-like-ness, it's
+            # best to not propagate it at all and force the user to annotate it
+            # as necessary.
+            #
+            # Compromise: we preserve size-like-ness only for exact equality
+            # and nothing else.
+            if a in self.size_like and isinstance(tgt, sympy.Symbol):
+                self.size_like.add(tgt)
+            elif isinstance(tgt, sympy.Symbol) and tgt in self.size_like:
+                self.size_like.add(a)
+            # Now, decide if we will do the substitution.
+            #
+            #  - If the source has a non-trivial range, only substitute if
+            #    we preserve this range.  Note that we may have propagated
+            #    the src_range to free variables in tgt when tgt is univariate
+            #    and we could find an inverse, which helps us achieve this.
+            #    This ensures we never "forget" about user defined ranges,
+            #    even if they end up being defined on composite formulas
+            #    like s0 + s1.
+            #
+            #  - If the variable is unbacked, only substitute if the substitution
+            #    would preserve the bounds also under size-like-ness conditions.
+            if not tgt_bound.issubset(src_bound):
+                self.log.debug(
+                    "skipped set_replacement %s = %s (%s) [%s not subset of %s]",
+                    a,
+                    tgt,
+                    msg,
+                    tgt_bound,
+                    src_bound,
+                )
+                return
+            elif a in self.size_like:
+                tgt_bound_so = self.bound_sympy(tgt, size_oblivious=True)
+                src_bound_so = self.bound_sympy(a, size_oblivious=True)
+                if not tgt_bound_so.issubset(src_bound_so):
+                    self.log.debug(
+                        "skipped set_replacement %s = %s (%s) "
+                        "[%s not subset of %s (size-oblivious conditions)]",
+                        a,
+                        tgt,
+                        msg,
+                        tgt_bound_so,
+                        src_bound_so,
+                    )
+                    return
+        if isinstance(tgt, (sympy.Integer, sympy.Float)):
+            # specializing to a constant, which is likely unexpected (unless
+            # you specified dynamic=True)
+            user_tb = TracingContext.extract_stack()
+            trace_structured(
+                "symbolic_shape_specialization",
+                metadata_fn=lambda: {
+                    "symbol": repr(a),
+                    "sources": [s.name() for s in self.var_to_sources.get(a, [])],
+                    "value": repr(tgt),
+                    "reason": msg,
+                    "stack": structured.from_traceback(
+                        CapturedTraceback.extract(skip=1).summary()
+                    ),
+                    "user_stack": (structured.from_traceback(user_tb) if user_tb else None),
+                },
+            )
+            # if config.print_specializations:
+            #    self.log.warning(
+            #         "Specializing %s to %s", self.var_to_sources[a][0].name(), tgt
+            #     )
+            #     self.log.debug("SPECIALIZATION", stack_info=True)
+        assert msg != "range_refined_to_singleton", (
+            f"A dynamic dimension becomes static! "
+            f"a={a!r}, tgt={tgt!r}, msg={msg!r}, tgt_bound={tgt_bound}"
+        )
+        # log.info("set_replacement %s = %s (%s) %s", a, tgt, msg, tgt_bound)
+        self.replacements[a] = tgt
+        # NB: the replacement may get refined, but the user will find the
+        # FIRST one most useful (TODO: Maybe we could consider tracking all of
+        # them)
+        if a not in self.replacements_slocs:
+            self.replacements_slocs[a] = self._get_sloc()
+        self._update_version_counter()
+        # When specializing 'a == tgt', the equality should be also conveyed to
+        # Z3, in case an expression uses 'a'.
+        self._add_target_expr(sympy.Eq(a, tgt, evaluate=False))

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -102,9 +102,22 @@ class patched_DynamicCache:
     `transformers/#36652 <https://github.com/huggingface/transformers/pull/36652>`_.
     """
-    _PATCHES_ = ["reorder_cache", "update", "crop", "from_batch_splits"]
+    _PATCHES_ = ["reorder_cache", "update", "crop", "from_batch_splits", "get_seq_length"]
     _PATCHED_CLASS_ = transformers.cache_utils.DynamicCache
+    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
+        """Returns the sequence length of the cached states.
+        A layer index can be optionally passed."""
+        # TODO: deprecate this function in favor of `cache_position`
+        is_empty_layer = (
+            len(self.key_cache) == 0  # no cache in any layer
+            or len(self.key_cache)
+            <= layer_idx  # skipped `layer_idx` and hasn't run a layer with cache after it
+            or self.key_cache[layer_idx].numel() == 0  # the layer has no cache
+        )
+        layer_seq_length = self.key_cache[layer_idx].shape[-2] if not is_empty_layer else 0
+        return layer_seq_length
     def reorder_cache(self, beam_idx: torch.LongTensor):
         """Reorders the cache for beam search, given the selected beam indices."""
         for layer_idx in range(len(self.key_cache)):

onnx_diagnostic/torch_models/llms.py CHANGED Viewed

@@ -1,96 +1,2 @@
-from typing import Any, Dict
-import torch
-import transformers
-from ..cache_helpers import make_dynamic_cache
-def get_tiny_llm(
-    batch_size: int = 2,
-    input_cache: bool = True,
-    dynamic_rope: bool = False,
-    **kwargs,
-) -> Dict[str, Any]:
-    """
-    Gets a non initialized model.
-    :param batch_size: batch size
-    :param input_cache: generate data for this iteration with or without cache
-    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
-    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
-    :return: dictionary
-    See :ref:`l-plot-tiny-llm-export` for an example.
-    """
-    config = {
-        "architectures": ["LlamaForCausalLM"],
-        "bos_token_id": 1,
-        "eos_token_id": 2,
-        "hidden_act": "silu",
-        "hidden_size": 192,
-        "initializer_range": 0.02,
-        "intermediate_size": 1024,
-        "max_position_embeddings": 1024,
-        "model_type": "llama",
-        "num_attention_heads": 2,
-        "num_hidden_layers": 1,
-        "num_key_value_heads": 1,
-        "pretraining_tp": 1,
-        "rms_norm_eps": 1e-05,
-        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
-        "tie_word_embeddings": False,
-        "torch_dtype": "float32",
-        "transformers_version": "4.31.0.dev0",
-        "use_cache": True,
-        "vocab_size": 32000,
-    }
-    config.update(**kwargs)
-    conf = transformers.LlamaConfig(**config)
-    model = transformers.LlamaForCausalLM(conf)
-    model.eval()
-    # now the inputs
-    cache_last_dim = 96
-    sequence_length = 30
-    sequence_length2 = 3
-    num_key_value_heads = 1
-    max_token_id = config["vocab_size"] - 1
-    n_layers = config["num_hidden_layers"]
-    batch = torch.export.Dim("batch", min=1, max=1024)
-    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
-    shapes = {
-        "input_ids": {0: batch, 1: seq_length},
-        "attention_mask": {
-            0: batch,
-            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
-        },
-        "past_key_values": [
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-        ],
-    }
-    inputs = dict(
-        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
-            torch.int64
-        ),
-        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-            torch.int64
-        ),
-        past_key_values=make_dynamic_cache(
-            [
-                (
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                )
-                for i in range(n_layers)
-            ]
-        ),
-    )
-    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)
+from .untrained.llm_phi2 import get_phi2
+from .untrained.llm_tiny_llm import get_tiny_llm

onnx_diagnostic/torch_models/untrained/__init__.py ADDED Viewed

File without changes

onnx_diagnostic/torch_models/untrained/llm_phi2.py ADDED Viewed

@@ -0,0 +1,109 @@
+from typing import Any, Dict
+import torch
+import transformers
+from ...cache_helpers import make_dynamic_cache
+def get_phi2(
+    batch_size: int = 1,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    dynamic_rope: bool = False,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    Gets a non initialized model
+    similar to `microsoft/phi-2 <https://huggingface.co/microsoft/phi-2>`_
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
+    :return: dictionary
+    See :ref:`l-plot-tiny-llm-export-patched` for an example with a similar model.
+    """
+    config = {
+        "_name_or_path": "microsoft/phi-2",
+        "architectures": ["PhiForCausalLM"],
+        "attention_dropout": 0.0,
+        "bos_token_id": 50256,
+        "embd_pdrop": 0.0,
+        "eos_token_id": 50256,
+        "hidden_act": "gelu_new",
+        "hidden_size": 2560,
+        "initializer_range": 0.02,
+        "intermediate_size": 10240,
+        "layer_norm_eps": 1e-05,
+        "max_position_embeddings": 2048,
+        "model_type": "phi",
+        "num_attention_heads": 32,
+        "num_hidden_layers": 32,
+        "num_key_value_heads": 32,
+        "partial_rotary_factor": 0.4,
+        "qk_layernorm": False,
+        "resid_pdrop": 0.1,
+        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
+        "rope_theta": 10000.0,
+        "tie_word_embeddings": False,
+        "torch_dtype": "float16",
+        "transformers_version": "4.37.0",
+        "use_cache": True,
+        "vocab_size": 51200,
+    }
+    config.update(**kwargs)
+    conf = transformers.PhiConfig(**config)
+    model = transformers.PhiForCausalLM(conf)
+    model.eval()
+    # now the inputs
+    cache_last_dim = 80
+    max_token_id = config["vocab_size"] - 1
+    n_layers = config["num_hidden_layers"]
+    num_key_value_heads = config["num_key_value_heads"]
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "position_ids": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "attention_mask": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "past_key_values": [
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+        ],
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
+        .to(torch.int64)
+        .expand((batch_size, -1)),
+        past_key_values=make_dynamic_cache(
+            [
+                (
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                )
+                for i in range(n_layers)
+            ]
+        ),
+    )
+    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)

onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py ADDED Viewed

@@ -0,0 +1,104 @@
+from typing import Any, Dict
+import torch
+import transformers
+from ...cache_helpers import make_dynamic_cache
+def get_tiny_llm(
+    batch_size: int = 2,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    dynamic_rope: bool = False,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    Gets a non initialized model
+    similar to `arnir0/Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
+    :return: dictionary
+    See :ref:`l-plot-tiny-llm-export` or :ref:`l-plot-tiny-llm-export-patched` for examples.
+    """
+    config = {
+        "architectures": ["LlamaForCausalLM"],
+        "bos_token_id": 1,
+        "eos_token_id": 2,
+        "hidden_act": "silu",
+        "hidden_size": 192,
+        "initializer_range": 0.02,
+        "intermediate_size": 1024,
+        "max_position_embeddings": 1024,
+        "model_type": "llama",
+        "num_attention_heads": 2,
+        "num_hidden_layers": 1,
+        "num_key_value_heads": 1,
+        "pretraining_tp": 1,
+        "rms_norm_eps": 1e-05,
+        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
+        "tie_word_embeddings": False,
+        "torch_dtype": "float32",
+        "transformers_version": "4.31.0.dev0",
+        "use_cache": True,
+        "vocab_size": 32000,
+    }
+    config.update(**kwargs)
+    conf = transformers.LlamaConfig(**config)
+    model = transformers.LlamaForCausalLM(conf)
+    model.eval()
+    # now the inputs
+    cache_last_dim = 96
+    max_token_id = config["vocab_size"] - 1
+    n_layers = config["num_hidden_layers"]
+    num_key_value_heads = config["num_key_value_heads"]
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "attention_mask": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "position_ids": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "past_key_values": [
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+        ],
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
+        .to(torch.int64)
+        .expand((batch_size, -1)),
+        past_key_values=make_dynamic_cache(
+            [
+                (
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                )
+                for i in range(n_layers)
+            ]
+        ),
+    )
+    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)

onnx_diagnostic/torch_test_helper.py CHANGED Viewed

@@ -13,6 +13,10 @@ def is_torchdynamo_exporting() -> bool:
     """Tells if torch is exporting a model."""
     import torch
+    if not hasattr(torch.compiler, "is_exporting"):
+        # torch.compiler.is_exporting requires torch>=2.7
+        return False
     try:
         return torch.compiler.is_exporting()
     except Exception:

{onnx_diagnostic-0.2.1.dist-info → onnx_diagnostic-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: onnx-diagnostic
-Version: 0.2.1
+Version: 0.2.2
 Summary: Investigate ONNX models
 Home-page: https://github.com/sdpython/onnx-diagnostic
 Author: Xavier Dupré
@@ -87,6 +87,8 @@ Enlightening Examples
 * `Use DYNAMIC or AUTO when exporting if dynamic shapes has constraints
   <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_with_dynamic_shapes_auto.html>`_
+* `Find and fix an export issue due to dynamic shapes
+  <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_locate_issue.html>`_
 * `Export with DynamicCache and dynamic shapes
   <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_with_dynamic_cache.html>`_
 * `Steel method forward to guess the dynamic shapes (with Tiny-LLM)

{onnx_diagnostic-0.2.1.dist-info → onnx_diagnostic-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,12 @@
-onnx_diagnostic/__init__.py,sha256=_Od-M8LjUrc4uDtezDVkld8vhaBzkJ0r5WdsOWcCUG0,164
+onnx_diagnostic/__init__.py,sha256=G8fJzYgyo9GW9yrpyPU-kbp-3X-LMyXMXxZiqPLDLEg,164
 onnx_diagnostic/args.py,sha256=7pTrw1A1wuNvLdXJdpda5spPI140FylwSmxxZTGu_4E,4389
-onnx_diagnostic/cache_helpers.py,sha256=3wMsOseENzbsjzo7yhgt44cH0S1t-n1e-_dj-os3uGc,2698
-onnx_diagnostic/ext_test_case.py,sha256=XvrgU7_TBo2MB3ilhOawatmSwvtHeGfYnl_a0t69QAo,37222
+onnx_diagnostic/cache_helpers.py,sha256=rtiJZDzuIXR88p3e9ALeMhOiJ66SONM7oUO1t5-N7FM,3374
+onnx_diagnostic/doc.py,sha256=qKBHk5pFGK0WwecMqAB5fhAdTRF5g9-rsMWLsyczYFc,913
+onnx_diagnostic/ext_test_case.py,sha256=dlpwu4CunVA35W1VycCYVreEIOZyeYQpvU8fHKfurNE,37340
 onnx_diagnostic/helpers.py,sha256=qOArpaRV2W3LcD48Ewi6QsBHWZcSpvaHaA0f9bOMGWA,61494
 onnx_diagnostic/onnx_tools.py,sha256=avk1GkA8QRPb_a6JEmN55YfbefxL4wCBRKZMNMs_VuI,8304
-onnx_diagnostic/ort_session.py,sha256=hcXwSOLLosHzmQYQRhzFVd5kHbj7frymZiw_WTXSg44,26221
-onnx_diagnostic/torch_test_helper.py,sha256=ZRKZDzuv_IZogt79ubJpbD6YDxRX52BX74er7OI0VOQ,9336
+onnx_diagnostic/ort_session.py,sha256=z_uBekAkQMHbt1XdxQUM3HSA9SPdrkoEJqETe-yDdd0,26746
+onnx_diagnostic/torch_test_helper.py,sha256=f6cbHwEexsY9-wwTDu9_y5_-57QzUW8UrC8YWIa4ID4,9468
 onnx_diagnostic/export/__init__.py,sha256=nngeNcP71r_Mmvt2U-qbG-t4-0Kr623zcUhHgNA3IiA,40
 onnx_diagnostic/export/dynamic_shapes.py,sha256=9lVZEq9fY_97HGeXVbZlStYXJQ3bs_T5jVSp3iu46Kc,15662
 onnx_diagnostic/reference/__init__.py,sha256=0Al5kins8LlBICAsszEZ59thMwmaARBO6fMwtYpKOOQ,98
@@ -41,15 +42,18 @@ onnx_diagnostic/reference/ops/op_slice.py,sha256=yRxfYBs8b7QezyyG9JHCD8MIJHij2qR
 onnx_diagnostic/reference/ops/op_transpose_cast.py,sha256=ifef74rvh0Yvq1Zx51B4mfnISbxV9uRg9DFjkdL1_68,361
 onnx_diagnostic/reference/ops/op_tri_matrix.py,sha256=Yn2gxAyygcwtF5Hjau9ihXDAzul0BAkdqVimVahtFBU,519
 onnx_diagnostic/torch_export_patches/__init__.py,sha256=RZzVGgouNNXaPirQJYQThiq5wrliwH4unVszeU18oJw,116
-onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=B7OSjS9dbwa5NNh7kpPexL1eb03vSD7L7TgaMfiH60Q,16223
+onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=o_Q6DHWUL9_zwHNwGHnzkBvhdJd3LV968-6mVKEr3Ko,17441
 onnx_diagnostic/torch_export_patches/onnx_export_serialization.py,sha256=0DAa1wP4kFRoru2J3i2r1sNNxUdpF0Op5noTcmLWRow,4087
 onnx_diagnostic/torch_export_patches/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=uUraqyHGnj9QykRtA_CFjlTGmk8NsmbY2Ufu7ITPSJQ,5783
-onnx_diagnostic/torch_export_patches/patches/patch_transformers.py,sha256=okNlYyWxM4P6TDxZv-_vEKI8nJSx4tnwXZRmcO1ZnPg,21118
+onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=SPvwag9oelMDmK5St0c1DXOKPO7_5iA9Nck8P9JCGk0,14111
+onnx_diagnostic/torch_export_patches/patches/patch_transformers.py,sha256=RSoIPTbiZj9IV7Gd9NjAnJGhLHveyB2Tw1kKpATw3f8,21814
 onnx_diagnostic/torch_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-onnx_diagnostic/torch_models/llms.py,sha256=pEMzD5GwmQ-X2WmYHw1OnNT9RKMRwJdGVyoGxWEbaio,3111
-onnx_diagnostic-0.2.1.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
-onnx_diagnostic-0.2.1.dist-info/METADATA,sha256=_mDC2LPuSjXECIosSLHGsPLqjWlUGmQH62_-xLKOT0w,4696
-onnx_diagnostic-0.2.1.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
-onnx_diagnostic-0.2.1.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
-onnx_diagnostic-0.2.1.dist-info/RECORD,,
+onnx_diagnostic/torch_models/llms.py,sha256=soyg4yC87ptGoeulJhKqw5opGmuLvH1pn_ZDXZ4Jr8E,90
+onnx_diagnostic/torch_models/untrained/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+onnx_diagnostic/torch_models/untrained/llm_phi2.py,sha256=wfc_sp7DDbVPNMPtrYrKSndOPSuL3YLd_vTgjjaeGAc,3720
+onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py,sha256=9pculGIzkhjKSNwna6nbpUoLBpm2JEWhYvHeq_TFEts,3525
+onnx_diagnostic-0.2.2.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
+onnx_diagnostic-0.2.2.dist-info/METADATA,sha256=ea8SGO-nBmEa4OHFkbMDY3VhqGGdvZn3iS-l-YXz97A,4851
+onnx_diagnostic-0.2.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+onnx_diagnostic-0.2.2.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
+onnx_diagnostic-0.2.2.dist-info/RECORD,,

{onnx_diagnostic-0.2.1.dist-info → onnx_diagnostic-0.2.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.0.2)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{onnx_diagnostic-0.2.1.dist-info → onnx_diagnostic-0.2.2.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{onnx_diagnostic-0.2.1.dist-info → onnx_diagnostic-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

onnx-diagnostic 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

onnx-diagnostic 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl