PyPI - onnx-diagnostic - Versions diffs - 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

onnx-diagnostic 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +136 -140
onnx_diagnostic/ci_models/data/Blanca_Lake_Hudak.jpg +0 -0
onnx_diagnostic/ci_models/data/Ice_worm_glacier.jpg +0 -0
onnx_diagnostic/ci_models/data/__init__.py +0 -0
onnx_diagnostic/ci_models/export_phi4_mm.py +10 -7
onnx_diagnostic/export/api.py +13 -4
onnx_diagnostic/export/dynamic_shapes.py +1 -1
onnx_diagnostic/export/validate.py +2 -0
onnx_diagnostic/ext_test_case.py +32 -15
onnx_diagnostic/helpers/args_helper.py +1 -0
onnx_diagnostic/helpers/bench_run.py +0 -1
onnx_diagnostic/helpers/cache_helper.py +102 -36
onnx_diagnostic/helpers/doc_helper.py +7 -4
onnx_diagnostic/helpers/graph_helper.py +6 -6
onnx_diagnostic/helpers/helper.py +39 -0
onnx_diagnostic/helpers/log_helper.py +37 -14
onnx_diagnostic/helpers/memory_peak.py +5 -1
onnx_diagnostic/helpers/mini_onnx_builder.py +9 -14
onnx_diagnostic/helpers/model_builder_helper.py +1 -1
onnx_diagnostic/helpers/onnx_helper.py +283 -110
onnx_diagnostic/helpers/ort_session.py +5 -2
onnx_diagnostic/helpers/rt_helper.py +53 -9
onnx_diagnostic/helpers/torch_helper.py +15 -11
onnx_diagnostic/investigate/__init__.py +0 -0
onnx_diagnostic/investigate/input_observer.py +970 -0
onnx_diagnostic/reference/evaluator.py +0 -1
onnx_diagnostic/reference/ort_evaluator.py +0 -1
onnx_diagnostic/reference/report_results_comparison.py +9 -3
onnx_diagnostic/reference/torch_evaluator.py +5 -1
onnx_diagnostic/reference/torch_ops/_op_run.py +3 -5
onnx_diagnostic/reference/torch_ops/sequence_ops.py +1 -1
onnx_diagnostic/tasks/feature_extraction.py +0 -1
onnx_diagnostic/torch_export_patches/__init__.py +0 -1
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +32 -14
onnx_diagnostic/torch_export_patches/patch_module.py +1 -1
onnx_diagnostic/torch_export_patches/patches/_patch_transformers_masking_utils.py +107 -6
onnx_diagnostic/torch_export_patches/patches/_patch_transformers_rotary_embedding.py +2 -2
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +13 -3
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +1 -0
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +70 -23
onnx_diagnostic/torch_models/code_sample.py +5 -10
onnx_diagnostic/torch_models/hghub/hub_data.py +2 -4
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +6 -12
onnx_diagnostic/torch_models/validate.py +1 -1
onnx_diagnostic/torch_onnx/compare.py +0 -1
onnx_diagnostic/torch_onnx/runtime_info.py +1 -1
onnx_diagnostic/torch_onnx/sbs.py +1 -1
onnx_diagnostic/torch_onnx/sbs_dataclasses.py +2 -4
onnx_diagnostic/typing.py +15 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/RECORD +55 -50
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/WHEEL +1 -1
onnx_diagnostic/api.py +0 -15
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/top_level.txt +0 -0

onnx_diagnostic/reference/evaluator.py CHANGED Viewed

@@ -42,7 +42,6 @@ from .ops.op_slice import Slice_1, Slice_10
 from .ops.op_transpose_cast import Transpose2DCastFP16, Transpose2DCastFP32
 from .ops.op_tri_matrix import TriMatrix
 logger = getLogger("onnx-diagnostic-eval")

onnx_diagnostic/reference/ort_evaluator.py CHANGED Viewed

@@ -34,7 +34,6 @@ from ..helpers.torch_helper import to_tensor
 from .report_results_comparison import ReportResultComparison
 from .evaluator import ExtendedReferenceEvaluator
 PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto)
 Proto = Union[FunctionProto, ModelProto, GraphProto, NodeProto]

onnx_diagnostic/reference/report_results_comparison.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Set, Tuple, Union
 ReportKeyNameType = Union[str, Tuple[str, int, str]]
 ReportKeyValueType = Tuple[int, Tuple[int, ...]]
@@ -14,6 +13,7 @@ class ReportResultComparison:
     :param tensors: tensor
     """
+    # pyrefly: ignore[unknown-name]
     def __init__(self, tensors: Dict[ReportKeyNameType, "torch.Tensor"]):  # noqa: F821
         from ..helpers.onnx_helper import dtype_to_tensor_dtype
         from ..helpers import max_diff, string_type
@@ -25,7 +25,9 @@ class ReportResultComparison:
         self.max_diff = max_diff
         self.tensors = tensors
         self._build_mapping()
+        self.unique_run_names: Set[str] = set()
+    # pyrefly: ignore[unknown-name]
     def key(self, tensor: "torch.Tensor") -> ReportKeyValueType:  # noqa: F821
         "Returns a key for a tensor, (onnx dtype, shape)."
         return self.dtype_to_tensor_dtype(tensor.dtype), tuple(map(int, tensor.shape))
@@ -59,12 +61,15 @@ class ReportResultComparison:
         for k, v in self.value.items():
             (i_run, run_name), ref_name = k
             d = dict(run_index=i_run, run_name=run_name, ref_name=ref_name)
+            # pyrefly: ignore[no-matching-overload]
             d.update(v)
             rows.append(d)
         return rows
     def report(
-        self, outputs: Dict[str, "torch.Tensor"]  # noqa: F821
+        self,
+        # pyrefly: ignore[unknown-name]
+        outputs: Dict[str, "torch.Tensor"],  # noqa: F821
     ) -> List[Tuple[Tuple[int, str], ReportKeyNameType, Dict[str, Union[float, str]]]]:
         """
         For every tensor in outputs, compares it to every tensor held by
@@ -79,6 +84,7 @@ class ReportResultComparison:
             key = self.key(tensor)
             if key not in self.mapping:
                 continue
+            # pyrefly: ignore[unknown-name]
             cache: Dict["torch.device", "torch.Tensor"] = {}  # noqa: F821, UP037
             for held_key in self.mapping[key]:
                 t2 = self.tensors[held_key]

onnx_diagnostic/reference/torch_evaluator.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TorchOnnxEvaluator:
     * `functions`: local functions
     The class is not multithreaded. `runtime_info` gets updated
-    by the the class. The list of available kernels is returned by function
+    by the class. The list of available kernels is returned by function
     :func:`onnx_diagnostic.reference.torch_evaluator.get_kernels`.
     Example:
@@ -494,8 +494,10 @@ class TorchOnnxEvaluator:
             r = self.runtime_info[k]
             r.set_value(
                 torch_ops.OpRunTensor(
+                    # pyrefly: ignore[missing-attribute]
                     v.to(self.CUDA) if not r.is_shape and self.on_cuda else v,
                     is_constant=False,
+                    # pyrefly: ignore[missing-attribute]
                     may_cpu=len(v.shape) == 1 and v.numel() < 8 and v.dtype == torch.int64,
                 )
             )
@@ -524,6 +526,7 @@ class TorchOnnxEvaluator:
                         f"for kernel {type(kernel)}."
                     )
                     for name, t in zip(kernel.output, res):
+                        # pyrefly: ignore[bad-argument-type]
                         self.runtime_info[name].set_value(t)
                     if self.verbose:
                         for name in kernel.output:
@@ -644,6 +647,7 @@ class TorchOnnxEvaluator:
                         f"for kernel {type(kernel)}."
                     )
                     for name, t in zip(kernel.output, res):
+                        # pyrefly: ignore[bad-argument-type]
                         self.runtime_info[name].set_value(t)
                 else:
                     assert isinstance(

onnx_diagnostic/reference/torch_ops/_op_run.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional, Union, Tuple
 import onnx
 import torch
-from ...api import TensorLike
+from ...typing import TensorLike
 from ...helpers import string_type
 from ...helpers.torch_helper import to_tensor
@@ -149,7 +149,7 @@ class OpRunSequence(OpRunValue):
     ) -> "OpRunSequence":
         "Inserts a value at a given position."
         assert isinstance(tensor, OpRunTensor), f"Unexpected type {type(tensor)} for tensor"
-        new_seq = OpRunSequence()
+        new_seq = OpRunSequence()  # type: ignore[abstract]
         seq = self.sequence.copy()
         new_seq.sequence = seq
         if position is None:
@@ -314,9 +314,7 @@ class OpRunKernel:
 class OpRunFunction(OpRunKernel):
-    """
-    Defines a kernel based on a local functions.
-    """
+    """Defines a kernel based on a local functions."""
     def __init__(
         self,

onnx_diagnostic/reference/torch_ops/sequence_ops.py CHANGED Viewed

@@ -46,7 +46,7 @@ class SequenceEmpty_11(OpRunOpSequence):
         )
     def run(self) -> OpRunSequence:
-        return OpRunSequence(dtype=self.dtype)
+        return OpRunSequence(dtype=self.dtype)  # type: ignore[abstract]
 class SequenceInsert_11(OpRunOpSequence):

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 from ..helpers.config_helper import update_config, check_hasattr
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 __TASK__ = "feature-extraction"

onnx_diagnostic/torch_export_patches/__init__.py CHANGED Viewed

@@ -4,7 +4,6 @@ from .onnx_export_errors import (
 )
 from .patch_module import torch_export_rewrite
 # bypass_export_some_errors is the first name given to the patches.
 bypass_export_some_errors = torch_export_patches  # type: ignore

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -562,6 +562,7 @@ def _patch_transformers(
                 "[torch_export_patches] patches "
                 "transformers.masking_utils.sdpa_mask_recent_torch"
             )
         f_transformers_sdpa_mask_recent_torch = masking_utils.sdpa_mask_recent_torch
         masking_utils.sdpa_mask_recent_torch = (
             patch_transformers_list.patched_sdpa_mask_recent_torch
@@ -574,7 +575,9 @@ def _patch_transformers(
             )
         if masking_utils.sdpa_mask == f_transformers_sdpa_mask_recent_torch:
             if verbose:
-                print("[torch_export_patches] patches transformers.masking_utils.sdpa_mask")
+                print(
+                    "[torch_export_patches] patches transformers.masking_utils.sdpa_mask (1)"
+                )
             f_transformers_sdpa_mask = masking_utils.sdpa_mask
             masking_utils.sdpa_mask = patch_transformers_list.patched_sdpa_mask_recent_torch
             if patch_details:
@@ -583,8 +586,23 @@ def _patch_transformers(
                     f_transformers_sdpa_mask,
                     patch_transformers_list.patched_sdpa_mask_recent_torch,
                 )
-        else:
-            f_transformers_sdpa_mask = None
+    if (  # vmap
+        masking_utils
+        and patch_transformers_list.patch_masking_utils
+        and hasattr(masking_utils, "sdpa_mask")
+        and f_transformers_sdpa_mask is None
+    ):
+        if verbose:
+            print("[torch_export_patches] patches transformers.masking_utils.sdpa_mask (3)")
+        f_transformers_sdpa_mask = masking_utils.sdpa_mask
+        masking_utils.sdpa_mask = patch_transformers_list.patched_sdpa_mask
+        if patch_details:
+            patch_details.append(
+                "transformers",
+                f_transformers_sdpa_mask,
+                patch_transformers_list.patched_sdpa_mask,
+            )
     if (  # eager_mask
         masking_utils
@@ -742,17 +760,17 @@ def _unpatch_transformers(
                 "transformers.masking_utils.sdpa_mask_recent_torch"
             )
-        if f_transformers_sdpa_mask is not None:
-            assert f_transformers_sdpa_mask.__name__ in (
-                "sdpa_mask",
-                "sdpa_mask_recent_torch",
-            ), (
-                f"corrupted function 'sdpa_mask', its name is "
-                f"{f_transformers_sdpa_mask.__name__!r}"
-            )
-            masking_utils.sdpa_mask = f_transformers_sdpa_mask
-            if verbose:
-                print("[torch_export_patches] restored transformers.masking_utils.sdpa_mask")
+    if f_transformers_sdpa_mask is not None:
+        assert f_transformers_sdpa_mask.__name__ in (
+            "sdpa_mask",
+            "sdpa_mask_recent_torch",
+        ), (
+            f"corrupted function 'sdpa_mask', its name is "
+            f"{f_transformers_sdpa_mask.__name__!r}"
+        )
+        masking_utils.sdpa_mask = f_transformers_sdpa_mask
+        if verbose:
+            print("[torch_export_patches] restored transformers.masking_utils.sdpa_mask")
     if (  # eager_mask
         masking_utils

onnx_diagnostic/torch_export_patches/patch_module.py CHANGED Viewed

@@ -986,7 +986,7 @@ def torch_export_rewrite(
             name = me.__qualname__
             spl = name.split(".")
             if len(spl) == 1:
-                # This a function
+                # This is a function
                 module = me.__module__
                 if module in me.__globals__:
                     mod = me.__globals__[module]

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_masking_utils.py CHANGED Viewed

@@ -36,6 +36,26 @@ if patch_masking_utils:
         _ignore_bidirectional_mask_sdpa = None
         bidirectional_mask_function = None
+    try:
+        from transformers.masking_utils import _non_vmap_expansion_sdpa
+    except ImportError:
+        def _non_vmap_expansion_sdpa(
+            batch_indices: torch.Tensor,
+            head_indices: torch.Tensor,
+            q_indices: torch.Tensor,
+            kv_indices: torch.Tensor,
+        ):
+            """
+            https://github.com/huggingface/optimum-onnx/blob/
+            c123e8f4fab61b54a8e0e31ce74462bcacca576e/optimum/exporters/onnx/model_patcher.py#L362-L365
+            """
+            batch_indices = batch_indices[:, None, None, None]
+            head_indices = head_indices[None, :, None, None]
+            q_indices = q_indices[None, None, :, None]
+            kv_indices = kv_indices[None, None, None, :]
+            return batch_indices, head_indices, q_indices, kv_indices
     def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) -> Callable:
         """manual patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
         from ...helpers import string_type
@@ -146,12 +166,13 @@ if patch_masking_utils:
             padding_mask, q_length, kv_length, kv_offset, local_size
         ):
             return None
-        if (
-            allow_is_bidirectional_skip
-            and _ignore_bidirectional_mask_sdpa
-            and _ignore_bidirectional_mask_sdpa(padding_mask)
-        ):
-            return None
+        if allow_is_bidirectional_skip and _ignore_bidirectional_mask_sdpa:
+            # transformers<=5.0: 1 parameter, 3 for transformers>5.0
+            n_parameters = len(inspect.signature(_ignore_bidirectional_mask_sdpa).parameters)
+            if _ignore_bidirectional_mask_sdpa(
+                *[padding_mask, kv_length, kv_offset][:n_parameters]
+            ):
+                return None
         if mask_function is bidirectional_mask_function:
             if padding_mask is not None:
@@ -180,3 +201,83 @@ if patch_masking_utils:
             batch_arange, head_arange, cache_position, kv_arange
         )
         return causal_mask
+    def patched_sdpa_mask(
+        batch_size: int,
+        cache_position: torch.Tensor,
+        kv_length: int,
+        kv_offset: int = 0,
+        mask_function: Callable = causal_mask_function,
+        attention_mask: torch.Tensor | None = None,
+        local_size: int | None = None,
+        allow_is_causal_skip: bool = True,
+        allow_is_bidirectional_skip: bool = False,
+        allow_torch_fix: bool = True,
+        use_vmap: bool = False,
+        **kwargs,
+    ) -> torch.Tensor | None:
+        """manual patch for function ``transformers.masking_utils.sdpa_mask``."""
+        q_length = cache_position.shape[0]
+        # Potentially pad the 2D mask
+        padding_mask = prepare_padding_mask(attention_mask, kv_length, kv_offset)
+        # Under specific conditions, we can avoid materializing the mask
+        #   1. Causal masks can rely on the `is_causal` argument
+        #   2. Bidirectional do not need any further processing (no bias)
+        if allow_is_causal_skip and _ignore_causal_mask_sdpa(
+            padding_mask, q_length, kv_length, kv_offset, local_size
+        ):
+            return None
+        if allow_is_bidirectional_skip and _ignore_bidirectional_mask_sdpa(
+            padding_mask, kv_length, local_size
+        ):
+            return None
+        # Potentially add the padding 2D mask
+        if padding_mask is not None:
+            mask_function = and_masks(mask_function, padding_mask_function(padding_mask))
+        batch_arange = torch.arange(batch_size, device=cache_position.device)
+        head_arange = torch.arange(1, device=cache_position.device)
+        # Similar to `kv_arange = torch.arange(start=kv_offset,
+        # end=kv_offset + kv_length, device=cache_position.device)`
+        # but without data-dependent slicing (i.e. torch.compile friendly)
+        kv_arange = torch.arange(kv_length, device=cache_position.device) + kv_offset
+        # Actual mask creation
+        # Option 1: Fast non-vmap mask creation (default)
+        # PATCHED
+        use_vmap = False
+        if not use_vmap:
+            # Apply mask function element-wise through broadcasting
+            attention_mask = mask_function(
+                *_non_vmap_expansion_sdpa(batch_arange, head_arange, cache_position, kv_arange)
+            )
+            # Expand the mask to match batch size
+            # and query length if they weren't used in the mask function
+            attention_mask = attention_mask.expand(batch_size, -1, q_length, kv_length)
+        # Option 2: Vmap mask creation (torch>=2.6 and custom patterns)
+        # elif _is_torch_greater_or_equal_than_2_6:
+        # This creates the 4D mask easily.
+        # Note that we need this context manager as vmap cannot handle slicing a tensor from
+        # scalar tensor (it internally calls `.item()` which vmap does not allow,
+        # but this context works around it
+        # We don't need to add an offset to the mask_function either,
+        # as we vmap directly the correct indices for k and kv indices
+        #    with TransformGetItemToIndex():
+        #        attention_mask = _vmap_expansion_sdpa(mask_function)(
+        #            batch_arange, head_arange, cache_position, kv_arange
+        #        )
+        # Option 3: Error out since it indicates that the user did something custom,
+        # which they shouldn't have (torch<2.6)
+        else:
+            raise ValueError(
+                "The vmap functionality for mask creation "
+                "is only supported from torch>=2.6. "
+                "Please update your torch version or use "
+                "`use_vmap=False` with index-based masks."
+            )
+        return attention_mask

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_rotary_embedding.py CHANGED Viewed

@@ -7,10 +7,10 @@ import transformers
 def patched__compute_dynamic_ntk_parameters(
     config: Optional[transformers.PretrainedConfig] = None,
-    device: Optional["torch.device"] = None,
+    device: Optional[torch.device] = None,
     seq_len: Optional[int] = None,
     **rope_kwargs,
-) -> Tuple["torch.Tensor", float]:
+) -> Tuple[torch.Tensor, float]:
     """
     manual patch:
     ``[patch:transformers.modeling_rope_utils._compute_dynamic_ntk_parameters]``

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -188,6 +188,11 @@ def patched__broadcast_shapes(*_shapes):
     return common_shape
+def value_ranges_is_positive(value_ranges: torch.utils._sympy.value_ranges.ValueRanges):
+    """Tells if an interval is equivalent to a positive or null integer."""
+    return value_ranges.lower == 0 and value_ranges.upper > 4623372036854775806
 class patched_ShapeEnv:
     def _check_frozen(
@@ -281,7 +286,10 @@ class patched_ShapeEnv:
                     )
                     self._update_var_to_range(b, b_bound, self.var_to_range_sloc[a])
                     tgt_bound = self.bound_sympy(tgt)
-                    assert tgt_bound.issubset(
+                    assert (
+                        value_ranges_is_positive(tgt_bound)
+                        and value_ranges_is_positive(src_bound)
+                    ) or tgt_bound.issubset(
                         src_bound
                     ), f"{tgt_bound=} not a subset of {src_bound=}"
@@ -524,8 +532,10 @@ class patched_ShapeEnv:
             transmute_into_runtime_assert = False
-            backed_var_to_val = getattr(
-                self, "backed_var_to_val", getattr(self, "var_to_val", {})
+            backed_var_to_val = (
+                self.backed_var_to_val
+                if hasattr(self, "backed_var_to_val")
+                else self.var_to_val
             )
             concrete_val = None
             if not (expr.free_symbols <= backed_var_to_val.keys()):

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -38,6 +38,7 @@ if patch_masking_utils:
     from ._patch_transformers_masking_utils import (
         patched__vmap_for_bhqkv,
         patched_eager_mask,
+        patched_sdpa_mask,
         patched_sdpa_mask_recent_torch,
     )

onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import itertools
 from typing import Any, Callable, List, Set, Tuple
 import torch
+import transformers.cache_utils
 from transformers.cache_utils import Cache, DynamicCache, EncoderDecoderCache, StaticCache
 try:
@@ -22,22 +23,63 @@ from transformers.modeling_outputs import BaseModelOutput
 from ...helpers.cache_helper import make_dynamic_cache, make_static_cache, CacheKeyValue
 from . import make_serialization_function_for_dataclass
 SUPPORTED_DATACLASSES: Set[type] = set()
 WRONG_REGISTRATIONS = {
     DynamicCache: "4.50",
     BaseModelOutput: None,
 }
+SHORTEN_LAYER_NAMES = {
+    "DynamicLayer": "D",
+    "DynamicSlidingWindowLayer": "W",
+    "StaticLayer": "S",
+    "StaticSlidingWindowLayer": "X",
+    "D": "DynamicLayer",
+    "W": "DynamicSlidingWindowLayer",
+    "S": "StaticLayer",
+    "X": "StaticSlidingWindowLayer",
+}
+KWARGS_LAYER_NAMES = {
+    "DynamicLayer": lambda layer: "",
+    "DynamicSlidingWindowLayer": lambda layer: str(layer.sliding_window),
+    "StaticLayer": lambda layer: "",
+    "StaticSlidingWindowLayer": lambda layer: str(layer.sliding_window),
+}
+PARSE_LAYER_NAMES = {
+    "DynamicLayer": lambda skw: {},
+    "DynamicSlidingWindowLayer": lambda skw: dict(sliding_window=int(skw[1:])),
+    "StaticLayer": lambda skw: {},
+    "StaticSlidingWindowLayer": lambda skw: dict(sliding_window=int(skw[1:])),
+}
 def _flatten_key_value_cache(cache: Cache) -> Tuple[List[Any], torch.utils._pytree.Context]:
     ca = CacheKeyValue(cache)
     flat = list(itertools.chain.from_iterable(zip(ca.key_cache, ca.value_cache)))
-    keys = list(
-        itertools.chain.from_iterable(
-            (f"key_{i}", f"value_{i}") for i in range(len(ca.key_cache))
+    unique = set(ca.cls_layers) if ca.cls_layers else None
+    if (
+        cache.__class__.__name__ != "DynamicCache"
+        or unique is None
+        or (len(unique) == 1 and unique.pop().__name__ == "DynamicLayer")
+    ):
+        keys = list(
+            itertools.chain.from_iterable(
+                (f"key_{i}", f"value_{i}") for i in range(len(ca.key_cache))
+            )
         )
-    )
+        return flat, keys
+    keys = []
+    for i in range(len(ca.key_cache)):
+        letter = SHORTEN_LAYER_NAMES[ca.cls_layers[i].__name__]
+        if hasattr(cache, "layers"):
+            kwargs = KWARGS_LAYER_NAMES[ca.cls_layers[i].__name__](cache.layers[i])
+        else:
+            kwargs = ""
+        keys.extend([f"key_{letter}{kwargs}_{i}", f"value_{letter}{kwargs}_{i}"])
     return flat, keys
@@ -55,7 +97,26 @@ def _unflatten_cache(
     output_type=None,
 ) -> DynamicCache:
     """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
-    res = make_cache(list(zip(values[::2], values[1::2])))
+    expected = list(
+        itertools.chain.from_iterable(
+            (f"key_{i}", f"value_{i}") for i in range(len(values) // 2)
+        )
+    )
+    if expected == context:
+        res = make_cache(list(zip(values[::2], values[1::2])))
+    else:
+        cls_layer_names = [SHORTEN_LAYER_NAMES[name.split("_")[1][0]] for name in context][::2]
+        cls_kwargs = [
+            PARSE_LAYER_NAMES[SHORTEN_LAYER_NAMES[name.split("_")[1][0]]](name.split("_")[1])
+            for name in context
+        ][::2]
+        cls_layers = [
+            getattr(transformers.cache_utils, cls_name) for cls_name in cls_layer_names
+        ]
+        res = make_cache(
+            list(zip(values[::2], values[1::2])), cls_layers=cls_layers, cls_kwargs=cls_kwargs
+        )
     assert output_type is None or isinstance(
         res, output_type
     ), f"Type mismatch between {output_type} (expected) and {type(res)}"
@@ -71,14 +132,6 @@ def flatten_dynamic_cache(
     dynamic_cache: DynamicCache,
 ) -> Tuple[List[Any], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    assert (
-        not hasattr(dynamic_cache, "layers")
-        or not dynamic_cache.layers
-        or all(lay.__class__.__name__ == "DynamicLayer" for lay in dynamic_cache.layers)
-    ), (
-        f"The serialization does not work yet on other layers "
-        f"than DynamicLayer, but layers={[lay.__class__ for lay in dynamic_cache.layers]}"
-    )
     return _flatten_key_value_cache(dynamic_cache)
@@ -86,14 +139,6 @@ def flatten_with_keys_dynamic_cache(
     dynamic_cache: DynamicCache,
 ) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    assert (
-        not hasattr(dynamic_cache, "layers")
-        or not dynamic_cache.layers
-        or all(lay.__class__.__name__ == "DynamicLayer" for lay in dynamic_cache.layers)
-    ), (
-        f"The serialization does not work yet on other layers "
-        f"than DynamicLayer, but layers={[lay.__class__ for lay in dynamic_cache.layers]}"
-    )
     return _flatten_with_keys_cache(dynamic_cache)
@@ -161,7 +206,9 @@ def unflatten_static_cache(
 ) -> StaticCache:
     """Restores a :class:`transformers.cache_utils.StaticCache` from python objects."""
     return _unflatten_cache(
-        lambda *args: make_static_cache(*args, max_cache_len=values[0].shape[2]),
+        lambda *args, **kwargs: make_static_cache(
+            *args, max_cache_len=values[0].shape[2], **kwargs
+        ),
         values,
         context,
         output_type=output_type,

onnx_diagnostic/torch_models/code_sample.py CHANGED Viewed

@@ -8,11 +8,9 @@ from .hghub.model_inputs import _preprocess_model_id
 from .hghub import get_untrained_model_with_inputs
 from .validate import filter_inputs, make_patch_kwargs
 CODE_SAMPLES = {
     "imports": "from typing import Any\nimport torch",
-    "get_model_with_inputs": textwrap.dedent(
-        """
+    "get_model_with_inputs": textwrap.dedent("""
     def get_model_with_inputs(
         model_id:str,
         subfolder: str | None = None,
@@ -57,8 +55,7 @@ CODE_SAMPLES = {
             if device:
                 data["model"] = data["model"].to(device)
         return data["model"]
-    """
-    ),
+    """),
 }
@@ -198,7 +195,7 @@ def code_sample(
         this is not always possible
     :param use_pretrained: use the trained model, not the untrained one
     :param optimization: optimization to apply to the exported model,
-        depend on the the exporter
+        depend on the exporter
     :param quiet: if quiet, catches exception if any issue
     :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
         if True before exporting
@@ -326,11 +323,9 @@ def code_sample(
         imports,
         cache_import,
         CODE_SAMPLES["get_model_with_inputs"],
-        textwrap.dedent(
-            f"""
+        textwrap.dedent(f"""
             model = get_model_with_inputs({model_args})
-                        """
-        ),
+                        """),
         f"inputs = {input_code}",
         exporter_code,
     ]

onnx_diagnostic/torch_models/hghub/hub_data.py CHANGED Viewed

@@ -10,8 +10,7 @@ __data_arch_values__ = {
     "ResNetForImageClassification": dict(image_size=224),
 }
-__data_arch__ = textwrap.dedent(
-    """
+__data_arch__ = textwrap.dedent("""
     architecture,task
     ASTModel,feature-extraction
     AutoencoderKL,image-to-image
@@ -166,8 +165,7 @@ __data_arch__ = textwrap.dedent(
     YolosModel,image-feature-extraction
     Alibaba-NLP/gte-large-en-v1.5,sentence-similarity
     emilyalsentzer/Bio_ClinicalBERT,fill-mask
-    nvidia/Cosmos-Predict2-2B-Video2World//transformer,image-to-video"""
-)
+    nvidia/Cosmos-Predict2-2B-Video2World//transformer,image-to-video""")
 __data_tasks__ = [
     "audio-classification",

onnx-diagnostic 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl

onnx-diagnostic 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl