PyPI - onnx-diagnostic - Versions diffs - 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl - Mend

onnx-diagnostic 0.7.8py3-none-any.whl → 0.7.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.8"
+__version__ = "0.7.10"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -850,13 +850,13 @@ def get_parser_agg() -> ArgumentParser:
         "--filter-in",
         default="",
         help="adds a filter to filter in data, syntax is\n"
-        '``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
+        '``"<column1>:<value1>;<value2>//<column2>:<value3>"`` ...',
     )
     parser.add_argument(
         "--filter-out",
         default="",
         help="adds a filter to filter out data, syntax is\n"
-        '``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
+        '``"<column1>:<value1>;<value2>//<column2>:<value3>"`` ...',
     )
     parser.add_argument(
         "--sbs",

onnx_diagnostic/helpers/_log_helper.py CHANGED Viewed

@@ -118,9 +118,11 @@ def filter_data(
         if isinstance(fmt, str):
             cols = fmt.split("//")
             for c in cols:
-                assert ":" in c, f"Unexpected value {c!r} in fmt={fmt!r}"
+                assert ":" in c, f"Unexpected value {c!r} in fmt={fmt!r}, cols={cols!r}"
                 spl = c.split(":")
-                assert len(spl) == 2, f"Unexpected value {c!r} in fmt={fmt!r}"
+                assert (
+                    len(spl) == 2
+                ), f"Unexpected value {c!r} in fmt={fmt!r}, spl={spl}, cols={cols}"
                 name, fil = spl
                 cond[name] = set(fil.split(";"))
         return cond

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -270,7 +270,7 @@ def make_static_cache(
             self.num_attention_heads = key_value_pairs[0][0].shape[1]
             self.num_hidden_layers = len(key_value_pairs)
-        def get_text_config(self):
+        def get_text_config(self, *args, **kwargs):
             return self
     assert max_cache_len is not None, (
@@ -366,7 +366,7 @@ def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -
             self.num_hidden_layers = len(key_value_pairs)
             self.dtype = dtype
-        def get_text_config(self):
+        def get_text_config(self, *args, **kwargs):
             return self
     cache = MambaCache(
@@ -409,7 +409,7 @@ def make_sliding_window_cache(
             self.num_hidden_layers = len(key_value_pairs)
             self.sliding_window = key_value_pairs[0][0].shape[2]
-        def get_text_config(self):
+        def get_text_config(self, *args, **kwargs):
             return self
     cache = transformers.cache_utils.SlidingWindowCache(
@@ -577,7 +577,7 @@ def make_hybrid_cache(
         sliding_window = _sliding_window
         num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
-        def get_text_config(self):
+        def get_text_config(self, *args, **kwargs):
             return self
     if layer_types:

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -774,6 +774,14 @@ def string_type(
             return f"{obj.__class__.__name__}(**{s})"
     if obj.__class__.__name__ in {"TorchModelContainer", "InferenceSession"}:
         return f"{obj.__class__.__name__}(...)"
+    if obj.__class__.__name__ == "Results":
+        import ultralytics
+        assert isinstance(
+            obj, ultralytics.engine.results.Results
+        ), f"Unexpected type={type(obj)}"
+        return f"ultralytics.{obj.__class__.__name__}(...)"
     if verbose:
         print(f"[string_type] END:{type(obj)}")
     raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import enum
 import io
+import os
 import pprint
 import re
 import warnings
@@ -270,6 +271,10 @@ class CubePlot:
     def _to_images_bar(
         self, verbose: int = 0, merge: bool = True, title_suffix: Optional[str] = None
     ) -> List[bytes]:
+        """
+        Environment variable ``FIGSIZEH`` can be set to increase the
+        graph height. Default is 1.0.
+        """
         assert merge, f"merge={merge} not implemented yet"
         import matplotlib.pyplot as plt
@@ -279,7 +284,8 @@ class CubePlot:
         n_cols = 3
         nn = df.shape[1] // n_cols
         nn += int(df.shape[1] % n_cols != 0)
-        fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 5))
+        ratio = float(os.environ.get("FIGSIZEH", "1"))
+        fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 3 * ratio))
         pos = 0
         imgs = []
         for c in self._make_loop(df.columns, verbose):

onnx_diagnostic/helpers/model_builder_helper.py CHANGED Viewed

@@ -201,10 +201,12 @@ def create_model_builder(
     arch_map = {
         "ChatGLMForConditionalGeneration": builder.ChatGLMModel,
         "ChatGLMModel": builder.ChatGLMModel,
+        "Ernie4_5_ForCausalLM": builder.ErnieModel,
         "GemmaForCausalLM": builder.Gemma2Model,
         "Gemma3ForCausalLM": builder.Gemma3Model,
         "Gemma3ForConditionalGeneration": builder.Gemma3Model,
         "GraniteForCausalLM": builder.GraniteModel,
+        "GptOssForCausalLM": builder.GPTOSSModel,
         "LlamaForCausalLM": builder.LlamaModel,
         "MistralForCausalLM": builder.MistralModel,
         "NemotronForCausalLM": builder.NemotronModel,
@@ -235,6 +237,7 @@ def create_model_builder(
         "Phi4MMForCausalLM": builder.Phi4MMModel,
         "Qwen2ForCausalLM": builder.QwenModel,
         "Qwen3ForCausalLM": builder.Qwen3Model,
+        "SmolLM3ForCausalLM": builder.SmolLM3Model,
     }
     assert config.architectures[0] in arch_map, (
@@ -276,6 +279,8 @@ def create_model_builder(
         for key in text_config:
             if not hasattr(config, key):
                 setattr(config, key, getattr(text_config, key))
+    elif config.architectures[0] == "GptOssForCausalLM":
+        delattr(config, "quantization_config")
     elif (
         config.architectures[0] == "PhiMoEForCausalLM"
         and config.max_position_embeddings != config.original_max_position_embeddings

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -1186,7 +1186,7 @@ def shadowing_names(
                 shadow |= set(i.name for i in g.input) & shadow_context
                 shadow |= set(i.name for i in g.initializer) & shadow_context
                 shadow |= set(i.name for i in g.sparse_initializer) & shadow_context
-                s, ps, c = shadowing_names(
+                s, _ps, c = shadowing_names(
                     g.node, verbose=verbose, existing=existing, shadow_context=existing
                 )
                 shadow |= s

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -543,7 +543,7 @@ def dummy_llm(
             )
         def forward(self, x):
-            B, T, C = x.shape
+            _B, T, C = x.shape
             query = self.query(x)
             key = self.key(x)
@@ -721,9 +721,10 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
         return {to_any(t, to_value) for t in value}
     if type(value) is dict:
         return {k: to_any(t, to_value) for k, t in value.items()}
-    if value.__class__.__name__ == "DynamicCache":
+    if value.__class__.__name__ in {"DynamicCache", "HybridCache"}:
+        make = dict(DynamicCache=make_dynamic_cache, HybridCache=make_hybrid_cache)
         cc = CacheKeyValue(value)
-        return make_dynamic_cache(
+        return make[value.__class__.__name__](  # type: ignore[operator]
             list(
                 zip(
                     [t.to(to_value) if t is not None else t for t in cc.key_cache],
@@ -822,6 +823,15 @@ def torch_deepcopy(value: Any) -> Any:
         new_args = torch_deepcopy(args)
         return torch.utils._pytree.tree_unflatten(new_args, spec)
+    if value.__class__.__name__ == "Results":
+        import copy
+        import ultralytics
+        assert isinstance(
+            value, ultralytics.engine.results.Results
+        ), f"Unexpected type={type(value)}"
+        return copy.deepcopy(value)
     # We should have a code using serialization, deserialization assuming a model
     # cannot be exported without them.
     raise NotImplementedError(f"torch_deepcopy not implemented for type {type(value)}")
@@ -856,7 +866,7 @@ def torch_tensor_size(value: Any) -> Any:
     if value.__class__.__name__ == "MambaCache":
         return torch_tensor_size(value.conv_states) + torch_tensor_size(value.ssm_states)
     if value.__class__ in torch.utils._pytree.SUPPORTED_NODES:
-        args, spec = torch.utils._pytree.tree_flatten(value)
+        args, _spec = torch.utils._pytree.tree_flatten(value)
         return sum(torch_tensor_size(a) for a in args)
     # We should have a code using serialization, deserialization assuming a model

onnx_diagnostic/reference/ops/op_scan.py CHANGED Viewed

@@ -26,11 +26,11 @@ class Scan(_Scan):
     ):
         (
             num_loop_state_vars,
-            num_scan_outputs,
-            output_directions,
-            max_dir_out,
-            output_axes,
-            max_axe_out,
+            _num_scan_outputs,
+            _output_directions,
+            _max_dir_out,
+            _output_axes,
+            _max_axe_out,
             state_names_in,
             state_names_out,
             scan_names_in,

onnx_diagnostic/reference/ort_evaluator.py CHANGED Viewed

@@ -562,7 +562,7 @@ class OnnxruntimeEvaluator:
         if key in self._cache:
             sess = self._cache[key][1]
         else:
-            self._cache[key] = onx, sess = self._get_sess_if(node, name, inputs, results)
+            self._cache[key] = _onx, sess = self._get_sess_if(node, name, inputs, results)
         assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}"
         feeds = {name: results[name] for name in sess.input_names}
@@ -616,7 +616,7 @@ class OnnxruntimeEvaluator:
         if key in self._cache:
             sess = self._cache[key][1]
         else:
-            self._cache[key] = onx, sess = self._get_sess_scan(node, name, inputs, results)
+            self._cache[key] = _onx, sess = self._get_sess_scan(node, name, inputs, results)
         assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}"
         feeds = {name: results[name] for name in sess.input_names}

onnx_diagnostic/tasks/automatic_speech_recognition.py CHANGED Viewed

@@ -76,7 +76,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"
     shapes = {

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -47,7 +47,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "sequence_length"
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/tasks/fill_mask.py CHANGED Viewed

@@ -42,7 +42,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "sequence_length"
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -107,7 +107,7 @@ def _get_inputs_gemma3(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     # cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
@@ -230,7 +230,7 @@ def get_inputs(
         assert (
             "cls_cache" not in kwargs
         ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-        batch = torch.export.Dim("batch", min=1, max=1024)
+        batch = "batch"
         batch_img = torch.export.Dim("batch_img", min=1, max=1024)
         seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
         cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)

onnx_diagnostic/tasks/sentence_similarity.py CHANGED Viewed

@@ -42,7 +42,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/tasks/summarization.py CHANGED Viewed

@@ -70,7 +70,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
     cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -72,7 +72,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length_key"
     cache_length2 = "cache_length_val"

onnx_diagnostic/tasks/text_classification.py CHANGED Viewed

@@ -42,7 +42,7 @@ def get_inputs(
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("sequence_length", min=1, max=1024)
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -83,7 +83,7 @@ def get_inputs(
         :class:`transformers.cache_utils.DynamicCache`
     :return: dictionary
     """
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)

onnx_diagnostic/tasks/zero_shot_image_classification.py CHANGED Viewed

@@ -65,7 +65,7 @@ def get_inputs(
         input_width, int
     ), f"Unexpected type for input_height {type(input_height)}{config}"
-    batch = torch.export.Dim("batch", min=1, max=1024)
+    batch = "batch"
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/torch_export_patches/eval/model_cases.py CHANGED Viewed

@@ -384,7 +384,7 @@ class ControlFlowScan(torch.nn.Module):
     def forward(self, x):
         init = torch.zeros_like(x[0])
-        carry, out = torch.ops.higher_order.scan(
+        carry, _out = torch.ops.higher_order.scan(
             ControlFlowScan.add, [init], [x], additional_inputs=[]
         )
         return carry
@@ -429,7 +429,7 @@ class ControlFlowScanCDist(torch.nn.Module):
         return [carry.clone(), rd]
     def forward(self, x):
-        carry, out = torch.ops.higher_order.scan(
+        _carry, out = torch.ops.higher_order.scan(
             ControlFlowScanCDist.dist,
             [x],
             [x],
@@ -483,7 +483,7 @@ class ControlFlowScanCDistXY(torch.nn.Module):
         return [y.clone(), rd]
     def forward(self, x, y):
-        carry, out = torch.ops.higher_order.scan(
+        _carry, out = torch.ops.higher_order.scan(
             ControlFlowScanCDistXY.dist,
             [y],
             [x],

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -439,6 +439,28 @@ def torch_export_patches(
                 f_transformers__vmap_for_bhqkv = masking_utils._vmap_for_bhqkv
                 masking_utils._vmap_for_bhqkv = patch_transformers_list.patched__vmap_for_bhqkv
+                if verbose:
+                    print(
+                        "[torch_export_patches] patches "
+                        "transformers.masking_utils.sdpa_mask_recent_torch"
+                    )
+                f_transformers_sdpa_mask_recent_torch = masking_utils.sdpa_mask_recent_torch
+                masking_utils.sdpa_mask_recent_torch = (
+                    patch_transformers_list.patched_sdpa_mask_recent_torch
+                )
+                if masking_utils.sdpa_mask == f_transformers_sdpa_mask_recent_torch:
+                    if verbose:
+                        print(
+                            "[torch_export_patches] patches "
+                            "transformers.masking_utils.sdpa_mask"
+                        )
+                    f_transformers_sdpa_mask = masking_utils.sdpa_mask
+                    masking_utils.sdpa_mask = (
+                        patch_transformers_list.patched_sdpa_mask_recent_torch
+                    )
+                else:
+                    f_transformers_sdpa_mask = None
             if (
                 masking_utils
                 and patch_transformers_list.patch_masking_utils
@@ -456,10 +478,37 @@ def torch_export_patches(
                     and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"]
                     == f_transformers_eager_mask
                 ):
+                    if verbose:
+                        print(
+                            "[torch_export_patches] patches "
+                            "transformers.masking_utils.eager_mask "
+                            "in ALL_MASK_ATTENTION_FUNCTIONS"
+                        )
                     masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"] = (
                         patch_transformers_list.patched_eager_mask
                     )
+            if (
+                masking_utils
+                and patch_transformers_list.patch_masking_utils
+                and hasattr(masking_utils, "sdpa_mask")
+                and f_transformers_sdpa_mask is not None
+            ):
+                if verbose:
+                    print(
+                        "[torch_export_patches] patches "
+                        "transformers.masking_utils.sdpa_mask "
+                        "in ALL_MASK_ATTENTION_FUNCTIONS"
+                    )
+                if (
+                    "sdpa" in masking_utils.ALL_MASK_ATTENTION_FUNCTIONS
+                    and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["sdpa"]
+                    == f_transformers_sdpa_mask
+                ):
+                    masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["sdpa"] = (
+                        patch_transformers_list.patched_sdpa_mask_recent_torch
+                    )
         if custom_patches:
             if verbose:
                 print("[torch_export_patches] applies custom patches")
@@ -568,12 +617,31 @@ def torch_export_patches(
                     and hasattr(masking_utils, "_vmap_for_bhqkv")
                 ):
                     masking_utils._vmap_for_bhqkv = f_transformers__vmap_for_bhqkv
                     if verbose:
                         print(
                             "[torch_export_patches] restored "
                             "transformers.masking_utils._vmap_for_bhqkv"
                         )
+                    masking_utils.sdpa_mask_recent_torch = (
+                        f_transformers_sdpa_mask_recent_torch
+                    )
+                    if verbose:
+                        print(
+                            "[torch_export_patches] restored "
+                            "transformers.masking_utils.sdpa_mask_recent_torch"
+                        )
+                    if f_transformers_sdpa_mask is not None:
+                        masking_utils.sdpa_mask = f_transformers_sdpa_mask
+                        if verbose:
+                            print(
+                                "[torch_export_patches] restored "
+                                "transformers.masking_utils.sdpa_mask"
+                            )
                 if (
                     masking_utils
                     and patch_transformers_list.patch_masking_utils
@@ -581,6 +649,11 @@ def torch_export_patches(
                 ):
                     f_transformers_eager_mask = masking_utils.eager_mask
                     masking_utils.eager_mask = f_transformers_eager_mask
+                    if verbose:
+                        print(
+                            "[torch_export_patches] restored "
+                            "transformers.masking_utils.eager_mask"
+                        )
                     if (
                         "eager" in masking_utils.ALL_MASK_ATTENTION_FUNCTIONS
                         and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"]
@@ -589,11 +662,32 @@ def torch_export_patches(
                         masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"] = (
                             f_transformers_eager_mask
                         )
-                    if verbose:
-                        print(
-                            "[torch_export_patches] restored "
-                            "transformers.masking_utils.eager_mask"
+                        if verbose:
+                            print(
+                                "[torch_export_patches] restored "
+                                "transformers.masking_utils.eager_mask "
+                                "in ALL_MASK_ATTENTION_FUNCTIONS"
+                            )
+                if (
+                    masking_utils
+                    and patch_transformers_list.patch_masking_utils
+                    and hasattr(masking_utils, "sdpa_mask")
+                ):
+                    if (
+                        "sdpa" in masking_utils.ALL_MASK_ATTENTION_FUNCTIONS
+                        and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["sdpa"]
+                        == patch_transformers_list.patched_sdpa_mask_recent_torch
+                    ):
+                        masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["sdpa"] = (
+                            f_transformers_sdpa_mask
                         )
+                        if verbose:
+                            print(
+                                "[torch_export_patches] restored "
+                                "transformers.masking_utils.sdpa_mask "
+                                "in ALL_MASK_ATTENTION_FUNCTIONS"
+                            )
             ########
             # caches

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -205,7 +205,10 @@ class patched_ShapeEnv:
         # Precondition: a == tgt
         assert isinstance(a, sympy.Symbol)
-        if self.allow_complex_guards_as_runtime_asserts and not _is_supported_equivalence(tgt):
+        if (
+            getattr(self, "allow_complex_guards_as_runtime_asserts", False)
+            or getattr(self, "prefer_deferred_runtime_asserts_over_guards", False)
+        ) and not _is_supported_equivalence(tgt):
             # continuing leads to placeholder shapes
             # having complex expressions that we can't resolve
             return

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -37,7 +37,13 @@ from ...helpers.torch_helper import is_torchdynamo_exporting
 if patch_masking_utils:
     # Introduced in 4.52
-    from transformers.masking_utils import causal_mask_function, sdpa_mask
+    from transformers.masking_utils import (
+        causal_mask_function,
+        padding_mask_function,
+        and_masks,
+        _ignore_causal_mask_sdpa,
+        prepare_padding_mask,
+    )
     def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) -> Callable:
         """manual patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
@@ -105,7 +111,7 @@ if patch_masking_utils:
         """manual patch for function ``transformers.masking_utils.eager_mask``."""
         # The masks for eager attention are simply boolean mask from sdpa, casted to 0 and -inf
         _ = kwargs.pop("allow_is_causal_skip", None)
-        mask = sdpa_mask(
+        mask = patched_sdpa_mask_recent_torch(
             batch_size=batch_size,
             cache_position=cache_position,
             kv_length=kv_length,
@@ -125,6 +131,35 @@ if patch_masking_utils:
         mask = (~mask).to(dtype) * min_dtype
         return mask
+    def patched_sdpa_mask_recent_torch(
+        batch_size: int,
+        cache_position: torch.Tensor,
+        kv_length: int,
+        kv_offset: int = 0,
+        mask_function: Callable = causal_mask_function,
+        attention_mask: Optional[torch.Tensor] = None,
+        local_size: Optional[int] = None,
+        allow_is_causal_skip: bool = True,
+        **kwargs,
+    ) -> Optional[torch.Tensor]:
+        """manual patch for function ``transformers.masking_utils.sdpa_mask_recent_torch``."""
+        q_length = cache_position.shape[0]
+        padding_mask = prepare_padding_mask(attention_mask, kv_length, kv_offset, _slice=False)
+        if allow_is_causal_skip and _ignore_causal_mask_sdpa(
+            padding_mask, q_length, kv_length, kv_offset, local_size
+        ):
+            return None
+        kv_arange = torch.arange(kv_length, device=cache_position.device)
+        kv_arange += kv_offset
+        if padding_mask is not None:
+            mask_function = and_masks(mask_function, padding_mask_function(padding_mask))
+        batch_arange = torch.arange(batch_size, device=cache_position.device)
+        head_arange = torch.arange(1, device=cache_position.device)
+        causal_mask = patched__vmap_for_bhqkv(mask_function)(
+            batch_arange, head_arange, cache_position, kv_arange
+        )
+        return causal_mask
 if patch_parse_processor_args:

onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py CHANGED Viewed

@@ -218,7 +218,6 @@ def unflatten_sliding_window_cache(
     values: List[Any], context: torch.utils._pytree.Context, output_type=None
 ) -> SlidingWindowCache:
     """Restores a :class:`transformers.cache_utils.SlidingWindowCache` from python objects."""
-    key_cache, value_cache = values
     return make_sliding_window_cache(list(zip(values[0], values[1])))

onnx_diagnostic/torch_models/hghub/hub_data.py CHANGED Viewed

@@ -11,6 +11,7 @@ __data_arch__ = textwrap.dedent(
     """
     architecture,task
     ASTModel,feature-extraction
+    AutoencoderKL,image-to-image
     AlbertModel,feature-extraction
     BeitForImageClassification,image-classification
     BartForConditionalGeneration,summarization
@@ -154,6 +155,7 @@ __data_arch__ = textwrap.dedent(
     Wav2Vec2ForCTC,automatic-speech-recognition
     YolosForObjectDetection,object-detection
     YolosModel,image-feature-extraction
+    Alibaba-NLP/gte-large-en-v1.5,sentence-similarity
     emilyalsentzer/Bio_ClinicalBERT,fill-mask"""
 )

onnx-diagnostic 0.7.8__py3-none-any.whl → 0.7.10__py3-none-any.whl

onnx-diagnostic 0.7.8py3-none-any.whl → 0.7.10py3-none-any.whl