PyPI - onnx-diagnostic - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

onnx-diagnostic 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -2,6 +2,7 @@ import ast
 import enum
 import inspect
 import itertools
+import json
 from dataclasses import is_dataclass, fields
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 import numpy as np
@@ -1373,11 +1374,7 @@ def max_diff(
         if hist:
             if isinstance(hist, bool):
                 hist = np.array([0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100], dtype=diff.dtype)
-            ind = np.digitize(diff.reshape((-1,)), hist, right=True)
-            cou = np.bincount(ind, minlength=ind.shape[0] + 1)
-            res["rep"] = dict(
-                zip([f">{x}" for x in hist], [int(i) for i in (cou.sum() - np.cumsum(cou))])
-            )
+            res["rep"] = {f">{h}": (diff > h).sum().item() for h in hist}
         return res  # type: ignore
     if isinstance(expected, torch.Tensor) and isinstance(got, torch.Tensor):
@@ -1493,27 +1490,11 @@ def max_diff(
             dev=dev,
         )
         if hist:
-            if isinstance(hist, list) and len(hist) == 1:
-                res["rep"] = {f">{hist[0]}": (diff > hist[0]).sum().item()}
-            elif isinstance(hist, list) and len(hist) == 2:
-                res["rep"] = {
-                    f">{hist[0]}": (diff > hist[0]).sum().item(),
-                    f">{hist[1]}": (diff > hist[1]).sum().item(),
-                }
-            else:
-                if isinstance(hist, bool):
-                    hist = torch.tensor(
-                        [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100], dtype=diff.dtype
-                    )
-                hist = torch.tensor(hist).to(diff.device)
-                ind = torch.bucketize(diff.reshape((-1,)), hist, right=False)
-                cou = torch.bincount(ind, minlength=ind.shape[0] + 1)
-                res["rep"] = dict(
-                    zip(
-                        [f">{x}" for x in hist],
-                        [int(i) for i in (cou.sum() - torch.cumsum(cou, 0))],
-                    )
+            if isinstance(hist, bool):
+                hist = torch.tensor(
+                    [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100], dtype=diff.dtype
                 )
+            res["rep"] = {f">{h}": (diff > h).sum().item() for h in hist}
         return res  # type: ignore
     if isinstance(expected, int) and isinstance(got, torch.Tensor):
@@ -1750,8 +1731,26 @@ def max_diff(
     )
-def string_diff(diff: Dict[str, Any]) -> str:
-    """Renders discrepancies return by :func:`max_diff` into one string."""
+def string_diff(diff: Dict[str, Any], js: bool = False, ratio: bool = False, **kwargs) -> str:
+    """
+    Renders discrepancies return by :func:`max_diff` into one string.
+    :param diff: differences
+    :param js: json format
+    :param ratio: display mismatch ratio
+    :param kwargs: addition values to add in the json format
+    """
+    if js:
+        if "rep" in diff:
+            rep = diff["rep"]
+            diff = {**{k: v for k, v in diff.items() if k != "rep"}, **rep}
+            if ratio:
+                for k, v in rep.items():
+                    diff[f"%{k}"] = v / diff["n"]
+                diff["mean"] = diff["sum"] / diff["n"]
+            diff.update(kwargs)
+        return json.dumps(diff)
     # dict(abs=, rel=, sum=, n=n_diff, dnan=)
     if "dev" in diff:
         ddiff = {k: v for k, v in diff.items() if k != "dev"}

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -818,6 +818,7 @@ def torch_export_patches(
     rewrite: Optional[List[Callable]] = None,
     dump_rewriting: Optional[str] = None,
     patch_details: Optional[PatchDetails] = None,
+    profile: Optional[str] = None,
 ) -> Callable:
     """
     Tries to bypass some situations :func:`torch.export.export` does not support.
@@ -850,6 +851,8 @@ def torch_export_patches(
     :param dump_rewriting: dumps rewriting information in file beginning with that prefix
     :param patch_details: if specified, this class is used to stored every rewritten done.
     :param verbose: to show which patches is applied
+    :param profile: starts profiling whatever is called inside the context manager,
+        output the profiling into a text file
     The list of available patches.
@@ -1017,10 +1020,23 @@ def torch_export_patches(
         if verbose:
             print("[torch_export_patches] done patching")
+        if profile:
+            from pyinstrument import Profiler
+            profiler = Profiler()
+            profiler.start()
+        else:
+            profiler = None
         try:
             yield fct_callable
         finally:
+            if profiler:
+                profiler.stop()
+                with open(profile, "w") as f:
+                    f.write(profiler.output_html())
             # unpatch
             if verbose:

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_masking_utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import inspect
 from typing import Callable, List, Optional, Tuple
 import torch
@@ -19,6 +20,12 @@ if patch_masking_utils:
         prepare_padding_mask,
     )
+    _prepare_padding_mask_kwargs = (
+        dict(_slice=False)
+        if "_slice" in inspect.signature(prepare_padding_mask).parameters
+        else {}
+    )
     try:
         # transformers>=5.0
         from transformers.masking_utils import (
@@ -132,7 +139,9 @@ if patch_masking_utils:
     ) -> Optional[torch.Tensor]:
         """manual patch for function ``transformers.masking_utils.sdpa_mask_recent_torch``."""
         q_length = cache_position.shape[0]
-        padding_mask = prepare_padding_mask(attention_mask, kv_length, kv_offset, _slice=False)
+        padding_mask = prepare_padding_mask(
+            attention_mask, kv_length, kv_offset, **_prepare_padding_mask_kwargs
+        )
         if allow_is_causal_skip and _ignore_causal_mask_sdpa(
             padding_mask, q_length, kv_length, kv_offset, local_size
         ):

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py CHANGED Viewed

@@ -24,7 +24,7 @@ if patch_qwen2_5:
     onnx_plugs_op = onnxscript.values.Opset("onnx_plug", 1)
     op = onnxscript.opset22
-    op24 = onnxscript.onnx_opset.opset24
+    op23 = onnxscript.onnx_opset.opset23
     msft_op = onnxscript.values.Opset("com.microsoft", 1)
     STOPAT = (
         int(os.environ.get("STOPAT", None))
@@ -101,7 +101,7 @@ if patch_qwen2_5:
         return attn_output_4d
     @onnxscript.script(opset=onnx_plugs_op)
-    def LoopAttention24(
+    def LoopAttention23(
         query_states,
         key_states,
         value_states,
@@ -109,26 +109,26 @@ if patch_qwen2_5:
         scaling: float = 0.11180339887498948,
         num_heads: int = 16,
     ):
-        to_3d_shape = op24.Constant(value_ints=[0, 0, -1])
-        query_transposed = op24.Transpose(query_states, perm=[0, 2, 1, 3])
-        output_shape = op24.Shape(query_transposed)
-        query_3d = op24.Reshape(query_transposed, to_3d_shape)
-        value_3d = op24.Reshape(op24.Transpose(value_states, perm=[0, 2, 1, 3]), to_3d_shape)
-        key_3d = op24.Reshape(op24.Transpose(key_states, perm=[0, 2, 1, 3]), to_3d_shape)
-        cu_seqlens = op24.Cast(cu_seqlens, to=onnx.TensorProto.INT32)
-        num_patches = op24.Size(cu_seqlens) - 1
-        seq_axis = op24.Constant(value_ints=[1])
-        seq_axis_int32 = op24.Cast(seq_axis, to=onnx.TensorProto.INT32)
-        seq_attn = op24.SequenceEmpty(dtype=onnx.TensorProto.FLOAT)
+        to_3d_shape = op23.Constant(value_ints=[0, 0, -1])
+        query_transposed = op23.Transpose(query_states, perm=[0, 2, 1, 3])
+        output_shape = op23.Shape(query_transposed)
+        query_3d = op23.Reshape(query_transposed, to_3d_shape)
+        value_3d = op23.Reshape(op23.Transpose(value_states, perm=[0, 2, 1, 3]), to_3d_shape)
+        key_3d = op23.Reshape(op23.Transpose(key_states, perm=[0, 2, 1, 3]), to_3d_shape)
+        cu_seqlens = op23.Cast(cu_seqlens, to=onnx.TensorProto.INT32)
+        num_patches = op23.Size(cu_seqlens) - 1
+        seq_axis = op23.Constant(value_ints=[1])
+        seq_axis_int32 = op23.Cast(seq_axis, to=onnx.TensorProto.INT32)
+        seq_attn = op23.SequenceEmpty(dtype=onnx.TensorProto.FLOAT)
         for i_patch in range(num_patches):
-            i_1d = op24.Reshape(i_patch, [1])
+            i_1d = op23.Reshape(i_patch, [1])
             i_plus_1_1d = i_1d + 1
-            start = op24.Gather(cu_seqlens, i_1d, axis=0)
-            end = op24.Gather(cu_seqlens, i_plus_1_1d, axis=0)
-            query_i = op24.Slice(query_3d, start, end, seq_axis_int32)
-            key_i = op24.Slice(key_3d, start, end, seq_axis_int32)
-            value_i = op24.Slice(value_3d, start, end, seq_axis_int32)
-            mha_output = op24.Attention(
+            start = op23.Gather(cu_seqlens, i_1d, axis=0)
+            end = op23.Gather(cu_seqlens, i_plus_1_1d, axis=0)
+            query_i = op23.Slice(query_3d, start, end, seq_axis_int32)
+            key_i = op23.Slice(key_3d, start, end, seq_axis_int32)
+            value_i = op23.Slice(value_3d, start, end, seq_axis_int32)
+            mha_output = op23.Attention(
                 query_i,
                 key_i,
                 value_i,
@@ -137,9 +137,9 @@ if patch_qwen2_5:
                 kv_num_heads=num_heads,
                 softmax_precision=onnx.TensorProto.FLOAT,
             )
-            seq_attn = op24.SequenceInsert(seq_attn, mha_output)
-        attn_output = op24.ConcatFromSequence(seq_attn, axis=1)
-        attn_output_4d = op24.Reshape(attn_output, output_shape)
+            seq_attn = op23.SequenceInsert(seq_attn, mha_output)
+        attn_output = op23.ConcatFromSequence(seq_attn, axis=1)
+        attn_output_4d = op23.Reshape(attn_output, output_shape)
         return attn_output_4d
     @onnxscript.script(opset=onnx_plugs_op)
@@ -256,20 +256,24 @@ if patch_qwen2_5:
         return attn_output
     def qwen_version_selector(opset: int, *args: torch.Tensor) -> Tuple[str, torch.dtype]:
-        first_tensor = next(a for a in args if a is not None)
-        dtype = first_tensor.dtype
+        first_float_tensor = next(
+            a
+            for a in args
+            if a is not None and a.dtype in {torch.float16, torch.float32, torch.bfloat16}
+        )
+        dtype = first_float_tensor.dtype
         strategy = patched_Qwen2_5_VLVisionAttention.STRATEGY_FOR_ATTENTION()
         itype = torch_dtype_to_onnx_dtype(dtype)
         if strategy is not None:
             return strategy, itype
         if dtype == torch.float32 or itype == onnx.TensorProto.FLOAT:
-            if opset >= 24:
-                return "LOOPA24", itype
+            if opset >= 23:
+                return "LOOPA23", itype
             return "LOOPMHA", itype
         if dtype == torch.float16 or itype == onnx.TensorProto.FLOAT16:
             # first_tensor may be a SymbolicTensor (onnx).
             # is_cuda is not available.
-            if hasattr(first_tensor, "is_cuda") and first_tensor.is_cuda:
+            if hasattr(first_float_tensor, "is_cuda") and first_float_tensor.is_cuda:
                 return "PACKED", itype
             return "LOOPMHA", itype
         raise AssertionError(
@@ -288,9 +292,9 @@ if patch_qwen2_5:
             ("PACKED", onnx.TensorProto.FLOAT16): _add_com_microsoft_opset(
                 PackedAttention.to_function_proto()
             ),
-            ("LOOPA24", onnx.TensorProto.FLOAT): LoopAttention24.to_function_proto(),
-            ("LOOPA24", onnx.TensorProto.FLOAT16): _update_sequence_type(
-                onnx.TensorProto.FLOAT16, LoopAttention24.to_function_proto()
+            ("LOOPA23", onnx.TensorProto.FLOAT): LoopAttention23.to_function_proto(),
+            ("LOOPA23", onnx.TensorProto.FLOAT16): _update_sequence_type(
+                onnx.TensorProto.FLOAT16, LoopAttention23.to_function_proto()
             ),
             ("LOOPMHA", onnx.TensorProto.FLOAT): _add_com_microsoft_opset(
                 LoopMHAAttention.to_function_proto()
@@ -733,3 +737,71 @@ if patch_qwen2_5:
             attn_output = attn_output.reshape(seq_length, -1).contiguous()
             attn_output = self.proj(attn_output)
             return attn_output
+    class patched_Qwen2_5_VLModel:
+        _PATCHES_ = ["get_placeholder_mask"]
+        _PATCHED_CLASS_ = transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2_5_VLModel
+        def get_placeholder_mask(
+            self,
+            input_ids: torch.LongTensor,
+            inputs_embeds: torch.FloatTensor,
+            image_features: Optional[torch.FloatTensor] = None,
+            video_features: Optional[torch.FloatTensor] = None,
+        ):
+            if input_ids is None:
+                special_image_mask = inputs_embeds == self.get_input_embeddings()(
+                    torch.tensor(
+                        self.config.image_token_id,
+                        dtype=torch.long,
+                        device=inputs_embeds.device,
+                    )
+                )
+                special_image_mask = special_image_mask.all(-1)
+                special_video_mask = inputs_embeds == self.get_input_embeddings()(
+                    torch.tensor(
+                        self.config.video_token_id,
+                        dtype=torch.long,
+                        device=inputs_embeds.device,
+                    )
+                )
+                special_video_mask = special_video_mask.all(-1)
+            else:
+                special_image_mask = input_ids == self.config.image_token_id
+                special_video_mask = input_ids == self.config.video_token_id
+            special_image_mask = (
+                special_image_mask.unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
+            )
+            # PATCHED: we should use torch._check
+            # but this fails for compilation. It cannot be verified with FakeTensors
+            # torch._check(
+            #    image_features is None
+            #    or inputs_embeds[special_image_mask].numel() == image_features.numel(),
+            #    lambda: (
+            #        f"Image features and image tokens do not match: tokens: "
+            #        f"{special_image_mask.sum()}, features {image_features.shape[0]}"
+            #    ),
+            # )
+            special_video_mask = (
+                special_video_mask.unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
+            )
+            # PATCHED: we should use torch._check
+            # but this fails for compilation. It cannot be verified with FakeTensors
+            # torch._check(
+            #    video_features is None
+            #    or inputs_embeds[special_video_mask].numel() == video_features.numel(),
+            #    lambda: (
+            #        f"Videos features and video tokens do not match: tokens: "
+            #        f"{special_video_mask.sum()}, features {video_features.shape[0]}"
+            #    ),
+            # )
+            return special_image_mask, special_video_mask

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -77,6 +77,7 @@ if patch_qwen2_5:
         patched_Qwen2_5_VisionTransformerPretrainedModel,
         patched_Qwen2_5_VLVisionAttentionOneIteration,
         patched_Qwen2_5_VLVisionAttention,
+        patched_Qwen2_5_VLModel,
         PLUGS as PLUGS_Qwen25,
     )

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py CHANGED Viewed

@@ -55,6 +55,7 @@ Automatically generated:
 import base64
 import json
 import textwrap
+from typing import Any
 import transformers
 null = None
@@ -62,6 +63,22 @@ true = True
 false = False
+def _enforce_default(config_type: type, **kwargs) -> Any:
+    config = config_type(**kwargs)
+    for name in [
+        *[k for k in kwargs if k.endswith("_token_id")],
+        "attention_dropout",
+        "hidden_size",
+        "hidden_act",
+        "intermediate_size",
+        "max_position_embeddings",
+        "vocab_size",
+    ]:
+        if name in kwargs and (not hasattr(config, name) or getattr(config, name) is None):
+            setattr(config, name, kwargs[name])
+    return config
 def _ccached_arnir0_tiny_LLM():
     "arnir0/Tiny-LLM"
     return transformers.LlamaConfig(
@@ -4691,7 +4708,8 @@ def _ccached_zai_glm_45():
 def _ccached_microsoft_phi3_mini_128k_instruct():
     "microsoft/Phi-3-mini-128k-instruct"
-    return transformers.Phi3Config(
+    return _enforce_default(
+        transformers.Phi3Config,
         **{
             "_name_or_path": "Phi-3-mini-128k-instruct",
             "architectures": ["Phi3ForCausalLM"],
@@ -4827,13 +4845,14 @@ def _ccached_microsoft_phi3_mini_128k_instruct():
             "use_cache": true,
             "attention_bias": false,
             "vocab_size": 32064,
-        }
+        },
     )
 def _ccached_google_gemma_3_4b_it_like():
     "google/gemma-3-4b-it"
-    return transformers.Gemma3Config(
+    return _enforce_default(
+        transformers.Gemma3Config,
         **{
             "architectures": ["Gemma3ForConditionalGeneration"],
             "boi_token_index": 255999,
@@ -4863,13 +4882,14 @@ def _ccached_google_gemma_3_4b_it_like():
                 "patch_size": 14,
                 "vision_use_head": false,
             },
-        }
+        },
     )
 def _ccached_hf_internal_testing_tiny_random_gemma3_for_causal_lm():
     "hf-internal-testing/tiny-random-Gemma3ForCausalLM"
-    return transformers.Gemma3TextConfig(
+    return _enforce_default(
+        transformers.Gemma3TextConfig,
         **{
             "architectures": ["Gemma3ForCausalLM"],
             "attention_bias": false,
@@ -4901,13 +4921,14 @@ def _ccached_hf_internal_testing_tiny_random_gemma3_for_causal_lm():
             "transformers_version": "4.52.0.dev0",
             "use_cache": true,
             "vocab_size": 262144,
-        }
+        },
     )
 def _ccached_qwen_qwen2_5_vl_7b_instruct():
     "Qwen/Qwen2.5-VL-7B-Instruct"
-    return transformers.Qwen2_5_VLConfig(
+    return _enforce_default(
+        transformers.Qwen2_5_VLConfig,
         **{
             "architectures": ["Qwen2_5_VLForConditionalGeneration"],
             "attention_dropout": 0.0,
@@ -4954,5 +4975,5 @@ def _ccached_qwen_qwen2_5_vl_7b_instruct():
             },
             "rope_scaling": {"type": "mrope", "mrope_section": [16, 24, 24]},
             "vocab_size": 152064,
-        }
+        },
     )

onnx-diagnostic 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

onnx-diagnostic 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl