PyPI - onnx-diagnostic - Versions diffs - 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl - Mend

onnx-diagnostic 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/reference/ops/op_cast_like.py CHANGED Viewed

@@ -1,13 +1,17 @@
 from onnx.onnx_pb import TensorProto
 from onnx.reference.op_run import OpRun
-from onnx.reference.ops.op_cast import (
-    bfloat16,
-    cast_to,
-    float8e4m3fn,
-    float8e4m3fnuz,
-    float8e5m2,
-    float8e5m2fnuz,
-)
+try:
+    from onnx.reference.ops.op_cast import (
+        bfloat16,
+        cast_to,
+        float8e4m3fn,
+        float8e4m3fnuz,
+        float8e5m2,
+        float8e5m2fnuz,
+    )
+except ImportError:
+    from onnx.reference.ops.op_cast import cast_to
 from ...helpers.onnx_helper import np_dtype_to_tensor_dtype

onnx_diagnostic/tasks/automatic_speech_recognition.py CHANGED Viewed

@@ -33,7 +33,7 @@ def get_inputs(
     head_dim: int,
     batch_size: int = 2,
     sequence_length: int = 30,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -132,6 +132,9 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
@@ -144,7 +147,8 @@ def get_inputs(
             decoder_layers=decoder_layers,
             head_dim=head_dim,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -1,17 +1,15 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
 from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 __TASK__ = "feature-extraction"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
-    kwargs = dict(
-        num_hidden_layers=min(config.num_hidden_layers, 2),
-        num_attention_heads=min(config.num_attention_heads, 4),
-    )
+    check_hasattr(config, "num_hidden_layers")
+    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
     update_config(config, kwargs)
     return kwargs
@@ -22,7 +20,13 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    add_second_input: bool = False,
+    sequence_length2: int = 3,
+    decoder_attention_heads: Optional[int] = None,
+    encoder_attention_heads: Optional[int] = None,
+    encoder_ffn_dim: Optional[int] = None,
+    decoder_ffn_dim: Optional[int] = None,
+    num_hidden_layers: Optional[int] = None,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -50,14 +54,84 @@ def get_inputs(
         ),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
+    if (
+        encoder_attention_heads
+        and decoder_attention_heads
+        and encoder_ffn_dim
+        and decoder_ffn_dim
+        and num_hidden_layers
+    ):
+        inputs["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
+        cache_length = "cache_length_key"
+        cache_length2 = "cache_length_val"
+        shapes["past_key_values"] = [  # type: ignore[assignment]
+            [
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            [
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+            ],
+        ]
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            sequence_length2=sequence_length2,
+            decoder_attention_heads=decoder_attention_heads,
+            encoder_attention_heads=encoder_attention_heads,
+            encoder_ffn_dim=encoder_ffn_dim,
+            decoder_ffn_dim=decoder_ffn_dim,
+            num_hidden_layers=num_hidden_layers,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res
@@ -76,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         sequence_length=30,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
+    for att in [
+        "decoder_attention_heads",
+        "encoder_attention_heads",
+        "encoder_ffn_dim",
+        "decoder_ffn_dim",
+        "num_hidden_layers",
+    ]:
+        if hasattr(config, att):
+            kwargs[att] = getattr(config, att)
+    kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
+    print(kwargs)
     return kwargs, get_inputs

onnx_diagnostic/tasks/fill_mask.py CHANGED Viewed

@@ -22,7 +22,7 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -54,12 +54,16 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/image_classification.py CHANGED Viewed

@@ -34,7 +34,7 @@ def get_inputs(
     input_channels: int,
     batch_size: int = 2,
     dynamic_rope: bool = False,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -75,14 +75,18 @@ def get_inputs(
         shapes["interpolate_pos_encoding"] = None  # type: ignore[assignment]
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
-            input_width=input_width + 1,
-            input_height=input_height + 1,
+            input_width=input_width + add_second_input,
+            input_height=input_height + add_second_input,
             input_channels=input_channels,
             batch_size=batch_size + 1,
             dynamic_rope=dynamic_rope,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -32,7 +32,7 @@ def get_inputs(
     sequence_length2: int = 3,
     n_images: int = 2,
     dynamic_rope: bool = False,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -105,6 +105,9 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
@@ -116,10 +119,11 @@ def get_inputs(
             height=height,
             num_channels=num_channels,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             sequence_length2=sequence_length2 + 1,
             n_images=n_images + 1,
             dynamic_rope=dynamic_rope,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/mixture_of_expert.py CHANGED Viewed

@@ -41,7 +41,7 @@ def get_inputs(
     sequence_length2: int = 3,
     n_images: int = 2,
     dynamic_rope: bool = False,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """

onnx_diagnostic/tasks/object_detection.py CHANGED Viewed

@@ -27,7 +27,7 @@ def get_inputs(
     input_channels: int,
     batch_size: int = 2,
     dynamic_rope: bool = False,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -65,14 +65,18 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
-            input_width=input_width + 1,
-            input_height=input_height + 1,
+            input_width=input_width + add_second_input,
+            input_height=input_height + add_second_input,
             input_channels=input_channels,
             batch_size=batch_size + 1,
             dynamic_rope=dynamic_rope,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/sentence_similarity.py CHANGED Viewed

@@ -22,7 +22,7 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -54,12 +54,16 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/summarization.py CHANGED Viewed

@@ -29,7 +29,7 @@ def get_inputs(
     batch_size: int = 2,
     sequence_length: int = 30,
     sequence_length2: int = 3,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -144,6 +144,9 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
@@ -154,8 +157,9 @@ def get_inputs(
             head_dim_encoder=head_dim_encoder,
             head_dim_decoder=head_dim_decoder,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             sequence_length2=sequence_length2 + 1,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -30,7 +30,7 @@ def get_inputs(
     batch_size: int = 2,
     sequence_length: int = 30,
     sequence_length2: int = 3,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -69,8 +69,8 @@ def get_inputs(
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
-    cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)
+    cache_length = "cache_length_key"
+    cache_length2 = "cache_length_val"
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
@@ -149,6 +149,9 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
@@ -160,8 +163,9 @@ def get_inputs(
             head_dim_decoder=head_dim_decoder,
             encoder_dim=encoder_dim,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             sequence_length2=sequence_length2 + 1,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/text_classification.py CHANGED Viewed

@@ -22,7 +22,7 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -54,12 +54,16 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -72,7 +72,7 @@ def get_inputs(
     num_key_value_heads: Optional[int] = None,
     head_dim: Optional[int] = None,
     cls_cache: Optional[Union[type, str]] = None,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -260,13 +260,15 @@ def get_inputs(
             config=config,
             dummy_max_token_id=dummy_max_token_id,
             num_hidden_layers=num_hidden_layers,
-            batch_size=batch_size + 1,
+            batch_size=(batch_size + 1) if add_second_input > 0 else 1,
             sequence_length=sequence_length + 1,
-            sequence_length2=sequence_length2 + 1,
+            sequence_length2=sequence_length2
+            + (add_second_input if add_second_input > 0 else -add_second_input),
             dynamic_rope=dynamic_rope,
             num_key_value_heads=num_key_value_heads,
             head_dim=head_dim,
             cls_cache=cls_cache,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/text_to_image.py CHANGED Viewed

@@ -25,7 +25,7 @@ def get_inputs(
     in_channels: int,
     sample_size: int,
     cross_attention_dim: int,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -58,15 +58,19 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             batch_size=batch_size + 1,
             sequence_length=sequence_length,
-            cache_length=cache_length + 1,
+            cache_length=cache_length + add_second_input,
             in_channels=in_channels,
             sample_size=sample_size,
             cross_attention_dim=cross_attention_dim,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/tasks/zero_shot_image_classification.py CHANGED Viewed

@@ -34,7 +34,7 @@ def get_inputs(
     input_height: int = 224,
     input_channels: int = 3,
     batch_size_image=3,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     **kwargs,  # unused
 ):
     """
@@ -87,16 +87,20 @@ def get_inputs(
     )
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
         res["inputs2"] = get_inputs(
             model=model,
             config=config,
             dummy_max_token_id=dummy_max_token_id,
             batch_size=batch_size + 1,
-            sequence_length=sequence_length + 1,
+            sequence_length=sequence_length + add_second_input,
             input_width=input_width,
             input_height=input_height,
             input_channels=input_channels,
             batch_size_image=batch_size_image + 1,
+            add_second_input=0,
             **kwargs,
         )["inputs"]
     return res

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -16,6 +16,8 @@ def get_function(name: str) -> Tuple[type, Callable]:
     module_name = ".".join(spl[:-1])
     fname = spl[-1]
     mod = importlib.import_module(module_name)
+    if not hasattr(mod, fname):
+        return None, None
     return mod, getattr(mod, fname)
@@ -33,12 +35,16 @@ def get_patches(mod, verbose: int = 0) -> Tuple[str, List[Any]]:
                 doc = v.__doc__.lstrip()
                 if doc.startswith("manual patch"):
                     continue
-                reg = re.compile("[[]patch:([a-z_A-Z.]+)[]]")
+                reg = re.compile("[\\[]patch:([a-z_A-Z.]+)[\\]]")
                 fall = reg.findall(doc)
                 assert (
                     len(fall) == 1
                 ), f"Unable to find patching information for {v} in \n{doc}"
                 fmod, f = get_function(fall[0])
+                if fmod is None and f is None:
+                    # The function does not exist in this version of transformers.
+                    # No patch is needed.
+                    continue
                 to_patch.append({"module": fmod, "function": f, "patch": v})
     name = mod.__name__
@@ -420,7 +426,11 @@ def torch_export_patches(
                 patch_transformers_list, verbose=verbose
             )
-            if masking_utils and hasattr(masking_utils, "_vmap_for_bhqkv"):
+            if (
+                masking_utils
+                and patch_transformers_list.patch_masking_utils
+                and hasattr(masking_utils, "_vmap_for_bhqkv")
+            ):
                 if verbose:
                     print(
                         "[torch_export_patches] patches "
@@ -429,6 +439,27 @@ def torch_export_patches(
                 f_transformers__vmap_for_bhqkv = masking_utils._vmap_for_bhqkv
                 masking_utils._vmap_for_bhqkv = patch_transformers_list.patched__vmap_for_bhqkv
+            if (
+                masking_utils
+                and patch_transformers_list.patch_masking_utils
+                and hasattr(masking_utils, "eager_mask")
+            ):
+                if verbose:
+                    print(
+                        "[torch_export_patches] patches "
+                        "transformers.masking_utils.eager_mask"
+                    )
+                f_transformers_eager_mask = masking_utils.eager_mask
+                masking_utils.eager_mask = patch_transformers_list.patched_eager_mask
+                if (
+                    "eager" in masking_utils.ALL_MASK_ATTENTION_FUNCTIONS
+                    and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"]
+                    == f_transformers_eager_mask
+                ):
+                    masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"] = (
+                        patch_transformers_list.patched_eager_mask
+                    )
         if custom_patches:
             if verbose:
                 print("[torch_export_patches] applies custom patches")
@@ -511,7 +542,7 @@ def torch_export_patches(
             if custom_patches:
                 if verbose:
-                    print("[torch_export_patches] unpatch custom patches")
+                    print("[torch_export_patches] unpatches custom patches")
                 unpatch_module_or_classes(
                     custom_patches, revert_custom_patches_info, verbose=verbose
                 )
@@ -526,18 +557,43 @@ def torch_export_patches(
                 except ImportError:
                     masking_utils = None
                 if verbose:
-                    print("[torch_export_patches] unpatch transformers")
+                    print("[torch_export_patches] unpatches transformers")
                 unpatch_module_or_classes(
                     patch_transformers_list, revert_patches_info, verbose=verbose
                 )
-                if masking_utils and hasattr(masking_utils, "_vmap_for_bhqkv"):
+                if (
+                    masking_utils
+                    and patch_transformers_list.patch_masking_utils
+                    and hasattr(masking_utils, "_vmap_for_bhqkv")
+                ):
+                    masking_utils._vmap_for_bhqkv = f_transformers__vmap_for_bhqkv
                     if verbose:
                         print(
-                            "[torch_export_patches] unpatch "
+                            "[torch_export_patches] restored "
                             "transformers.masking_utils._vmap_for_bhqkv"
                         )
-                    masking_utils._vmap_for_bhqkv = f_transformers__vmap_for_bhqkv
+                if (
+                    masking_utils
+                    and patch_transformers_list.patch_masking_utils
+                    and hasattr(masking_utils, "eager_mask")
+                ):
+                    f_transformers_eager_mask = masking_utils.eager_mask
+                    masking_utils.eager_mask = f_transformers_eager_mask
+                    if (
+                        "eager" in masking_utils.ALL_MASK_ATTENTION_FUNCTIONS
+                        and masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"]
+                        == patch_transformers_list.patched_eager_mask
+                    ):
+                        masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["eager"] = (
+                            f_transformers_eager_mask
+                        )
+                    if verbose:
+                        print(
+                            "[torch_export_patches] restored "
+                            "transformers.masking_utils.eager_mask"
+                        )
             ########
             # caches

onnx-diagnostic 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

onnx-diagnostic 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl