PyPI - onnx-diagnostic - Versions diffs - 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl - Mend

onnx-diagnostic 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -7,59 +7,107 @@ import torch
 import transformers
 from transformers.modeling_attn_mask_utils import AttentionMaskConverter
 from transformers.cache_utils import StaticCache, Cache, DynamicCache
+try:
+    import transformers.masking_utils
+    patch_masking_utils = True
+except ImportError:
+    patch_masking_utils = False
 from ...ext_test_case import has_transformers
 from ...helpers.torch_helper import is_torchdynamo_exporting
-def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) -> Callable:
-    """manual patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
-    from ...helpers import string_type
-    dimensions: List[Tuple[Optional[int], ...]] = [
-        (None, None, None, 0),
-        (None, None, 0, None),
-    ]
-    if bh_indices:
-        dimensions.extend([(None, 0, None, None), (0, None, None, None)])
-    # reshape
-    dimensions = [tuple(1 if d is None else -1 for d in shape) for shape in dimensions]
-    dimensions = tuple(reversed(dimensions))
-    indices = tuple(shape.index(-1) for shape in dimensions)
-    # unsqueeze
-    udimensions = [tuple(di for di, d in enumerate(shape) if d == 1) for shape in dimensions]
-    def vector_mask_function(
-        *args, mask_function=mask_function, dimensions=dimensions, indices=indices
-    ):
-        assert len(args) == len(dimensions) == len(udimensions), (
-            f"Mismatch between args={string_type(args)} and dimensions={dimensions} "
-            f"and udimensions={udimensions}."
-        )
-        assert len(indices) == len(args), (
-            f"Mismatch between args={string_type(args)} and indices={indices}, "
-            f"they should have the same length."
+if patch_masking_utils:
+    # Introduced in 4.52
+    from transformers.masking_utils import causal_mask_function, sdpa_mask
+    def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) -> Callable:
+        """manual patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
+        from ...helpers import string_type
+        dimensions: List[Tuple[Optional[int], ...]] = [
+            (None, None, None, 0),
+            (None, None, 0, None),
+        ]
+        if bh_indices:
+            dimensions.extend([(None, 0, None, None), (0, None, None, None)])
+        # reshape
+        dimensions = [tuple(1 if d is None else -1 for d in shape) for shape in dimensions]
+        dimensions = tuple(reversed(dimensions))
+        indices = tuple(shape.index(-1) for shape in dimensions)
+        # unsqueeze
+        udimensions = [
+            tuple(di for di, d in enumerate(shape) if d == 1) for shape in dimensions
+        ]
+        def vector_mask_function(
+            *args, mask_function=mask_function, dimensions=dimensions, indices=indices
+        ):
+            assert len(args) == len(dimensions) == len(udimensions), (
+                f"Mismatch between args={string_type(args)} and dimensions={dimensions} "
+                f"and udimensions={udimensions}."
+            )
+            assert len(indices) == len(args), (
+                f"Mismatch between args={string_type(args)} and indices={indices}, "
+                f"they should have the same length."
+            )
+            for a in args:
+                assert (
+                    a.ndim == 1
+                ), f"Expected a tensor with 1 dimension not {string_type(a, with_shape=True)}"
+                torch._check(a.shape[0] > 0)
+            new_args = [a.reshape(shape) for a, shape in zip(args, dimensions)]
+            # new_args = [
+            #    a.unsqueeze(dims[0]).unsqueeze(dims[1]).unsqueeze(dims[2])
+            #    for a, dims in zip(args, udimensions)
+            # ]
+            max_shape = tuple(args[i].shape[0] for i in indices)
+            # if is_torchdynamo_exporting():
+            #     for a in args:
+            #         # The exporter should export with a dimension > 1
+            #         # to make sure it is dynamic.
+            #         torch._check(a.shape[0] > 1)
+            expanded_args = [a.expand(max_shape) for a in new_args]
+            return mask_function(*expanded_args)
+        return vector_mask_function
+    def patched_eager_mask(
+        batch_size: int,
+        cache_position: torch.Tensor,
+        kv_length: int,
+        kv_offset: int = 0,
+        mask_function: Callable = causal_mask_function,
+        attention_mask: Optional[torch.Tensor] = None,
+        dtype: torch.dtype = torch.float32,
+        **kwargs,
+    ) -> torch.Tensor:
+        """manual patch for function ``transformers.masking_utils.eager_mask``."""
+        # The masks for eager attention are simply boolean mask from sdpa, casted to 0 and -inf
+        _ = kwargs.pop("allow_is_causal_skip", None)
+        mask = sdpa_mask(
+            batch_size=batch_size,
+            cache_position=cache_position,
+            kv_length=kv_length,
+            kv_offset=kv_offset,
+            mask_function=mask_function,
+            attention_mask=attention_mask,
+            allow_is_causal_skip=False,
+            allow_torch_fix=False,
+            **kwargs,
         )
-        for a in args:
-            assert (
-                a.ndim == 1
-            ), f"Expected a tensor with 1 dimension not {string_type(a, with_shape=True)}"
-            torch._check(a.shape[0] > 0)
-        new_args = [a.reshape(shape) for a, shape in zip(args, dimensions)]
-        # new_args = [
-        #    a.unsqueeze(dims[0]).unsqueeze(dims[1]).unsqueeze(dims[2])
-        #    for a, dims in zip(args, udimensions)
-        # ]
-        max_shape = tuple(args[i].shape[0] for i in indices)
-        # if is_torchdynamo_exporting():
-        #     for a in args:
-        #         # The exporter should export with a dimension > 1 to make sure it is dynamic.
-        #         torch._check(a.shape[0] > 1)
-        expanded_args = [a.expand(max_shape) for a in new_args]
-        return mask_function(*expanded_args)
-    return vector_mask_function
+        min_dtype = torch.finfo(dtype).min
+        # The patched line.
+        # we need 0s where the tokens should be taken into account,
+        # and -inf otherwise (mask is already of boolean type)
+        # mask =
+        #   torch.where(mask, torch.tensor(0.0, device=mask.device, dtype=dtype), min_dtype)
+        mask = (~mask).to(dtype) * min_dtype
+        return mask
 def _patch_make_causal_mask(
@@ -207,7 +255,8 @@ class patched_DynamicCache:
         """
         # Update the number of seen tokens
         if layer_idx == 0:
-            self._seen_tokens += key_states.shape[-2]
+            if hasattr(self, "_seen_tokens"):
+                self._seen_tokens += key_states.shape[-2]
         # Update the cache
         if key_states is not None:
@@ -246,7 +295,8 @@ class patched_DynamicCache:
         if self.get_seq_length() <= max_length:
             return
-        self._seen_tokens = max_length
+        if hasattr(self, "_seen_tokens"):
+            self._seen_tokens = max_length
         for idx in range(len(self.key_cache)):
             if self.key_cache[idx].numel():
                 self.key_cache[idx] = self.key_cache[idx][..., :max_length, :]
@@ -814,6 +864,91 @@ def patched_dynamic_rope_update(rope_forward):
     return wrapper
+def common_eager_attention_forward(
+    module: torch.nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    scaling: Optional[float] = None,
+    dropout: float = 0.0,
+    head_mask: Optional[torch.Tensor] = None,
+    **kwargs,
+):
+    if scaling is None:
+        scaling = query.size(-1) ** -0.5
+    attn_weights = torch.matmul(query, key.transpose(2, 3)) * scaling
+    if attention_mask is not None:
+        # The two following lines were added.
+        if attention_mask is not None and attention_mask.ndim == 4:
+            attention_mask = attention_mask[:, :, :, : key.shape[-2]]
+        attn_weights = attn_weights + attention_mask
+    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
+    if head_mask is not None:
+        attn_weights = attn_weights * head_mask.view(1, -1, 1, 1)
+    attn_weights = torch.nn.functional.dropout(
+        attn_weights, p=dropout, training=module.training
+    )
+    attn_output = torch.matmul(attn_weights, value)
+    attn_output = attn_output.transpose(1, 2).contiguous()
+    return attn_output, attn_weights
+def patched_model_bart_eager_attention_forward(
+    module: torch.nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    scaling: Optional[float] = None,
+    dropout: float = 0.0,
+    head_mask: Optional[torch.Tensor] = None,
+    **kwargs,
+):
+    """[patch:transformers.models.bart.modeling_bart.eager_attention_forward]"""
+    return common_eager_attention_forward(
+        module,
+        query,
+        key,
+        value,
+        attention_mask=attention_mask,
+        scaling=scaling,
+        dropout=dropout,
+        head_mask=head_mask,
+        **kwargs,
+    )
+def patched_modeling_marian_eager_attention_forward(
+    module: torch.nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    scaling: Optional[float] = None,
+    dropout: float = 0.0,
+    head_mask: Optional[torch.Tensor] = None,
+    **kwargs,
+):
+    """[patch:transformers.models.marian.modeling_marian.eager_attention_forward]"""
+    return common_eager_attention_forward(
+        module,
+        query,
+        key,
+        value,
+        attention_mask=attention_mask,
+        scaling=scaling,
+        dropout=dropout,
+        head_mask=head_mask,
+        **kwargs,
+    )
 class common_RotaryEmbedding(torch.nn.Module):
     @torch.no_grad()
     @patched_dynamic_rope_update
@@ -1045,4 +1180,6 @@ class patched_IdeficsAttention(torch.nn.Module):
         if output_attentions:
             attn_weights = None
-        return attn_output, attn_weights, past_key_value
+        if pv.Version(transformers.__version__) < pv.Version("4.53.99"):
+            return attn_output, attn_weights, past_key_value
+        return attn_output, attn_weights

onnx_diagnostic/torch_models/hghub/model_inputs.py CHANGED Viewed

@@ -26,7 +26,7 @@ def get_untrained_model_with_inputs(
     use_pretrained: bool = False,
     same_as_pretrained: bool = False,
     use_preinstalled: bool = True,
-    add_second_input: bool = False,
+    add_second_input: int = 1,
     subfolder: Optional[str] = None,
     use_only_preinstalled: bool = False,
 ) -> Dict[str, Any]:
@@ -144,6 +144,11 @@ def get_untrained_model_with_inputs(
                 f"[get_untrained_model_with_inputs] config._attn_implementation="
                 f"{config._attn_implementation!r}"  # type: ignore[union-attr]
             )
+    elif verbose:
+        print(
+            f"[get_untrained_model_with_inputs] default config._attn_implementation="
+            f"{getattr(config, '_attn_implementation', '?')!r}"  # type: ignore[union-attr]
+        )
     if type(config) is dict and "_diffusers_version" in config:
         import diffusers

onnx_diagnostic/torch_models/validate.py CHANGED Viewed

@@ -18,7 +18,6 @@ from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
 from ..torch_export_patches import torch_export_patches
 from ..torch_export_patches.patch_inputs import use_dyn_not_str
-from ..reference import TorchOnnxEvaluator
 from .hghub import get_untrained_model_with_inputs
@@ -157,6 +156,12 @@ def version_summary() -> Dict[str, Union[int, float, str]]:
         "version_torch": torch.__version__,
         "version_numpy": numpy.__version__,
     }
+    try:
+        import scipy
+        summary["version_scipy"] = getattr(scipy, "__version__", "?")
+    except ImportError:
+        pass
     try:
         import transformers
@@ -181,6 +186,12 @@ def version_summary() -> Dict[str, Union[int, float, str]]:
         summary["version_onnxruntime"] = getattr(onnxruntime, "__version__", "?")
     except ImportError:
         pass
+    try:
+        import onnx_ir
+        summary["version_onnx_ir"] = getattr(onnx_ir, "__version__", "?")
+    except ImportError:
+        pass
     import onnx_diagnostic
     summary["version_onnx_diagnostic"] = onnx_diagnostic.__version__
@@ -276,7 +287,8 @@ def validate_model(
     runtime: str = "onnxruntime",
     repeat: int = 1,
     warmup: int = 0,
-    inputs2: bool = True,
+    inputs2: int = 1,
+    output_names: Optional[List[str]] = None,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Validates a model.
@@ -325,7 +337,9 @@ def validate_model(
     :param repeat: number of time to measure the model
     :param warmup: warmup the model first
     :param inputs2: checks that the second set of inputs is reunning as well,
-        this ensures that the model does support dynamism
+        this ensures that the model does support dynamism, the value is used
+        as an increment to the first set of values (added to dimensions)
+    :param output_names: output names the onnx exporter should use
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
@@ -421,6 +435,7 @@ def validate_model(
         )
         print(f"[validate_model] exporter={exporter!r}, optimization={optimization!r}")
         print(f"[validate_model] dump_folder={dump_folder!r}")
+        print(f"[validate_model] output_names={output_names}")
         summary["model_id"] = model_id
         summary["model_subfolder"] = subfolder or ""
@@ -619,6 +634,7 @@ def validate_model(
                     optimization=optimization,
                     do_run=do_run,
                     dump_folder=dump_folder,
+                    output_names=output_names,
                 )
         else:
             data["inputs_export"] = data["inputs"]
@@ -631,6 +647,7 @@ def validate_model(
                 optimization=optimization,
                 do_run=do_run,
                 dump_folder=dump_folder,
+                output_names=output_names,
             )
         summary.update(summary_export)
@@ -856,6 +873,7 @@ def call_exporter(
     optimization: Optional[str] = None,
     do_run: bool = False,
     dump_folder: Optional[str] = None,
+    output_names: Optional[List[str]] = None,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Calls an exporter on a model;
@@ -868,6 +886,7 @@ def call_exporter(
     :param optimization: optimization to do
     :param do_run: runs and compute discrepancies
     :param dump_folder: to dump additional information
+    :param output_names: list of output names to use with the onnx exporter
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -890,6 +909,7 @@ def call_exporter(
             quiet=quiet,
             verbose=verbose,
             optimization=optimization,
+            output_names=output_names,
         )
         return summary, data
     if exporter == "custom" or exporter.startswith("custom"):
@@ -901,6 +921,7 @@ def call_exporter(
             verbose=verbose,
             optimization=optimization,
             dump_folder=dump_folder,
+            output_names=output_names,
         )
         return summary, data
     if exporter == "modelbuilder":
@@ -911,6 +932,7 @@ def call_exporter(
             quiet=quiet,
             verbose=verbose,
             optimization=optimization,
+            output_names=output_names,
         )
         return summary, data
     raise NotImplementedError(
@@ -1054,7 +1076,7 @@ def validate_onnx_model(
     runtime: str = "onnxruntime",
     repeat: int = 1,
     warmup: int = 0,
-    inputs2: bool = True,
+    inputs2: int = 1,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Verifies that an onnx model produces the same
@@ -1070,14 +1092,15 @@ def validate_onnx_model(
     :param runtime: onnx runtime to use, onnxruntime or torch
     :param repeat: run that number of times the model
     :param warmup: warmup the model
-    :param inputs: to validate the model on the second input set
-        to make sure the exported model supports dynamism
+    :param inputs2: to validate the model on the second input set
+        to make sure the exported model supports dynamism, the value is
+        used as an increment added to the first set of inputs (added to dimensions)
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
     import onnxruntime
-    def _mk(key):
+    def _mk(key, flavour=flavour):
         return f"{key}_{flavour}" if flavour else key
     summary: Dict[str, Any] = {}
@@ -1113,6 +1136,9 @@ def validate_onnx_model(
             f"{providers}..., flavour={flavour!r}"
         )
+    if runtime != "onnxruntime":
+        from ..reference import TorchOnnxEvaluator
     cls_runtime = (
         (
             lambda model, providers: onnxruntime.InferenceSession(
@@ -1122,14 +1148,14 @@ def validate_onnx_model(
         )
         if runtime == "onnxruntime"
         else (
-            lambda model, providers: TorchOnnxEvaluator(
+            lambda model, providers, _cls_=TorchOnnxEvaluator: _cls_(  # type: ignore[misc]
                 model, providers=providers, verbose=max(verbose - 1, 0)
             )
         )
     )
     sess = _quiet_or_not_quiet(
         quiet,
-        _mk("onnx_ort_create"),
+        _mk("create_onnx_ort"),
         summary,
         data,
         (lambda source=source, providers=providers: cls_runtime(source, providers)),
@@ -1164,7 +1190,7 @@ def validate_onnx_model(
         got = _quiet_or_not_quiet(
             quiet,
-            _mk(f"time_onnx_ort_run{suffix}"),
+            _mk(f"run_onnx_ort{suffix}"),
             summary,
             data,
             (lambda sess=sess, feeds=feeds: sess.run(None, feeds)),
@@ -1195,6 +1221,7 @@ def call_torch_export_onnx(
     quiet: bool = False,
     verbose: int = 0,
     optimization: Optional[str] = None,
+    output_names: Optional[List[str]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx.
@@ -1206,6 +1233,7 @@ def call_torch_export_onnx(
     :param quiet: catch exception or not
     :param verbose: verbosity
     :param optimization: optimization to do
+    :param output_names: output names to use
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -1260,6 +1288,8 @@ def call_torch_export_onnx(
             print("[call_torch_export_onnx] dynamo=False so...")
             print(f"[call_torch_export_onnx] args={string_type(args, with_shape=True)}")
             print(f"[call_torch_export_onnx] kwargs={string_type(kwargs, with_shape=True)}")
+    if output_names:
+        export_export_kwargs["output_names"] = output_names
     if opset:
         export_export_kwargs["opset_version"] = opset
     if verbose:
@@ -1330,6 +1360,7 @@ def call_torch_export_model_builder(
     quiet: bool = False,
     verbose: int = 0,
     optimization: Optional[str] = None,
+    output_names: Optional[List[str]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx with :epkg:`ModelBuilder`.
@@ -1340,6 +1371,7 @@ def call_torch_export_model_builder(
     :param quiet: catch exception or not
     :param verbose: verbosity
     :param optimization: optimization to do
+    :param output_names: list of output names to use
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -1353,6 +1385,9 @@ def call_torch_export_model_builder(
     provider = data.get("model_device", "cpu")
     dump_folder = data.get("model_dump_folder", "")
     assert dump_folder, "dump_folder cannot be empty with ModelBuilder"
+    assert (
+        not output_names
+    ), f"output_names not empty, not supported yet, output_names={output_names}"
     cache_dir = os.path.join(dump_folder, "cache_mb")
     if not os.path.exists(cache_dir):
         os.makedirs(cache_dir)
@@ -1392,6 +1427,7 @@ def call_torch_export_custom(
     verbose: int = 0,
     optimization: Optional[str] = None,
     dump_folder: Optional[str] = None,
+    output_names: Optional[List[str]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx.
@@ -1404,6 +1440,7 @@ def call_torch_export_custom(
     :param verbose: verbosity
     :param optimization: optimization to do
     :param dump_folder: to store additional information
+    :param output_names: list of output names to use
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -1488,6 +1525,8 @@ def call_torch_export_custom(
     )
     if opset:
         kws["target_opset"] = opset
+    if output_names:
+        kws["output_names"] = output_names
     epo, opt_stats = _quiet_or_not_quiet(
         quiet,

{onnx_diagnostic-0.7.3.dist-info → onnx_diagnostic-0.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: onnx-diagnostic
-Version: 0.7.3
+Version: 0.7.5
 Summary: Investigate ONNX models
 Home-page: https://github.com/sdpython/onnx-diagnostic
 Author: Xavier Dupré

onnx-diagnostic 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

onnx-diagnostic 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl