PyPI - onnx-diagnostic - Versions diffs - 0.7.16__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

onnx-diagnostic 0.7.16py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +78 -22
onnx_diagnostic/export/api.py +124 -0
onnx_diagnostic/export/dynamic_shapes.py +2 -1
onnx_diagnostic/export/shape_helper.py +47 -70
onnx_diagnostic/ext_test_case.py +11 -0
onnx_diagnostic/helpers/cache_helper.py +38 -7
onnx_diagnostic/helpers/fake_tensor_helper.py +224 -104
onnx_diagnostic/helpers/helper.py +27 -33
onnx_diagnostic/helpers/log_helper.py +109 -5
onnx_diagnostic/helpers/memory_peak.py +2 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +1 -1
onnx_diagnostic/helpers/model_builder_helper.py +132 -2
onnx_diagnostic/helpers/onnx_helper.py +1 -1
onnx_diagnostic/helpers/ort_session.py +4 -0
onnx_diagnostic/helpers/rt_helper.py +393 -43
onnx_diagnostic/helpers/torch_helper.py +20 -1
onnx_diagnostic/tasks/__init__.py +7 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +2 -8
onnx_diagnostic/tasks/feature_extraction.py +2 -8
onnx_diagnostic/tasks/image_text_to_text.py +10 -8
onnx_diagnostic/tasks/summarization.py +2 -8
onnx_diagnostic/tasks/text2text_generation.py +3 -8
onnx_diagnostic/tasks/text_generation.py +86 -65
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +718 -438
onnx_diagnostic/torch_export_patches/patch_details.py +340 -0
onnx_diagnostic/torch_export_patches/patch_inputs.py +1 -1
onnx_diagnostic/torch_export_patches/patch_module.py +9 -36
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +12 -6
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +162 -24
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +140 -104
onnx_diagnostic/torch_models/untrained/llm_phi2.py +1 -4
onnx_diagnostic/torch_models/validate.py +626 -228
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/RECORD +38 -36
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/top_level.txt +0 -0

onnx_diagnostic/torch_models/validate.py CHANGED Viewed

@@ -1,22 +1,25 @@
-import gc
 import datetime
+import gc
 import inspect
 import os
 import pprint
 import sys
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
 import time
+from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 import onnx
 import torch
 from ..export import CoupleInputsDynamicShapes
 from ..helpers import max_diff, string_type, string_diff
 from ..helpers.helper import flatten_object
-from ..helpers.rt_helper import make_feeds, reorder_modelbuilder_cache_to_torch
+from ..helpers.rt_helper import make_feeds
 from ..helpers.torch_helper import to_any, torch_deepcopy
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
-from ..torch_export_patches import torch_export_patches
+from ..torch_export_patches import (
+    torch_export_patches,
+    register_additional_serialization_functions,
+)
 from ..torch_export_patches.patch_inputs import use_dyn_not_str
 from .hghub import get_untrained_model_with_inputs
 from .hghub.model_inputs import _preprocess_model_id
@@ -270,8 +273,8 @@ def _quiet_or_not_quiet(
         summary[f"time_{suffix}_latency_std"] = a.std()
         summary[f"time_{suffix}_latency_min"] = a.min()
         summary[f"time_{suffix}_latency_max"] = a.max()
-        summary[f"time_{suffix}_latency_098"] = a[-i2]
-        summary[f"time_{suffix}_latency_095"] = a[-i5]
+        summary[f"time_{suffix}_latency_098"] = a[-(max(i2, 1))]
+        summary[f"time_{suffix}_latency_095"] = a[-max(i5, 1)]
         summary[f"time_{suffix}_latency_005"] = a[i5]
         summary[f"time_{suffix}_latency_002"] = a[i2]
         summary[f"time_{suffix}_n"] = len(a)
@@ -320,125 +323,33 @@ def make_patch_kwargs(
     return patch_kwargs
-def validate_model(
-    model_id: str,
-    task: Optional[str] = None,
-    do_run: bool = False,
-    exporter: Optional[str] = None,
-    do_same: bool = False,
-    verbose: int = 0,
-    dtype: Optional[Union[str, torch.dtype]] = None,
-    device: Optional[Union[str, torch.device]] = None,
-    same_as_pretrained: bool = False,
-    use_pretrained: bool = False,
-    optimization: Optional[str] = None,
-    quiet: bool = False,
-    patch: Union[bool, str, Dict[str, bool]] = False,
-    rewrite: bool = False,
-    stop_if_static: int = 1,
-    dump_folder: Optional[str] = None,
-    drop_inputs: Optional[List[str]] = None,
-    ortfusiontype: Optional[str] = None,
-    input_options: Optional[Dict[str, Any]] = None,
-    model_options: Optional[Dict[str, Any]] = None,
-    subfolder: Optional[str] = None,
-    opset: Optional[int] = None,
-    runtime: str = "onnxruntime",
-    repeat: int = 1,
-    warmup: int = 0,
-    inputs2: int = 1,
-    output_names: Optional[List[str]] = None,
-    ort_logs: bool = False,
-    quiet_input_sets: Optional[Set[str]] = None,
-) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
-    """
-    Validates a model.
-    The function can also be called through the command line
-    :ref:`l-cmd-validate`.
-    :param model_id: model id to validate
-    :param task: task used to generate the necessary inputs,
-        can be left empty to use the default task for this model
-        if it can be determined
-    :param do_run: checks the model works with the defined inputs
-    :param exporter: exporter the model using this exporter,
-        available list: ``export-strict``, ``export-nostrict``, ...
-        see below
-    :param do_same: checks the discrepancies of the exported model
-    :param verbose: verbosity level
-    :param dtype: uses this dtype to check the model
-    :param device: do the verification on this device
-    :param same_as_pretrained: use a model equivalent to the trained,
-        this is not always possible
-    :param use_pretrained: use the trained model, not the untrained one
-    :param optimization: optimization to apply to the exported model,
-        depend on the the exporter
-    :param quiet: if quiet, catches exception if any issue
-    :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
-        if True before exporting
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`,
-        a string can be used to specify only one of them
-    :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting,
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
-    :param stop_if_static: stops if a dynamic dimension becomes static,
-        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
-    :param dump_folder: dumps everything in a subfolder of this one
-    :param drop_inputs: drops this list of inputs (given their names)
-    :param ortfusiontype: runs ort fusion, the parameters defines the fusion type,
-        it accepts multiple values separated by ``|``,
-        see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`
-    :param input_options: additional options to define the dummy inputs
-        used to export
-    :param model_options: additional options when creating the model such as
-        ``num_hidden_layers`` or ``attn_implementation``
-    :param subfolder: version or subfolders to uses when retrieving a model id
-    :param opset: onnx opset to use for the conversion
-    :param runtime: onnx runtime to use to check about discrepancies,
-        possible values ``onnxruntime``, ``torch``, ``orteval``,
-        ``orteval10``, ``ref`` only if `do_run` is true
-    :param repeat: number of time to measure the model
-    :param warmup: warmup the model first
-    :param inputs2: checks that other sets of inputs are running as well,
-        this ensures that the model does support dynamism, the value is used
-        as an increment to the first set of values (added to dimensions),
-        or an empty cache for example
-    :param output_names: output names the onnx exporter should use
-    :param ort_logs: increases onnxruntime verbosity when creating the session
-    :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set
-        even if quiet is False
-    :return: two dictionaries, one with some metrics,
-        another one with whatever the function produces
-    The following environment variables can be used to print out some
-    information:
-    * ``PRINT_CONFIG``: prints the model configuration
-    The following exporters are available:
-    * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False)
-    * ``onnx-dynamo``: run :func:`torch.onnx.export` (...),
-      models can be optimized with ``optimization`` in ``("ir", "os_ort")``
-    * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model
-    * ``custom``: custom exporter (see :epkg:`experimental-experiment`),
-      models can be optimized with ``optimization`` in
-      ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")``
-    The default runtime, :epkg:`onnxruntime` is used to validate a model and check the
-    exported model returns the same outputs as the original one, otherwise,
-    :class:`onnx_diagnostic.reference.TorchOnnxEvaluator`
-    if ``runtime == 'torch'`` or
-    :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator`
-    if ``runtime == 'orteval'`` or
-    :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator`
-    if ``runtime == 'ref'``,
-    ``orteval10`` increases the verbosity.
-    .. versionchanged:: 0.7.13
-        *inputs2* not only means a second set of inputs but many
-        such as ``input_empty_cache``
-        which refers to a set of inputs using an empty cache.
-    """
+def _prepare_validation(
+    model_id,
+    subfolder,
+    same_as_pretrained,
+    use_pretrained,
+    patch,
+    rewrite,
+    do_run,
+    dtype,
+    device,
+    optimization,
+    quiet,
+    drop_inputs,
+    ortfusiontype,
+    stop_if_static,
+    exporter,
+    runtime,
+    inputs2,
+    input_options,
+    model_options,
+    exporter_options,
+    opset,
+    task,
+    verbose,
+    output_names,
+    dump_folder,
+):
     main_validation_begin = time.perf_counter()
     model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
         model_id,
@@ -470,6 +381,10 @@ def validate_model(
             version_exporter=exporter or "",
             version_runtime=runtime,
             version_inputs2=inputs2,
+            version_input_options=str(input_options),
+            version_drop_input=str(drop_inputs),
+            version_model_options=str(model_options),
+            version_exporter_options=str(exporter_options),
             time_preprocess_model_id=time_preprocess_model_id,
         )
     )
@@ -520,6 +435,32 @@ def validate_model(
         summary["model_id"] = model_id
         summary["model_subfolder"] = subfolder or ""
+    return (
+        summary,
+        model_id,
+        subfolder,
+        same_as_pretrained,
+        use_pretrained,
+        dump_folder,
+        folder_name,
+        patch_kwargs,
+    )
+def _get_untrained_model_with_inputs(
+    summary,
+    model_id,
+    verbose,
+    task,
+    use_pretrained,
+    same_as_pretrained,
+    input_options,
+    model_options,
+    subfolder,
+    inputs2,
+    quiet,
+    dump_folder,
+):
     iop = input_options or {}
     mop = model_options or {}
     data = _quiet_or_not_quiet(
@@ -544,8 +485,6 @@ def validate_model(
         ),
     )
-    second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]
     if dump_folder:
         with open(os.path.join(dump_folder, "model_config.txt"), "w") as f:
             f.write(f"model_id: {model_id}\n------\n")
@@ -562,30 +501,45 @@ def validate_model(
                 f.write(f"model_id: {model_id}\n------\n")
                 f.write(pprint.pformat(dump_info))
-    if exporter == "modelbuilder":
-        # Models used with ModelBuilder do not like batch size > 1.
-        # Let's change that.
-        for k in ["inputs", "inputs2"]:
-            if k not in data:
-                continue
-            if verbose:
-                print(f"[validate_model] set batch=1 for data[{k!r}]")
-                print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}")
-            cpl = CoupleInputsDynamicShapes(
-                tuple(), data[k], dynamic_shapes=data["dynamic_shapes"]
-            )
-            if patch_kwargs.get("patch", False):
-                with torch_export_patches(**patch_kwargs):  # type: ignore[arg-type]
-                    data[k] = cpl.change_dynamic_dimensions(
-                        desired_values=dict(batch=1), only_desired=True
-                    )
-            else:
-                data[k] = cpl.change_dynamic_dimensions(
-                    desired_values=dict(batch=1), only_desired=True
-                )
-            if verbose:
-                print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+    return data, iop, mop
+def _update_data_for_modelbuilder(data, verbose):
+    # Models used with ModelBuilder do not like batch size > 1.
+    # Let's change that.
+    for k in ["inputs", "inputs2"]:
+        if k not in data:
+            continue
+        if verbose:
+            print(f"[validate_model] set batch=1 for data[{k!r}]")
+            print(f"[validate_model] batch=1 === {string_type(data[k], with_shape=True)}")
+        cpl = CoupleInputsDynamicShapes(
+            tuple(), data[k], dynamic_shapes=data["dynamic_shapes"]
+        )
+        with register_additional_serialization_functions(patch_transformers=True):  # type: ignore[arg-type]
+            data[k] = cpl.change_dynamic_dimensions(
+                desired_values=dict(batch=1), only_desired=True
+            )
+        if verbose:
+            print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
+def _update_inputs_outputs(
+    data,
+    summary,
+    exporter,
+    iop,
+    mop,
+    dump_folder,
+    opset,
+    device,
+    dtype,
+    rewrite,
+    drop_inputs,
+    verbose,
+    second_input_keys,
+    model_id,
+):
     # modelbuilder needs different treatments sometimes, so
     # we mark it for later usage.
     # for example, it has different past_kv ordering than
@@ -672,7 +626,7 @@ def validate_model(
     for k in ["task", "size", "n_weights"]:
         summary[f"model_{k.replace('_','')}"] = data[k]
     summary["second_input_keys"] = ",".join(second_input_keys)
-    summary["model_inputs_options"] = str(input_options or "")
+    summary["model_inputs_options"] = str(iop or "")
     summary["model_inputs"] = string_type(data["inputs"], with_shape=True)
     summary["model_shapes"] = string_type(data["dynamic_shapes"])
     summary["model_class"] = data["model"].__class__.__name__
@@ -689,6 +643,8 @@ def validate_model(
     ).replace(" ", "")
     summary["model_id"] = model_id
+def _verbose_validate(data, second_input_keys, verbose):
     if verbose:
         print("[validate_model] --")
         print(f"[validate_model] task={data['task']}")
@@ -701,33 +657,30 @@ def validate_model(
         print(f"[validate_model] second_input_keys={second_input_keys}")
         print("[validate_model] --")
-    if do_run:
-        validation_begin = time.perf_counter()
-        _validate_do_run_model(
-            data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet
-        )
-        if second_input_keys:
-            for k in second_input_keys:
-                _validate_do_run_model(
-                    data,
-                    summary,
-                    k,
-                    f"run2{k[6:]}",
-                    f"run_expected2{k[6:]}",
-                    verbose,
-                    1,
-                    0,
-                    quiet,
-                )
-        summary["time_total_validation_torch"] = time.perf_counter() - validation_begin
+def _call_exporter(
+    data,
+    summary,
+    exporter,
+    patch_kwargs,
+    stop_if_static,
+    verbose,
+    dump_folder,
+    quiet,
+    optimization,
+    do_run,
+    output_names,
+    exporter_options,
+):
     if exporter:
-        print(
-            f"[validate_model] -- export the model with {exporter!r}, "
-            f"optimization={optimization!r}"
-        )
+        expop = exporter_options or {}
+        if verbose:
+            print(
+                f"[validate_model] -- export the model with {exporter!r}, "
+                f"optimization={optimization!r}"
+            )
+            if expop:
+                print(f"[validate_model] -- exporter options {expop}")
         exporter_begin = time.perf_counter()
         if patch_kwargs:
             if verbose:
@@ -757,6 +710,7 @@ def validate_model(
                     do_run=do_run,
                     dump_folder=dump_folder,
                     output_names=output_names,
+                    exporter_options=expop,
                 )
         else:
             data["inputs_export"] = data["inputs"]
@@ -770,11 +724,14 @@ def validate_model(
                 do_run=do_run,
                 dump_folder=dump_folder,
                 output_names=output_names,
+                exporter_options=expop,
             )
         summary.update(summary_export)
         summary["time_total_exporter"] = time.perf_counter() - exporter_begin
+def _dump_onnx_model(data, summary, dump_folder, verbose, exporter, folder_name):
     dump_stats = None
     if dump_folder:
         if "exported_program" in data:
@@ -839,26 +796,392 @@ def validate_model(
     ):
         if verbose:
             print("[validate_model] -- done (final)")
-        if dump_stats:
-            with open(dump_stats, "w") as f:
-                for k, v in sorted(summary.items()):
-                    f.write(f":{k}:{v};\n")
+        return False, dump_stats
+    return True, dump_stats
+def validate_model(
+    model_id: str,
+    task: Optional[str] = None,
+    do_run: bool = False,
+    exporter: Optional[str] = None,
+    do_same: bool = False,
+    verbose: int = 0,
+    dtype: Optional[Union[str, torch.dtype]] = None,
+    device: Optional[Union[str, torch.device]] = None,
+    same_as_pretrained: bool = False,
+    use_pretrained: bool = False,
+    optimization: Optional[str] = None,
+    quiet: bool = False,
+    patch: Union[bool, str, Dict[str, bool]] = False,
+    rewrite: bool = False,
+    stop_if_static: int = 1,
+    dump_folder: Optional[str] = None,
+    drop_inputs: Optional[List[str]] = None,
+    ortfusiontype: Optional[str] = None,
+    input_options: Optional[Dict[str, Any]] = None,
+    model_options: Optional[Dict[str, Any]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
+    subfolder: Optional[str] = None,
+    opset: Optional[int] = None,
+    runtime: str = "onnxruntime",
+    repeat: int = 1,
+    warmup: int = 0,
+    inputs2: int = 1,
+    output_names: Optional[List[str]] = None,
+    ort_logs: bool = False,
+    quiet_input_sets: Optional[Set[str]] = None,
+) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
+    """
+    Validates a model.
+    The function can also be called through the command line
+    :ref:`l-cmd-validate`.
+    :param model_id: model id to validate
+    :param task: task used to generate the necessary inputs,
+        can be left empty to use the default task for this model
+        if it can be determined
+    :param do_run: checks the model works with the defined inputs
+    :param exporter: exporter the model using this exporter,
+        available list: ``export-strict``, ``export-nostrict``, ...
+        see below
+    :param do_same: checks the discrepancies of the exported model
+    :param verbose: verbosity level
+    :param dtype: uses this dtype to check the model
+    :param device: do the verification on this device
+    :param same_as_pretrained: use a model equivalent to the trained,
+        this is not always possible
+    :param use_pretrained: use the trained model, not the untrained one
+    :param optimization: optimization to apply to the exported model,
+        depend on the the exporter
+    :param quiet: if quiet, catches exception if any issue
+    :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
+        if True before exporting
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`,
+        a string can be used to specify only one of them
+    :param rewrite: applies known rewriting (``patch_transformers=True``) before exporting,
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
+    :param stop_if_static: stops if a dynamic dimension becomes static,
+        see :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
+    :param dump_folder: dumps everything in a subfolder of this one
+    :param drop_inputs: drops this list of inputs (given their names)
+    :param ortfusiontype: runs ort fusion, the parameters defines the fusion type,
+        it accepts multiple values separated by ``|``,
+        see :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`
+    :param input_options: additional options to define the dummy inputs
+        used to export
+    :param model_options: additional options when creating the model such as
+        ``num_hidden_layers`` or ``attn_implementation``
+    :param exporter_options: additional options when exporting the model such as
+        ``report=True`` or ``verify=True``
+    :param subfolder: version or subfolders to uses when retrieving a model id
+    :param opset: onnx opset to use for the conversion
+    :param runtime: onnx runtime to use to check about discrepancies,
+        possible values ``onnxruntime``, ``torch``, ``orteval``,
+        ``orteval10``, ``ref`` only if `do_run` is true
+    :param repeat: number of time to measure the model
+    :param warmup: warmup the model first
+    :param inputs2: checks that other sets of inputs are running as well,
+        this ensures that the model does support dynamism, the value is used
+        as an increment to the first set of values (added to dimensions),
+        or an empty cache for example
+    :param output_names: output names the onnx exporter should use
+    :param ort_logs: increases onnxruntime verbosity when creating the session
+    :param quiet_input_sets: avoid raising an exception if the inputs belongs to that set
+        even if quiet is False
+    :return: two dictionaries, one with some metrics,
+        another one with whatever the function produces
+    The following environment variables can be used to print out some
+    information:
+    * ``PRINT_CONFIG``: prints the model configuration
+    The following exporters are available:
+    * ``export-nostrict``: run :func:`torch.export.export` (..., strict=False)
+    * ``onnx-dynamo``: run :func:`torch.onnx.export` (...),
+      models can be optimized with ``optimization`` in ``("ir", "os_ort")``
+    * ``modelbuilder``: use :epkg:`ModelBuilder` to builds the onnx model
+    * ``custom``: custom exporter (see :epkg:`experimental-experiment`),
+      models can be optimized with ``optimization`` in
+      ``("default", "default+onnxruntime", "default+os_ort", "default+onnxruntime+os_ort")``
+    The default runtime, :epkg:`onnxruntime` is used to validate a model and check the
+    exported model returns the same outputs as the original one, otherwise,
+    :class:`onnx_diagnostic.reference.TorchOnnxEvaluator`
+    if ``runtime == 'torch'`` or
+    :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator`
+    if ``runtime == 'orteval'`` or
+    :class:`onnx_diagnostic.reference.ExtendedReferenceEvaluator`
+    if ``runtime == 'ref'``,
+    ``orteval10`` increases the verbosity.
+    .. versionchanged:: 0.7.13
+        *inputs2* not only means a second set of inputs but many
+        such as ``input_empty_cache``
+        which refers to a set of inputs using an empty cache.
+    """
+    main_validation_begin = time.perf_counter()
+    cont, summary, data, dump_stats, second_input_keys = _validate_model_step1(
+        model_id=model_id,
+        do_same=do_same,
+        do_run=do_run,
+        patch=patch,
+        rewrite=rewrite,
+        dtype=dtype,
+        device=device,
+        optimization=optimization,
+        quiet=quiet,
+        drop_inputs=drop_inputs,
+        ortfusiontype=ortfusiontype,
+        stop_if_static=stop_if_static,
+        exporter=exporter,
+        verbose=verbose,
+        task=task,
+        runtime=runtime,
+        inputs2=inputs2,
+        input_options=input_options,
+        model_options=model_options,
+        exporter_options=exporter_options,
+        opset=opset,
+        output_names=output_names,
+        repeat=repeat,
+        warmup=warmup,
+        dump_folder=dump_folder,
+        subfolder=subfolder,
+        use_pretrained=use_pretrained,
+        same_as_pretrained=same_as_pretrained,
+    )
+    if dump_folder:
+        with open(dump_stats, "w") as f:
+            for k, v in sorted(summary.items()):
+                f.write(f":{k}:{v};\n")
+    if not cont:
         return summary, data
+    data, summary = _clean_data_remove_model_and_proto(data, summary)
+    _validate_model_step2(
+        summary=summary,
+        data=data,
+        do_run=do_run,
+        quiet=quiet,
+        verbose=verbose,
+        runtime=runtime,
+        repeat=repeat,
+        warmup=warmup,
+        second_input_keys=second_input_keys,
+        ort_logs=ort_logs,
+        quiet_input_sets=quiet_input_sets,
+        ortfusiontype=ortfusiontype,
+        model_id=model_id,
+    )
+    summary["time_total"] = time.perf_counter() - main_validation_begin
+    if verbose:
+        print("[validate_model] -- done (final)")
+    with open(dump_stats, "w") as f:
+        for k, v in sorted(summary.items()):
+            f.write(f":{k}:{v};\n")
+    return summary, data
+def _clean_data_remove_model_and_proto(data, summary):
+    assert isinstance(data, dict) and isinstance(data, dict)
+    data = _clean_data_remove_model_and_proto_(data)
+    summary = _clean_data_remove_model_and_proto_(summary)
+    gc.collect()
+    return data, summary
+def _clean_data_remove_model_and_proto_(obj):
+    if type(obj) is dict:
+        # do not use isinstance otherwise CausalLMOutputWithPast becomes a dictionary
+        return {k: _clean_data_remove_model_and_proto_(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_clean_data_remove_model_and_proto_(v) for v in obj]
+    if isinstance(obj, tuple):
+        return tuple(_clean_data_remove_model_and_proto_(v) for v in obj)
+    if isinstance(obj, set):
+        return {_clean_data_remove_model_and_proto_(v) for v in obj}
+    if isinstance(obj, (torch.nn.Module, onnx.ModelProto)):
+        return None
+    return obj
+def _validate_model_step1(
+    model_id,
+    do_same,
+    do_run,
+    patch,
+    rewrite,
+    dtype,
+    device,
+    optimization,
+    quiet,
+    drop_inputs,
+    ortfusiontype,
+    stop_if_static,
+    exporter,
+    verbose,
+    task,
+    runtime,
+    inputs2,
+    input_options,
+    model_options,
+    exporter_options,
+    opset,
+    output_names,
+    repeat,
+    warmup,
+    dump_folder,
+    subfolder,
+    use_pretrained,
+    same_as_pretrained,
+):
+    assert not do_same or do_run, (
+        f"Discrepancies cannot be measured if the model is not run, "
+        f"do_run={do_run}, do_same={do_same}"
+    )
+    (
+        summary,
+        model_id,
+        subfolder,
+        same_as_pretrained,
+        use_pretrained,
+        dump_folder,
+        folder_name,
+        patch_kwargs,
+    ) = _prepare_validation(
+        model_id=model_id,
+        subfolder=subfolder,
+        same_as_pretrained=same_as_pretrained,
+        use_pretrained=use_pretrained,
+        patch=patch,
+        rewrite=rewrite,
+        do_run=do_run,
+        dtype=dtype,
+        device=device,
+        optimization=optimization,
+        quiet=quiet,
+        drop_inputs=drop_inputs,
+        ortfusiontype=ortfusiontype,
+        stop_if_static=stop_if_static,
+        exporter=exporter,
+        runtime=runtime,
+        inputs2=inputs2,
+        input_options=input_options,
+        model_options=model_options,
+        exporter_options=exporter_options,
+        opset=opset,
+        task=task,
+        verbose=verbose,
+        output_names=output_names,
+        dump_folder=dump_folder,
+    )
+    data, iop, mop = _get_untrained_model_with_inputs(
+        summary=summary,
+        model_id=model_id,
+        verbose=verbose,
+        task=task,
+        use_pretrained=use_pretrained,
+        same_as_pretrained=same_as_pretrained,
+        input_options=input_options,
+        model_options=model_options,
+        subfolder=subfolder,
+        inputs2=inputs2,
+        quiet=quiet,
+        dump_folder=dump_folder,
+    )
+    second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]
+    if exporter == "modelbuilder":
+        _update_data_for_modelbuilder(data, verbose)
+    _update_inputs_outputs(
+        data=data,
+        summary=summary,
+        exporter=exporter,
+        iop=iop,
+        mop=mop,
+        dump_folder=dump_folder,
+        opset=opset,
+        device=device,
+        dtype=dtype,
+        rewrite=rewrite,
+        drop_inputs=drop_inputs,
+        verbose=verbose,
+        second_input_keys=second_input_keys,
+        model_id=model_id,
+    )
+    _verbose_validate(data, second_input_keys, verbose)
     if do_run:
-        # Let's move the model to CPU to make sure it frees GPU memory.
-        if verbose:
-            # It does not really work for the time being and the model
-            # gets loaded twice, one by torch, one by onnxruntime
-            print("[validation_model] -- delete the model")
-            for key in ["model", "onnx_program", "config"]:
-                if key in data:
-                    del data[key]
-            if device is not None and "cuda" in str(device).lower():
-                torch.cuda.empty_cache()
-            gc.collect()
-            print("[validation_model] -- done")
+        validation_begin = time.perf_counter()
+        _validate_do_run_model(
+            data, summary, "inputs", "run", "run_expected", verbose, repeat, warmup, quiet
+        )
+        if second_input_keys:
+            for k in second_input_keys:
+                _validate_do_run_model(
+                    data,
+                    summary,
+                    k,
+                    f"run2{k[6:]}",
+                    f"run_expected2{k[6:]}",
+                    verbose,
+                    1,
+                    0,
+                    quiet,
+                )
+        summary["time_total_validation_torch"] = time.perf_counter() - validation_begin
+    _call_exporter(
+        data=data,
+        summary=summary,
+        exporter=exporter,
+        patch_kwargs=patch_kwargs,
+        stop_if_static=stop_if_static,
+        verbose=verbose,
+        dump_folder=dump_folder,
+        quiet=quiet,
+        optimization=optimization,
+        do_run=do_run,
+        output_names=output_names,
+        exporter_options=exporter_options,
+    )
+    cont, dump_stats = _dump_onnx_model(
+        data=data,
+        summary=summary,
+        dump_folder=dump_folder,
+        verbose=verbose,
+        exporter=exporter,
+        folder_name=folder_name,
+    )
+    return cont, summary, data, dump_stats, second_input_keys
+def _validate_model_step2(
+    summary,
+    data,
+    do_run,
+    quiet,
+    verbose,
+    runtime,
+    repeat,
+    warmup,
+    second_input_keys,
+    ort_logs,
+    quiet_input_sets,
+    ortfusiontype,
+    model_id,
+):
+    if do_run:
         validation_begin = time.perf_counter()
         summary_valid, data = validate_onnx_model(
             data=data,
@@ -937,16 +1260,6 @@ def validate_model(
                 summary.update(summary_valid)
     _compute_final_statistics(summary)
-    summary["time_total"] = time.perf_counter() - main_validation_begin
-    if verbose:
-        print("[validate_model] -- done (final)")
-    if dump_stats:
-        # Dumps again the statistics.
-        with open(dump_stats, "w") as f:
-            for k, v in sorted(summary.items()):
-                f.write(f":{k}:{v};\n")
-    return summary, data
 def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
@@ -1030,7 +1343,7 @@ def _validate_do_run_model(
     summary[expected_tag] = string_type(expected, with_shape=True)
     if verbose:
-        print(f"[validate_model] done ([{tag}])")
+        print(f"[validate_model] done ([{tag}]) - {string_type(expected, with_shape=True)}")
     data[expected_tag] = expected
     assert hash_inputs == string_type(data[key], with_shape=True), (
         f"The model execution did modified the inputs:\n"
@@ -1040,7 +1353,6 @@ def _validate_do_run_model(
 def _validate_do_run_exported_program(data, summary, verbose, quiet):
     # We run a second time the model to check the patch did not
     # introduce any discrepancies
     if verbose:
@@ -1065,7 +1377,13 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet):
     if "ERR_run_patched" in summary:
         return summary, data
-    disc = max_diff(data["run_expected"], expected)
+    verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+    if verbose_diff >= 10:
+        print("[_validate_do_run_exported_program] with inputs_export")
+    disc = max_diff(data["run_expected"], expected, verbose=verbose_diff)
+    assert not verbose_diff or (
+        not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+    ), f"something went wrong disc={disc}"
     for k, v in disc.items():
         summary[f"disc_patched_{k}"] = str(v)
     if verbose:
@@ -1107,6 +1425,7 @@ def call_exporter(
     do_run: bool = False,
     dump_folder: Optional[str] = None,
     output_names: Optional[List[str]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Calls an exporter on a model;
@@ -1120,6 +1439,7 @@ def call_exporter(
     :param do_run: runs and compute discrepancies
     :param dump_folder: to dump additional information
     :param output_names: list of output names to use with the onnx exporter
+    :param exporter_options: exporter options
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -1135,6 +1455,7 @@ def call_exporter(
             verbose=verbose,
             optimization=optimization,
             do_run=do_run,
+            exporter_options=exporter_options,
         )
         _restore_torch_export_export(summary)
         return summary, data
@@ -1147,6 +1468,7 @@ def call_exporter(
             verbose=verbose,
             optimization=optimization,
             output_names=output_names,
+            exporter_options=exporter_options,
         )
         _restore_torch_export_export(summary)
         return summary, data
@@ -1160,6 +1482,7 @@ def call_exporter(
             optimization=optimization,
             dump_folder=dump_folder,
             output_names=output_names,
+            exporter_options=exporter_options,
         )
         _restore_torch_export_export(summary)
         return summary, data
@@ -1172,6 +1495,7 @@ def call_exporter(
             verbose=verbose,
             optimization=optimization,
             output_names=output_names,
+            exporter_options=exporter_options,
         )
         _restore_torch_export_export(summary)
         return summary, data
@@ -1191,6 +1515,7 @@ def call_torch_export_export(
     verbose: int = 0,
     optimization: Optional[str] = None,
     do_run: bool = False,
+    exporter_options: Optional[Dict[str, Any]] = None,
 ):
     """
     Exports a model with :func:`torch.export.export`.
@@ -1203,9 +1528,11 @@ def call_torch_export_export(
     :param verbose: verbosity
     :param optimization: optimization to do
     :param do_run: runs and compute discrepancies
+    :param exporter_options: additional options given to the exporter
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
+    exporter_options = exporter_options or {}
     assert exporter in {
         "export",
         "export-strict",
@@ -1214,8 +1541,12 @@ def call_torch_export_export(
     assert not optimization, f"No optimization is implemented for exporter={exporter!r}"
     assert "model" in data, f"model is missing from data: {sorted(data)}"
     assert "inputs_export" in data, f"inputs_export is missing from data: {sorted(data)}"
+    assert ("-strict" not in exporter) or ("strict" not in exporter_options), (
+        f"Options strict cannot be specified in the exporter name {exporter!r} "
+        f"and in the options {exporter_options}"
+    )
     summary: Dict[str, Union[str, int, float]] = {}
-    strict = "-strict" in exporter
+    strict = "-strict" in exporter or exporter_options.pop("strict", False)
     args, kwargs = split_args_kwargs(data["inputs_export"])
     ds = data.get("dynamic_shapes", None)
@@ -1225,6 +1556,7 @@ def call_torch_export_export(
     summary["export_args"] = string_type(args, with_shape=True)
     summary["export_kwargs"] = string_type(kwargs, with_shape=True)
     summary["export_dynamic_shapes"] = string_type(ds)
+    summary["export_options"] = str(exporter_options)
     # There is an issue with DynamicShape [[],[]] becomes []
     dse = use_dyn_not_str(ds)
@@ -1251,7 +1583,9 @@ def call_torch_export_export(
         data,
         (
             lambda m=model, args=args, kws=kwargs, dse=dse, s=strict: (
-                torch.export.export(m, args, kwargs=kws, dynamic_shapes=dse, strict=s)
+                torch.export.export(
+                    m, args, kwargs=kws, dynamic_shapes=dse, strict=s, **exporter_options
+                )
             )
         ),
     )
@@ -1294,7 +1628,14 @@ def call_torch_export_export(
         if "ERR_export_export" in summary:
             return summary, data
-        disc = max_diff(data["run_expected"], expected)
+        verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+        if verbose_diff >= 10:
+            print("[call_torch_export_export] with inputs_export")
+        disc = max_diff(data["run_expected"], expected, verbose=verbose_diff)
+        assert not verbose_diff or (
+            not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+        ), f"something went wrong disc={disc}"
         for k, v in disc.items():
             summary[f"disc_exported_{k}"] = str(v)
         if verbose:
@@ -1465,6 +1806,9 @@ def validate_onnx_model(
     if verbose:
         print(f"[validate_onnx_model] -- keys={keys}")
     for k_input, k_expected, suffix in keys:
+        if k_input == "inputs_prompt":
+            # this must used onnx_generate
+            continue
         # make_feeds
         assert k_input in data, f"Unable to find {k_input!r} in {sorted(data)}"
         assert k_expected in data, f"Unable to find {k_expected!r} in {sorted(data)}"
@@ -1478,7 +1822,7 @@ def validate_onnx_model(
             data[k_input],
             use_numpy=True,
             check_flatten=False,
-            is_modelbuilder=data["exporter"] == "modelbuilder",
+            is_modelbuilder=data["exporter"] == "modelbuilder",  # to remove position_ids
         )
         if verbose:
             print(f"[validate_onnx_model] ort inputs={string_type(feeds, with_shape=True)}")
@@ -1501,13 +1845,6 @@ def validate_onnx_model(
             repeat=repeat,
             warmup=warmup,
         )
-        # NOTE: modelbuilder has different order on past_kv outputs
-        if data["exporter"] == "modelbuilder":
-            logits = got[:1]
-            past_key_values = got[1:]
-            reorder_past_key_values = reorder_modelbuilder_cache_to_torch(past_key_values)
-            got = logits + reorder_past_key_values
         if f"ERR_{_mk(f'time_onnx_ort_run{suffix}')}" in summary:
             return summary, data
@@ -1518,7 +1855,16 @@ def validate_onnx_model(
             print(f"[validate_onnx_model] got={string_type(got, with_shape=True)}")
         # compute discrepancies
-        disc = max_diff(data[k_expected], got, flatten=True)
+        verbose_diff = int(os.environ.get("MAXDIFF", "0"))
+        if verbose_diff >= 10:
+            print(
+                f"[validate_onnx_model] k_input={k_input!r}, "
+                f"k_expected={k_expected!r}, suffix={suffix!r}"
+            )
+        disc = max_diff(data[k_expected], got, flatten=True, verbose=verbose_diff)
+        assert not verbose_diff or (
+            not np.isnan(disc["abs"]) and not np.isinf(disc["abs"])
+        ), f"something went wrong disc={disc}"
         if verbose:
             print(f"[validate_onnx_model] discrepancies={string_diff(disc)}")
         for k, v in disc.items():
@@ -1533,6 +1879,7 @@ def call_torch_export_onnx(
     verbose: int = 0,
     optimization: Optional[str] = None,
     output_names: Optional[List[str]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx.
@@ -1545,10 +1892,12 @@ def call_torch_export_onnx(
     :param verbose: verbosity
     :param optimization: optimization to do
     :param output_names: output names to use
+    :param exporter_options: additional options to give the exporter
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
     available = {None, "", "ir", "os_ort", "ir+default"}
+    exporter_options = exporter_options or {}
     assert (
         optimization in available
     ), f"unexpected value for optimization={optimization}, available={available}"
@@ -1576,6 +1925,7 @@ def call_torch_export_onnx(
     summary["export_dynamo"] = dynamo
     summary["export_args"] = string_type(args, with_shape=True)
     summary["export_kwargs"] = string_type(kwargs, with_shape=True)
+    summary["export_exporter"] = str(exporter_options)
     opset = data.get("model_opset", None)
     if opset:
         summary["export_opset"] = opset
@@ -1603,6 +1953,11 @@ def call_torch_export_onnx(
         export_export_kwargs["output_names"] = output_names
     if opset:
         export_export_kwargs["opset_version"] = opset
+    assert not (set(export_export_kwargs) & set(exporter_options)), (
+        f"Some options were defined twice, "
+        f"{set(export_export_kwargs) & set(exporter_options)}, "
+        f"you should remove them from exporter_options={exporter_options}"
+    )
     if verbose:
         print(
             f"[call_torch_export_onnx] export_export_kwargs="
@@ -1622,6 +1977,7 @@ def call_torch_export_onnx(
                     args,
                     kwargs=kws,
                     **ekws,
+                    **exporter_options,
                 )
             )
         ),
@@ -1694,6 +2050,7 @@ def call_torch_export_model_builder(
     verbose: int = 0,
     optimization: Optional[str] = None,
     output_names: Optional[List[str]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx with :epkg:`ModelBuilder`.
@@ -1705,11 +2062,13 @@ def call_torch_export_model_builder(
     :param verbose: verbosity
     :param optimization: optimization to do
     :param output_names: list of output names to use
+    :param exporter_options: additional options to give the exporter
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
     from ..helpers.model_builder_helper import create_model_builder, save_model_builder
+    exporter_options = exporter_options or {}
     assert optimization in (
         None,
         "",
@@ -1737,7 +2096,12 @@ def call_torch_export_model_builder(
             ], p=precision, pr=provider, cd=cache_dir: (
                 save_model_builder(
                     create_model_builder(
-                        c, m, precision=p, execution_provider=pr, cache_dir=cd
+                        c,
+                        m,
+                        precision=p,
+                        execution_provider=pr,
+                        cache_dir=cd,
+                        **exporter_options,
                     )
                 )
             )
@@ -1854,6 +2218,7 @@ def call_torch_export_custom(
     optimization: Optional[str] = None,
     dump_folder: Optional[str] = None,
     output_names: Optional[List[str]] = None,
+    exporter_options: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Exports a model into onnx.
@@ -1867,9 +2232,11 @@ def call_torch_export_custom(
     :param optimization: optimization to do
     :param dump_folder: to store additional information
     :param output_names: list of output names to use
+    :param exporter_options: additional exporter options
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
+    exporter_options = exporter_options or {}
     available = {
         "",
         "default",
@@ -1905,11 +2272,20 @@ def call_torch_export_custom(
     assert exporter in available, f"Unexpected value for exporter={exporter!r} in {available}"
     assert "model" in data, f"model is missing from data: {sorted(data)}"
     assert "inputs_export" in data, f"inputs_export is missing from data: {sorted(data)}"
+    assert ("-strict" not in exporter) or ("strict" not in exporter_options), (
+        f"Options strict cannot be specified in the exporter name {exporter!r} "
+        f"and in the options {exporter_options}"
+    )
+    assert ("-fake" not in exporter) or ("fake" not in exporter_options), (
+        f"Options strict cannot be specified in the exporter name {exporter!r} "
+        f"and in the options {exporter_options}"
+    )
     summary: Dict[str, Union[str, int, float]] = {}
-    strict = "-strict" in exporter
+    strict = "-strict" in exporter or exporter_options.pop("strict", False)
     args, kwargs = split_args_kwargs(data["inputs_export"])
     ds = data.get("dynamic_shapes", None)
-    if "-fake" in exporter:
+    fake = "-fake" in exporter or exporter_options.pop("fake", False)
+    if fake:
         from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
         if verbose:
@@ -1932,8 +2308,10 @@ def call_torch_export_custom(
     summary["export_exporter"] = exporter
     summary["export_optimization"] = optimization or ""
     summary["export_strict"] = strict
+    summary["export_fake"] = fake
     summary["export_args"] = string_type(args, with_shape=True)
     summary["export_kwargs"] = string_type(kwargs, with_shape=True)
+    summary["export_options"] = str(exporter_options)
     from experimental_experiment.torch_interpreter import to_onnx, ExportOptions
     from experimental_experiment.xbuilder import OptimizationOptions
@@ -1941,17 +2319,35 @@ def call_torch_export_custom(
     spl = optimization.split("+") if optimization else []
     os_ort = "os_ort" in spl
     optimization = "+".join(_ for _ in spl if _ != "os_ort")
-    export_options = ExportOptions(
-        strict=strict,
-        decomposition_table=(
+    inline = "-noinline" not in exporter or exporter_options.pop("inline", True)
+    decomposition_table = (
+        exporter_options.pop("decomposition_table")
+        if "decomposition_table" in exporter_options
+        else (
             "default"
             if ("-default" in exporter or "-dec" in exporter)
             else ("all" if ("-all" in exporter or "-decall" in exporter) else None)
-        ),
+        )
+    )
+    large_model = bool(exporter_options.pop("large_model", True))
+    return_optimize_report = bool(exporter_options.pop("return_optimize_report", True))
+    export_modules_as_functions = bool(
+        exporter_options.pop("export_modules_as_functions", False)
+    )
+    external_threshold = int(exporter_options.pop("external_threshold", 1024))
+    summary["export_decomposition_table"] = str(decomposition_table)
+    summary["export_inline"] = str(inline)
+    summary["export_large_model"] = str(large_model)
+    summary["export_return_optimize_report"] = str(return_optimize_report)
+    summary["export_export_modules_as_functions"] = str(export_modules_as_functions)
+    summary["export_external_threshold"] = str(external_threshold)
+    export_options = ExportOptions(
+        strict=strict,
+        decomposition_table=decomposition_table,
         save_ep=(os.path.join(dump_folder, f"{exporter}.ep") if dump_folder else None),
+        **exporter_options,
     )
-    inline = "-noinline" not in exporter
     options = OptimizationOptions(patterns=optimization) if optimization else None
     model = data["model"]
     kws = dict(
@@ -1959,10 +2355,12 @@ def call_torch_export_custom(
         export_options=export_options,
         options=options,
         optimize=bool(optimization),
-        large_model=True,
-        return_optimize_report=True,
         verbose=max(verbose - 2, 0),
         inline=inline,
+        large_model=large_model,
+        return_optimize_report=return_optimize_report,
+        export_modules_as_functions=export_modules_as_functions,
+        external_threshold=external_threshold,
     )
     if opset:
         kws["target_opset"] = opset

onnx-diagnostic 0.7.16__py3-none-any.whl → 0.8.1__py3-none-any.whl

onnx-diagnostic 0.7.16py3-none-any.whl → 0.8.1py3-none-any.whl