PyPI - onnx-diagnostic - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

onnx-diagnostic 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +18 -0
onnx_diagnostic/api.py +15 -0
onnx_diagnostic/ext_test_case.py +3 -1
onnx_diagnostic/helpers/args_helper.py +1 -1
onnx_diagnostic/helpers/helper.py +6 -5
onnx_diagnostic/helpers/model_builder_helper.py +24 -8
onnx_diagnostic/helpers/rt_helper.py +5 -1
onnx_diagnostic/helpers/torch_helper.py +2 -0
onnx_diagnostic/reference/__init__.py +1 -0
onnx_diagnostic/reference/torch_evaluator.py +518 -0
onnx_diagnostic/reference/torch_ops/__init__.py +55 -0
onnx_diagnostic/reference/torch_ops/_op_run.py +326 -0
onnx_diagnostic/reference/torch_ops/access_ops.py +84 -0
onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
onnx_diagnostic/reference/torch_ops/controlflow_ops.py +118 -0
onnx_diagnostic/reference/torch_ops/generator_ops.py +35 -0
onnx_diagnostic/reference/torch_ops/nn_ops.py +176 -0
onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
onnx_diagnostic/reference/torch_ops/shape_ops.py +120 -0
onnx_diagnostic/reference/torch_ops/unary_ops.py +86 -0
onnx_diagnostic/tasks/__init__.py +22 -1
onnx_diagnostic/tasks/image_classification.py +2 -2
onnx_diagnostic/tasks/text_generation.py +3 -3
onnx_diagnostic/torch_export_patches/eval/__init__.py +106 -37
onnx_diagnostic/torch_export_patches/eval/model_cases.py +12 -25
onnx_diagnostic/torch_export_patches/patch_module_helper.py +130 -16
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +88 -0
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +142 -0
onnx_diagnostic/torch_models/test_helper.py +115 -15
onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.1.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.1.dist-info}/RECORD +38 -23
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.1.dist-info}/WHEEL +1 -1
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.1.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.1.dist-info}/top_level.txt +0 -0

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import inspect
 from dataclasses import dataclass
+from functools import wraps
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import torch
 import transformers
@@ -531,3 +532,90 @@ class patched_GenerationMixin:
         # 8. Remove unexpected `generate` inputs (TODO @joao: fix trainer and examples)
         model_inputs.pop("labels", None)
         return model_inputs
+def patched_dynamic_rope_update(rope_forward):
+    """
+    patch:transformers.modeling_rope_utils.dynamic_rope_update
+    """
+    def longrope_frequency_update(self, position_ids, device):
+        seq_len = torch.max(position_ids) + 1
+        if hasattr(self.config, "original_max_position_embeddings"):
+            original_max_position_embeddings = self.config.original_max_position_embeddings
+        else:
+            original_max_position_embeddings = self.config.max_position_embeddings
+        # At export time, seq_len is unknown.
+        long_inv_freq, _ = self.rope_init_fn(
+            self.config, device, seq_len=original_max_position_embeddings + 1
+        )
+        original_inv_freq = self.original_inv_freq.to(device)
+        cond = (seq_len > original_max_position_embeddings).item()
+        inv_freq = torch.cond(
+            cond,
+            (lambda x, y: x.clone()),
+            (lambda x, y: y.clone()),
+            [long_inv_freq, original_inv_freq],
+        )
+        self.inv_freq = inv_freq
+        # if seq_len > original_max_position_embeddings:
+        #    self.inv_freq = self.long_inv_freq
+        # else:
+        #    self.inv_freq = self.original_inv_freq
+    def dynamic_frequency_update(self, position_ids, device):
+        seq_len = torch.max(position_ids) + 1
+        if seq_len > self.max_seq_len_cached:  # growth
+            inv_freq, self.attention_scaling = self.rope_init_fn(
+                self.config, device, seq_len=seq_len
+            )
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+            self.max_seq_len_cached = seq_len
+        if (
+            seq_len < self.original_max_seq_len
+            and self.max_seq_len_cached > self.original_max_seq_len
+        ):
+            self.original_inv_freq = self.original_inv_freq.to(device)
+            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+            self.max_seq_len_cached = self.original_max_seq_len
+    @wraps(rope_forward)
+    def wrapper(self, x, position_ids):
+        if "dynamic" in self.rope_type:
+            dynamic_frequency_update(self, position_ids, device=x.device)
+        elif self.rope_type == "longrope":
+            longrope_frequency_update(self, position_ids, device=x.device)
+        return rope_forward(self, x, position_ids)
+    return wrapper
+class patched_Phi3RotaryEmbedding(torch.nn.Module):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.phi3.modeling_phi3.Phi3RotaryEmbedding
+    @torch.no_grad()
+    @patched_dynamic_rope_update
+    def forward(self, x, position_ids):
+        inv_freq_expanded = (
+            self.inv_freq[None, :, None]
+            .float()
+            .expand(position_ids.shape[0], -1, 1)
+            .to(x.device)
+        )
+        position_ids_expanded = position_ids[:, None, :].float()
+        device_type = (
+            x.device.type
+            if isinstance(x.device.type, str) and x.device.type != "mps"
+            else "cpu"
+        )
+        with torch.autocast(device_type=device_type, enabled=False):  # Force float32
+            freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
+            emb = torch.cat((freqs, freqs), dim=-1)
+            cos = emb.cos() * self.attention_scaling
+            sin = emb.sin() * self.attention_scaling
+        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py CHANGED Viewed

@@ -3951,3 +3951,145 @@ def _ccached_facebook_bart_large_cnn():
             "vocab_size": 50264,
         }
     )
+def _ccached_microsoft_phi4_reasoning():
+    "microsoft/Phi-4-mini-reasoning"
+    return transformers.Phi3Config(
+        **{
+            "architectures": ["Phi3ForCausalLM"],
+            "attention_bias": false,
+            "attention_dropout": 0.0,
+            "bos_token_id": 199999,
+            "embd_pdrop": 0.0,
+            "eos_token_id": 199999,
+            "full_attn_mod": 1,
+            "hidden_act": "silu",
+            "hidden_size": 3072,
+            "initializer_range": 0.02,
+            "intermediate_size": 8192,
+            "interpolate_factor": 1,
+            "lm_head_bias": false,
+            "max_position_embeddings": 131072,
+            "mlp_bias": false,
+            "model_type": "phi3",
+            "num_attention_heads": 24,
+            "num_hidden_layers": 32,
+            "num_key_value_heads": 8,
+            "original_max_position_embeddings": 4096,
+            "pad_token_id": 199999,
+            "partial_rotary_factor": 0.75,
+            "resid_pdrop": 0.0,
+            "rms_norm_eps": 1e-05,
+            "rope_scaling": {
+                "long_factor": [
+                    1,
+                    1.118320672,
+                    1.250641126,
+                    1.398617824,
+                    1.564103225,
+                    1.74916897,
+                    1.956131817,
+                    2.187582649,
+                    2.446418898,
+                    2.735880826,
+                    3.059592084,
+                    3.421605075,
+                    3.826451687,
+                    4.279200023,
+                    4.785517845,
+                    5.351743533,
+                    5.984965424,
+                    6.693110555,
+                    7.485043894,
+                    8.370679318,
+                    9.36110372,
+                    10.4687158,
+                    11.70738129,
+                    13.09260651,
+                    14.64173252,
+                    16.37415215,
+                    18.31155283,
+                    20.47818807,
+                    22.90118105,
+                    25.61086418,
+                    28.64115884,
+                    32.03,
+                    32.1,
+                    32.13,
+                    32.23,
+                    32.6,
+                    32.61,
+                    32.64,
+                    32.66,
+                    32.7,
+                    32.71,
+                    32.93,
+                    32.97,
+                    33.28,
+                    33.49,
+                    33.5,
+                    44.16,
+                    47.77,
+                ],
+                "short_factor": [
+                    1,
+                    1.118320672,
+                    1.250641126,
+                    1.398617824,
+                    1.564103225,
+                    1.74916897,
+                    1.956131817,
+                    2.187582649,
+                    2.446418898,
+                    2.735880826,
+                    3.059592084,
+                    3.421605075,
+                    3.826451687,
+                    4.279200023,
+                    4.785517845,
+                    5.351743533,
+                    5.984965424,
+                    6.693110555,
+                    7.485043894,
+                    8.370679318,
+                    9.36110372,
+                    10.4687158,
+                    11.70738129,
+                    13.09260651,
+                    14.64173252,
+                    16.37415215,
+                    18.31155283,
+                    20.47818807,
+                    22.90118105,
+                    25.61086418,
+                    28.64115884,
+                    32.03,
+                    32.1,
+                    32.13,
+                    32.23,
+                    32.6,
+                    32.61,
+                    32.64,
+                    32.66,
+                    32.7,
+                    32.71,
+                    32.93,
+                    32.97,
+                    33.28,
+                    33.49,
+                    33.5,
+                    44.16,
+                    47.77,
+                ],
+                "type": "longrope",
+            },
+            "rope_theta": 10000.0,
+            "sliding_window": 262144,
+            "tie_word_embeddings": true,
+            "torch_dtype": "bfloat16",
+            "transformers_version": "4.50.0",
+            "use_cache": true,
+            "vocab_size": 200064,
+        }
+    )

onnx_diagnostic/torch_models/test_helper.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import sys
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import time
+import numpy as np
 import onnx
 import onnxscript
 import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -17,6 +18,7 @@ from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 from ..tasks import random_input_kwargs
 from ..torch_export_patches import torch_export_patches
 from ..torch_export_patches.patch_inputs import use_dyn_not_str
+from ..reference import TorchOnnxEvaluator
 from .hghub import get_untrained_model_with_inputs
@@ -192,11 +194,16 @@ def _quiet_or_not_quiet(
     summary: Dict[str, Any],
     data: Optional[Dict[str, Any]],
     fct: Callable,
+    repeat: int = 1,
+    warmup: int = 0,
 ) -> Any:
     begin = time.perf_counter()
     if quiet:
         try:
-            return fct()
+            res = fct()
+            summary[f"time_{suffix}"] = time.perf_counter() - begin
+            if warmup + repeat == 1:
+                return res
         except Exception as e:
             summary[f"ERR_{suffix}"] = str(e)
             summary[f"time_{suffix}"] = time.perf_counter() - begin
@@ -204,11 +211,45 @@ def _quiet_or_not_quiet(
                 return {f"ERR_{suffix}": e}
             data[f"ERR_{suffix}"] = e
             return None
-    res = fct()
+    else:
+        res = fct()
     summary[f"time_{suffix}"] = time.perf_counter() - begin
+    if warmup + repeat > 1:
+        if suffix == "run":
+            res = torch_deepcopy(res)
+        summary[f"{suffix}_output"] = string_type(res, with_shape=True, with_min_max=True)
+        summary[f"{suffix}_warmup"] = warmup
+        summary[f"{suffix}_repeat"] = repeat
+        for _w in range(max(0, warmup - 1)):
+            t = fct()
+            summary[f"io_{suffix}_{_w+1}"] = string_type(t, with_shape=True, with_min_max=True)
+        summary[f"time_{suffix}_warmup"] = time.perf_counter() - begin
+        times = []
+        for _r in range(repeat):
+            begin = time.perf_counter()
+            t = fct()
+            times.append(time.perf_counter() - begin)
+        a = np.array(times)
+        summary[f"time_{suffix}_latency"] = a.mean()
+        summary[f"time_{suffix}_latency_std"] = a.std()
+        summary[f"time_{suffix}_latency_min"] = a.min()
+        summary[f"time_{suffix}_latency_min"] = a.max()
     return res
+def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
+    """Shrinks the configuration before it gets added to the information to log."""
+    new_cfg = {}
+    for k, v in cfg.items():
+        new_cfg[k] = (
+            v
+            if (not isinstance(v, (list, tuple, set, dict)) or len(v) < 50)
+            else (v.__class__("...") if isinstance(v, (list, tuple)) else "...")
+        )
+    return new_cfg
 def validate_model(
     model_id: str,
     task: Optional[str] = None,
@@ -231,6 +272,9 @@ def validate_model(
     model_options: Optional[Dict[str, Any]] = None,
     subfolder: Optional[str] = None,
     opset: Optional[int] = None,
+    runtime: str = "onnxruntime",
+    repeat: int = 1,
+    warmup: int = 0,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Validates a model.
@@ -267,6 +311,10 @@ def validate_model(
         ``num_hidden_layers`` or ``attn_implementation``
     :param subfolder: version or subfolders to uses when retrieving a model id
     :param opset: onnx opset to use for the conversion
+    :param runtime: onnx runtime to use to check about discrepancies,
+        only if `do_run` is true
+    :param repeat: number of time to measure the model
+    :param warmup: warmup the model first
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
@@ -295,6 +343,7 @@ def validate_model(
             version_ortfusiontype=ortfusiontype or "",
             version_stop_if_static=str(stop_if_static),
             version_exporter=exporter or "",
+            version_runtime=runtime,
         )
     )
     if opset:
@@ -436,7 +485,9 @@ def validate_model(
     if summary["model_module"] in sys.modules:
         summary["model_file"] = str(sys.modules[summary["model_module"]].__file__)  # type: ignore[index]
     summary["model_config_class"] = data["configuration"].__class__.__name__
-    summary["model_config"] = str(data["configuration"].to_dict()).replace(" ", "")
+    summary["model_config"] = str(shrink_config(data["configuration"].to_dict())).replace(
+        " ", ""
+    )
     summary["model_id"] = model_id
     if verbose:
@@ -460,7 +511,13 @@ def validate_model(
         model = data["model"]
         expected = _quiet_or_not_quiet(
-            quiet, "run", summary, data, (lambda m=model, inp=inputs: m(**inp))
+            quiet,
+            "run",
+            summary,
+            data,
+            (lambda m=model, inp=inputs: m(**torch_deepcopy(inp))),
+            repeat=repeat,
+            warmup=warmup,
         )
         if "ERR_run" in summary:
             return summary, data
@@ -522,7 +579,7 @@ def validate_model(
                     disc = max_diff(data["expected"], expected)
                     for k, v in disc.items():
-                        summary[f"disc_patched_{k}"] = v
+                        summary[f"disc_patched_{k}"] = str(v)
                     if verbose:
                         print("[validate_model] done (patched run)")
                         print(f"[validate_model] patched discrepancies={string_diff(disc)}")
@@ -618,7 +675,14 @@ def validate_model(
         return summary, data
     if do_run:
-        summary_valid, data = validate_onnx_model(data=data, quiet=quiet, verbose=verbose)
+        summary_valid, data = validate_onnx_model(
+            data=data,
+            quiet=quiet,
+            verbose=verbose,
+            runtime=runtime,
+            repeat=repeat,
+            warmup=warmup,
+        )
         summary.update(summary_valid)
     if ortfusiontype and "onnx_filename" in data:
@@ -671,7 +735,13 @@ def validate_model(
             if do_run:
                 summary_valid, data = validate_onnx_model(
-                    data=data, quiet=quiet, verbose=verbose, flavour=flavour
+                    data=data,
+                    quiet=quiet,
+                    verbose=verbose,
+                    flavour=flavour,
+                    runtime=runtime,
+                    repeat=repeat,
+                    warmup=warmup,
                 )
                 summary.update(summary_valid)
@@ -883,6 +953,9 @@ def validate_onnx_model(
     quiet: bool = False,
     verbose: int = 0,
     flavour: Optional[str] = None,
+    runtime: str = "onnxruntime",
+    repeat: int = 1,
+    warmup: int = 0,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Verifies that an onnx model produces the same
@@ -895,6 +968,9 @@ def validate_onnx_model(
     :param quiet: catch exception or not
     :param verbose: verbosity
     :param flavour: use a different version of the inputs
+    :param runtime: onnx runtime to use, onnxruntime or torch
+    :param repeat: run that number of times the model
+    :param warmup: warmup the model
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -936,18 +1012,28 @@ def validate_onnx_model(
             f"{providers}..., flavour={flavour!r}"
         )
+    cls_runtime = (
+        (
+            lambda model, providers: onnxruntime.InferenceSession(
+                (model.SerializeToString() if isinstance(model, onnx.ModelProto) else model),
+                providers=providers,
+            )
+        )
+        if runtime == "onnxruntime"
+        else (
+            lambda model, providers: TorchOnnxEvaluator(
+                model, providers=providers, verbose=max(verbose - 1, 0)
+            )
+        )
+    )
     sess = _quiet_or_not_quiet(
         quiet,
-        _mk("time_onnx_ort_create"),
+        _mk("onnx_ort_create"),
         summary,
         data,
-        (
-            lambda source=source, providers=providers: onnxruntime.InferenceSession(
-                source, providers=providers
-            )
-        ),
+        (lambda source=source, providers=providers: cls_runtime(source, providers)),
     )
-    if f"ERR_{_mk('time_onnx_ort_create')}" in summary:
+    if f"ERR_{_mk('onnx_ort_create')}" in summary:
         return summary, data
     data[_mk("onnx_ort_sess")] = sess
@@ -975,6 +1061,8 @@ def validate_onnx_model(
         summary,
         data,
         (lambda sess=sess, feeds=feeds: sess.run(None, feeds)),
+        repeat=repeat,
+        warmup=warmup,
     )
     if f"ERR_{_mk('time_onnx_ort_run')}" in summary:
         return summary, data
@@ -1051,7 +1139,7 @@ def call_torch_export_onnx(
             dynamo=False,
             dynamic_axes={
                 k: v
-                for k, v in CoupleInputsDynamicShapes(args, kwargs, ds)
+                for k, v in CoupleInputsDynamicShapes(args, kwargs, ds)  # type: ignore[arg-type]
                 .replace_by_string()
                 .items()
                 if isinstance(v, dict)
@@ -1229,6 +1317,13 @@ def call_torch_export_custom(
         "custom-nostrict",
         "custom-nostrict-default",
         "custom-nostrict-all",
+        "custom-inline",
+        "custom-strict-inline",
+        "custom-strict-default-inline",
+        "custom-strict-all-inline",
+        "custom-nostrict-inline",
+        "custom-nostrict-default-inline",
+        "custom-nostrict-all-inline",
     }
     assert exporter in available, f"Unexpected value for exporter={exporter!r} in {available}"
     assert "model" in data, f"model is missing from data: {sorted(data)}"
@@ -1269,6 +1364,10 @@ def call_torch_export_custom(
         ),
         save_ep=(os.path.join(dump_folder, f"{exporter}.ep") if dump_folder else None),
     )
+    inline = "-inline" in exporter
+    if inline:
+        export_options.aten_as_function = set()
     options = OptimizationOptions(patterns=optimization) if optimization else None
     model = data["model"]
     kws = dict(
@@ -1279,6 +1378,7 @@ def call_torch_export_custom(
         large_model=True,
         return_optimize_report=True,
         verbose=max(verbose - 2, 0),
+        inline=inline,
     )
     if opset:
         kws["target_opset"] = opset

onnx-diagnostic 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

onnx-diagnostic 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl