PyPI - onnx-diagnostic - Versions diffs - 0.6.3__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +281 -80
onnx_diagnostic/doc.py +22 -0
onnx_diagnostic/export/dynamic_shapes.py +48 -20
onnx_diagnostic/export/shape_helper.py +126 -0
onnx_diagnostic/ext_test_case.py +1 -1
onnx_diagnostic/helpers/cache_helper.py +78 -8
onnx_diagnostic/helpers/config_helper.py +8 -4
onnx_diagnostic/helpers/helper.py +30 -3
onnx_diagnostic/helpers/log_helper.py +1744 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +4 -1
onnx_diagnostic/helpers/model_builder_helper.py +54 -73
onnx_diagnostic/helpers/torch_helper.py +18 -2
onnx_diagnostic/reference/__init__.py +1 -0
onnx_diagnostic/reference/ort_evaluator.py +29 -4
onnx_diagnostic/reference/report_results_comparison.py +95 -0
onnx_diagnostic/reference/torch_evaluator.py +21 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +3 -0
onnx_diagnostic/tasks/feature_extraction.py +3 -0
onnx_diagnostic/tasks/fill_mask.py +3 -0
onnx_diagnostic/tasks/image_classification.py +7 -1
onnx_diagnostic/tasks/image_text_to_text.py +72 -18
onnx_diagnostic/tasks/mixture_of_expert.py +3 -0
onnx_diagnostic/tasks/object_detection.py +3 -0
onnx_diagnostic/tasks/sentence_similarity.py +3 -0
onnx_diagnostic/tasks/summarization.py +3 -0
onnx_diagnostic/tasks/text2text_generation.py +3 -0
onnx_diagnostic/tasks/text_classification.py +3 -0
onnx_diagnostic/tasks/text_generation.py +90 -43
onnx_diagnostic/tasks/zero_shot_image_classification.py +3 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +78 -25
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +37 -0
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +365 -17
onnx_diagnostic/torch_models/hghub/hub_api.py +81 -8
onnx_diagnostic/torch_models/hghub/hub_data.py +6 -2
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +209 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +58 -14
onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py +23 -50
onnx_diagnostic/torch_models/{test_helper.py → validate.py} +166 -106
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/RECORD +44 -41
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/top_level.txt +0 -0

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py CHANGED Viewed

@@ -3953,6 +3953,46 @@ def _ccached_facebook_bart_large_cnn():
     )
+def _ccached_microsoft_phi3_mini_4k_instruct():
+    "microsoft/Phi-3-mini-4k-instruct"
+    return transformers.Phi3Config(
+        **{
+            "_name_or_path": "Phi-3-mini-4k-instruct",
+            "architectures": ["Phi3ForCausalLM"],
+            "attention_dropout": 0.0,
+            "auto_map": {
+                "AutoConfig": "configuration_phi3.Phi3Config",
+                "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
+            },
+            "bos_token_id": 1,
+            "embd_pdrop": 0.0,
+            "eos_token_id": 32000,
+            "hidden_act": "silu",
+            "hidden_size": 3072,
+            "initializer_range": 0.02,
+            "intermediate_size": 8192,
+            "max_position_embeddings": 4096,
+            "model_type": "phi3",
+            "num_attention_heads": 32,
+            "num_hidden_layers": 32,
+            "num_key_value_heads": 32,
+            "original_max_position_embeddings": 4096,
+            "pad_token_id": 32000,
+            "resid_pdrop": 0.0,
+            "rms_norm_eps": 1e-05,
+            "rope_scaling": null,
+            "rope_theta": 10000.0,
+            "sliding_window": 2047,
+            "tie_word_embeddings": false,
+            "torch_dtype": "bfloat16",
+            "transformers_version": "4.40.2",
+            "use_cache": true,
+            "attention_bias": false,
+            "vocab_size": 32064,
+        }
+    )
 def _ccached_microsoft_phi4_reasoning():
     "microsoft/Phi-4-mini-reasoning"
     return transformers.Phi3Config(
@@ -4093,3 +4133,172 @@ def _ccached_microsoft_phi4_reasoning():
             "vocab_size": 200064,
         }
     )
+def _ccached_ydshieh_tiny_random_vit_for_image_classification():
+    "ydshieh/tiny-random-ViTForImageClassification"
+    return transformers.Phi3Config(
+        **{
+            "_name_or_path": ".temp/dummy/vit/ViTForImageClassification",
+            "architectures": ["ViTForImageClassification"],
+            "attention_probs_dropout_prob": 0.1,
+            "encoder_stride": 2,
+            "hidden_act": "gelu",
+            "hidden_dropout_prob": 0.1,
+            "hidden_size": 32,
+            "image_size": 30,
+            "initializer_range": 0.02,
+            "intermediate_size": 37,
+            "layer_norm_eps": 1e-12,
+            "model_type": "vit",
+            "num_attention_heads": 4,
+            "num_channels": 3,
+            "num_hidden_layers": 5,
+            "patch_size": 2,
+            "qkv_bias": true,
+            "torch_dtype": "float32",
+            "transformers_version": "4.24.0.dev0",
+        }
+    )
+def _ccached_microsoft_phi_35_mini_instruct():
+    "microsoft/Phi-3.5-mini-instruct"
+    return transformers.Phi3Config(
+        **{
+            "_name_or_path": "Phi-3.5-mini-instruct",
+            "architectures": ["Phi3ForCausalLM"],
+            "attention_dropout": 0.0,
+            "auto_map": {
+                "AutoConfig": "configuration_phi3.Phi3Config",
+                "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
+            },
+            "bos_token_id": 1,
+            "embd_pdrop": 0.0,
+            "eos_token_id": 32000,
+            "hidden_act": "silu",
+            "hidden_size": 3072,
+            "initializer_range": 0.02,
+            "intermediate_size": 8192,
+            "max_position_embeddings": 131072,
+            "model_type": "phi3",
+            "num_attention_heads": 32,
+            "num_hidden_layers": 32,
+            "num_key_value_heads": 32,
+            "original_max_position_embeddings": 4096,
+            "pad_token_id": 32000,
+            "resid_pdrop": 0.0,
+            "rms_norm_eps": 1e-05,
+            "rope_scaling": {
+                "long_factor": [
+                    1.0800000429153442,
+                    1.1100000143051147,
+                    1.1399999856948853,
+                    1.340000033378601,
+                    1.5899999141693115,
+                    1.600000023841858,
+                    1.6200000047683716,
+                    2.620000123977661,
+                    3.2300000190734863,
+                    3.2300000190734863,
+                    4.789999961853027,
+                    7.400000095367432,
+                    7.700000286102295,
+                    9.09000015258789,
+                    12.199999809265137,
+                    17.670000076293945,
+                    24.46000099182129,
+                    28.57000160217285,
+                    30.420001983642578,
+                    30.840002059936523,
+                    32.590003967285156,
+                    32.93000411987305,
+                    42.320003509521484,
+                    44.96000289916992,
+                    50.340003967285156,
+                    50.45000457763672,
+                    57.55000305175781,
+                    57.93000411987305,
+                    58.21000289916992,
+                    60.1400032043457,
+                    62.61000442504883,
+                    62.62000274658203,
+                    62.71000289916992,
+                    63.1400032043457,
+                    63.1400032043457,
+                    63.77000427246094,
+                    63.93000411987305,
+                    63.96000289916992,
+                    63.970001220703125,
+                    64.02999877929688,
+                    64.06999969482422,
+                    64.08000183105469,
+                    64.12000274658203,
+                    64.41000366210938,
+                    64.4800033569336,
+                    64.51000213623047,
+                    64.52999877929688,
+                    64.83999633789062,
+                ],
+                "short_factor": [
+                    1.0,
+                    1.0199999809265137,
+                    1.0299999713897705,
+                    1.0299999713897705,
+                    1.0499999523162842,
+                    1.0499999523162842,
+                    1.0499999523162842,
+                    1.0499999523162842,
+                    1.0499999523162842,
+                    1.0699999332427979,
+                    1.0999999046325684,
+                    1.1099998950958252,
+                    1.1599998474121094,
+                    1.1599998474121094,
+                    1.1699998378753662,
+                    1.2899998426437378,
+                    1.339999794960022,
+                    1.679999828338623,
+                    1.7899998426437378,
+                    1.8199998140335083,
+                    1.8499997854232788,
+                    1.8799997568130493,
+                    1.9099997282028198,
+                    1.9399996995925903,
+                    1.9899996519088745,
+                    2.0199997425079346,
+                    2.0199997425079346,
+                    2.0199997425079346,
+                    2.0199997425079346,
+                    2.0199997425079346,
+                    2.0199997425079346,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0299997329711914,
+                    2.0799996852874756,
+                    2.0899996757507324,
+                    2.189999580383301,
+                    2.2199995517730713,
+                    2.5899994373321533,
+                    2.729999542236328,
+                    2.749999523162842,
+                    2.8399994373321533,
+                ],
+                "type": "longrope",
+            },
+            "rope_theta": 10000.0,
+            "sliding_window": 262144,
+            "tie_word_embeddings": false,
+            "torch_dtype": "bfloat16",
+            "transformers_version": "4.43.3",
+            "use_cache": true,
+            "attention_bias": false,
+            "vocab_size": 32064,
+        }
+    )

onnx_diagnostic/torch_models/hghub/model_inputs.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import inspect
 import os
+import pprint
 from typing import Any, Dict, Optional, Tuple
 import torch
 import transformers
 from ...helpers.config_helper import update_config
 from ...tasks import reduce_model_config, random_input_kwargs
-from .hub_api import task_from_arch, task_from_id, get_pretrained_config
+from .hub_api import task_from_arch, task_from_id, get_pretrained_config, download_code_modelid
 def _code_needing_rewriting(model: Any) -> Any:
@@ -22,10 +23,12 @@ def get_untrained_model_with_inputs(
     model_kwargs: Optional[Dict[str, Any]] = None,
     verbose: int = 0,
     dynamic_rope: Optional[bool] = None,
+    use_pretrained: bool = False,
     same_as_pretrained: bool = False,
     use_preinstalled: bool = True,
     add_second_input: bool = False,
     subfolder: Optional[str] = None,
+    use_only_preinstalled: bool = False,
 ) -> Dict[str, Any]:
     """
     Gets a non initialized model similar to the original model
@@ -42,10 +45,12 @@ def get_untrained_model_with_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :param same_as_pretrained: if True, do not change the default values
         to get a smaller model
+    :param use_pretrained: download the pretrained weights as well
     :param use_preinstalled: use preinstalled configurations
     :param add_second_input: provides a second inputs to check a model
         supports different shapes
     :param subfolder: subfolder to use for this model id
+    :param use_only_preinstalled: use only preinstalled version
     :return: dictionary with a model, inputs, dynamic shapes, and the configuration,
         some necessary rewriting as well
@@ -66,6 +71,10 @@ def get_untrained_model_with_inputs(
         print("-- dynamic shapes:", pprint.pformat(data['dynamic_shapes']))
         print("-- configuration:", pprint.pformat(data['configuration']))
     """
+    assert not use_preinstalled or not use_only_preinstalled, (
+        f"model_id={model_id!r}, pretinstalled model is only available "
+        f"if use_only_preinstalled is False."
+    )
     if verbose:
         print(f"[get_untrained_model_with_inputs] model_id={model_id!r}")
         if use_preinstalled:
@@ -74,6 +83,7 @@ def get_untrained_model_with_inputs(
         config = get_pretrained_config(
             model_id,
             use_preinstalled=use_preinstalled,
+            use_only_preinstalled=use_only_preinstalled,
             subfolder=subfolder,
             **(model_kwargs or {}),
         )
@@ -96,7 +106,7 @@ def get_untrained_model_with_inputs(
         print(f"[get_untrained_model_with_inputs] architectures={archs!r}")
         print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
     if task is None:
-        task = task_from_arch(archs[0])
+        task = task_from_arch(archs[0], model_id=model_id)
     if verbose:
         print(f"[get_untrained_model_with_inputs] task={task!r}")
@@ -111,7 +121,6 @@ def get_untrained_model_with_inputs(
         )
     # updating the configuration
     mkwargs = reduce_model_config(config, task) if not same_as_pretrained else {}
     if model_kwargs:
         for k, v in model_kwargs.items():
@@ -136,27 +145,62 @@ def get_untrained_model_with_inputs(
                 f"{config._attn_implementation!r}"  # type: ignore[union-attr]
             )
+    if use_pretrained:
+        model = transformers.AutoModel.from_pretrained(model_id, **mkwargs)
+    else:
+        if archs is not None:
+            try:
+                model = getattr(transformers, archs[0])(config)
+            except AttributeError as e:
+                # The code of the models is not in transformers but in the
+                # repository of the model. We need to download it.
+                pyfiles = download_code_modelid(model_id, verbose=verbose)
+                if pyfiles:
+                    if "." in archs[0]:
+                        cls_name = archs[0]
+                    else:
+                        modeling = [_ for _ in pyfiles if "/modeling_" in _]
+                        assert len(modeling) == 1, (
+                            f"Unable to guess the main file implemented class {archs[0]!r} "
+                            f"from {pyfiles}, found={modeling}."
+                        )
+                        last_name = os.path.splitext(os.path.split(modeling[0])[-1])[0]
+                        cls_name = f"{last_name}.{archs[0]}"
+                    if verbose:
+                        print(
+                            f"[get_untrained_model_with_inputs] custom code for {cls_name!r}"
+                        )
+                        print(
+                            f"[get_untrained_model_with_inputs] from folder "
+                            f"{os.path.split(pyfiles[0])[0]!r}"
+                        )
+                    cls = transformers.dynamic_module_utils.get_class_from_dynamic_module(
+                        cls_name, pretrained_model_name_or_path=os.path.split(pyfiles[0])[0]
+                    )
+                    model = cls(config)
+                else:
+                    raise AttributeError(
+                        f"Unable to find class 'tranformers.{archs[0]}'. "
+                        f"The code needs to be downloaded, config="
+                        f"\n{pprint.pformat(config)}."
+                    ) from e
+        else:
+            assert same_as_pretrained and use_pretrained, (
+                f"Model {model_id!r} cannot be built, the model cannot be built. "
+                f"It must be downloaded. Use same_as_pretrained=True "
+                f"and use_pretrained=True."
+            )
     # input kwargs
     kwargs, fct = random_input_kwargs(config, task)
     if verbose:
         print(f"[get_untrained_model_with_inputs] use fct={fct}")
         if os.environ.get("PRINT_CONFIG") in (1, "1"):
-            import pprint
             print(f"-- input kwargs for task {task!r}")
             pprint.pprint(kwargs)
     if inputs_kwargs:
         kwargs.update(inputs_kwargs)
-    if archs is not None:
-        model = getattr(transformers, archs[0])(config)
-    else:
-        assert same_as_pretrained, (
-            f"Model {model_id!r} cannot be built, the model cannot be built. "
-            f"It must be downloaded. Use same_as_pretrained=True."
-        )
-        model = None
     # This line is important. Some models may produce different
     # outputs even with the same inputs in training mode.
     model.eval()

onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py CHANGED Viewed

@@ -1,7 +1,5 @@
 from typing import Any, Dict
-import torch
 import transformers
-from ...helpers.cache_helper import make_dynamic_cache
 def get_tiny_llm(
@@ -9,6 +7,7 @@ def get_tiny_llm(
     sequence_length: int = 30,
     sequence_length2: int = 3,
     dynamic_rope: bool = False,
+    use_static_cache: bool = False,
     **kwargs,
 ) -> Dict[str, Any]:
     """
@@ -18,11 +17,14 @@ def get_tiny_llm(
     :param sequence_length: sequence length
     :param sequence_length2: new sequence length
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :param use_static_cache: use StaticCache instead of DynamicCache
     :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
     :return: dictionary
     See :ref:`l-plot-tiny-llm-export` or :ref:`l-plot-tiny-llm-export-patched` for examples.
     """
+    from ...tasks.text_generation import get_inputs
     config = {
         "architectures": ["LlamaForCausalLM"],
         "bos_token_id": 1,
@@ -48,56 +50,27 @@ def get_tiny_llm(
     config.update(**kwargs)
     conf = transformers.LlamaConfig(**config)
+    if use_static_cache:
+        conf.cache_implementation = "static"
     model = transformers.LlamaForCausalLM(conf)
     model.eval()
-    # now the inputs
-    cache_last_dim = 96
-    max_token_id = config["vocab_size"] - 1
-    n_layers = config["num_hidden_layers"]
-    num_key_value_heads = config["num_key_value_heads"]
-    batch = torch.export.Dim("batch", min=1, max=1024)
-    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
+    res = get_inputs(
+        model,
+        conf,
+        dummy_max_token_id=config["vocab_size"],  # type: ignore[arg-type]
+        num_hidden_layers=config["num_hidden_layers"],  # type: ignore[arg-type]
+        batch_size=batch_size,
+        sequence_length=sequence_length,
+        sequence_length2=sequence_length2,
+        dynamic_rope=dynamic_rope,
+        num_key_value_heads=config["num_key_value_heads"],  # type: ignore[arg-type]
+        cls_cache="StaticCache" if use_static_cache else "DynamicCache",
+    )
-    shapes = {
-        "input_ids": {0: batch, 1: seq_length},
-        "attention_mask": {
-            0: batch,
-            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
-        },
-        "position_ids": {
-            0: batch,
-            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
-        },
-        "past_key_values": [
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-        ],
-    }
-    inputs = dict(
-        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
-            torch.int64
-        ),
-        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-            torch.int64
-        ),
-        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
-        .to(torch.int64)
-        .expand((batch_size, -1)),
-        past_key_values=make_dynamic_cache(
-            [
-                (
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                )
-                for i in range(n_layers)
-            ]
-        ),
+    return dict(
+        inputs=res["inputs"],
+        model=model,
+        dynamic_shapes=res["dynamic_shapes"],
+        configuration=conf,
     )
-    return dict(inputs=inputs, model=model, dynamic_shapes=shapes, configuration=conf)

onnx-diagnostic 0.6.3__py3-none-any.whl → 0.7.1__py3-none-any.whl

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.1py3-none-any.whl