PyPI - onnx-diagnostic - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.8.0"
+__version__ = "0.8.1"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -265,7 +265,7 @@ def get_parser_config() -> ArgumentParser:
         "--mop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional model options, use to change some parameters of the model, "
+        help="Additional model options, used to change some parameters of the model, "
         "example:\n  --mop attn_implementation=sdpa or --mop attn_implementation=eager",
         action=_ParseDict,
     )
@@ -442,11 +442,17 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         default=True,
         action=_BoolOrParseDictPatch,
         nargs="*",
-        help="Applies patches before exporting, it can be a boolean "
-        "to enable to disable the patches or be more finetuned. It is possible to "
-        "disable patch for torch by adding "
-        '--patch "patch_sympy=False" --patch "patch_torch=False", '
-        "default is True.",
+        help=textwrap.dedent(
+            """
+        Applies patches before exporting, it can be a boolean
+        to enable to disable the patches or be more finetuned
+        (default is True). It is possible to disable patch for torch
+        by adding:
+            --patch "patch_sympy=False" --patch "patch_torch=False"
+        """.strip(
+                "\n"
+            )
+        ),
     )
     parser.add_argument(
         "--rewrite",
@@ -476,10 +482,16 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         "--inputs2",
         default=1,
         type=int,
-        help="Validates or exports the model on a second set of inputs\n"
-        "to check the exported model supports dynamism. The values is used "
-        "as an increment to the first set of inputs. A high value may trick "
-        "a different behavior in the model and missed by the exporter.",
+        help=textwrap.dedent(
+            """
+        Validates or exports the model on a second set of inputs
+        to check the exported model supports dynamism. The values is used
+        as an increment to the first set of inputs. A high value may trick
+        a different behavior in the model and missed by the exporter.
+        """.strip(
+                "\n"
+            )
+        ),
     )
     parser.add_argument(
         "--runtime",
@@ -512,9 +524,15 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         parser.add_argument(
             "--ortfusiontype",
             required=False,
-            help="Applies onnxruntime fusion, this parameter should contain the\n"
-            "model type or multiple values separated by `|`. `ALL` can be used\n"
-            "to run them all.",
+            help=textwrap.dedent(
+                """
+                Applies onnxruntime fusion, this parameter should contain the
+                model type or multiple values separated by `|`. `ALL` can be used
+                to run them all.
+                """.strip(
+                    "\n"
+                )
+            ),
         )
     parser.add_argument("-v", "--verbose", default=0, type=int, help="verbosity")
     parser.add_argument("--dtype", help="Changes dtype if necessary.")
@@ -523,18 +541,32 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         "--iop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional input options, use to change the default"
-        "inputs use to export, example:\n  --iop cls_cache=SlidingWindowCache"
-        "\n  --iop cls_cache=StaticCache",
+        help=textwrap.dedent(
+            """
+        Additional input options, used to change the default
+        inputs use to export. Examples:
+            --iop cls_cache=SlidingWindowCache
+            --iop cls_cache=StaticCache
+        """.strip(
+                "\n"
+            )
+        ),
         action=_ParseDict,
     )
     parser.add_argument(
         "--mop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional model options, use to change some parameters of the model, "
-        "example:\n  --mop attn_implementation=sdpa --mop attn_implementation=eager\n  "
-        "--mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\"",
+        help=textwrap.dedent(
+            """
+            Additional model options, used to change some parameters
+            of the model. Example:
+                --mop attn_implementation=sdpa --mop attn_implementation=eager"
+                --mop "rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}"
+            """.strip(
+                "\n"
+            )
+        ),
         action=_ParseDict,
     )
     if name == "validate":
@@ -566,9 +598,32 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         parser.add_argument(
             "--quiet-input-sets",
             default="",
-            help="Avoids raising an exception when an input sets does not work with "
-            "the exported model.\nExample: --quiet-input-sets=inputs,inputs22",
+            help=textwrap.dedent(
+                """
+                Avoids raising an exception when an input sets does not work with
+                the exported model. Example:
+                    --quiet-input-sets=inputs,inputs22
+                """.strip(
+                    "\n"
+                )
+            ),
         )
+    parser.add_argument(
+        "--expop",
+        metavar="KEY=VALUE",
+        nargs="*",
+        help=textwrap.dedent(
+            """
+            Additional exporter options, use to change some parameters
+            of the model. Examples:
+                --expop report=True
+                --expop report=True --expop verify=True
+            """.strip(
+                "\n"
+            )
+        ),
+        action=_ParseDict,
+    )
     return parser
@@ -634,6 +689,7 @@ def _cmd_validate(argv: List[Any]):
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
+            exporter_options=args.expop,
         )
         print("")
         print("-- summary --")
@@ -940,7 +996,7 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
         "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
         "n_node_gqa,n_node_layer_normalization,n_node_layer_normalization23,"
-        "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
+        "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,n_node_random,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"
         "time_export_unbiased,onnx_n_nodes_no_cst,n_node_initializer_small",

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -1016,6 +1016,8 @@ def max_diff(
     You may use :func:`string_diff` to display the discrepancies in one string.
     """
+    if verbose >= 10:
+        print(f"[max_diff] {type(expected)} ? {type(got)}")
     if expected is None and got is None:
         return dict(abs=0, rel=0, sum=0, n=0, dnan=0)
@@ -1061,8 +1063,8 @@ def max_diff(
     if expected.__class__.__name__ == "CausalLMOutputWithPast":
         if verbose >= 6:
             print(
-                f"[max_diff] CausalLMOutputWithPast: {string_type(expected)} "
-                f"? {string_type(got)}"
+                f"[max_diff] CausalLMOutputWithPast: {string_type(expected, with_shape=True)} "
+                f"? {string_type(got, with_shape=True)}"
             )
         if got.__class__.__name__ == "CausalLMOutputWithPast":
             return max_diff(

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1169,7 +1169,8 @@ class CubeLogs:
             assuming they should remain stale
         :param sbs: configurations to compare side-by-side, this adds two tabs,
             one gathering raw data about the two configurations, the other one
-            is aggregated by metrics
+            is aggregated by metrics, example:
+            ``=dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
         """
         if verbose:
             print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
@@ -1611,6 +1612,7 @@ class CubeLogsPerformance(CubeLogs):
             "n_node_initializer_small",
             "n_node_layer_normalization",
             "n_node_layer_normalization23",
+            "n_node_random",
             "n_node_reshape",
             "n_node_rotary_embedding",
             "n_node_rotary_embedding23",
@@ -1802,6 +1804,16 @@ class CubeLogsPerformance(CubeLogs):
                     + gdf(df, "op_onnx__InstanceNormlization", 0)
                     + gdf(df, "op_onnx__GroupNormalization", 0),
                 ),
+                n_node_random=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__RandomNormal", 0)
+                    + gdf(df, "op_onnx__RandomNormalLike", 0)
+                    + gdf(df, "op_onnx__RandomUniform", 0)
+                    + gdf(df, "op_onnx__RandomUniformLike", 0)
+                    + gdf(df, "op_onnx__Multinomial", 0)
+                    + gdf(df, "op_onnx__Bernoulli", 0),
+                ),
                 n_node_attention=lambda df: gpreserve(
                     df,
                     "time_latency_eager",

onnx_diagnostic/helpers/memory_peak.py CHANGED Viewed

@@ -47,6 +47,8 @@ class Monitor:
     @property
     def delta_avg(self):
+        if self.n_measures == 0:
+            return 0
         return self.average / self.n_measures - self.begin
     def __repr__(self):

onnx_diagnostic/helpers/mini_onnx_builder.py CHANGED Viewed

@@ -52,7 +52,7 @@ def proto_from_array(
     tensor = TensorProto()
     tensor.dims.extend(arr_cpu.shape)
-    tensor.name = name
+    tensor.name = name or ""
     itype = dtype_to_tensor_dtype(arr_cpu.dtype)
     assert not hasattr(TensorProto, "INT4") or itype not in {
         TensorProto.INT4,

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -331,7 +331,7 @@ def onnx_dtype_name(itype: int, exc: bool = True) -> str:
         print(onnx_dtype_name(7))
     """
     for k in dir(TensorProto):
-        if "FLOAT" in k or "INT" in k or "TEXT" in k or "BOOL" in k:
+        if k.upper() == k and k != "EXTERNAL":
             v = getattr(TensorProto, k)
             if v == itype:
                 return k

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -10,13 +10,9 @@ from .ort_session import InferenceSessionForTorch
 def name_type_to_onnx_dtype(name: str) -> int:
-    if name == "tensor(int64)":
-        return onnx.TensorProto.INT64
-    if name == "tensor(float)":
-        return onnx.TensorProto.FLOAT
-    if name == "tensor(float16)":
-        return onnx.TensorProto.FLOAT16
-    raise AssertionError(f"Unexpected value {name!r}")
+    assert name.startswith("tensor(") and name.endswith(")"), f"Invalid value name={name!r}"
+    look = name[7:-1]
+    return getattr(onnx.TensorProto, look.upper())
 def make_feeds(
@@ -153,7 +149,7 @@ def make_empty_cache(
 def generate_and_validate(
     model,
     input_ids: torch.Tensor,
-    eos_token_id: int,
+    eos_token_id: int = 2,
     max_new_tokens: int = 100,
     session: Optional[Union[InferenceSessionForTorch, onnx.ModelProto, str]] = None,
     atol: float = 0.1,
@@ -262,10 +258,10 @@ def generate_and_validate(
 def onnx_generate(
     model_or_path: Union[onnx.ModelProto, str, InferenceSessionForTorch],
     input_ids: torch.Tensor,
-    eos_token_id: int,
+    eos_token_id: int = 2,
     max_new_tokens=100,
     return_session: bool = False,
-) -> Union[torch.Tensor, Tuple[torch.Tensor, InferenceSessionForTorch]]:
+) -> Union[torch.Tensor, Tuple[torch.Tensor, InferenceSessionForTorch, Dict[str, Any]]]:
     """
     Implements a simple method ``generate`` for an ONNX model.
     The function does not expect any ``position_ids`` as input.
@@ -277,7 +273,7 @@ def onnx_generate(
     :param return_session: returns the instance of class
         :class:`InferenceSessionForTorch
         <onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch>`
-        created if necessary
+        created if necessary, the function returns the feeds for the next iteration
     :return: input tokens concatenated with new tokens
     .. runpython::
@@ -353,12 +349,19 @@ def onnx_generate(
     input_shapes = session.input_shapes
     input_names = session.input_names
     input_types = session.input_types
+    has_position_ids = "position_ids" in session.input_names
     assert (
         len(input_names) > 2
         and input_names[:2] == ["input_ids", "attention_mask"]
-        and input_names[2].startswith("past_key_values")
-    ), f"Only text generation is supported but input_names == {input_names}"
+        and input_names[3 if has_position_ids else 2].startswith("past_key_values")
+    ), (
+        f"Only text generation is supported but input_names == {input_names}, "
+        f"has_position_ids={has_position_ids}"
+    )
+    assert (
+        not has_position_ids or input_names[2] == "position_ids"
+    ), f"position_ids must the third input but input_names={input_names}"
     # First call: prefill
     feeds = dict(
@@ -370,6 +373,10 @@ def onnx_generate(
             input_ids.shape[0], input_names[2:], input_shapes[2:], input_types[2:]
         ),
     )
+    if has_position_ids:
+        feeds["position_ids"] = torch.unsqueeze(
+            torch.arange(input_ids.shape[1], dtype=torch.int64, device=input_ids.device), 0
+        )
     outputs = session.run(None, feeds)
@@ -393,11 +400,21 @@ def onnx_generate(
                 input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
             ),
         )
-        feeds.update(dict(zip(input_names[2:], outputs[1:])))
+        if has_position_ids:
+            feeds["position_ids"] = torch.unsqueeze(
+                torch.arange(
+                    input_ids.shape[1],
+                    input_ids.shape[1] + 1,
+                    dtype=torch.int64,
+                    device=input_ids.device,
+                ),
+                0,
+            )
+        feeds.update(dict(zip(input_names[3 if has_position_ids else 2 :], outputs[1:])))
         outputs = session.run(None, feeds)
     if return_session:
-        return input_ids, session
+        return input_ids, session, feeds
     return input_ids

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -151,6 +151,7 @@ def get_inputs(
         assert (
             add_second_input > 0
         ), f"Not implemented for add_second_input={add_second_input}."
+        res["inputs_prompt"] = dict(input_ids=torch.randint(1000, 30000, (1, 11)))
         res["inputs2"] = get_inputs(
             model=model,
             config=config,

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -56,6 +56,74 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
     return kwargs
+def _get_input_falcon_mamba(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    dummy_max_token_id: int,
+    num_hidden_layers: int,
+    batch_size: int = 2,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    dynamic_rope: bool = False,
+    num_key_value_heads: Optional[int] = None,
+    head_dim: Optional[int] = None,
+    cls_cache: Optional[Union[type, str]] = None,
+    **kwargs,  # unused
+):
+    try:
+        from transformers.models.mamba.modeling_mamba import MambaCache
+    except ImportError:
+        from transformers.cache_utils import MambaCache
+    assert cls_cache in (
+        "MambaCache",
+        MambaCache,
+    ), f"Unexpected value for cls_cache={cls_cache} and config={config}"
+    batch = "batch"
+    seq_length_multiple = 8
+    sequence_length = (
+        (sequence_length + seq_length_multiple) // seq_length_multiple * seq_length_multiple
+    )
+    # sequence_inc = seq_length_multiple
+    sequence_length2 = seq_length_multiple
+    shapes = {
+        "input_ids": {0: batch, 1: "sequence_length"},
+        "attention_mask": {
+            0: batch,
+            1: "cache+seq",  # cache_length + seq_length
+        },
+        "cache_position": {
+            0: batch,
+            1: "cache+seq",  # cache_length + seq_length
+        },
+        "cache_params": [{0: batch} for _ in range(num_hidden_layers * 2)],
+    }
+    inputs = dict(
+        input_ids=torch.randint(
+            0, dummy_max_token_id, (batch_size, sequence_length + sequence_length2)
+        ).to(torch.int64),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        cache_position=torch.arange(0, kwargs["conv_kernel"]).to(torch.int64),
+        # .expand((batch_size, -1))
+        cache_params=make_mamba_cache(
+            [
+                (
+                    torch.randn(
+                        batch_size, kwargs["intermediate_size"], kwargs["conv_kernel"]
+                    ),
+                    torch.randn(batch_size, kwargs["intermediate_size"], kwargs["state_size"]),
+                )
+                for i in range(num_hidden_layers)
+            ]
+        ),
+    )
+    return dict(inputs=inputs, dynamic_shapes=shapes)
 def get_inputs(
     model: torch.nn.Module,
     config: Optional[Any],
@@ -68,7 +136,7 @@ def get_inputs(
     num_key_value_heads: Optional[int] = None,
     head_dim: Optional[int] = None,
     cls_cache: Optional[Union[type, str]] = None,
-    add_second_input: int = 1,
+    add_second_input: Optional[int] = None,
     **kwargs,  # unused
 ):
     """
@@ -84,6 +152,7 @@ def get_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :param cls_cache: cache class, by default it is
         :class:`transformers.cache_utils.DynamicCache`
+    :param add_second_input: adds other kinds of inputs
     :return: dictionary
     """
     batch = "batch"
@@ -91,60 +160,20 @@ def get_inputs(
     cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
     if config is not None and config.__class__.__name__ == "FalconMambaConfig":
-        try:
-            from transformers.models.mamba.modeling_mamba import MambaCache
-        except ImportError:
-            from transformers.cache_utils import MambaCache
-        assert cls_cache in (
-            "MambaCache",
-            MambaCache,
-        ), f"Unexpected value for cls_cache={cls_cache} and config={config}"
-        seq_length_multiple = 8
-        sequence_length = (
-            (sequence_length + seq_length_multiple)
-            // seq_length_multiple
-            * seq_length_multiple
-        )
-        # sequence_inc = seq_length_multiple
-        sequence_length2 = seq_length_multiple
-        shapes = {
-            "input_ids": {0: batch, 1: "sequence_length"},
-            "attention_mask": {
-                0: batch,
-                1: "cache+seq",  # cache_length + seq_length
-            },
-            "cache_position": {
-                0: batch,
-                1: "cache+seq",  # cache_length + seq_length
-            },
-            "cache_params": [{0: batch} for _ in range(num_hidden_layers * 2)],
-        }
-        inputs = dict(
-            input_ids=torch.randint(
-                0, dummy_max_token_id, (batch_size, sequence_length + sequence_length2)
-            ).to(torch.int64),
-            attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-                torch.int64
-            ),
-            cache_position=torch.arange(0, kwargs["conv_kernel"]).to(torch.int64),
-            # .expand((batch_size, -1))
-            cache_params=make_mamba_cache(
-                [
-                    (
-                        torch.randn(
-                            batch_size, kwargs["intermediate_size"], kwargs["conv_kernel"]
-                        ),
-                        torch.randn(
-                            batch_size, kwargs["intermediate_size"], kwargs["state_size"]
-                        ),
-                    )
-                    for i in range(num_hidden_layers)
-                ]
-            ),
+        res = _get_input_falcon_mamba(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_hidden_layers=num_hidden_layers,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            sequence_length2=sequence_length2,
+            dynamic_rope=dynamic_rope,
+            num_key_value_heads=num_key_value_heads,
+            head_dim=head_dim,
+            cls_cache=cls_cache,
+            **kwargs,  # unused
         )
-        res = dict(inputs=inputs, dynamic_shapes=shapes)
     else:
         if head_dim is None:
             assert config, "head_dim is None, the value cannot be set without a configuration"
@@ -244,6 +273,7 @@ def get_inputs(
             )
         res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
+        res["inputs_prompt"] = dict(input_ids=torch.randint(1000, 30000, (1, 11)))
         res["inputs2"] = get_inputs(
             model=model,
             config=config,

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -195,9 +195,12 @@ class patched_ShapeEnv:
         if self.frozen:
             self.counter["ignored_backward_guard"] += 1
             # PATCHED: raised an exception instead of logging.
+            import transformers
             raise AssertionError(
                 f"[patched_ShapeEnv] Ignored guard {expr} == {concrete_val}, "
-                f"this could result in accuracy problems"
+                f"this could result in accuracy problems, transformers.__version__="
+                f"{transformers.__version__!r}"
             )
     def _set_replacement(

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -1452,7 +1452,7 @@ def patched_sdpa_attention_forward(
             scale=scaling,
             is_causal=True,
             **sdpa_kwargs,
-        ),
+        ).contiguous(),
         lambda query, key, value: torch.nn.functional.scaled_dot_product_attention(
             query,
             key,
@@ -1461,7 +1461,7 @@ def patched_sdpa_attention_forward(
             scale=scaling,
             is_causal=False,
             **sdpa_kwargs,
-        ),
+        ).contiguous(),
         [query, key, value],
     )
     attn_output = attn_output.transpose(1, 2).contiguous()

onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl