PyPI - onnx-diagnostic - Versions diffs - 0.7.10__py3-none-any.whl → 0.7.12__py3-none-any.whl - Mend

onnx-diagnostic 0.7.10py3-none-any.whl → 0.7.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.10"
+__version__ = "0.7.12"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -474,7 +474,7 @@ def get_parser_validate() -> ArgumentParser:
     )
     parser.add_argument(
         "--runtime",
-        choices=["onnxruntime", "torch", "ref"],
+        choices=["onnxruntime", "torch", "ref", "orteval", "orteval10"],
         default="onnxruntime",
         help="onnx runtime to use, `onnxruntime` by default",
     )
@@ -542,6 +542,12 @@ def get_parser_validate() -> ArgumentParser:
         "the onnx exporter should use.",
         default="",
     )
+    parser.add_argument(
+        "--ort-logs",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Enables onnxruntime logging when the session is created",
+    )
     return parser
@@ -575,6 +581,7 @@ def _cmd_validate(argv: List[Any]):
         ):
             print(f"validate - unsupported args: export={args.export!r}, opt={args.opt!r}")
             return
+        patch_dict = args.patch if isinstance(args.patch, dict) else {"patch": args.patch}
         summary, _data = validate_model(
             model_id=args.mid,
             task=args.task,
@@ -585,8 +592,8 @@ def _cmd_validate(argv: List[Any]):
             use_pretrained=args.trained,
             dtype=args.dtype,
             device=args.device,
-            patch=args.patch,
-            rewrite=args.rewrite,
+            patch=patch_dict,
+            rewrite=args.rewrite and patch_dict.get("patch", True),
             stop_if_static=args.stop_if_static,
             optimization=args.opt,
             exporter=args.export,
@@ -601,6 +608,7 @@ def _cmd_validate(argv: List[Any]):
             repeat=args.repeat,
             warmup=args.warmup,
             inputs2=args.inputs2,
+            ort_logs=args.ort_logs,
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
@@ -820,6 +828,8 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic,"
         "n_model_pass,n_model_faster,"
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
+        "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
+        "n_node_layer_normalization,n_node_layer_normalization23,"
         "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -4,11 +4,6 @@ import torch
 import transformers
 import transformers.cache_utils
-try:
-    from transformers.models.mamba.modeling_mamba import MambaCache
-except ImportError:
-    from transformers.cache_utils import MambaCache
 class CacheKeyValue:
     """
@@ -354,8 +349,15 @@ def make_encoder_decoder_cache(
     )
-def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -> MambaCache:
+def make_mamba_cache(
+    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+) -> "MambaCache":  # noqa: F821
     "Creates a ``MambaCache``."
+    # import is moved here because this part is slow.
+    try:
+        from transformers.models.mamba.modeling_mamba import MambaCache
+    except ImportError:
+        from transformers.cache_utils import MambaCache
     dtype = key_value_pairs[0][0].dtype
     class _config:

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -285,7 +285,8 @@ class CubePlot:
         nn = df.shape[1] // n_cols
         nn += int(df.shape[1] % n_cols != 0)
         ratio = float(os.environ.get("FIGSIZEH", "1"))
-        fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 3 * ratio))
+        figsize = (6 * n_cols, nn * (2.5 + df.shape[0] / 15) * ratio)
+        fig, axs = plt.subplots(nn, n_cols, figsize=figsize)
         pos = 0
         imgs = []
         for c in self._make_loop(df.columns, verbose):
@@ -332,10 +333,12 @@ class CubePlot:
         n_cols = len(groups)
         title_suffix = f"\n{title_suffix}" if title_suffix else ""
+        ratio = float(os.environ.get("FIGSIZEH", "1"))
+        figsize = (5 * n_cols, max(len(g) for g in groups) * (2 + df.shape[1] / 2) * ratio)
         fig, axs = plt.subplots(
             df.shape[1],
             n_cols,
-            figsize=(5 * n_cols, max(len(g) for g in groups) * df.shape[1] / 2),
+            figsize=figsize,
             sharex=True,
             sharey="row" if n_cols > 1 else False,
         )
@@ -877,7 +880,11 @@ class CubeLogs:
             print(f"[CubeLogs.view] key_columns={key_columns}")
         g = data[[*key_index, *key_columns]].copy()
         g["count"] = 1
-        r = g.groupby([*key_index, *key_columns], dropna=False).sum()
+        r = (
+            g.copy()
+            if not key_index and not key_columns
+            else g.groupby([*key_index, *key_columns], dropna=False).sum()
+        )
         not_unique = r[r["count"] > 1]
         assert not_unique.shape[0] == 0, (
             f"view_def.name={view_def.name!r}, "
@@ -1505,6 +1512,11 @@ class CubeLogsPerformance(CubeLogs):
             "n_model_faster3x",
             "n_model_faster4x",
             "n_node_attention",
+            "n_node_attention23",
+            "n_node_rotary_embedding",
+            "n_node_rotary_embedding23",
+            "n_node_layer_normalization",
+            "n_node_layer_normalization23",
             "n_node_control_flow",
             "n_node_scatter",
             "n_node_function",
@@ -1568,7 +1580,9 @@ class CubeLogsPerformance(CubeLogs):
         def gdf(df, cname, default_value=np.nan):
             if cname in df.columns:
-                return df[cname]
+                if np.isnan(default_value):
+                    return df[cname]
+                return df[cname].fillna(default_value)
             return pandas.Series(default_value, index=df.index)
         def ghas_value(df, cname):
@@ -1676,15 +1690,54 @@ class CubeLogsPerformance(CubeLogs):
                     "time_latency",
                     gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
                 ),
+                n_node_attention23=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__Attention")
+                ),
+                n_node_rotary_embedding23=lambda df: gpreserve(
+                    df, "time_latency_eager", gdf(df, "op_onnx__RotaryEmbedding")
+                ),
+                n_node_layer_normalization23=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__LayerNormalization", 0)
+                    + gdf(df, "op_onnx__RMSNormalization", 0)
+                    + gdf(df, "op_onnx__BatchNormlization", 0)
+                    + gdf(df, "op_onnx__InstanceNormlization", 0)
+                    + gdf(df, "op_onnx__GroupNormalization", 0),
+                ),
                 n_node_attention=lambda df: gpreserve(
                     df,
-                    "op_onnx_com.microsoft_Attention",
-                    gdf(df, "op_onnx_com.microsoft_Attention")
-                    + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention"),
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_Attention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PackedAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PackedMultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_PagedAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_LongformerAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderMaskedSelfAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
+                ),
+                n_node_layer_normalization=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_EmbedLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SkipLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_LayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SkipSimplifiedLayerNormalization", 0)
+                    + gdf(df, "op_onnx_com.microsoft_SimplifiedLayerNormalization", 0),
+                ),
+                n_node_rotary_embedding=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx_com.microsoft_GemmaRotaryEmbedding", 0)
+                    + gdf(df, "op_onnx_com.microsoft_RotaryEmbedding", 0),
                 ),
                 n_node_control_flow=lambda df: gpreserve(
                     df,
-                    "op_onnx__If",
+                    "time_latency_eager",
                     (
                         gdf(df, "op_onnx__If", 0)
                         + gdf(df, "op_onnx__Scan", 0)
@@ -1693,7 +1746,7 @@ class CubeLogsPerformance(CubeLogs):
                 ),
                 n_node_scatter=lambda df: gpreserve(
                     df,
-                    "op_onnx__ScatterND",
+                    "time_latency_eager",
                     gdf(df, "op_onnx__ScatterND", 0) + gdf(df, "op_onnx__ScatterElements", 0),
                 ),
                 n_node_function=lambda df: gpreserve(
@@ -1706,13 +1759,13 @@ class CubeLogsPerformance(CubeLogs):
                     df, "onnx_n_initializer", gdf(df, "onnx_n_initializer")
                 ),
                 n_node_constant=lambda df: gpreserve(
-                    df, "op_onnx__Constant", gdf(df, "op_onnx__Constant")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Constant")
                 ),
                 n_node_shape=lambda df: gpreserve(
-                    df, "op_onnx__Shape", gdf(df, "op_onnx__Shape")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Shape")
                 ),
                 n_node_expand=lambda df: gpreserve(
-                    df, "op_onnx__Expand", gdf(df, "op_onnx__Expand")
+                    df, "time_latency_eager", gdf(df, "op_onnx__Expand")
                 ),
             )
             assert (

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .onnx_helper import dtype_to_tensor_dtype
 from .cache_helper import is_cache_dynamic_registered
@@ -23,6 +22,7 @@ def make_feeds(
     use_numpy: bool = False,
     copy: bool = False,
     check_flatten: bool = True,
+    is_modelbuilder: bool = False,
 ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
     """
     Serializes the inputs to produce feeds expected
@@ -35,10 +35,15 @@ def make_feeds(
         by ``OrtValue``
     :param check_flatten: if True, checks the ``torch.utils._pytree.tree_flatten``
         returns the same number of outputs
+    :param is_modelbuilder: if True, the exporter is ModelBuilder, and we need to reorder
+        the past_key_values inputs to match the expected order, and get rid of position_ids.
     :return: feeds dictionary
     """
-    # position_ids is a special case because ModelBuilder does not usually use it.
-    # We use types to detect the best inputs.
+    # NOTE: position_ids is a special case because ModelBuilder does not usually use it,
+    # because it's fued into rotary embedding in GQA.
+    if is_modelbuilder and isinstance(inputs, dict):
+        inputs.pop("position_ids", None)  # Ensure 'position_ids' absent before removing.
     flat = flatten_object(inputs, drop_keys=True)
     assert (
         not check_flatten
@@ -76,39 +81,6 @@ def make_feeds(
         f"\n-- inputs={string_type(inputs, with_shape=True)}"
         f"\n-- names={names}"
     )
-    if len(names) < len(flat) and (
-        isinstance(proto, onnx.ModelProto) or hasattr(proto, "get_inputs")
-    ):
-        typed_names = (
-            [(i.name, i.type.tensor_type.elem_type) for i in proto.graph.input]
-            if isinstance(proto, onnx.ModelProto)
-            else [(i.name, name_type_to_onnx_dtype(i.type)) for i in proto.get_inputs()]
-        )
-        new_flat = []
-        pos = 0
-        for _name, dtype in typed_names:
-            assert isinstance(
-                dtype, int
-            ), f"Unexpected value for dtype={dtype!r}, type(proto)={type(proto)}"
-            itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            while dtype != itype:
-                pos += 1
-                if pos >= len(flat):
-                    break
-                itype = dtype_to_tensor_dtype(flat[pos].dtype)
-            if pos >= len(flat):
-                break
-            new_flat.append(flat[pos])
-            pos += 1
-        assert len(new_flat) == len(names), (
-            f"Unable to align expected input {names} with the given input, "
-            f"type(proto)={type(proto)}"
-            f"\n-- inputs: {string_type(inputs, with_shape=True)}"
-            f"\n-- typed_names: {typed_names}"
-        )
-        flat = new_flat
     if copy:
         flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
@@ -122,4 +94,49 @@ def make_feeds(
         elif isinstance(i, float):
             i = np.array(i, dtype=np.float32)
         new_flat.append(i)
+    # NOTE: model builder has a different order for past_key_values
+    #       we need to reorder them to match the expected order
+    if is_modelbuilder:
+        # We assume that if "past_key_values" is in the names when it's
+        # modelbuilder
+        non_past_kv_input_names = [n for n in names if "past_key_values" not in n]
+        past_kv_names = [n for n in names if "past_key_values" in n]
+        reorder_past_kv_names = reorder_modelbuilder_cache_to_torch(past_kv_names)
+        names = non_past_kv_input_names + reorder_past_kv_names
     return dict(zip(names, new_flat))
+def reorder_modelbuilder_cache_to_torch(past_kv: List[Any]) -> List[Any]:
+    """
+    Reorders the past_kvs for ModelBuilder to match the expected order
+    by PyTorch exported models.
+    .. note::
+        This function can take either the names or the actual tensors
+        as long as they are in a list.
+    Conceptually,
+    From::
+        [past_key_values.0.key, past_key_values.0.value,
+        past_key_values.1.key, past_key_values.1.value, ...]
+    To::
+        [past_key_values.0.key, past_key_values.1.key,
+        ..., past_key_values.0.value, past_key_values.1.value, ...]
+    :param past_kv: list of flattened inputs
+    :return: reordered list of flattened inputs
+    """
+    total_len = len(past_kv)
+    if total_len % 2 != 0:
+        raise ValueError("The length of past_key_values should be even.")
+    keys = []
+    values = []
+    for i in range(0, total_len, 2):
+        keys.append(past_kv[i])
+        values.append(past_kv[i + 1])
+    return keys + values

onnx_diagnostic/tasks/__init__.py CHANGED Viewed

@@ -5,6 +5,8 @@ from . import (
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -14,7 +16,6 @@ from . import (
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 )
 __TASKS__ = [
@@ -23,6 +24,8 @@ __TASKS__ = [
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -32,7 +35,6 @@ __TASKS__ = [
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 ]

onnx_diagnostic/tasks/image_to_video.py ADDED Viewed

@@ -0,0 +1,127 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
+__TASK__ = "image-to-video"
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    if not hasattr(config, "num_hidden_layers") and not hasattr(config, "num_layers"):
+        # We cannot reduce.
+        return {}
+    check_hasattr(config, ("num_hidden_layers", "num_layers"))
+    kwargs = {}
+    if hasattr(config, "num_layers"):
+        kwargs["num_layers"] = min(config.num_layers, nhl())
+    if hasattr(config, "num_hidden_layers"):
+        kwargs["num_hidden_layers"] = min(config.num_hidden_layers, nhl())
+    update_config(config, kwargs)
+    return kwargs
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    text_embed_dim: int,
+    latent_channels: int,
+    batch_size: int = 2,
+    image_height: int = 704,
+    image_width: int = 1280,
+    latent_frames: int = 1,
+    text_maxlen: int = 512,
+    add_second_input: int = 1,
+    **kwargs,  # unused
+):
+    """
+    Generates inputs for task ``image-to-video``.
+    """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
+    latent_height = image_height // 8
+    latent_width = image_width // 8
+    dtype = torch.float32
+    inputs = dict(
+        hidden_states=torch.randn(
+            batch_size,
+            latent_channels,
+            latent_frames,
+            latent_height,
+            latent_width,
+            dtype=dtype,
+        ),
+        timestep=torch.tensor([1.0] * batch_size, dtype=dtype),
+        encoder_hidden_states=torch.randn(
+            batch_size, text_maxlen, text_embed_dim, dtype=dtype
+        ),
+        padding_mask=torch.ones(1, 1, image_height, image_width, dtype=dtype),
+        fps=torch.tensor([16] * batch_size, dtype=dtype),
+        condition_mask=torch.randn(
+            batch_size, 1, latent_frames, latent_height, latent_width, dtype=dtype
+        ),
+    )
+    shapes = dict(
+        hidden_states={
+            0: "batch_size",
+            2: "latent_frames",
+            3: "latent_height",
+            4: "latent_width",
+        },
+        timestep={0: "batch_size"},
+        encoder_hidden_states={0: "batch_size"},
+        padding_mask={0: "batch_size", 2: "height", 3: "width"},
+        fps={0: "batch_size"},
+        condition_mask={
+            0: "batch_size",
+            2: "latent_frames",
+            3: "latent_height",
+            4: "latent_width",
+        },
+    )
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            text_embed_dim=text_embed_dim,
+            latent_channels=latent_channels,
+            batch_size=batch_size,
+            image_height=image_height,
+            image_width=image_width,
+            latent_frames=latent_frames,
+            text_maxlen=text_maxlen,
+            add_second_input=0,
+            **kwargs,
+        )["inputs"]
+    return res
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(config, "in_channels", "text_embed_dim"),
+    kwargs = dict(
+        text_embed_dim=1024 if config is None else config.text_embed_dim,
+        latent_channels=16 if config is None else config.in_channels - 1,
+        batch_size=1,
+        image_height=8 * 50,
+        image_width=8 * 80,
+        latent_frames=1,
+        text_maxlen=512,
+    )
+    return kwargs, get_inputs

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -254,6 +254,17 @@ def torch_export_patches(
     may appear ``AssertionError: Mutating module attribute _seen_tokens during export.``.
     It can be avoided by setting ``strict=False`` when call :func:`torch.export.export`.
     """
+    if verbose:
+        print(f"[torch_export_patches] patch_sympy={patch_sympy!r}")
+        print(f"                     . patch_torch={patch_torch!r}")
+        print(f"                     . patch_transformers={patch_transformers!r}")
+        print(f"                     . patch_diffusers={patch_diffusers!r}")
+        print(f"                     . catch_constraints={catch_constraints!r}")
+        print(f"                     . stop_if_static={stop_if_static!r}")
+        print(f"                     . patch={patch!r}")
+        print(f"                     . custom_patches={custom_patches!r}")
+        print(f"[torch_export_patches] dump_rewriting={dump_rewriting!r}")
     if rewrite:
         from .patch_module import torch_export_rewrite

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -35,6 +35,9 @@ except ImportError:
 from ...ext_test_case import has_transformers
 from ...helpers.torch_helper import is_torchdynamo_exporting
+patch_is_initialized = pv.Version(transformers.__version__) > pv.Version("4.56.99")
 if patch_masking_utils:
     # Introduced in 4.52
     from transformers.masking_utils import (
@@ -213,6 +216,8 @@ if patch_DynamicLayer:
             new_shape[-2] = 0
             self.keys = torch.empty(new_shape, dtype=self.dtype, device=self.device)
             self.values = torch.empty(new_shape, dtype=self.dtype, device=self.device)
+            if patch_is_initialized:
+                self.is_initialized = True
 def _patch_make_causal_mask(

onnx-diagnostic 0.7.10__py3-none-any.whl → 0.7.12__py3-none-any.whl

onnx-diagnostic 0.7.10py3-none-any.whl → 0.7.12py3-none-any.whl