PyPI - onnx-diagnostic - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +78 -22
onnx_diagnostic/export/api.py +35 -5
onnx_diagnostic/export/control_flow.py +511 -0
onnx_diagnostic/export/control_flow_research.py +135 -0
onnx_diagnostic/ext_test_case.py +33 -9
onnx_diagnostic/helpers/cache_helper.py +217 -203
onnx_diagnostic/helpers/helper.py +6 -2
onnx_diagnostic/helpers/log_helper.py +39 -5
onnx_diagnostic/helpers/memory_peak.py +2 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +55 -3
onnx_diagnostic/helpers/onnx_helper.py +13 -16
onnx_diagnostic/helpers/rt_helper.py +579 -15
onnx_diagnostic/helpers/torch_helper.py +5 -0
onnx_diagnostic/tasks/image_text_to_text.py +5 -1
onnx_diagnostic/tasks/text2text_generation.py +1 -0
onnx_diagnostic/tasks/text_generation.py +84 -54
onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +563 -61
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
onnx_diagnostic/torch_models/validate.py +620 -213
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +30 -28
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -901,13 +901,19 @@ class CubeLogs:
             else g.groupby([*key_index, *key_columns], dropna=False).sum()
         )
         not_unique = r[r["count"] > 1]
+        if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
+            filename = os.environ.get("DUPLICATE")
+            subset = data.set_index([*key_index, *key_columns]).merge(
+                not_unique.head(), left_index=True, right_index=True
+            )
+            subset.to_excel(filename)
         assert not_unique.shape[0] == 0, (
             f"view_def.name={view_def.name!r}, "
             f"unable to run the pivot with index={sorted(key_index)}, "
             f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
             f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
-            f"not unique={set(data.columns) - unique}"
-            f"\n--\n{not_unique.head(10)}"
+            f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
+            f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
         )
         # pivot
@@ -1000,8 +1006,12 @@ class CubeLogs:
         keys = set(self.keys_time) - {columns_to_fix}
         select = data[self.keys_time]
         select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
         assert select_agg[columns_to_fix].max() <= 1, (
-            f"Column {columns_to_fix!r} has two distinct values at least for one date\n"
+            f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+            f"max={select_agg[columns_to_fix].max()}\n"
             f"{select_agg[select_agg[columns_to_fix] > 1]}"
         )
@@ -1038,6 +1048,16 @@ class CubeLogs:
             f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
             f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
         )
+        select = res[self.keys_time]
+        select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
+        # assert select_agg[columns_to_fix].max() <= 1, (
+        #    f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+        #    f"max={select_agg[columns_to_fix].max()}\n"
+        #    f"{select_agg[select_agg[columns_to_fix] > 1]}"
+        # )
         return res
     def _dropna(
@@ -1169,7 +1189,8 @@ class CubeLogs:
             assuming they should remain stale
         :param sbs: configurations to compare side-by-side, this adds two tabs,
             one gathering raw data about the two configurations, the other one
-            is aggregated by metrics
+            is aggregated by metrics, example:
+            ``=dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
         """
         if verbose:
             print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
@@ -1611,6 +1632,7 @@ class CubeLogsPerformance(CubeLogs):
             "n_node_initializer_small",
             "n_node_layer_normalization",
             "n_node_layer_normalization23",
+            "n_node_random",
             "n_node_reshape",
             "n_node_rotary_embedding",
             "n_node_rotary_embedding23",
@@ -1802,6 +1824,16 @@ class CubeLogsPerformance(CubeLogs):
                     + gdf(df, "op_onnx__InstanceNormlization", 0)
                     + gdf(df, "op_onnx__GroupNormalization", 0),
                 ),
+                n_node_random=lambda df: gpreserve(
+                    df,
+                    "time_latency_eager",
+                    gdf(df, "op_onnx__RandomNormal", 0)
+                    + gdf(df, "op_onnx__RandomNormalLike", 0)
+                    + gdf(df, "op_onnx__RandomUniform", 0)
+                    + gdf(df, "op_onnx__RandomUniformLike", 0)
+                    + gdf(df, "op_onnx__Multinomial", 0)
+                    + gdf(df, "op_onnx__Bernoulli", 0),
+                ),
                 n_node_attention=lambda df: gpreserve(
                     df,
                     "time_latency_eager",
@@ -1965,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
         * **cmd:** command lines
         * **raw-short:** raw data without all the unused columns
         """
-        fix_aggregation_change = ["model_speedup_input_set", "model_test_with"]
+        # This does not work.
+        # used to be ["model_speedup_input_set", "model_test_with"]
+        fix_aggregation_change = []  # type: ignore[var-annotated]
         fs = ["suite", "model_suite", "task", "model_name", "model_task"]
         index_cols = self._filter_column(fs, self.keys_time)
         assert index_cols, (

onnx_diagnostic/helpers/memory_peak.py CHANGED Viewed

@@ -47,6 +47,8 @@ class Monitor:
     @property
     def delta_avg(self):
+        if self.n_measures == 0:
+            return 0
         return self.average / self.n_measures - self.begin
     def __repr__(self):

onnx_diagnostic/helpers/mini_onnx_builder.py CHANGED Viewed

@@ -52,7 +52,7 @@ def proto_from_array(
     tensor = TensorProto()
     tensor.dims.extend(arr_cpu.shape)
-    tensor.name = name
+    tensor.name = name or ""
     itype = dtype_to_tensor_dtype(arr_cpu.dtype)
     assert not hasattr(TensorProto, "INT4") or itype not in {
         TensorProto.INT4,
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
     :return: ModelProto
     The function raises an error if not supported.
+    An example:
+    .. code-block:: python
+        from onnx_diagnostic.helpers.mini_onnx_builder import (
+            create_onnx_model_from_input_tensors,
+        )
+        import onnx
+        proto = create_onnx_model_from_input_tensors(
+            dict(
+                query_states=query_states,
+                key_states=key_states,
+                value_states=value_states,
+                cu_seqlens=cu_seqlens,
+                max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
+                scaling=self.scaling,
+                attn_output=attn_output,
+            )
+        )
+        onnx.save(proto, "attention_inputs.onnx")
     """
     if switch_low_high is None:
         switch_low_high = sys.byteorder != "big"
@@ -461,7 +482,17 @@ def _unflatten(
         if spl[-1] == "array":
             return pos + 1, outputs[pos]
         if spl[-1] == "tensor":
-            return pos + 1, torch.from_numpy(outputs[pos]).to(device)
+            try:
+                return pos + 1, torch.from_numpy(outputs[pos]).to(device)
+            except TypeError:
+                # it should be more robust
+                import ml_dtypes
+                if outputs[pos].dtype == ml_dtypes.bfloat16:
+                    return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
+                        torch.bfloat16
+                    )
+                raise
         raise AssertionError(f"Unexpected name {name!r} in {names}")
     res: List[Any] = []
@@ -532,6 +563,12 @@ def _unflatten(
             return d
         return ty(res)
+    if end and len(res) == 1:
+        if res[0] is None:
+            return next_pos, ty()
+        if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
+            return next_pos, ty()
+        return next_pos, _make(ty, res)
     return next_pos, (
         ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
     )
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
     :return: restored data
     See example :ref:`l-plot-intermediate-results` for an example.
+    .. code-block:: python
+        import os
+        from onnx_diagnostic.helpers.mini_onnx_builder import (
+            create_input_tensors_from_onnx_model,
+        )
+        from onnx_diagnostic.helpers import string_type
+        restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
+        for k, v in restored.items():
+            print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
     """
     if engine == "ExtendedReferenceEvaluator":
         from ..reference import ExtendedReferenceEvaluator
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
             return float(output[0])
         if name == "tensor":
             return torch.from_numpy(output).to(device)
-        raise AssertionError(f"Unexpected name {name!r} in {names}")
+        assert name.startswith(
+            ("list_", "list.", "dict.", "tuple_", "tuple.")
+        ), f"Unexpected name {name!r} in {names}"
     return _unflatten(sep, names, got, device=device)[1]

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -331,7 +331,7 @@ def onnx_dtype_name(itype: int, exc: bool = True) -> str:
         print(onnx_dtype_name(7))
     """
     for k in dir(TensorProto):
-        if "FLOAT" in k or "INT" in k or "TEXT" in k or "BOOL" in k:
+        if k.upper() == k and k != "EXTERNAL":
             v = getattr(TensorProto, k)
             if v == itype:
                 return k
@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int:  # noqa: F821
     try:
         return oh.np_dtype_to_tensor_dtype(dt)
     except ValueError:
-        try:
-            import ml_dtypes
-        except ImportError:
-            ml_dtypes = None  # type: ignore
-        if ml_dtypes is not None:
-            if dt == ml_dtypes.bfloat16:
-                return TensorProto.BFLOAT16
-            if dt == ml_dtypes.float8_e4m3fn:
-                return TensorProto.FLOAT8E4M3FN
-            if dt == ml_dtypes.float8_e4m3fnuz:
-                return TensorProto.FLOAT8E4M3FNUZ
-            if dt == ml_dtypes.float8_e5m2:
-                return TensorProto.FLOAT8E5M2
-            if dt == ml_dtypes.float8_e5m2fnuz:
-                return TensorProto.FLOAT8E5M2FNUZ
+        import ml_dtypes
+        if dt == ml_dtypes.bfloat16:
+            return TensorProto.BFLOAT16
+        if dt == ml_dtypes.float8_e4m3fn:
+            return TensorProto.FLOAT8E4M3FN
+        if dt == ml_dtypes.float8_e4m3fnuz:
+            return TensorProto.FLOAT8E4M3FNUZ
+        if dt == ml_dtypes.float8_e5m2:
+            return TensorProto.FLOAT8E5M2
+        if dt == ml_dtypes.float8_e5m2fnuz:
+            return TensorProto.FLOAT8E5M2FNUZ
     if dt == np.float32:
         return TensorProto.FLOAT
     if dt == np.float16:

onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl