onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +78 -22
- onnx_diagnostic/export/api.py +35 -5
- onnx_diagnostic/export/control_flow.py +511 -0
- onnx_diagnostic/export/control_flow_research.py +135 -0
- onnx_diagnostic/ext_test_case.py +33 -9
- onnx_diagnostic/helpers/cache_helper.py +217 -203
- onnx_diagnostic/helpers/helper.py +6 -2
- onnx_diagnostic/helpers/log_helper.py +39 -5
- onnx_diagnostic/helpers/memory_peak.py +2 -0
- onnx_diagnostic/helpers/mini_onnx_builder.py +55 -3
- onnx_diagnostic/helpers/onnx_helper.py +13 -16
- onnx_diagnostic/helpers/rt_helper.py +579 -15
- onnx_diagnostic/helpers/torch_helper.py +5 -0
- onnx_diagnostic/tasks/image_text_to_text.py +5 -1
- onnx_diagnostic/tasks/text2text_generation.py +1 -0
- onnx_diagnostic/tasks/text_generation.py +84 -54
- onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
- onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
- onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +563 -61
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
- onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
- onnx_diagnostic/torch_models/validate.py +620 -213
- {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
- {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +30 -28
- {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0
|
@@ -901,13 +901,19 @@ class CubeLogs:
|
|
|
901
901
|
else g.groupby([*key_index, *key_columns], dropna=False).sum()
|
|
902
902
|
)
|
|
903
903
|
not_unique = r[r["count"] > 1]
|
|
904
|
+
if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
|
|
905
|
+
filename = os.environ.get("DUPLICATE")
|
|
906
|
+
subset = data.set_index([*key_index, *key_columns]).merge(
|
|
907
|
+
not_unique.head(), left_index=True, right_index=True
|
|
908
|
+
)
|
|
909
|
+
subset.to_excel(filename)
|
|
904
910
|
assert not_unique.shape[0] == 0, (
|
|
905
911
|
f"view_def.name={view_def.name!r}, "
|
|
906
912
|
f"unable to run the pivot with index={sorted(key_index)}, "
|
|
907
913
|
f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
|
|
908
914
|
f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
|
|
909
|
-
f"not unique={set(data.columns) - unique}"
|
|
910
|
-
f"\n--\n{not_unique.head(10)}"
|
|
915
|
+
f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
|
|
916
|
+
f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
|
|
911
917
|
)
|
|
912
918
|
|
|
913
919
|
# pivot
|
|
@@ -1000,8 +1006,12 @@ class CubeLogs:
|
|
|
1000
1006
|
keys = set(self.keys_time) - {columns_to_fix}
|
|
1001
1007
|
select = data[self.keys_time]
|
|
1002
1008
|
select_agg = select.groupby(list(keys)).count()
|
|
1009
|
+
if select_agg.shape[0] == 0:
|
|
1010
|
+
# nothing to fix
|
|
1011
|
+
return data
|
|
1003
1012
|
assert select_agg[columns_to_fix].max() <= 1, (
|
|
1004
|
-
f"Column {columns_to_fix!r} has two distinct values at least for one date
|
|
1013
|
+
f"Column {columns_to_fix!r} has two distinct values at least for one date, "
|
|
1014
|
+
f"max={select_agg[columns_to_fix].max()}\n"
|
|
1005
1015
|
f"{select_agg[select_agg[columns_to_fix] > 1]}"
|
|
1006
1016
|
)
|
|
1007
1017
|
|
|
@@ -1038,6 +1048,16 @@ class CubeLogs:
|
|
|
1038
1048
|
f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
|
|
1039
1049
|
f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
|
|
1040
1050
|
)
|
|
1051
|
+
select = res[self.keys_time]
|
|
1052
|
+
select_agg = select.groupby(list(keys)).count()
|
|
1053
|
+
if select_agg.shape[0] == 0:
|
|
1054
|
+
# nothing to fix
|
|
1055
|
+
return data
|
|
1056
|
+
# assert select_agg[columns_to_fix].max() <= 1, (
|
|
1057
|
+
# f"Column {columns_to_fix!r} has two distinct values at least for one date, "
|
|
1058
|
+
# f"max={select_agg[columns_to_fix].max()}\n"
|
|
1059
|
+
# f"{select_agg[select_agg[columns_to_fix] > 1]}"
|
|
1060
|
+
# )
|
|
1041
1061
|
return res
|
|
1042
1062
|
|
|
1043
1063
|
def _dropna(
|
|
@@ -1169,7 +1189,8 @@ class CubeLogs:
|
|
|
1169
1189
|
assuming they should remain stale
|
|
1170
1190
|
:param sbs: configurations to compare side-by-side, this adds two tabs,
|
|
1171
1191
|
one gathering raw data about the two configurations, the other one
|
|
1172
|
-
is aggregated by metrics
|
|
1192
|
+
is aggregated by metrics, example:
|
|
1193
|
+
``=dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
|
|
1173
1194
|
"""
|
|
1174
1195
|
if verbose:
|
|
1175
1196
|
print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
|
|
@@ -1611,6 +1632,7 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1611
1632
|
"n_node_initializer_small",
|
|
1612
1633
|
"n_node_layer_normalization",
|
|
1613
1634
|
"n_node_layer_normalization23",
|
|
1635
|
+
"n_node_random",
|
|
1614
1636
|
"n_node_reshape",
|
|
1615
1637
|
"n_node_rotary_embedding",
|
|
1616
1638
|
"n_node_rotary_embedding23",
|
|
@@ -1802,6 +1824,16 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1802
1824
|
+ gdf(df, "op_onnx__InstanceNormlization", 0)
|
|
1803
1825
|
+ gdf(df, "op_onnx__GroupNormalization", 0),
|
|
1804
1826
|
),
|
|
1827
|
+
n_node_random=lambda df: gpreserve(
|
|
1828
|
+
df,
|
|
1829
|
+
"time_latency_eager",
|
|
1830
|
+
gdf(df, "op_onnx__RandomNormal", 0)
|
|
1831
|
+
+ gdf(df, "op_onnx__RandomNormalLike", 0)
|
|
1832
|
+
+ gdf(df, "op_onnx__RandomUniform", 0)
|
|
1833
|
+
+ gdf(df, "op_onnx__RandomUniformLike", 0)
|
|
1834
|
+
+ gdf(df, "op_onnx__Multinomial", 0)
|
|
1835
|
+
+ gdf(df, "op_onnx__Bernoulli", 0),
|
|
1836
|
+
),
|
|
1805
1837
|
n_node_attention=lambda df: gpreserve(
|
|
1806
1838
|
df,
|
|
1807
1839
|
"time_latency_eager",
|
|
@@ -1965,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1965
1997
|
* **cmd:** command lines
|
|
1966
1998
|
* **raw-short:** raw data without all the unused columns
|
|
1967
1999
|
"""
|
|
1968
|
-
|
|
2000
|
+
# This does not work.
|
|
2001
|
+
# used to be ["model_speedup_input_set", "model_test_with"]
|
|
2002
|
+
fix_aggregation_change = [] # type: ignore[var-annotated]
|
|
1969
2003
|
fs = ["suite", "model_suite", "task", "model_name", "model_task"]
|
|
1970
2004
|
index_cols = self._filter_column(fs, self.keys_time)
|
|
1971
2005
|
assert index_cols, (
|
|
@@ -52,7 +52,7 @@ def proto_from_array(
|
|
|
52
52
|
|
|
53
53
|
tensor = TensorProto()
|
|
54
54
|
tensor.dims.extend(arr_cpu.shape)
|
|
55
|
-
tensor.name = name
|
|
55
|
+
tensor.name = name or ""
|
|
56
56
|
itype = dtype_to_tensor_dtype(arr_cpu.dtype)
|
|
57
57
|
assert not hasattr(TensorProto, "INT4") or itype not in {
|
|
58
58
|
TensorProto.INT4,
|
|
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
|
|
|
422
422
|
:return: ModelProto
|
|
423
423
|
|
|
424
424
|
The function raises an error if not supported.
|
|
425
|
+
An example:
|
|
426
|
+
|
|
427
|
+
.. code-block:: python
|
|
428
|
+
|
|
429
|
+
from onnx_diagnostic.helpers.mini_onnx_builder import (
|
|
430
|
+
create_onnx_model_from_input_tensors,
|
|
431
|
+
)
|
|
432
|
+
import onnx
|
|
433
|
+
|
|
434
|
+
proto = create_onnx_model_from_input_tensors(
|
|
435
|
+
dict(
|
|
436
|
+
query_states=query_states,
|
|
437
|
+
key_states=key_states,
|
|
438
|
+
value_states=value_states,
|
|
439
|
+
cu_seqlens=cu_seqlens,
|
|
440
|
+
max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
|
|
441
|
+
scaling=self.scaling,
|
|
442
|
+
attn_output=attn_output,
|
|
443
|
+
)
|
|
444
|
+
)
|
|
445
|
+
onnx.save(proto, "attention_inputs.onnx")
|
|
425
446
|
"""
|
|
426
447
|
if switch_low_high is None:
|
|
427
448
|
switch_low_high = sys.byteorder != "big"
|
|
@@ -461,7 +482,17 @@ def _unflatten(
|
|
|
461
482
|
if spl[-1] == "array":
|
|
462
483
|
return pos + 1, outputs[pos]
|
|
463
484
|
if spl[-1] == "tensor":
|
|
464
|
-
|
|
485
|
+
try:
|
|
486
|
+
return pos + 1, torch.from_numpy(outputs[pos]).to(device)
|
|
487
|
+
except TypeError:
|
|
488
|
+
# it should be more robust
|
|
489
|
+
import ml_dtypes
|
|
490
|
+
|
|
491
|
+
if outputs[pos].dtype == ml_dtypes.bfloat16:
|
|
492
|
+
return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
|
|
493
|
+
torch.bfloat16
|
|
494
|
+
)
|
|
495
|
+
raise
|
|
465
496
|
raise AssertionError(f"Unexpected name {name!r} in {names}")
|
|
466
497
|
|
|
467
498
|
res: List[Any] = []
|
|
@@ -532,6 +563,12 @@ def _unflatten(
|
|
|
532
563
|
return d
|
|
533
564
|
return ty(res)
|
|
534
565
|
|
|
566
|
+
if end and len(res) == 1:
|
|
567
|
+
if res[0] is None:
|
|
568
|
+
return next_pos, ty()
|
|
569
|
+
if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
|
|
570
|
+
return next_pos, ty()
|
|
571
|
+
return next_pos, _make(ty, res)
|
|
535
572
|
return next_pos, (
|
|
536
573
|
ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
|
|
537
574
|
)
|
|
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
|
|
|
557
594
|
:return: restored data
|
|
558
595
|
|
|
559
596
|
See example :ref:`l-plot-intermediate-results` for an example.
|
|
597
|
+
|
|
598
|
+
.. code-block:: python
|
|
599
|
+
|
|
600
|
+
import os
|
|
601
|
+
from onnx_diagnostic.helpers.mini_onnx_builder import (
|
|
602
|
+
create_input_tensors_from_onnx_model,
|
|
603
|
+
)
|
|
604
|
+
from onnx_diagnostic.helpers import string_type
|
|
605
|
+
|
|
606
|
+
restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
|
|
607
|
+
for k, v in restored.items():
|
|
608
|
+
print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
|
|
609
|
+
|
|
560
610
|
"""
|
|
561
611
|
if engine == "ExtendedReferenceEvaluator":
|
|
562
612
|
from ..reference import ExtendedReferenceEvaluator
|
|
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
|
|
|
595
645
|
return float(output[0])
|
|
596
646
|
if name == "tensor":
|
|
597
647
|
return torch.from_numpy(output).to(device)
|
|
598
|
-
|
|
648
|
+
assert name.startswith(
|
|
649
|
+
("list_", "list.", "dict.", "tuple_", "tuple.")
|
|
650
|
+
), f"Unexpected name {name!r} in {names}"
|
|
599
651
|
|
|
600
652
|
return _unflatten(sep, names, got, device=device)[1]
|
|
@@ -331,7 +331,7 @@ def onnx_dtype_name(itype: int, exc: bool = True) -> str:
|
|
|
331
331
|
print(onnx_dtype_name(7))
|
|
332
332
|
"""
|
|
333
333
|
for k in dir(TensorProto):
|
|
334
|
-
if
|
|
334
|
+
if k.upper() == k and k != "EXTERNAL":
|
|
335
335
|
v = getattr(TensorProto, k)
|
|
336
336
|
if v == itype:
|
|
337
337
|
return k
|
|
@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int: # noqa: F821
|
|
|
671
671
|
try:
|
|
672
672
|
return oh.np_dtype_to_tensor_dtype(dt)
|
|
673
673
|
except ValueError:
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
if
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
return TensorProto.FLOAT8E5M2
|
|
687
|
-
if dt == ml_dtypes.float8_e5m2fnuz:
|
|
688
|
-
return TensorProto.FLOAT8E5M2FNUZ
|
|
674
|
+
import ml_dtypes
|
|
675
|
+
|
|
676
|
+
if dt == ml_dtypes.bfloat16:
|
|
677
|
+
return TensorProto.BFLOAT16
|
|
678
|
+
if dt == ml_dtypes.float8_e4m3fn:
|
|
679
|
+
return TensorProto.FLOAT8E4M3FN
|
|
680
|
+
if dt == ml_dtypes.float8_e4m3fnuz:
|
|
681
|
+
return TensorProto.FLOAT8E4M3FNUZ
|
|
682
|
+
if dt == ml_dtypes.float8_e5m2:
|
|
683
|
+
return TensorProto.FLOAT8E5M2
|
|
684
|
+
if dt == ml_dtypes.float8_e5m2fnuz:
|
|
685
|
+
return TensorProto.FLOAT8E5M2FNUZ
|
|
689
686
|
if dt == np.float32:
|
|
690
687
|
return TensorProto.FLOAT
|
|
691
688
|
if dt == np.float16:
|