onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. onnx_diagnostic/__init__.py +1 -1
  2. onnx_diagnostic/_command_lines_parser.py +78 -22
  3. onnx_diagnostic/export/api.py +35 -5
  4. onnx_diagnostic/export/control_flow.py +511 -0
  5. onnx_diagnostic/export/control_flow_research.py +135 -0
  6. onnx_diagnostic/ext_test_case.py +33 -9
  7. onnx_diagnostic/helpers/cache_helper.py +217 -203
  8. onnx_diagnostic/helpers/helper.py +6 -2
  9. onnx_diagnostic/helpers/log_helper.py +39 -5
  10. onnx_diagnostic/helpers/memory_peak.py +2 -0
  11. onnx_diagnostic/helpers/mini_onnx_builder.py +55 -3
  12. onnx_diagnostic/helpers/onnx_helper.py +13 -16
  13. onnx_diagnostic/helpers/rt_helper.py +579 -15
  14. onnx_diagnostic/helpers/torch_helper.py +5 -0
  15. onnx_diagnostic/tasks/image_text_to_text.py +5 -1
  16. onnx_diagnostic/tasks/text2text_generation.py +1 -0
  17. onnx_diagnostic/tasks/text_generation.py +84 -54
  18. onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
  19. onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
  20. onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
  21. onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
  22. onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +563 -61
  23. onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
  24. onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
  25. onnx_diagnostic/torch_models/validate.py +620 -213
  26. {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
  27. {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +30 -28
  28. {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
  29. {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
  30. {onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0
@@ -901,13 +901,19 @@ class CubeLogs:
901
901
  else g.groupby([*key_index, *key_columns], dropna=False).sum()
902
902
  )
903
903
  not_unique = r[r["count"] > 1]
904
+ if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
905
+ filename = os.environ.get("DUPLICATE")
906
+ subset = data.set_index([*key_index, *key_columns]).merge(
907
+ not_unique.head(), left_index=True, right_index=True
908
+ )
909
+ subset.to_excel(filename)
904
910
  assert not_unique.shape[0] == 0, (
905
911
  f"view_def.name={view_def.name!r}, "
906
912
  f"unable to run the pivot with index={sorted(key_index)}, "
907
913
  f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
908
914
  f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
909
- f"not unique={set(data.columns) - unique}"
910
- f"\n--\n{not_unique.head(10)}"
915
+ f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
916
+ f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
911
917
  )
912
918
 
913
919
  # pivot
@@ -1000,8 +1006,12 @@ class CubeLogs:
1000
1006
  keys = set(self.keys_time) - {columns_to_fix}
1001
1007
  select = data[self.keys_time]
1002
1008
  select_agg = select.groupby(list(keys)).count()
1009
+ if select_agg.shape[0] == 0:
1010
+ # nothing to fix
1011
+ return data
1003
1012
  assert select_agg[columns_to_fix].max() <= 1, (
1004
- f"Column {columns_to_fix!r} has two distinct values at least for one date\n"
1013
+ f"Column {columns_to_fix!r} has two distinct values at least for one date, "
1014
+ f"max={select_agg[columns_to_fix].max()}\n"
1005
1015
  f"{select_agg[select_agg[columns_to_fix] > 1]}"
1006
1016
  )
1007
1017
 
@@ -1038,6 +1048,16 @@ class CubeLogs:
1038
1048
  f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
1039
1049
  f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
1040
1050
  )
1051
+ select = res[self.keys_time]
1052
+ select_agg = select.groupby(list(keys)).count()
1053
+ if select_agg.shape[0] == 0:
1054
+ # nothing to fix
1055
+ return data
1056
+ # assert select_agg[columns_to_fix].max() <= 1, (
1057
+ # f"Column {columns_to_fix!r} has two distinct values at least for one date, "
1058
+ # f"max={select_agg[columns_to_fix].max()}\n"
1059
+ # f"{select_agg[select_agg[columns_to_fix] > 1]}"
1060
+ # )
1041
1061
  return res
1042
1062
 
1043
1063
  def _dropna(
@@ -1169,7 +1189,8 @@ class CubeLogs:
1169
1189
  assuming they should remain stale
1170
1190
  :param sbs: configurations to compare side-by-side, this adds two tabs,
1171
1191
  one gathering raw data about the two configurations, the other one
1172
- is aggregated by metrics
1192
+ is aggregated by metrics, example:
1193
+ ``=dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
1173
1194
  """
1174
1195
  if verbose:
1175
1196
  print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
@@ -1611,6 +1632,7 @@ class CubeLogsPerformance(CubeLogs):
1611
1632
  "n_node_initializer_small",
1612
1633
  "n_node_layer_normalization",
1613
1634
  "n_node_layer_normalization23",
1635
+ "n_node_random",
1614
1636
  "n_node_reshape",
1615
1637
  "n_node_rotary_embedding",
1616
1638
  "n_node_rotary_embedding23",
@@ -1802,6 +1824,16 @@ class CubeLogsPerformance(CubeLogs):
1802
1824
  + gdf(df, "op_onnx__InstanceNormlization", 0)
1803
1825
  + gdf(df, "op_onnx__GroupNormalization", 0),
1804
1826
  ),
1827
+ n_node_random=lambda df: gpreserve(
1828
+ df,
1829
+ "time_latency_eager",
1830
+ gdf(df, "op_onnx__RandomNormal", 0)
1831
+ + gdf(df, "op_onnx__RandomNormalLike", 0)
1832
+ + gdf(df, "op_onnx__RandomUniform", 0)
1833
+ + gdf(df, "op_onnx__RandomUniformLike", 0)
1834
+ + gdf(df, "op_onnx__Multinomial", 0)
1835
+ + gdf(df, "op_onnx__Bernoulli", 0),
1836
+ ),
1805
1837
  n_node_attention=lambda df: gpreserve(
1806
1838
  df,
1807
1839
  "time_latency_eager",
@@ -1965,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
1965
1997
  * **cmd:** command lines
1966
1998
  * **raw-short:** raw data without all the unused columns
1967
1999
  """
1968
- fix_aggregation_change = ["model_speedup_input_set", "model_test_with"]
2000
+ # This does not work.
2001
+ # used to be ["model_speedup_input_set", "model_test_with"]
2002
+ fix_aggregation_change = [] # type: ignore[var-annotated]
1969
2003
  fs = ["suite", "model_suite", "task", "model_name", "model_task"]
1970
2004
  index_cols = self._filter_column(fs, self.keys_time)
1971
2005
  assert index_cols, (
@@ -47,6 +47,8 @@ class Monitor:
47
47
 
48
48
  @property
49
49
  def delta_avg(self):
50
+ if self.n_measures == 0:
51
+ return 0
50
52
  return self.average / self.n_measures - self.begin
51
53
 
52
54
  def __repr__(self):
@@ -52,7 +52,7 @@ def proto_from_array(
52
52
 
53
53
  tensor = TensorProto()
54
54
  tensor.dims.extend(arr_cpu.shape)
55
- tensor.name = name
55
+ tensor.name = name or ""
56
56
  itype = dtype_to_tensor_dtype(arr_cpu.dtype)
57
57
  assert not hasattr(TensorProto, "INT4") or itype not in {
58
58
  TensorProto.INT4,
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
422
422
  :return: ModelProto
423
423
 
424
424
  The function raises an error if not supported.
425
+ An example:
426
+
427
+ .. code-block:: python
428
+
429
+ from onnx_diagnostic.helpers.mini_onnx_builder import (
430
+ create_onnx_model_from_input_tensors,
431
+ )
432
+ import onnx
433
+
434
+ proto = create_onnx_model_from_input_tensors(
435
+ dict(
436
+ query_states=query_states,
437
+ key_states=key_states,
438
+ value_states=value_states,
439
+ cu_seqlens=cu_seqlens,
440
+ max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
441
+ scaling=self.scaling,
442
+ attn_output=attn_output,
443
+ )
444
+ )
445
+ onnx.save(proto, "attention_inputs.onnx")
425
446
  """
426
447
  if switch_low_high is None:
427
448
  switch_low_high = sys.byteorder != "big"
@@ -461,7 +482,17 @@ def _unflatten(
461
482
  if spl[-1] == "array":
462
483
  return pos + 1, outputs[pos]
463
484
  if spl[-1] == "tensor":
464
- return pos + 1, torch.from_numpy(outputs[pos]).to(device)
485
+ try:
486
+ return pos + 1, torch.from_numpy(outputs[pos]).to(device)
487
+ except TypeError:
488
+ # it should be more robust
489
+ import ml_dtypes
490
+
491
+ if outputs[pos].dtype == ml_dtypes.bfloat16:
492
+ return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
493
+ torch.bfloat16
494
+ )
495
+ raise
465
496
  raise AssertionError(f"Unexpected name {name!r} in {names}")
466
497
 
467
498
  res: List[Any] = []
@@ -532,6 +563,12 @@ def _unflatten(
532
563
  return d
533
564
  return ty(res)
534
565
 
566
+ if end and len(res) == 1:
567
+ if res[0] is None:
568
+ return next_pos, ty()
569
+ if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
570
+ return next_pos, ty()
571
+ return next_pos, _make(ty, res)
535
572
  return next_pos, (
536
573
  ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
537
574
  )
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
557
594
  :return: restored data
558
595
 
559
596
  See example :ref:`l-plot-intermediate-results` for an example.
597
+
598
+ .. code-block:: python
599
+
600
+ import os
601
+ from onnx_diagnostic.helpers.mini_onnx_builder import (
602
+ create_input_tensors_from_onnx_model,
603
+ )
604
+ from onnx_diagnostic.helpers import string_type
605
+
606
+ restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
607
+ for k, v in restored.items():
608
+ print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
609
+
560
610
  """
561
611
  if engine == "ExtendedReferenceEvaluator":
562
612
  from ..reference import ExtendedReferenceEvaluator
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
595
645
  return float(output[0])
596
646
  if name == "tensor":
597
647
  return torch.from_numpy(output).to(device)
598
- raise AssertionError(f"Unexpected name {name!r} in {names}")
648
+ assert name.startswith(
649
+ ("list_", "list.", "dict.", "tuple_", "tuple.")
650
+ ), f"Unexpected name {name!r} in {names}"
599
651
 
600
652
  return _unflatten(sep, names, got, device=device)[1]
@@ -331,7 +331,7 @@ def onnx_dtype_name(itype: int, exc: bool = True) -> str:
331
331
  print(onnx_dtype_name(7))
332
332
  """
333
333
  for k in dir(TensorProto):
334
- if "FLOAT" in k or "INT" in k or "TEXT" in k or "BOOL" in k:
334
+ if k.upper() == k and k != "EXTERNAL":
335
335
  v = getattr(TensorProto, k)
336
336
  if v == itype:
337
337
  return k
@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int: # noqa: F821
671
671
  try:
672
672
  return oh.np_dtype_to_tensor_dtype(dt)
673
673
  except ValueError:
674
- try:
675
- import ml_dtypes
676
- except ImportError:
677
- ml_dtypes = None # type: ignore
678
- if ml_dtypes is not None:
679
- if dt == ml_dtypes.bfloat16:
680
- return TensorProto.BFLOAT16
681
- if dt == ml_dtypes.float8_e4m3fn:
682
- return TensorProto.FLOAT8E4M3FN
683
- if dt == ml_dtypes.float8_e4m3fnuz:
684
- return TensorProto.FLOAT8E4M3FNUZ
685
- if dt == ml_dtypes.float8_e5m2:
686
- return TensorProto.FLOAT8E5M2
687
- if dt == ml_dtypes.float8_e5m2fnuz:
688
- return TensorProto.FLOAT8E5M2FNUZ
674
+ import ml_dtypes
675
+
676
+ if dt == ml_dtypes.bfloat16:
677
+ return TensorProto.BFLOAT16
678
+ if dt == ml_dtypes.float8_e4m3fn:
679
+ return TensorProto.FLOAT8E4M3FN
680
+ if dt == ml_dtypes.float8_e4m3fnuz:
681
+ return TensorProto.FLOAT8E4M3FNUZ
682
+ if dt == ml_dtypes.float8_e5m2:
683
+ return TensorProto.FLOAT8E5M2
684
+ if dt == ml_dtypes.float8_e5m2fnuz:
685
+ return TensorProto.FLOAT8E5M2FNUZ
689
686
  if dt == np.float32:
690
687
  return TensorProto.FLOAT
691
688
  if dt == np.float16: