onnx-diagnostic 0.7.11__py3-none-any.whl → 0.7.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +5 -2
- onnx_diagnostic/helpers/log_helper.py +65 -12
- onnx_diagnostic/helpers/rt_helper.py +53 -36
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +11 -0
- onnx_diagnostic/torch_models/hghub/hub_api.py +4 -10
- onnx_diagnostic/torch_models/hghub/model_inputs.py +1 -1
- onnx_diagnostic/torch_models/validate.py +223 -77
- {onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/METADATA +2 -2
- {onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/RECORD +13 -13
- {onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.7.11.dist-info → onnx_diagnostic-0.7.12.dist-info}/top_level.txt +0 -0
onnx_diagnostic/__init__.py
CHANGED
|
@@ -581,6 +581,7 @@ def _cmd_validate(argv: List[Any]):
|
|
|
581
581
|
):
|
|
582
582
|
print(f"validate - unsupported args: export={args.export!r}, opt={args.opt!r}")
|
|
583
583
|
return
|
|
584
|
+
patch_dict = args.patch if isinstance(args.patch, dict) else {"patch": args.patch}
|
|
584
585
|
summary, _data = validate_model(
|
|
585
586
|
model_id=args.mid,
|
|
586
587
|
task=args.task,
|
|
@@ -591,8 +592,8 @@ def _cmd_validate(argv: List[Any]):
|
|
|
591
592
|
use_pretrained=args.trained,
|
|
592
593
|
dtype=args.dtype,
|
|
593
594
|
device=args.device,
|
|
594
|
-
patch=
|
|
595
|
-
rewrite=args.rewrite,
|
|
595
|
+
patch=patch_dict,
|
|
596
|
+
rewrite=args.rewrite and patch_dict.get("patch", True),
|
|
596
597
|
stop_if_static=args.stop_if_static,
|
|
597
598
|
optimization=args.opt,
|
|
598
599
|
exporter=args.export,
|
|
@@ -827,6 +828,8 @@ def get_parser_agg() -> ArgumentParser:
|
|
|
827
828
|
"n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic,"
|
|
828
829
|
"n_model_pass,n_model_faster,"
|
|
829
830
|
"n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
|
|
831
|
+
"n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
|
|
832
|
+
"n_node_layer_normalization,n_node_layer_normalization23,"
|
|
830
833
|
"peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
|
|
831
834
|
"n_node_constant,n_node_shape,n_node_expand,"
|
|
832
835
|
"n_node_function,n_node_initializer,n_node_scatter,"
|
|
@@ -285,7 +285,8 @@ class CubePlot:
|
|
|
285
285
|
nn = df.shape[1] // n_cols
|
|
286
286
|
nn += int(df.shape[1] % n_cols != 0)
|
|
287
287
|
ratio = float(os.environ.get("FIGSIZEH", "1"))
|
|
288
|
-
|
|
288
|
+
figsize = (6 * n_cols, nn * (2.5 + df.shape[0] / 15) * ratio)
|
|
289
|
+
fig, axs = plt.subplots(nn, n_cols, figsize=figsize)
|
|
289
290
|
pos = 0
|
|
290
291
|
imgs = []
|
|
291
292
|
for c in self._make_loop(df.columns, verbose):
|
|
@@ -332,10 +333,12 @@ class CubePlot:
|
|
|
332
333
|
n_cols = len(groups)
|
|
333
334
|
|
|
334
335
|
title_suffix = f"\n{title_suffix}" if title_suffix else ""
|
|
336
|
+
ratio = float(os.environ.get("FIGSIZEH", "1"))
|
|
337
|
+
figsize = (5 * n_cols, max(len(g) for g in groups) * (2 + df.shape[1] / 2) * ratio)
|
|
335
338
|
fig, axs = plt.subplots(
|
|
336
339
|
df.shape[1],
|
|
337
340
|
n_cols,
|
|
338
|
-
figsize=
|
|
341
|
+
figsize=figsize,
|
|
339
342
|
sharex=True,
|
|
340
343
|
sharey="row" if n_cols > 1 else False,
|
|
341
344
|
)
|
|
@@ -877,7 +880,11 @@ class CubeLogs:
|
|
|
877
880
|
print(f"[CubeLogs.view] key_columns={key_columns}")
|
|
878
881
|
g = data[[*key_index, *key_columns]].copy()
|
|
879
882
|
g["count"] = 1
|
|
880
|
-
r =
|
|
883
|
+
r = (
|
|
884
|
+
g.copy()
|
|
885
|
+
if not key_index and not key_columns
|
|
886
|
+
else g.groupby([*key_index, *key_columns], dropna=False).sum()
|
|
887
|
+
)
|
|
881
888
|
not_unique = r[r["count"] > 1]
|
|
882
889
|
assert not_unique.shape[0] == 0, (
|
|
883
890
|
f"view_def.name={view_def.name!r}, "
|
|
@@ -1505,6 +1512,11 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1505
1512
|
"n_model_faster3x",
|
|
1506
1513
|
"n_model_faster4x",
|
|
1507
1514
|
"n_node_attention",
|
|
1515
|
+
"n_node_attention23",
|
|
1516
|
+
"n_node_rotary_embedding",
|
|
1517
|
+
"n_node_rotary_embedding23",
|
|
1518
|
+
"n_node_layer_normalization",
|
|
1519
|
+
"n_node_layer_normalization23",
|
|
1508
1520
|
"n_node_control_flow",
|
|
1509
1521
|
"n_node_scatter",
|
|
1510
1522
|
"n_node_function",
|
|
@@ -1568,7 +1580,9 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1568
1580
|
|
|
1569
1581
|
def gdf(df, cname, default_value=np.nan):
|
|
1570
1582
|
if cname in df.columns:
|
|
1571
|
-
|
|
1583
|
+
if np.isnan(default_value):
|
|
1584
|
+
return df[cname]
|
|
1585
|
+
return df[cname].fillna(default_value)
|
|
1572
1586
|
return pandas.Series(default_value, index=df.index)
|
|
1573
1587
|
|
|
1574
1588
|
def ghas_value(df, cname):
|
|
@@ -1676,15 +1690,54 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1676
1690
|
"time_latency",
|
|
1677
1691
|
gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
|
|
1678
1692
|
),
|
|
1693
|
+
n_node_attention23=lambda df: gpreserve(
|
|
1694
|
+
df, "time_latency_eager", gdf(df, "op_onnx__Attention")
|
|
1695
|
+
),
|
|
1696
|
+
n_node_rotary_embedding23=lambda df: gpreserve(
|
|
1697
|
+
df, "time_latency_eager", gdf(df, "op_onnx__RotaryEmbedding")
|
|
1698
|
+
),
|
|
1699
|
+
n_node_layer_normalization23=lambda df: gpreserve(
|
|
1700
|
+
df,
|
|
1701
|
+
"time_latency_eager",
|
|
1702
|
+
gdf(df, "op_onnx__LayerNormalization", 0)
|
|
1703
|
+
+ gdf(df, "op_onnx__RMSNormalization", 0)
|
|
1704
|
+
+ gdf(df, "op_onnx__BatchNormlization", 0)
|
|
1705
|
+
+ gdf(df, "op_onnx__InstanceNormlization", 0)
|
|
1706
|
+
+ gdf(df, "op_onnx__GroupNormalization", 0),
|
|
1707
|
+
),
|
|
1679
1708
|
n_node_attention=lambda df: gpreserve(
|
|
1680
1709
|
df,
|
|
1681
|
-
"
|
|
1682
|
-
gdf(df, "op_onnx_com.microsoft_Attention")
|
|
1683
|
-
+ gdf(df, "op_onnx_com.microsoft_MultiHeadAttention")
|
|
1710
|
+
"time_latency_eager",
|
|
1711
|
+
gdf(df, "op_onnx_com.microsoft_Attention", 0)
|
|
1712
|
+
+ gdf(df, "op_onnx_com.microsoft_MultiHeadAttention", 0)
|
|
1713
|
+
+ gdf(df, "op_onnx_com.microsoft_PackedAttention", 0)
|
|
1714
|
+
+ gdf(df, "op_onnx_com.microsoft_PackedMultiHeadAttention", 0)
|
|
1715
|
+
+ gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0)
|
|
1716
|
+
+ gdf(df, "op_onnx_com.microsoft_PagedAttention", 0)
|
|
1717
|
+
+ gdf(df, "op_onnx_com.microsoft_DecoderAttention", 0)
|
|
1718
|
+
+ gdf(df, "op_onnx_com.microsoft_LongformerAttention", 0)
|
|
1719
|
+
+ gdf(df, "op_onnx_com.microsoft_DecoderMaskedSelfAttention", 0)
|
|
1720
|
+
+ gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
|
|
1721
|
+
+ gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
|
|
1722
|
+
),
|
|
1723
|
+
n_node_layer_normalization=lambda df: gpreserve(
|
|
1724
|
+
df,
|
|
1725
|
+
"time_latency_eager",
|
|
1726
|
+
gdf(df, "op_onnx_com.microsoft_EmbedLayerNormalization", 0)
|
|
1727
|
+
+ gdf(df, "op_onnx_com.microsoft_SkipLayerNormalization", 0)
|
|
1728
|
+
+ gdf(df, "op_onnx_com.microsoft_LayerNormalization", 0)
|
|
1729
|
+
+ gdf(df, "op_onnx_com.microsoft_SkipSimplifiedLayerNormalization", 0)
|
|
1730
|
+
+ gdf(df, "op_onnx_com.microsoft_SimplifiedLayerNormalization", 0),
|
|
1731
|
+
),
|
|
1732
|
+
n_node_rotary_embedding=lambda df: gpreserve(
|
|
1733
|
+
df,
|
|
1734
|
+
"time_latency_eager",
|
|
1735
|
+
gdf(df, "op_onnx_com.microsoft_GemmaRotaryEmbedding", 0)
|
|
1736
|
+
+ gdf(df, "op_onnx_com.microsoft_RotaryEmbedding", 0),
|
|
1684
1737
|
),
|
|
1685
1738
|
n_node_control_flow=lambda df: gpreserve(
|
|
1686
1739
|
df,
|
|
1687
|
-
"
|
|
1740
|
+
"time_latency_eager",
|
|
1688
1741
|
(
|
|
1689
1742
|
gdf(df, "op_onnx__If", 0)
|
|
1690
1743
|
+ gdf(df, "op_onnx__Scan", 0)
|
|
@@ -1693,7 +1746,7 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1693
1746
|
),
|
|
1694
1747
|
n_node_scatter=lambda df: gpreserve(
|
|
1695
1748
|
df,
|
|
1696
|
-
"
|
|
1749
|
+
"time_latency_eager",
|
|
1697
1750
|
gdf(df, "op_onnx__ScatterND", 0) + gdf(df, "op_onnx__ScatterElements", 0),
|
|
1698
1751
|
),
|
|
1699
1752
|
n_node_function=lambda df: gpreserve(
|
|
@@ -1706,13 +1759,13 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1706
1759
|
df, "onnx_n_initializer", gdf(df, "onnx_n_initializer")
|
|
1707
1760
|
),
|
|
1708
1761
|
n_node_constant=lambda df: gpreserve(
|
|
1709
|
-
df, "
|
|
1762
|
+
df, "time_latency_eager", gdf(df, "op_onnx__Constant")
|
|
1710
1763
|
),
|
|
1711
1764
|
n_node_shape=lambda df: gpreserve(
|
|
1712
|
-
df, "
|
|
1765
|
+
df, "time_latency_eager", gdf(df, "op_onnx__Shape")
|
|
1713
1766
|
),
|
|
1714
1767
|
n_node_expand=lambda df: gpreserve(
|
|
1715
|
-
df, "
|
|
1768
|
+
df, "time_latency_eager", gdf(df, "op_onnx__Expand")
|
|
1716
1769
|
),
|
|
1717
1770
|
)
|
|
1718
1771
|
assert (
|
|
@@ -3,7 +3,6 @@ import numpy as np
|
|
|
3
3
|
import onnx
|
|
4
4
|
import torch
|
|
5
5
|
from .helper import string_type, flatten_object
|
|
6
|
-
from .onnx_helper import dtype_to_tensor_dtype
|
|
7
6
|
from .cache_helper import is_cache_dynamic_registered
|
|
8
7
|
|
|
9
8
|
|
|
@@ -23,6 +22,7 @@ def make_feeds(
|
|
|
23
22
|
use_numpy: bool = False,
|
|
24
23
|
copy: bool = False,
|
|
25
24
|
check_flatten: bool = True,
|
|
25
|
+
is_modelbuilder: bool = False,
|
|
26
26
|
) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
|
|
27
27
|
"""
|
|
28
28
|
Serializes the inputs to produce feeds expected
|
|
@@ -35,10 +35,15 @@ def make_feeds(
|
|
|
35
35
|
by ``OrtValue``
|
|
36
36
|
:param check_flatten: if True, checks the ``torch.utils._pytree.tree_flatten``
|
|
37
37
|
returns the same number of outputs
|
|
38
|
+
:param is_modelbuilder: if True, the exporter is ModelBuilder, and we need to reorder
|
|
39
|
+
the past_key_values inputs to match the expected order, and get rid of position_ids.
|
|
38
40
|
:return: feeds dictionary
|
|
39
41
|
"""
|
|
40
|
-
# position_ids is a special case because ModelBuilder does not usually use it
|
|
41
|
-
#
|
|
42
|
+
# NOTE: position_ids is a special case because ModelBuilder does not usually use it,
|
|
43
|
+
# because it's fued into rotary embedding in GQA.
|
|
44
|
+
if is_modelbuilder and isinstance(inputs, dict):
|
|
45
|
+
inputs.pop("position_ids", None) # Ensure 'position_ids' absent before removing.
|
|
46
|
+
|
|
42
47
|
flat = flatten_object(inputs, drop_keys=True)
|
|
43
48
|
assert (
|
|
44
49
|
not check_flatten
|
|
@@ -76,39 +81,6 @@ def make_feeds(
|
|
|
76
81
|
f"\n-- inputs={string_type(inputs, with_shape=True)}"
|
|
77
82
|
f"\n-- names={names}"
|
|
78
83
|
)
|
|
79
|
-
if len(names) < len(flat) and (
|
|
80
|
-
isinstance(proto, onnx.ModelProto) or hasattr(proto, "get_inputs")
|
|
81
|
-
):
|
|
82
|
-
|
|
83
|
-
typed_names = (
|
|
84
|
-
[(i.name, i.type.tensor_type.elem_type) for i in proto.graph.input]
|
|
85
|
-
if isinstance(proto, onnx.ModelProto)
|
|
86
|
-
else [(i.name, name_type_to_onnx_dtype(i.type)) for i in proto.get_inputs()]
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
new_flat = []
|
|
90
|
-
pos = 0
|
|
91
|
-
for _name, dtype in typed_names:
|
|
92
|
-
assert isinstance(
|
|
93
|
-
dtype, int
|
|
94
|
-
), f"Unexpected value for dtype={dtype!r}, type(proto)={type(proto)}"
|
|
95
|
-
itype = dtype_to_tensor_dtype(flat[pos].dtype)
|
|
96
|
-
while dtype != itype:
|
|
97
|
-
pos += 1
|
|
98
|
-
if pos >= len(flat):
|
|
99
|
-
break
|
|
100
|
-
itype = dtype_to_tensor_dtype(flat[pos].dtype)
|
|
101
|
-
if pos >= len(flat):
|
|
102
|
-
break
|
|
103
|
-
new_flat.append(flat[pos])
|
|
104
|
-
pos += 1
|
|
105
|
-
assert len(new_flat) == len(names), (
|
|
106
|
-
f"Unable to align expected input {names} with the given input, "
|
|
107
|
-
f"type(proto)={type(proto)}"
|
|
108
|
-
f"\n-- inputs: {string_type(inputs, with_shape=True)}"
|
|
109
|
-
f"\n-- typed_names: {typed_names}"
|
|
110
|
-
)
|
|
111
|
-
flat = new_flat
|
|
112
84
|
|
|
113
85
|
if copy:
|
|
114
86
|
flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
|
|
@@ -122,4 +94,49 @@ def make_feeds(
|
|
|
122
94
|
elif isinstance(i, float):
|
|
123
95
|
i = np.array(i, dtype=np.float32)
|
|
124
96
|
new_flat.append(i)
|
|
97
|
+
|
|
98
|
+
# NOTE: model builder has a different order for past_key_values
|
|
99
|
+
# we need to reorder them to match the expected order
|
|
100
|
+
if is_modelbuilder:
|
|
101
|
+
# We assume that if "past_key_values" is in the names when it's
|
|
102
|
+
# modelbuilder
|
|
103
|
+
non_past_kv_input_names = [n for n in names if "past_key_values" not in n]
|
|
104
|
+
past_kv_names = [n for n in names if "past_key_values" in n]
|
|
105
|
+
reorder_past_kv_names = reorder_modelbuilder_cache_to_torch(past_kv_names)
|
|
106
|
+
names = non_past_kv_input_names + reorder_past_kv_names
|
|
125
107
|
return dict(zip(names, new_flat))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def reorder_modelbuilder_cache_to_torch(past_kv: List[Any]) -> List[Any]:
|
|
111
|
+
"""
|
|
112
|
+
Reorders the past_kvs for ModelBuilder to match the expected order
|
|
113
|
+
by PyTorch exported models.
|
|
114
|
+
|
|
115
|
+
.. note::
|
|
116
|
+
This function can take either the names or the actual tensors
|
|
117
|
+
as long as they are in a list.
|
|
118
|
+
|
|
119
|
+
Conceptually,
|
|
120
|
+
|
|
121
|
+
From::
|
|
122
|
+
|
|
123
|
+
[past_key_values.0.key, past_key_values.0.value,
|
|
124
|
+
past_key_values.1.key, past_key_values.1.value, ...]
|
|
125
|
+
|
|
126
|
+
To::
|
|
127
|
+
|
|
128
|
+
[past_key_values.0.key, past_key_values.1.key,
|
|
129
|
+
..., past_key_values.0.value, past_key_values.1.value, ...]
|
|
130
|
+
|
|
131
|
+
:param past_kv: list of flattened inputs
|
|
132
|
+
:return: reordered list of flattened inputs
|
|
133
|
+
"""
|
|
134
|
+
total_len = len(past_kv)
|
|
135
|
+
if total_len % 2 != 0:
|
|
136
|
+
raise ValueError("The length of past_key_values should be even.")
|
|
137
|
+
keys = []
|
|
138
|
+
values = []
|
|
139
|
+
for i in range(0, total_len, 2):
|
|
140
|
+
keys.append(past_kv[i])
|
|
141
|
+
values.append(past_kv[i + 1])
|
|
142
|
+
return keys + values
|
|
@@ -254,6 +254,17 @@ def torch_export_patches(
|
|
|
254
254
|
may appear ``AssertionError: Mutating module attribute _seen_tokens during export.``.
|
|
255
255
|
It can be avoided by setting ``strict=False`` when call :func:`torch.export.export`.
|
|
256
256
|
"""
|
|
257
|
+
if verbose:
|
|
258
|
+
print(f"[torch_export_patches] patch_sympy={patch_sympy!r}")
|
|
259
|
+
print(f" . patch_torch={patch_torch!r}")
|
|
260
|
+
print(f" . patch_transformers={patch_transformers!r}")
|
|
261
|
+
print(f" . patch_diffusers={patch_diffusers!r}")
|
|
262
|
+
print(f" . catch_constraints={catch_constraints!r}")
|
|
263
|
+
print(f" . stop_if_static={stop_if_static!r}")
|
|
264
|
+
print(f" . patch={patch!r}")
|
|
265
|
+
print(f" . custom_patches={custom_patches!r}")
|
|
266
|
+
print(f"[torch_export_patches] dump_rewriting={dump_rewriting!r}")
|
|
267
|
+
|
|
257
268
|
if rewrite:
|
|
258
269
|
from .patch_module import torch_export_rewrite
|
|
259
270
|
|
|
@@ -289,21 +289,17 @@ def task_from_tags(tags: Union[str, List[str]]) -> str:
|
|
|
289
289
|
|
|
290
290
|
def enumerate_model_list(
|
|
291
291
|
n: int = 50,
|
|
292
|
-
|
|
293
|
-
library: Optional[str] = None,
|
|
294
|
-
tags: Optional[Union[str, List[str]]] = None,
|
|
292
|
+
pipeline_tag: Optional[str] = None,
|
|
295
293
|
search: Optional[str] = None,
|
|
296
294
|
dump: Optional[str] = None,
|
|
297
|
-
filter: Optional[str] = None,
|
|
295
|
+
filter: Optional[Union[str, List[str]]] = None,
|
|
298
296
|
verbose: int = 0,
|
|
299
297
|
):
|
|
300
298
|
"""
|
|
301
299
|
Enumerates models coming from :epkg:`huggingface_hub`.
|
|
302
300
|
|
|
303
301
|
:param n: number of models to retrieve (-1 for all)
|
|
304
|
-
:param
|
|
305
|
-
:param tags: see :meth:`huggingface_hub.HfApi.list_models`
|
|
306
|
-
:param library: see :meth:`huggingface_hub.HfApi.list_models`
|
|
302
|
+
:param pipeline_tag: see :meth:`huggingface_hub.HfApi.list_models`
|
|
307
303
|
:param search: see :meth:`huggingface_hub.HfApi.list_models`
|
|
308
304
|
:param filter: see :meth:`huggingface_hub.HfApi.list_models`
|
|
309
305
|
:param dump: dumps the result in this csv file
|
|
@@ -311,9 +307,7 @@ def enumerate_model_list(
|
|
|
311
307
|
"""
|
|
312
308
|
api = HfApi()
|
|
313
309
|
models = api.list_models(
|
|
314
|
-
|
|
315
|
-
library=library,
|
|
316
|
-
tags=tags,
|
|
310
|
+
pipeline_tag=pipeline_tag,
|
|
317
311
|
search=search,
|
|
318
312
|
full=True,
|
|
319
313
|
filter=filter,
|
|
@@ -189,7 +189,7 @@ def get_untrained_model_with_inputs(
|
|
|
189
189
|
f"subfolder={subfolder!r}"
|
|
190
190
|
)
|
|
191
191
|
model = transformers.AutoModel.from_pretrained(
|
|
192
|
-
model_id, subfolder=subfolder, trust_remote_code=True, **mkwargs
|
|
192
|
+
model_id, subfolder=subfolder or "", trust_remote_code=True, **mkwargs
|
|
193
193
|
)
|
|
194
194
|
if verbose:
|
|
195
195
|
print(
|
|
@@ -3,7 +3,7 @@ import inspect
|
|
|
3
3
|
import os
|
|
4
4
|
import pprint
|
|
5
5
|
import sys
|
|
6
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
7
7
|
import time
|
|
8
8
|
import numpy as np
|
|
9
9
|
import onnx
|
|
@@ -11,7 +11,7 @@ import torch
|
|
|
11
11
|
from ..export import CoupleInputsDynamicShapes
|
|
12
12
|
from ..helpers import max_diff, string_type, string_diff
|
|
13
13
|
from ..helpers.helper import flatten_object
|
|
14
|
-
from ..helpers.rt_helper import make_feeds
|
|
14
|
+
from ..helpers.rt_helper import make_feeds, reorder_modelbuilder_cache_to_torch
|
|
15
15
|
from ..helpers.torch_helper import to_any, torch_deepcopy
|
|
16
16
|
from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
|
|
17
17
|
from ..tasks import random_input_kwargs
|
|
@@ -112,6 +112,7 @@ def _make_folder_name(
|
|
|
112
112
|
device: Optional[Union[str, torch.device]] = None,
|
|
113
113
|
subfolder: Optional[str] = None,
|
|
114
114
|
opset: Optional[int] = None,
|
|
115
|
+
drop_inputs: Optional[List[str]] = None,
|
|
115
116
|
) -> str:
|
|
116
117
|
"Creates a filename unique based on the given options."
|
|
117
118
|
els = [model_id.replace("/", "_")]
|
|
@@ -137,6 +138,9 @@ def _make_folder_name(
|
|
|
137
138
|
els.append(sdev)
|
|
138
139
|
if opset is not None:
|
|
139
140
|
els.append(f"op{opset}")
|
|
141
|
+
if drop_inputs:
|
|
142
|
+
ii = "-".join(f"{s[0]}{s[-1]}" for s in drop_inputs)
|
|
143
|
+
els.append(f"I-{ii.upper()}")
|
|
140
144
|
return "-".join(els)
|
|
141
145
|
|
|
142
146
|
|
|
@@ -264,14 +268,18 @@ def shrink_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
264
268
|
return new_cfg
|
|
265
269
|
|
|
266
270
|
|
|
267
|
-
def _preprocess_model_id(
|
|
271
|
+
def _preprocess_model_id(
|
|
272
|
+
model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
|
|
273
|
+
) -> Tuple[str, Optional[str], bool, bool]:
|
|
268
274
|
if subfolder or "//" not in model_id:
|
|
269
|
-
return model_id, subfolder
|
|
275
|
+
return model_id, subfolder, same_as_pretrained, use_pretrained
|
|
270
276
|
spl = model_id.split("//")
|
|
277
|
+
if spl[-1] == "pretrained":
|
|
278
|
+
return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
|
|
271
279
|
if spl[-1] in {"transformer", "vae"}:
|
|
272
280
|
# known subfolder
|
|
273
|
-
return "//".join(spl[:-1]), spl[-1]
|
|
274
|
-
return model_id, subfolder
|
|
281
|
+
return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
|
|
282
|
+
return model_id, subfolder, same_as_pretrained, use_pretrained
|
|
275
283
|
|
|
276
284
|
|
|
277
285
|
def validate_model(
|
|
@@ -384,13 +392,15 @@ def validate_model(
|
|
|
384
392
|
if ``runtime == 'ref'``,
|
|
385
393
|
``orteval10`` increases the verbosity.
|
|
386
394
|
"""
|
|
387
|
-
model_id, subfolder = _preprocess_model_id(
|
|
395
|
+
model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
|
|
396
|
+
model_id,
|
|
397
|
+
subfolder,
|
|
398
|
+
same_as_pretrained=same_as_pretrained,
|
|
399
|
+
use_pretrained=use_pretrained,
|
|
400
|
+
)
|
|
401
|
+
default_patch = dict(patch_transformers=True, patch_diffusers=True, patch=True)
|
|
388
402
|
if isinstance(patch, bool):
|
|
389
|
-
patch_kwargs = (
|
|
390
|
-
dict(patch_transformers=True, patch_diffusers=True, patch=True)
|
|
391
|
-
if patch
|
|
392
|
-
else dict(patch=False)
|
|
393
|
-
)
|
|
403
|
+
patch_kwargs = default_patch if patch else dict(patch=False)
|
|
394
404
|
elif isinstance(patch, str):
|
|
395
405
|
patch_kwargs = {"patch": True, **{p: True for p in patch.split(",")}} # noqa: C420
|
|
396
406
|
else:
|
|
@@ -399,11 +409,13 @@ def validate_model(
|
|
|
399
409
|
if "patch" not in patch_kwargs:
|
|
400
410
|
if any(patch_kwargs.values()):
|
|
401
411
|
patch_kwargs["patch"] = True
|
|
412
|
+
elif len(patch) == 1 and patch.get("patch", False):
|
|
413
|
+
patch_kwargs.update(default_patch)
|
|
402
414
|
|
|
403
415
|
assert not rewrite or patch_kwargs.get("patch", False), (
|
|
404
416
|
f"rewrite={rewrite}, patch={patch}, patch_kwargs={patch_kwargs} "
|
|
405
417
|
f"patch must be True to enable rewriting, "
|
|
406
|
-
f"if --
|
|
418
|
+
f"if --patch=0 was specified on the command line, rewrites are disabled."
|
|
407
419
|
)
|
|
408
420
|
summary = version_summary()
|
|
409
421
|
summary.update(
|
|
@@ -441,6 +453,7 @@ def validate_model(
|
|
|
441
453
|
device=device,
|
|
442
454
|
subfolder=subfolder,
|
|
443
455
|
opset=opset,
|
|
456
|
+
drop_inputs=drop_inputs,
|
|
444
457
|
)
|
|
445
458
|
dump_folder = os.path.join(dump_folder, folder_name)
|
|
446
459
|
if not os.path.exists(dump_folder):
|
|
@@ -536,6 +549,11 @@ def validate_model(
|
|
|
536
549
|
if verbose:
|
|
537
550
|
print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
|
|
538
551
|
|
|
552
|
+
# modelbuilder needs different treatments sometimes, so
|
|
553
|
+
# we mark it for later usage.
|
|
554
|
+
# for example, it has different past_kv ordering than
|
|
555
|
+
# flattened CacheObject
|
|
556
|
+
data["exporter"] = exporter
|
|
539
557
|
data["input_options"] = iop
|
|
540
558
|
data["model_options"] = mop
|
|
541
559
|
data["model_dump_folder"] = dump_folder
|
|
@@ -836,6 +854,8 @@ def validate_model(
|
|
|
836
854
|
)
|
|
837
855
|
summary.update(summary_valid)
|
|
838
856
|
|
|
857
|
+
_compute_final_statistics(summary)
|
|
858
|
+
|
|
839
859
|
if verbose:
|
|
840
860
|
print("[validate_model] -- done (final)")
|
|
841
861
|
if dump_stats:
|
|
@@ -848,15 +868,24 @@ def validate_model(
|
|
|
848
868
|
def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
|
|
849
869
|
"""Computes some statistics on the model itself."""
|
|
850
870
|
onx = onnx.load(onnx_filename, load_external_data=False)
|
|
871
|
+
cache_functions = {(f.domain, f.name): f for f in onx.functions}
|
|
872
|
+
local_domains = set(f.domain for f in onx.functions)
|
|
851
873
|
|
|
852
874
|
def node_iter(proto):
|
|
853
875
|
if isinstance(proto, onnx.ModelProto):
|
|
854
|
-
yield from node_iter(proto.graph)
|
|
855
876
|
for f in proto.functions:
|
|
856
877
|
yield from node_iter(f)
|
|
878
|
+
yield from node_iter(proto.graph)
|
|
857
879
|
elif isinstance(proto, (onnx.FunctionProto, onnx.GraphProto)):
|
|
858
880
|
for node in proto.node:
|
|
859
881
|
yield node
|
|
882
|
+
|
|
883
|
+
# Let's inline the function
|
|
884
|
+
key = node.domain, node.op_type
|
|
885
|
+
if key in cache_functions:
|
|
886
|
+
yield from node_iter(cache_functions[key])
|
|
887
|
+
|
|
888
|
+
# Let's continue
|
|
860
889
|
for att in node.attribute:
|
|
861
890
|
if att.type == onnx.AttributeProto.GRAPH:
|
|
862
891
|
yield from node_iter(att.g)
|
|
@@ -874,6 +903,11 @@ def compute_statistics(onnx_filename: str) -> Dict[str, Union[float, int]]:
|
|
|
874
903
|
n_nodes += 1
|
|
875
904
|
if proto.op_type != "Constant":
|
|
876
905
|
n_nodes_nocst += 1
|
|
906
|
+
if proto.domain in local_domains:
|
|
907
|
+
key = "n_node_local_function"
|
|
908
|
+
if key not in counts:
|
|
909
|
+
counts[key] = 0
|
|
910
|
+
counts[key] += 1
|
|
877
911
|
else:
|
|
878
912
|
key = f"n_node_initializer_{proto.data_type}"
|
|
879
913
|
|
|
@@ -960,6 +994,26 @@ def _validate_do_run_exported_program(data, summary, verbose, quiet):
|
|
|
960
994
|
)
|
|
961
995
|
|
|
962
996
|
|
|
997
|
+
_cache_export_times = []
|
|
998
|
+
_main_export_function = torch.export.export
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def _torch_export_export(*args, _export=_main_export_function, **kwargs):
|
|
1002
|
+
begin = time.perf_counter()
|
|
1003
|
+
res = _export(*args, **kwargs)
|
|
1004
|
+
duration = time.perf_counter() - begin
|
|
1005
|
+
_cache_export_times.append(duration)
|
|
1006
|
+
return res
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def _restore_torch_export_export(summary):
|
|
1010
|
+
torch.export.export = _main_export_function
|
|
1011
|
+
if _cache_export_times:
|
|
1012
|
+
summary["time_torch_export_export"] = sum(_cache_export_times)
|
|
1013
|
+
summary["time_torch_export_export_n"] = len(_cache_export_times)
|
|
1014
|
+
_cache_export_times.clear()
|
|
1015
|
+
|
|
1016
|
+
|
|
963
1017
|
def call_exporter(
|
|
964
1018
|
data: Dict[str, Any],
|
|
965
1019
|
exporter: str,
|
|
@@ -985,6 +1039,9 @@ def call_exporter(
|
|
|
985
1039
|
:return: two dictionaries, one with some metrics,
|
|
986
1040
|
another one with whatever the function produces
|
|
987
1041
|
"""
|
|
1042
|
+
_cache_export_times.clear()
|
|
1043
|
+
torch.export.export = _torch_export_export
|
|
1044
|
+
|
|
988
1045
|
if exporter == "export" or exporter.startswith("export-"):
|
|
989
1046
|
# torch export
|
|
990
1047
|
summary, data = call_torch_export_export(
|
|
@@ -995,6 +1052,7 @@ def call_exporter(
|
|
|
995
1052
|
optimization=optimization,
|
|
996
1053
|
do_run=do_run,
|
|
997
1054
|
)
|
|
1055
|
+
_restore_torch_export_export(summary)
|
|
998
1056
|
return summary, data
|
|
999
1057
|
if exporter.startswith("onnx-"):
|
|
1000
1058
|
# torch export
|
|
@@ -1006,6 +1064,7 @@ def call_exporter(
|
|
|
1006
1064
|
optimization=optimization,
|
|
1007
1065
|
output_names=output_names,
|
|
1008
1066
|
)
|
|
1067
|
+
_restore_torch_export_export(summary)
|
|
1009
1068
|
return summary, data
|
|
1010
1069
|
if exporter == "custom" or exporter.startswith("custom"):
|
|
1011
1070
|
# torch export
|
|
@@ -1018,6 +1077,7 @@ def call_exporter(
|
|
|
1018
1077
|
dump_folder=dump_folder,
|
|
1019
1078
|
output_names=output_names,
|
|
1020
1079
|
)
|
|
1080
|
+
_restore_torch_export_export(summary)
|
|
1021
1081
|
return summary, data
|
|
1022
1082
|
if exporter == "modelbuilder":
|
|
1023
1083
|
# torch export
|
|
@@ -1029,6 +1089,7 @@ def call_exporter(
|
|
|
1029
1089
|
optimization=optimization,
|
|
1030
1090
|
output_names=output_names,
|
|
1031
1091
|
)
|
|
1092
|
+
_restore_torch_export_export(summary)
|
|
1032
1093
|
return summary, data
|
|
1033
1094
|
raise NotImplementedError(
|
|
1034
1095
|
f"export with {exporter!r} and optimization={optimization!r} not implemented yet, "
|
|
@@ -1322,7 +1383,13 @@ def validate_onnx_model(
|
|
|
1322
1383
|
print(
|
|
1323
1384
|
f"[validate_onnx_model] inputs={string_type(data[k_input], with_shape=True)}"
|
|
1324
1385
|
)
|
|
1325
|
-
feeds = make_feeds(
|
|
1386
|
+
feeds = make_feeds(
|
|
1387
|
+
sess,
|
|
1388
|
+
data[k_input],
|
|
1389
|
+
use_numpy=True,
|
|
1390
|
+
check_flatten=False,
|
|
1391
|
+
is_modelbuilder=data["exporter"] == "modelbuilder",
|
|
1392
|
+
)
|
|
1326
1393
|
if verbose:
|
|
1327
1394
|
print(f"[validate_onnx_model] ort inputs={string_type(feeds, with_shape=True)}")
|
|
1328
1395
|
summary[_mk(f"onnx_ort_inputs{suffix}")] = string_type(feeds, with_shape=True)
|
|
@@ -1342,6 +1409,13 @@ def validate_onnx_model(
|
|
|
1342
1409
|
repeat=repeat,
|
|
1343
1410
|
warmup=warmup,
|
|
1344
1411
|
)
|
|
1412
|
+
# NOTE: modelbuilder has different order on past_kv outputs
|
|
1413
|
+
if data["exporter"] == "modelbuilder":
|
|
1414
|
+
logits = got[:1]
|
|
1415
|
+
past_key_values = got[1:]
|
|
1416
|
+
reorder_past_key_values = reorder_modelbuilder_cache_to_torch(past_key_values)
|
|
1417
|
+
got = logits + reorder_past_key_values
|
|
1418
|
+
|
|
1345
1419
|
if f"ERR_{_mk(f'time_onnx_ort_run{suffix}')}" in summary:
|
|
1346
1420
|
return summary, data
|
|
1347
1421
|
|
|
@@ -1382,7 +1456,7 @@ def call_torch_export_onnx(
|
|
|
1382
1456
|
:return: two dictionaries, one with some metrics,
|
|
1383
1457
|
another one with whatever the function produces
|
|
1384
1458
|
"""
|
|
1385
|
-
available = {None, "", "ir", "os_ort"}
|
|
1459
|
+
available = {None, "", "ir", "os_ort", "ir+default"}
|
|
1386
1460
|
assert (
|
|
1387
1461
|
optimization in available
|
|
1388
1462
|
), f"unexpected value for optimization={optimization}, available={available}"
|
|
@@ -1472,11 +1546,31 @@ def call_torch_export_onnx(
|
|
|
1472
1546
|
print(epo)
|
|
1473
1547
|
print("[call_torch_export_onnx] -- End of ONNXProgram")
|
|
1474
1548
|
|
|
1475
|
-
if optimization in {"ir", "os_ort"}:
|
|
1549
|
+
if optimization in {"ir", "os_ort", "ir+default"}:
|
|
1476
1550
|
if verbose:
|
|
1477
1551
|
print(f"[call_torch_export_onnx] starts optimization={optimization!r}...")
|
|
1478
1552
|
if optimization == "ir":
|
|
1479
1553
|
label, f_optim = "export_onnx_opt_ir", (lambda epo=epo: epo.optimize())
|
|
1554
|
+
elif optimization == "ir+default":
|
|
1555
|
+
import onnxscript
|
|
1556
|
+
from experimental_experiment.xbuilder import GraphBuilder, OptimizationOptions
|
|
1557
|
+
|
|
1558
|
+
def _ir_default_opt(epo):
|
|
1559
|
+
onnxscript.optimizer.optimize_ir(epo.model)
|
|
1560
|
+
onx = epo.model_proto
|
|
1561
|
+
# not very efficient
|
|
1562
|
+
gr = GraphBuilder(
|
|
1563
|
+
onx,
|
|
1564
|
+
infer_shapes_options=True,
|
|
1565
|
+
optimization_options=OptimizationOptions(patterns="default"),
|
|
1566
|
+
)
|
|
1567
|
+
cont = gr.to_onnx(large_model=True)
|
|
1568
|
+
epo.model = cont.to_ir()
|
|
1569
|
+
|
|
1570
|
+
label, f_optim = "export_onnx_opt_ir_default", (
|
|
1571
|
+
lambda epo=epo: _ir_default_opt(epo)
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1480
1574
|
else:
|
|
1481
1575
|
import onnxscript
|
|
1482
1576
|
import onnxscript.rewriter.ort_fusions as ort_fusions
|
|
@@ -1567,6 +1661,97 @@ def call_torch_export_model_builder(
|
|
|
1567
1661
|
return summary, data
|
|
1568
1662
|
|
|
1569
1663
|
|
|
1664
|
+
def process_statistics(data: Sequence[Dict[str, float]]) -> Dict[str, Any]:
|
|
1665
|
+
"""
|
|
1666
|
+
Processes statistics coming from the exporters.
|
|
1667
|
+
It takes a sequence of dictionaries (like a data frame)
|
|
1668
|
+
and extracts some metrics.
|
|
1669
|
+
"""
|
|
1670
|
+
|
|
1671
|
+
def _simplify(p):
|
|
1672
|
+
for s in [
|
|
1673
|
+
"remove_unused",
|
|
1674
|
+
"constant_folding",
|
|
1675
|
+
"remove_identity",
|
|
1676
|
+
"remove_duplicated_initializer",
|
|
1677
|
+
"dynamic_dimension_naming",
|
|
1678
|
+
"inline",
|
|
1679
|
+
"check",
|
|
1680
|
+
"build_graph_for_pattern",
|
|
1681
|
+
"pattern_optimization",
|
|
1682
|
+
]:
|
|
1683
|
+
if s in p or s.replace("_", "-") in p:
|
|
1684
|
+
return s
|
|
1685
|
+
if p.startswith(("apply_", "match_")):
|
|
1686
|
+
return p
|
|
1687
|
+
return "other"
|
|
1688
|
+
|
|
1689
|
+
def _add(d, a, v, use_max=False):
|
|
1690
|
+
if v:
|
|
1691
|
+
if a not in d:
|
|
1692
|
+
d[a] = v
|
|
1693
|
+
elif use_max:
|
|
1694
|
+
d[a] = max(d[a], v)
|
|
1695
|
+
else:
|
|
1696
|
+
d[a] += v
|
|
1697
|
+
|
|
1698
|
+
counts: Dict[str, Any] = {}
|
|
1699
|
+
applied_pattern_time: Dict[str, Any] = {}
|
|
1700
|
+
applied_pattern_n: Dict[str, Any] = {}
|
|
1701
|
+
matching_pattern_time: Dict[str, Any] = {}
|
|
1702
|
+
matching_pattern_n: Dict[str, Any] = {}
|
|
1703
|
+
|
|
1704
|
+
for obs in data:
|
|
1705
|
+
pattern = _simplify(obs["pattern"])
|
|
1706
|
+
_add(counts, "opt_nodes_added", obs.get("added", 0))
|
|
1707
|
+
_add(counts, "opt_nodes_removed", obs.get("removed", 0))
|
|
1708
|
+
_add(counts, "opt_time_steps", obs.get("time_in", 0))
|
|
1709
|
+
_add(counts, "opt_n_steps", 1)
|
|
1710
|
+
_add(
|
|
1711
|
+
counts,
|
|
1712
|
+
"opt_n_iteration",
|
|
1713
|
+
max(counts.get("opt_n_iteration", 0), obs.get("iteration", 0)),
|
|
1714
|
+
use_max=True,
|
|
1715
|
+
)
|
|
1716
|
+
|
|
1717
|
+
if pattern.startswith("apply_"):
|
|
1718
|
+
_add(counts, "opt_n_applied_patterns", 1)
|
|
1719
|
+
_add(counts, "opt_time_applied_patterns", obs.get("time_in", 0))
|
|
1720
|
+
_add(applied_pattern_time, pattern, obs.get("time_in", 0))
|
|
1721
|
+
_add(applied_pattern_n, pattern, 1)
|
|
1722
|
+
elif pattern.startswith("match_"):
|
|
1723
|
+
_add(counts, "opt_n_matching_patterns", 1)
|
|
1724
|
+
_add(counts, "opt_time_matching_patterns", obs.get("time_in", 0))
|
|
1725
|
+
_add(matching_pattern_time, pattern, obs.get("time_in", 0))
|
|
1726
|
+
_add(matching_pattern_n, pattern, 1)
|
|
1727
|
+
else:
|
|
1728
|
+
_add(counts, f"opt_time_{pattern}", obs.get("time_in", 0))
|
|
1729
|
+
_add(counts, f"opt_n_{pattern}", 1)
|
|
1730
|
+
_add(counts, f"opt_nodes_added_{pattern}", obs.get("added", 0))
|
|
1731
|
+
_add(counts, f"opt_nodes_removed_{pattern}", obs.get("removed", 0))
|
|
1732
|
+
|
|
1733
|
+
if applied_pattern_time:
|
|
1734
|
+
longest = max((v, k) for k, v in applied_pattern_time.items())
|
|
1735
|
+
counts["opt_top_time_applied_pattern"], counts["opt_top_time_applied_pattern_arg"] = (
|
|
1736
|
+
longest
|
|
1737
|
+
)
|
|
1738
|
+
longest = max((v, k) for k, v in applied_pattern_n.items())
|
|
1739
|
+
counts["opt_top_n_applied_pattern"], counts["opt_top_n_applied_pattern_arg"] = longest
|
|
1740
|
+
|
|
1741
|
+
if matching_pattern_time:
|
|
1742
|
+
longest = max((v, k) for k, v in matching_pattern_time.items())
|
|
1743
|
+
(
|
|
1744
|
+
counts["opt_top_time_matching_pattern"],
|
|
1745
|
+
counts["opt_top_time_matching_pattern_arg"],
|
|
1746
|
+
) = longest
|
|
1747
|
+
longest = max((v, k) for k, v in matching_pattern_n.items())
|
|
1748
|
+
counts["opt_top_n_matching_pattern"], counts["opt_top_n_matching_pattern_arg"] = (
|
|
1749
|
+
longest
|
|
1750
|
+
)
|
|
1751
|
+
counts["onnx_opt_optimized"] = 1
|
|
1752
|
+
return counts
|
|
1753
|
+
|
|
1754
|
+
|
|
1570
1755
|
def call_torch_export_custom(
|
|
1571
1756
|
data: Dict[str, Any],
|
|
1572
1757
|
exporter: str,
|
|
@@ -1696,67 +1881,10 @@ def call_torch_export_custom(
|
|
|
1696
1881
|
if "ERR_export_onnx_c" in summary:
|
|
1697
1882
|
return summary, data
|
|
1698
1883
|
|
|
1699
|
-
new_stat = {}
|
|
1884
|
+
new_stat: Dict[str, Any] = {k: v for k, v in opt_stats.items() if k.startswith("time_")}
|
|
1885
|
+
new_stat.update({k[5:]: v for k, v in opt_stats.items() if k.startswith("stat_time_")})
|
|
1700
1886
|
if "optimization" in opt_stats:
|
|
1701
|
-
|
|
1702
|
-
max_iter = 0
|
|
1703
|
-
applied = {}
|
|
1704
|
-
matched = set()
|
|
1705
|
-
n_applied = 0
|
|
1706
|
-
by_pattern = {}
|
|
1707
|
-
by_pattern_n = {}
|
|
1708
|
-
by_iter = {}
|
|
1709
|
-
cst_added, cst_removed, cst_time_in = 0, 0, 0.0
|
|
1710
|
-
|
|
1711
|
-
for obs in opt_stats["optimization"]:
|
|
1712
|
-
pattern = obs["pattern"]
|
|
1713
|
-
if pattern == "constant_folding":
|
|
1714
|
-
cst_added += obs.get("added", 0)
|
|
1715
|
-
cst_removed += obs.get("removed", 0)
|
|
1716
|
-
cst_time_in += obs.get("time_in", 0)
|
|
1717
|
-
if pattern not in by_pattern:
|
|
1718
|
-
by_pattern[pattern] = 0
|
|
1719
|
-
by_pattern_n[pattern] = 0
|
|
1720
|
-
by_iter[pattern] = 0
|
|
1721
|
-
time_in += obs.get("time_in", 0)
|
|
1722
|
-
added += obs.get("added", 0)
|
|
1723
|
-
removed += obs.get("removed", 0)
|
|
1724
|
-
max_iter = max(max_iter, obs.get("iteration", 0))
|
|
1725
|
-
by_pattern[pattern] += obs.get("time_in", 0)
|
|
1726
|
-
by_pattern_n[pattern] += obs.get("added", 0) - obs.get("removed", 0)
|
|
1727
|
-
if not pattern.startswith("match"):
|
|
1728
|
-
by_iter[pattern] = max(by_iter[pattern], obs.get("iteration", 0))
|
|
1729
|
-
p = obs["pattern"]
|
|
1730
|
-
if p.startswith("match_"):
|
|
1731
|
-
matched.add(p)
|
|
1732
|
-
elif p.startswith("apply_"):
|
|
1733
|
-
key = f"op_opt_{p}"
|
|
1734
|
-
key2 = f"op_opt_maxiter_{p}"
|
|
1735
|
-
if key not in applied:
|
|
1736
|
-
applied[key] = 1
|
|
1737
|
-
applied[key2] = obs["iteration"]
|
|
1738
|
-
else:
|
|
1739
|
-
applied[key] += 1
|
|
1740
|
-
applied[key2] = max(obs["iteration"], applied[key2])
|
|
1741
|
-
n_applied += 1
|
|
1742
|
-
|
|
1743
|
-
new_stat.update(
|
|
1744
|
-
dict(
|
|
1745
|
-
onnx_opt_optimized=1,
|
|
1746
|
-
op_opt_all_time_in=time_in,
|
|
1747
|
-
op_opt_all_added=added,
|
|
1748
|
-
op_opt_all_removed=removed,
|
|
1749
|
-
op_opt_max_iter=max_iter,
|
|
1750
|
-
op_opt_unique_matched=len(matched),
|
|
1751
|
-
op_opt_unique_applied=len(applied),
|
|
1752
|
-
op_opt_n_applied=n_applied,
|
|
1753
|
-
time_export_optimization=time_in,
|
|
1754
|
-
op_opt_export_optimization=time_in,
|
|
1755
|
-
op_opt_cst_time_in=cst_time_in,
|
|
1756
|
-
op_opt_cst_added=cst_added,
|
|
1757
|
-
op_opt_cst_removed=cst_removed,
|
|
1758
|
-
)
|
|
1759
|
-
)
|
|
1887
|
+
new_stat.update(process_statistics(opt_stats["optimization"]))
|
|
1760
1888
|
|
|
1761
1889
|
summary.update(new_stat)
|
|
1762
1890
|
assert epo is not None, "no onnx export was found"
|
|
@@ -1875,3 +2003,21 @@ def run_ort_fusion(
|
|
|
1875
2003
|
f"opt_ort_{model_type}_duration": duration,
|
|
1876
2004
|
f"opt_ort_{model_type}_duration_save": d,
|
|
1877
2005
|
}, {f"opt_ort_{model_type}": output_path}
|
|
2006
|
+
|
|
2007
|
+
|
|
2008
|
+
def _compute_final_statistics(summary: Dict[str, Any]):
|
|
2009
|
+
"""
|
|
2010
|
+
Updates inline the list of statistics. It adds:
|
|
2011
|
+
|
|
2012
|
+
- speedup
|
|
2013
|
+
"""
|
|
2014
|
+
stats = {}
|
|
2015
|
+
if (
|
|
2016
|
+
"time_run_latency" in summary
|
|
2017
|
+
and "time_run_onnx_ort_latency" in summary
|
|
2018
|
+
and summary["time_run_onnx_ort_latency"] > 0
|
|
2019
|
+
):
|
|
2020
|
+
stats["stat_estimated_speedup_ort"] = (
|
|
2021
|
+
summary["time_run_latency"] / summary["time_run_onnx_ort_latency"]
|
|
2022
|
+
)
|
|
2023
|
+
summary.update(stats)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: onnx-diagnostic
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.12
|
|
4
4
|
Summary: Investigate ONNX models
|
|
5
5
|
Home-page: https://github.com/sdpython/onnx-diagnostic
|
|
6
6
|
Author: Xavier Dupré
|
|
@@ -25,7 +25,7 @@ Description-Content-Type: text/x-rst
|
|
|
25
25
|
License-File: LICENSE.txt
|
|
26
26
|
Requires-Dist: numpy
|
|
27
27
|
Requires-Dist: onnx>=1.16.0
|
|
28
|
-
Requires-Dist: onnxruntime>=1.
|
|
28
|
+
Requires-Dist: onnxruntime>=1.23
|
|
29
29
|
Requires-Dist: optree
|
|
30
30
|
Requires-Dist: torch>=2.8
|
|
31
31
|
Requires-Dist: torch_geometric
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
onnx_diagnostic/__init__.py,sha256=
|
|
1
|
+
onnx_diagnostic/__init__.py,sha256=dcCB9tAfK6HWFqGTvBN7m6WdJ5DFFu0P3gcwcKdA7MI,174
|
|
2
2
|
onnx_diagnostic/__main__.py,sha256=YmyV_Aq_ianDlHyKLHMa6h8YK3ZmFPpLVHLKjM91aCk,79
|
|
3
|
-
onnx_diagnostic/_command_lines_parser.py,sha256=
|
|
3
|
+
onnx_diagnostic/_command_lines_parser.py,sha256=wleBwnoCDyAWRYRREUSGkwAJKw2YI4Td_7ydxmdOXfI,33457
|
|
4
4
|
onnx_diagnostic/api.py,sha256=BhCl_yCd78N7TlVtPOHjeYv1QBEy39TjZ647rcHqLh0,345
|
|
5
5
|
onnx_diagnostic/doc.py,sha256=t3RELgfooYnVMAi0JSpggWkQEgUsREz8NmRvn0TnLI8,2829
|
|
6
6
|
onnx_diagnostic/ext_test_case.py,sha256=emfQGiQSz5FVDhyJ1Acsv_Tast7tWl426TjtpNqxDBU,43558
|
|
@@ -17,13 +17,13 @@ onnx_diagnostic/helpers/config_helper.py,sha256=H2mOcMXfrcolFnt8EuqmRFkpQ3YdNRDf
|
|
|
17
17
|
onnx_diagnostic/helpers/doc_helper.py,sha256=pl5MZd3_FaE8BqQnqoBuSBxoNCFcd2OJd3eITUSku5c,5897
|
|
18
18
|
onnx_diagnostic/helpers/graph_helper.py,sha256=hevQT5a7_QuriVPQcbT5qe18n99Doyl5h3-qshx1-uk,14093
|
|
19
19
|
onnx_diagnostic/helpers/helper.py,sha256=mRQ-wo9P30m0Z0_v3EfEDwK_dZFTUhIVKo-5ut9DPW8,63194
|
|
20
|
-
onnx_diagnostic/helpers/log_helper.py,sha256=
|
|
20
|
+
onnx_diagnostic/helpers/log_helper.py,sha256=SKzxJ6DdP9uq4e2feA2nqd2Rreq4G-ujKZFUELfycP0,85674
|
|
21
21
|
onnx_diagnostic/helpers/memory_peak.py,sha256=OT6mz0muBbBZY0pjgW2_eCk_lOtFRo-5w4jFo2Z6Kok,6380
|
|
22
22
|
onnx_diagnostic/helpers/mini_onnx_builder.py,sha256=FgK-Kws1WpSYdYJCPyONwQYY3AjbgUHimZlaYyiNUfE,21286
|
|
23
23
|
onnx_diagnostic/helpers/model_builder_helper.py,sha256=tJi4VkP0TS2yyDSxQPNu9WRoSnPCAjr6L0J49X2LdXk,12810
|
|
24
24
|
onnx_diagnostic/helpers/onnx_helper.py,sha256=oxl3x0EQowGP9kfz8aKDqnJZcvYY8FeZLsfoLJDiSUg,39826
|
|
25
25
|
onnx_diagnostic/helpers/ort_session.py,sha256=UgUUeUslDxEFBc6w6f3HMq_a7bn4TBlItmojqWquSj4,29281
|
|
26
|
-
onnx_diagnostic/helpers/rt_helper.py,sha256=
|
|
26
|
+
onnx_diagnostic/helpers/rt_helper.py,sha256=E9fQ76lcLJqcOCNsAeZBdxmmEO_FH0oSIlFRU2gnQ6U,5229
|
|
27
27
|
onnx_diagnostic/helpers/torch_helper.py,sha256=e0KkSTdoZthc5Yuf9e8XVGAx-lqOYy4DeRRe-N4QUYQ,33478
|
|
28
28
|
onnx_diagnostic/reference/__init__.py,sha256=rLZsxOlnb7-81F2CzepGnZLejaROg4JvgFaGR9FwVQA,208
|
|
29
29
|
onnx_diagnostic/reference/evaluator.py,sha256=RzNzjFDeMe-4X51Tb22N6aagazY5ktNq-mRmPcfY5EU,8848
|
|
@@ -90,7 +90,7 @@ onnx_diagnostic/tasks/text_generation.py,sha256=hV-oK1bWjtepxkA491Va_0CWrELZbfP4
|
|
|
90
90
|
onnx_diagnostic/tasks/text_to_image.py,sha256=mOS3Ruosi3hzRMxXLDN7ZkAbi7NnQb7MWwQP_okGVHs,2962
|
|
91
91
|
onnx_diagnostic/tasks/zero_shot_image_classification.py,sha256=jJCMWuOqGv5ahCfjrcqxuYCJFhTgHV5KUf2yyv2yxYA,4624
|
|
92
92
|
onnx_diagnostic/torch_export_patches/__init__.py,sha256=0SaZedwznm1hQUCvXZsGZORV5vby954wEExr5faepGg,720
|
|
93
|
-
onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=
|
|
93
|
+
onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=TUDY6sRf2Si-t7rK_hdKiFqSP2gjJbPpIGgnW2Mt5eA,28686
|
|
94
94
|
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py,sha256=klvqiMjccwGhiRnLRVbwTi5WWkMfvtnOV5ycirPcAdA,11354
|
|
95
95
|
onnx_diagnostic/torch_export_patches/patch_expressions.py,sha256=vr4tt61cbDnaaaduzMj4UBZ8OUtr6GfDpIWwOYqjWzs,3213
|
|
96
96
|
onnx_diagnostic/torch_export_patches/patch_inputs.py,sha256=3ySY1nAzINSS1hAzTycwfdbPas8G5CDL2MjnaAHBkMU,7825
|
|
@@ -106,12 +106,12 @@ onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py,sha256=drq3
|
|
|
106
106
|
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py,sha256=mcmZGekzQlLgE_o3SdKlRgCx4ewwyyAuNWZ9CaN_zrI,9317
|
|
107
107
|
onnx_diagnostic/torch_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
108
|
onnx_diagnostic/torch_models/llms.py,sha256=soyg4yC87ptGoeulJhKqw5opGmuLvH1pn_ZDXZ4Jr8E,90
|
|
109
|
-
onnx_diagnostic/torch_models/validate.py,sha256=
|
|
109
|
+
onnx_diagnostic/torch_models/validate.py,sha256=oDPnZDFpiPx7s0we4usaD4pQpJEgqnKYjW-L-TM8Bsw,76395
|
|
110
110
|
onnx_diagnostic/torch_models/hghub/__init__.py,sha256=vi1Q7YHdddj1soiBN42MSvJdFqe2_KUoWafHISjwOu8,58
|
|
111
|
-
onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=
|
|
111
|
+
onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=rFbiPNLET-KdBpnv-p0nKgwHX6d7C_Z0s9zZ86_92kQ,14307
|
|
112
112
|
onnx_diagnostic/torch_models/hghub/hub_data.py,sha256=8V_pAgACPLPsLRYUododg7MSL6str-T3tBEGY4OaeYQ,8724
|
|
113
113
|
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py,sha256=3yH1pQbCYNDmRMNUCwMFf5ELnAa35ubTKD2JRF5y9Ls,287515
|
|
114
|
-
onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=
|
|
114
|
+
onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=NgKFt3fwM5PYUOWwApKphiAWfQyJk3rjGXHr4kkSRiE,13707
|
|
115
115
|
onnx_diagnostic/torch_models/hghub/model_specific.py,sha256=j50Nu7wddJMoqmD4QzMbNdFDUUgUmSBKRzPDH55TlUQ,2498
|
|
116
116
|
onnx_diagnostic/torch_models/untrained/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
117
|
onnx_diagnostic/torch_models/untrained/llm_phi2.py,sha256=JbGZmW41MPJcQgqaJc9R2G00nI79nI-lABN-ffA1lmY,4037
|
|
@@ -119,8 +119,8 @@ onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py,sha256=QXw_Bs2SzfeiQMf-tm
|
|
|
119
119
|
onnx_diagnostic/torch_onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
120
120
|
onnx_diagnostic/torch_onnx/runtime_info.py,sha256=1g9F_Jf9AAgYQU4stbsrFXwQl-30mWlQrFbQ7val8Ps,9268
|
|
121
121
|
onnx_diagnostic/torch_onnx/sbs.py,sha256=fN799L_G1c2RKEuNcKt_MnQri5dwD4OzeCkBBFFoUBI,16865
|
|
122
|
-
onnx_diagnostic-0.7.
|
|
123
|
-
onnx_diagnostic-0.7.
|
|
124
|
-
onnx_diagnostic-0.7.
|
|
125
|
-
onnx_diagnostic-0.7.
|
|
126
|
-
onnx_diagnostic-0.7.
|
|
122
|
+
onnx_diagnostic-0.7.12.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
|
|
123
|
+
onnx_diagnostic-0.7.12.dist-info/METADATA,sha256=aQ02curD3P5PXXiaUBlf6pLkpoqMR_F6721HDpsxhLE,7435
|
|
124
|
+
onnx_diagnostic-0.7.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
125
|
+
onnx_diagnostic-0.7.12.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
|
|
126
|
+
onnx_diagnostic-0.7.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|