onnx-diagnostic 0.7.12__py3-none-any.whl → 0.7.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +7 -2
- onnx_diagnostic/export/dynamic_shapes.py +11 -2
- onnx_diagnostic/helpers/helper.py +11 -5
- onnx_diagnostic/helpers/log_helper.py +53 -17
- onnx_diagnostic/helpers/mini_onnx_builder.py +17 -0
- onnx_diagnostic/helpers/model_builder_helper.py +1 -0
- onnx_diagnostic/helpers/rt_helper.py +2 -1
- onnx_diagnostic/helpers/torch_helper.py +31 -7
- onnx_diagnostic/reference/torch_evaluator.py +2 -2
- onnx_diagnostic/tasks/data/__init__.py +13 -0
- onnx_diagnostic/tasks/data/dummies_imagetext2text_generation_gemma3.onnx +0 -0
- onnx_diagnostic/tasks/image_text_to_text.py +256 -141
- onnx_diagnostic/tasks/text_generation.py +30 -0
- onnx_diagnostic/torch_export_patches/eval/__init__.py +184 -151
- onnx_diagnostic/torch_export_patches/eval/model_cases.py +20 -5
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +52 -20
- onnx_diagnostic/torch_export_patches/patch_inputs.py +10 -6
- onnx_diagnostic/torch_export_patches/patches/patch_torch.py +540 -10
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +269 -4
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +36 -0
- onnx_diagnostic/torch_models/hghub/model_inputs.py +55 -5
- onnx_diagnostic/torch_models/validate.py +116 -50
- onnx_diagnostic/torch_onnx/sbs.py +2 -1
- {onnx_diagnostic-0.7.12.dist-info → onnx_diagnostic-0.7.14.dist-info}/METADATA +11 -31
- {onnx_diagnostic-0.7.12.dist-info → onnx_diagnostic-0.7.14.dist-info}/RECORD +29 -27
- {onnx_diagnostic-0.7.12.dist-info → onnx_diagnostic-0.7.14.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.7.12.dist-info → onnx_diagnostic-0.7.14.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.7.12.dist-info → onnx_diagnostic-0.7.14.dist-info}/top_level.txt +0 -0
onnx_diagnostic/__init__.py
CHANGED
|
@@ -400,12 +400,17 @@ def get_parser_validate() -> ArgumentParser:
|
|
|
400
400
|
|
|
401
401
|
position_ids is usually not needed, they can be removed by adding:
|
|
402
402
|
|
|
403
|
-
|
|
403
|
+
--drop position_ids
|
|
404
404
|
|
|
405
405
|
The behaviour may be modified compare the original configuration,
|
|
406
406
|
the following argument can be rope_scaling to dynamic:
|
|
407
407
|
|
|
408
|
-
|
|
408
|
+
--mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
|
|
409
|
+
|
|
410
|
+
You can profile the command line by running:
|
|
411
|
+
|
|
412
|
+
pyinstrument -m onnx_diagnostic validate ...
|
|
413
|
+
pyinstrument -r html -o profile.html -m onnx_diagnostic validate ...
|
|
409
414
|
"""
|
|
410
415
|
),
|
|
411
416
|
formatter_class=RawTextHelpFormatter,
|
|
@@ -56,6 +56,14 @@ class CoupleInputsDynamicShapes:
|
|
|
56
56
|
self.kwargs = kwargs
|
|
57
57
|
self.dynamic_shapes = dynamic_shapes
|
|
58
58
|
self.args_names = args_names
|
|
59
|
+
if not self.kwargs and isinstance(self.dynamic_shapes, dict):
|
|
60
|
+
# This assumes the dictionary for the dynamic shapes is ordered
|
|
61
|
+
# the same way the args are. The input names are not known.
|
|
62
|
+
assert len(self.dynamic_shapes) == len(self.args), (
|
|
63
|
+
f"Length mismatch, kwargs is empty, len(dynamic_shapes)="
|
|
64
|
+
f"{len(self.dynamic_shapes)}, len(args)={len(self.args)}"
|
|
65
|
+
)
|
|
66
|
+
self.dynamic_shapes = tuple(self.dynamic_shapes.values())
|
|
59
67
|
|
|
60
68
|
def __str__(self) -> str:
|
|
61
69
|
return "\n".join(
|
|
@@ -232,8 +240,9 @@ class CoupleInputsDynamicShapes:
|
|
|
232
240
|
"""
|
|
233
241
|
if not self.args:
|
|
234
242
|
assert isinstance(self.kwargs, dict) and isinstance(self.dynamic_shapes, dict), (
|
|
235
|
-
f"Type mismatch, args={string_type(self.args)}
|
|
236
|
-
f"
|
|
243
|
+
f"Type mismatch, args={string_type(self.args)}, "
|
|
244
|
+
f"kwargs={string_type(self.kwargs)} and dynamic_shapes="
|
|
245
|
+
f"{string_type(self.dynamic_shapes)} should have the same type."
|
|
237
246
|
)
|
|
238
247
|
res = self._generic_walker_step(
|
|
239
248
|
processor,
|
|
@@ -397,7 +397,7 @@ def string_type(
|
|
|
397
397
|
return "AUTO"
|
|
398
398
|
if verbose:
|
|
399
399
|
print(f"[string_type] Y7:{type(obj)}")
|
|
400
|
-
return str(obj)
|
|
400
|
+
return str(obj).replace("DimHint(DYNAMIC)", "DYNAMIC").replace("DimHint(AUTO)", "AUTO")
|
|
401
401
|
|
|
402
402
|
if isinstance(obj, bool):
|
|
403
403
|
if with_min_max:
|
|
@@ -516,8 +516,10 @@ def string_type(
|
|
|
516
516
|
print(f"[string_type] V2:{type(obj)}")
|
|
517
517
|
return "OV(NOTENSOR)"
|
|
518
518
|
if with_min_max:
|
|
519
|
+
from .torch_helper import to_numpy
|
|
520
|
+
|
|
519
521
|
try:
|
|
520
|
-
t = obj
|
|
522
|
+
t = to_numpy(obj)
|
|
521
523
|
except Exception:
|
|
522
524
|
# pass unable to convert into numpy (bfloat16, ...)
|
|
523
525
|
if verbose:
|
|
@@ -939,7 +941,7 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
|
|
|
939
941
|
return flatten_object(list(x.values()), drop_keys=drop_keys)
|
|
940
942
|
return flatten_object(list(x.items()), drop_keys=drop_keys)
|
|
941
943
|
|
|
942
|
-
if x.__class__.__name__ in {"DynamicCache", "StaticCache"}:
|
|
944
|
+
if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
|
|
943
945
|
from .cache_helper import CacheKeyValue
|
|
944
946
|
|
|
945
947
|
kc = CacheKeyValue(x)
|
|
@@ -1233,9 +1235,13 @@ def max_diff(
|
|
|
1233
1235
|
|
|
1234
1236
|
if isinstance(expected, np.ndarray) or isinstance(got, np.ndarray):
|
|
1235
1237
|
if isinstance(expected, torch.Tensor):
|
|
1236
|
-
|
|
1238
|
+
from .torch_helper import to_numpy
|
|
1239
|
+
|
|
1240
|
+
expected = to_numpy(expected)
|
|
1237
1241
|
if isinstance(got, torch.Tensor):
|
|
1238
|
-
|
|
1242
|
+
from .torch_helper import to_numpy
|
|
1243
|
+
|
|
1244
|
+
got = to_numpy(got)
|
|
1239
1245
|
if verbose >= 6:
|
|
1240
1246
|
print(f"[max_diff] tensor: {string_type(expected)} ? {string_type(got)}")
|
|
1241
1247
|
|
|
@@ -1167,7 +1167,7 @@ class CubeLogs:
|
|
|
1167
1167
|
df.to_excel(
|
|
1168
1168
|
writer,
|
|
1169
1169
|
sheet_name=name,
|
|
1170
|
-
freeze_panes=(df.columns.nlevels +
|
|
1170
|
+
freeze_panes=(df.columns.nlevels + 1, df.index.nlevels),
|
|
1171
1171
|
)
|
|
1172
1172
|
f_highlights[name] = tview.f_highlight
|
|
1173
1173
|
if tview.plots:
|
|
@@ -1210,7 +1210,7 @@ class CubeLogs:
|
|
|
1210
1210
|
for k, v in sbs.items():
|
|
1211
1211
|
print(f"[CubeLogs.to_excel] sbs {k}: {v}")
|
|
1212
1212
|
name = "∧".join(sbs)
|
|
1213
|
-
sbs_raw, sbs_agg = self.sbs(sbs)
|
|
1213
|
+
sbs_raw, sbs_agg, sbs_col = self.sbs(sbs)
|
|
1214
1214
|
if verbose:
|
|
1215
1215
|
print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
|
|
1216
1216
|
print(
|
|
@@ -1222,7 +1222,7 @@ class CubeLogs:
|
|
|
1222
1222
|
writer,
|
|
1223
1223
|
sheet_name=name,
|
|
1224
1224
|
freeze_panes=(
|
|
1225
|
-
sbs_raw.columns.nlevels +
|
|
1225
|
+
sbs_raw.columns.nlevels + 1,
|
|
1226
1226
|
sbs_raw.index.nlevels,
|
|
1227
1227
|
),
|
|
1228
1228
|
)
|
|
@@ -1230,10 +1230,18 @@ class CubeLogs:
|
|
|
1230
1230
|
writer,
|
|
1231
1231
|
sheet_name=f"{name}-AGG",
|
|
1232
1232
|
freeze_panes=(
|
|
1233
|
-
sbs_agg.columns.nlevels +
|
|
1233
|
+
sbs_agg.columns.nlevels + 1,
|
|
1234
1234
|
sbs_agg.index.nlevels,
|
|
1235
1235
|
),
|
|
1236
1236
|
)
|
|
1237
|
+
sbs_col.to_excel(
|
|
1238
|
+
writer,
|
|
1239
|
+
sheet_name=f"{name}-COL",
|
|
1240
|
+
freeze_panes=(
|
|
1241
|
+
sbs_col.columns.nlevels + 1,
|
|
1242
|
+
sbs_col.index.nlevels,
|
|
1243
|
+
),
|
|
1244
|
+
)
|
|
1237
1245
|
|
|
1238
1246
|
if plots:
|
|
1239
1247
|
from openpyxl.drawing.image import Image
|
|
@@ -1314,7 +1322,7 @@ class CubeLogs:
|
|
|
1314
1322
|
|
|
1315
1323
|
def sbs(
|
|
1316
1324
|
self, configs: Dict[str, Dict[str, Any]], column_name: str = "CONF"
|
|
1317
|
-
) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
|
|
1325
|
+
) -> Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]:
|
|
1318
1326
|
"""
|
|
1319
1327
|
Creates a side-by-side for two configurations.
|
|
1320
1328
|
Every configuration a dictionary column:value which filters in
|
|
@@ -1325,7 +1333,7 @@ class CubeLogs:
|
|
|
1325
1333
|
:param configs: example
|
|
1326
1334
|
``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
|
|
1327
1335
|
:param column_name: column to add with the name of the configuration
|
|
1328
|
-
:return: data
|
|
1336
|
+
:return: data, aggregated date, data with a row per model
|
|
1329
1337
|
"""
|
|
1330
1338
|
assert (
|
|
1331
1339
|
len(configs) >= 2
|
|
@@ -1433,6 +1441,8 @@ class CubeLogs:
|
|
|
1433
1441
|
_mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
|
|
1434
1442
|
_mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
|
|
1435
1443
|
_mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
|
|
1444
|
+
_mkc(m, f"{n1}*({n1}∧{n2})"): si * (~sinan & ~sjnan).astype(float),
|
|
1445
|
+
_mkc(m, f"{n2}*({n1}∧{n2})"): sj * (~sinan & ~sjnan).astype(float),
|
|
1436
1446
|
}
|
|
1437
1447
|
)
|
|
1438
1448
|
nas.columns.names = view_res.columns.names
|
|
@@ -1452,13 +1462,11 @@ class CubeLogs:
|
|
|
1452
1462
|
}
|
|
1453
1463
|
flat = view_res.groupby(self.time).agg(aggs)
|
|
1454
1464
|
flat = flat.stack("METRICS", future_stack=True)
|
|
1455
|
-
return res, flat
|
|
1465
|
+
return res, flat, view_res.T.sort_index().T
|
|
1456
1466
|
|
|
1457
1467
|
|
|
1458
1468
|
class CubeLogsPerformance(CubeLogs):
|
|
1459
|
-
"""
|
|
1460
|
-
Processes logs coming from experiments.
|
|
1461
|
-
"""
|
|
1469
|
+
"""Processes logs coming from experiments."""
|
|
1462
1470
|
|
|
1463
1471
|
def __init__(
|
|
1464
1472
|
self,
|
|
@@ -1511,20 +1519,25 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1511
1519
|
"n_model_faster2x",
|
|
1512
1520
|
"n_model_faster3x",
|
|
1513
1521
|
"n_model_faster4x",
|
|
1522
|
+
"n_model_faster5x",
|
|
1514
1523
|
"n_node_attention",
|
|
1515
1524
|
"n_node_attention23",
|
|
1516
|
-
"
|
|
1517
|
-
"
|
|
1518
|
-
"n_node_layer_normalization",
|
|
1519
|
-
"n_node_layer_normalization23",
|
|
1525
|
+
"n_node_causal_mask",
|
|
1526
|
+
"n_node_constant",
|
|
1520
1527
|
"n_node_control_flow",
|
|
1521
|
-
"
|
|
1528
|
+
"n_node_expand",
|
|
1522
1529
|
"n_node_function",
|
|
1530
|
+
"n_node_gqa",
|
|
1523
1531
|
"n_node_initializer",
|
|
1524
1532
|
"n_node_initializer_small",
|
|
1525
|
-
"
|
|
1533
|
+
"n_node_layer_normalization",
|
|
1534
|
+
"n_node_layer_normalization23",
|
|
1535
|
+
"n_node_reshape",
|
|
1536
|
+
"n_node_rotary_embedding",
|
|
1537
|
+
"n_node_rotary_embedding23",
|
|
1538
|
+
"n_node_scatter",
|
|
1539
|
+
"n_node_sequence",
|
|
1526
1540
|
"n_node_shape",
|
|
1527
|
-
"n_node_expand",
|
|
1528
1541
|
"onnx_n_nodes_no_cst",
|
|
1529
1542
|
"peak_gpu_torch",
|
|
1530
1543
|
"peak_gpu_nvidia",
|
|
@@ -1690,6 +1703,11 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1690
1703
|
"time_latency",
|
|
1691
1704
|
gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98,
|
|
1692
1705
|
),
|
|
1706
|
+
n_model_faster5x=lambda df: gpreserve(
|
|
1707
|
+
df,
|
|
1708
|
+
"time_latency",
|
|
1709
|
+
gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 4.98,
|
|
1710
|
+
),
|
|
1693
1711
|
n_node_attention23=lambda df: gpreserve(
|
|
1694
1712
|
df, "time_latency_eager", gdf(df, "op_onnx__Attention")
|
|
1695
1713
|
),
|
|
@@ -1720,6 +1738,11 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1720
1738
|
+ gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0)
|
|
1721
1739
|
+ gdf(df, "op_onnx_com.microsoft_SparseAttention", 0),
|
|
1722
1740
|
),
|
|
1741
|
+
n_node_gqa=lambda df: gpreserve(
|
|
1742
|
+
df,
|
|
1743
|
+
"time_latency_eager",
|
|
1744
|
+
gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0),
|
|
1745
|
+
),
|
|
1723
1746
|
n_node_layer_normalization=lambda df: gpreserve(
|
|
1724
1747
|
df,
|
|
1725
1748
|
"time_latency_eager",
|
|
@@ -1764,9 +1787,22 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1764
1787
|
n_node_shape=lambda df: gpreserve(
|
|
1765
1788
|
df, "time_latency_eager", gdf(df, "op_onnx__Shape")
|
|
1766
1789
|
),
|
|
1790
|
+
n_node_reshape=lambda df: gpreserve(
|
|
1791
|
+
df, "time_latency_eager", gdf(df, "op_onnx__Reshape")
|
|
1792
|
+
),
|
|
1767
1793
|
n_node_expand=lambda df: gpreserve(
|
|
1768
1794
|
df, "time_latency_eager", gdf(df, "op_onnx__Expand")
|
|
1769
1795
|
),
|
|
1796
|
+
n_node_causal_mask=lambda df: gpreserve(
|
|
1797
|
+
df,
|
|
1798
|
+
"time_latency_eager",
|
|
1799
|
+
gdf(df, "op_onnx__CausalMask", 0),
|
|
1800
|
+
),
|
|
1801
|
+
n_node_sequence=lambda df: gpreserve(
|
|
1802
|
+
df,
|
|
1803
|
+
"time_latency_eager",
|
|
1804
|
+
gdf(df, "op_onnx__SequenceAt", 0) + gdf(df, "op_onnx__SplitToSequence", 0),
|
|
1805
|
+
),
|
|
1770
1806
|
)
|
|
1771
1807
|
assert (
|
|
1772
1808
|
formula in lambdas
|
|
@@ -381,6 +381,23 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
|
|
|
381
381
|
else:
|
|
382
382
|
for p, o in _flatten_iterator(getattr(obj, att), sep):
|
|
383
383
|
yield f"DynamicCache_{att}{sep}{p}", o
|
|
384
|
+
elif obj.__class__.__name__ == "StaticCache":
|
|
385
|
+
# transformers
|
|
386
|
+
import transformers
|
|
387
|
+
from .cache_helper import CacheKeyValue
|
|
388
|
+
|
|
389
|
+
assert isinstance(
|
|
390
|
+
obj, transformers.cache_utils.StaticCache
|
|
391
|
+
), f"Unexpected type {type(obj)}"
|
|
392
|
+
obj = CacheKeyValue(obj)
|
|
393
|
+
atts = ["key_cache", "value_cache"]
|
|
394
|
+
for i, att in enumerate(atts):
|
|
395
|
+
if i == len(atts) - 1:
|
|
396
|
+
for p, o in _flatten_iterator(getattr(obj, att), sep):
|
|
397
|
+
yield f"StaticCache._{att}{sep}{p}", o
|
|
398
|
+
else:
|
|
399
|
+
for p, o in _flatten_iterator(getattr(obj, att), sep):
|
|
400
|
+
yield f"StaticCache_{att}{sep}{p}", o
|
|
384
401
|
else:
|
|
385
402
|
raise NotImplementedError(f"Unexpected type {type(obj)}")
|
|
386
403
|
|
|
@@ -203,6 +203,7 @@ def create_model_builder(
|
|
|
203
203
|
"ChatGLMModel": builder.ChatGLMModel,
|
|
204
204
|
"Ernie4_5_ForCausalLM": builder.ErnieModel,
|
|
205
205
|
"GemmaForCausalLM": builder.Gemma2Model,
|
|
206
|
+
"Gemma2ForCausalLM": builder.Gemma2Model,
|
|
206
207
|
"Gemma3ForCausalLM": builder.Gemma3Model,
|
|
207
208
|
"Gemma3ForConditionalGeneration": builder.Gemma3Model,
|
|
208
209
|
"GraniteForCausalLM": builder.GraniteModel,
|
|
@@ -3,6 +3,7 @@ import numpy as np
|
|
|
3
3
|
import onnx
|
|
4
4
|
import torch
|
|
5
5
|
from .helper import string_type, flatten_object
|
|
6
|
+
from .torch_helper import to_numpy
|
|
6
7
|
from .cache_helper import is_cache_dynamic_registered
|
|
7
8
|
|
|
8
9
|
|
|
@@ -56,7 +57,7 @@ def make_feeds(
|
|
|
56
57
|
f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
|
|
57
58
|
)
|
|
58
59
|
if use_numpy:
|
|
59
|
-
flat = [t
|
|
60
|
+
flat = [to_numpy(t) if isinstance(t, torch.Tensor) else t for t in flat]
|
|
60
61
|
names = (
|
|
61
62
|
[i.name for i in proto.graph.input]
|
|
62
63
|
if isinstance(proto, onnx.ModelProto)
|
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
import warnings
|
|
7
7
|
from collections.abc import Iterable
|
|
8
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
|
9
9
|
import numpy as np
|
|
10
10
|
import onnx
|
|
11
11
|
from onnx.external_data_helper import load_external_data_for_tensor, uses_external_data
|
|
@@ -283,9 +283,11 @@ def steal_forward(
|
|
|
283
283
|
],
|
|
284
284
|
fprint: Callable = string_type,
|
|
285
285
|
dump_file: Optional[str] = None,
|
|
286
|
+
dump_drop: Optional[Set[str]] = None,
|
|
286
287
|
submodules: bool = False,
|
|
287
288
|
verbose: int = 0,
|
|
288
289
|
storage_limit: int = 2**27,
|
|
290
|
+
save_as_external_data: bool = True,
|
|
289
291
|
**kwargs,
|
|
290
292
|
):
|
|
291
293
|
"""
|
|
@@ -303,6 +305,9 @@ def steal_forward(
|
|
|
303
305
|
:param dump_file: dumps stolen inputs and outputs in an onnx model,
|
|
304
306
|
they can be restored with :func:`create_input_tensors_from_onnx_model
|
|
305
307
|
<onnx_diagnostic.helpers.mini_onnx_builder.create_input_tensors_from_onnx_model>`
|
|
308
|
+
:param dump_drop: to drop some inputs too big (only if dump_file is specified)
|
|
309
|
+
:param save_as_external_data: True by default, but maybe better to have everything
|
|
310
|
+
in a single file if possible
|
|
306
311
|
:param submodules: if True and model is a module, the list extended with all the submodules
|
|
307
312
|
the module contains
|
|
308
313
|
:param verbose: verbosity
|
|
@@ -411,6 +416,15 @@ def steal_forward(
|
|
|
411
416
|
if verbose:
|
|
412
417
|
size = torch_tensor_size(storage)
|
|
413
418
|
print(f"-- gather stored {len(storage)} objects, size={size // 2 ** 20} Mb")
|
|
419
|
+
if dump_drop:
|
|
420
|
+
for k, v in storage.items():
|
|
421
|
+
if k[-1] == "I":
|
|
422
|
+
_args, kwargs = v
|
|
423
|
+
ii = set(kwargs) & dump_drop
|
|
424
|
+
if ii:
|
|
425
|
+
for i in ii:
|
|
426
|
+
print("---", i)
|
|
427
|
+
del kwargs[i]
|
|
414
428
|
proto = create_onnx_model_from_input_tensors(storage)
|
|
415
429
|
if verbose:
|
|
416
430
|
print("-- dumps stored objects")
|
|
@@ -420,7 +434,7 @@ def steal_forward(
|
|
|
420
434
|
onnx.save(
|
|
421
435
|
proto,
|
|
422
436
|
dump_file,
|
|
423
|
-
save_as_external_data=
|
|
437
|
+
save_as_external_data=save_as_external_data,
|
|
424
438
|
all_tensors_to_one_file=True,
|
|
425
439
|
location=location,
|
|
426
440
|
)
|
|
@@ -464,10 +478,10 @@ def is_torchdynamo_exporting() -> bool:
|
|
|
464
478
|
return False
|
|
465
479
|
|
|
466
480
|
|
|
467
|
-
def to_numpy(tensor: "torch.Tensor"): # noqa: F821
|
|
481
|
+
def to_numpy(tensor: "torch.Tensor") -> np.ndarray: # noqa: F821
|
|
468
482
|
"""Converts a :class:`torch.Tensor` to :class:`numpy.ndarray`."""
|
|
469
483
|
try:
|
|
470
|
-
return tensor.numpy()
|
|
484
|
+
return tensor.detach().cpu().numpy()
|
|
471
485
|
except TypeError:
|
|
472
486
|
# We try with ml_dtypes
|
|
473
487
|
pass
|
|
@@ -476,7 +490,7 @@ def to_numpy(tensor: "torch.Tensor"): # noqa: F821
|
|
|
476
490
|
|
|
477
491
|
conv = {torch.bfloat16: ml_dtypes.bfloat16}
|
|
478
492
|
assert tensor.dtype in conv, f"Unsupported type {tensor.dtype}, not in {conv}"
|
|
479
|
-
return tensor.to(torch.float32).numpy().astype(conv[tensor.dtype])
|
|
493
|
+
return tensor.detach().to(torch.float32).cpu().numpy().astype(conv[tensor.dtype])
|
|
480
494
|
|
|
481
495
|
|
|
482
496
|
def replace_string_by_dynamic(dynamic_shapes: Any) -> Any:
|
|
@@ -765,7 +779,12 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
|
|
|
765
779
|
|
|
766
780
|
|
|
767
781
|
def torch_deepcopy(value: Any) -> Any:
|
|
768
|
-
"""
|
|
782
|
+
"""
|
|
783
|
+
Makes a deep copy.
|
|
784
|
+
|
|
785
|
+
:param value: any value
|
|
786
|
+
:return: a deep copy
|
|
787
|
+
"""
|
|
769
788
|
if value is None:
|
|
770
789
|
return None
|
|
771
790
|
if isinstance(value, (int, float, str)):
|
|
@@ -794,9 +813,14 @@ def torch_deepcopy(value: Any) -> Any:
|
|
|
794
813
|
from .cache_helper import CacheKeyValue
|
|
795
814
|
|
|
796
815
|
ca = CacheKeyValue(value)
|
|
816
|
+
if len(ca.key_cache) == 0:
|
|
817
|
+
# Use of deepcopy.
|
|
818
|
+
import copy
|
|
819
|
+
|
|
820
|
+
return copy.deepcopy(value)
|
|
797
821
|
return make_static_cache(
|
|
798
822
|
torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))),
|
|
799
|
-
max_cache_len=value.max_cache_len,
|
|
823
|
+
max_cache_len=max([value.max_cache_len, *[t.shape[2] for t in ca.key_cache]]),
|
|
800
824
|
)
|
|
801
825
|
if value.__class__.__name__ == "HybridCache":
|
|
802
826
|
from .cache_helper import CacheKeyValue
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import onnx
|
|
5
5
|
import torch
|
|
6
|
-
from ..helpers.torch_helper import to_tensor
|
|
6
|
+
from ..helpers.torch_helper import to_tensor, to_numpy
|
|
7
7
|
from ..torch_onnx.runtime_info import first_used_last_used, RuntimeValue
|
|
8
8
|
from .report_results_comparison import ReportResultComparison
|
|
9
9
|
from . import torch_ops
|
|
@@ -578,7 +578,7 @@ class TorchOnnxEvaluator:
|
|
|
578
578
|
print(f"- clean {o}")
|
|
579
579
|
|
|
580
580
|
if use_numpy:
|
|
581
|
-
return [None if a is None else a
|
|
581
|
+
return [None if a is None else to_numpy(a) for a in fres]
|
|
582
582
|
return fres
|
|
583
583
|
|
|
584
584
|
def run_with_values(
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_data(name: str):
|
|
5
|
+
"""Returns data stored in this folder."""
|
|
6
|
+
filename = os.path.join(os.path.dirname(__file__), name)
|
|
7
|
+
assert os.path.exists(
|
|
8
|
+
filename
|
|
9
|
+
), f"Unable to find a file with {name!r}, looked for {filename!r}"
|
|
10
|
+
|
|
11
|
+
from ...helpers.mini_onnx_builder import create_input_tensors_from_onnx_model
|
|
12
|
+
|
|
13
|
+
return create_input_tensors_from_onnx_model(filename)
|
|
Binary file
|