onnx-diagnostic 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +18 -0
- onnx_diagnostic/api.py +15 -0
- onnx_diagnostic/ext_test_case.py +3 -1
- onnx_diagnostic/helpers/args_helper.py +1 -1
- onnx_diagnostic/helpers/doc_helper.py +143 -0
- onnx_diagnostic/helpers/helper.py +6 -5
- onnx_diagnostic/helpers/model_builder_helper.py +24 -8
- onnx_diagnostic/helpers/rt_helper.py +5 -1
- onnx_diagnostic/helpers/torch_helper.py +2 -0
- onnx_diagnostic/reference/__init__.py +1 -0
- onnx_diagnostic/reference/torch_evaluator.py +648 -0
- onnx_diagnostic/reference/torch_ops/__init__.py +55 -0
- onnx_diagnostic/reference/torch_ops/_op_run.py +335 -0
- onnx_diagnostic/reference/torch_ops/access_ops.py +94 -0
- onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
- onnx_diagnostic/reference/torch_ops/controlflow_ops.py +121 -0
- onnx_diagnostic/reference/torch_ops/generator_ops.py +36 -0
- onnx_diagnostic/reference/torch_ops/nn_ops.py +196 -0
- onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
- onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
- onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
- onnx_diagnostic/reference/torch_ops/shape_ops.py +121 -0
- onnx_diagnostic/reference/torch_ops/unary_ops.py +86 -0
- onnx_diagnostic/tasks/__init__.py +22 -1
- onnx_diagnostic/tasks/image_classification.py +2 -2
- onnx_diagnostic/tasks/text_generation.py +3 -3
- onnx_diagnostic/torch_export_patches/eval/__init__.py +106 -37
- onnx_diagnostic/torch_export_patches/eval/model_cases.py +12 -25
- onnx_diagnostic/torch_export_patches/patch_module_helper.py +130 -16
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +88 -0
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +142 -0
- onnx_diagnostic/torch_models/test_helper.py +133 -16
- onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
- {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/METADATA +1 -1
- {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/RECORD +39 -23
- {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/WHEEL +1 -1
- {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/top_level.txt +0 -0
onnx_diagnostic/__init__.py
CHANGED
|
@@ -352,6 +352,12 @@ def get_parser_validate() -> ArgumentParser:
|
|
|
352
352
|
action=BooleanOptionalAction,
|
|
353
353
|
help="validate the trained model (requires downloading)",
|
|
354
354
|
)
|
|
355
|
+
parser.add_argument(
|
|
356
|
+
"--runtime",
|
|
357
|
+
choices=["onnxruntime", "torch", "ref"],
|
|
358
|
+
default="onnxruntime",
|
|
359
|
+
help="onnx runtime to use, onnxruntime by default",
|
|
360
|
+
)
|
|
355
361
|
parser.add_argument(
|
|
356
362
|
"-o",
|
|
357
363
|
"--dump-folder",
|
|
@@ -399,6 +405,15 @@ def get_parser_validate() -> ArgumentParser:
|
|
|
399
405
|
"example: --mop attn_implementation=eager",
|
|
400
406
|
action=_ParseDict,
|
|
401
407
|
)
|
|
408
|
+
parser.add_argument(
|
|
409
|
+
"--repeat",
|
|
410
|
+
default=1,
|
|
411
|
+
type=int,
|
|
412
|
+
help="number of times to run the model to measures inference time",
|
|
413
|
+
)
|
|
414
|
+
parser.add_argument(
|
|
415
|
+
"--warmup", default=0, type=int, help="number of times to run the model to do warmup"
|
|
416
|
+
)
|
|
402
417
|
return parser
|
|
403
418
|
|
|
404
419
|
|
|
@@ -453,6 +468,9 @@ def _cmd_validate(argv: List[Any]):
|
|
|
453
468
|
model_options=args.mop,
|
|
454
469
|
subfolder=args.subfolder,
|
|
455
470
|
opset=args.opset,
|
|
471
|
+
runtime=args.runtime,
|
|
472
|
+
repeat=args.repeat,
|
|
473
|
+
warmup=args.warmup,
|
|
456
474
|
)
|
|
457
475
|
print("")
|
|
458
476
|
print("-- summary --")
|
onnx_diagnostic/api.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TensorLike:
|
|
5
|
+
"""Mocks a tensor."""
|
|
6
|
+
|
|
7
|
+
@property
|
|
8
|
+
def dtype(self) -> Any:
|
|
9
|
+
"Must be overwritten."
|
|
10
|
+
raise NotImplementedError("dtype must be overwritten.")
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def shape(self) -> Any:
|
|
14
|
+
"Must be overwritten."
|
|
15
|
+
raise NotImplementedError("shape must be overwritten.")
|
onnx_diagnostic/ext_test_case.py
CHANGED
|
@@ -907,11 +907,13 @@ class ExtTestCase(unittest.TestCase):
|
|
|
907
907
|
except AssertionError as e:
|
|
908
908
|
expected_max = numpy.abs(expected).max()
|
|
909
909
|
expected_value = numpy.abs(value).max()
|
|
910
|
+
te = expected.astype(int) if expected.dtype == numpy.bool_ else expected
|
|
911
|
+
tv = value.astype(int) if value.dtype == numpy.bool_ else value
|
|
910
912
|
rows = [
|
|
911
913
|
f"{msg}\n{e}" if msg else str(e),
|
|
912
914
|
f"expected max value={expected_max}",
|
|
913
915
|
f"expected computed value={expected_value}\n",
|
|
914
|
-
f"ratio={
|
|
916
|
+
f"ratio={te / tv}\ndiff={te - tv}",
|
|
915
917
|
]
|
|
916
918
|
raise AssertionError("\n".join(rows)) # noqa: B904
|
|
917
919
|
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from typing import Dict, Optional, Tuple
|
|
2
|
+
import onnx
|
|
3
|
+
import onnx.helper as oh
|
|
4
|
+
import torch
|
|
5
|
+
from ..reference.torch_ops import OpRunKernel, OpRunTensor
|
|
6
|
+
from .torch_helper import onnx_dtype_to_torch_dtype, torch_dtype_to_onnx_dtype
|
|
7
|
+
from .ort_session import InferenceSessionForTorch
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LayerNormalizationOrt(OpRunKernel):
|
|
11
|
+
"LayerNormalization with onnxruntime"
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def device_dependent(cls) -> bool:
|
|
15
|
+
"Needs device."
|
|
16
|
+
return False
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
node: onnx.NodeProto,
|
|
21
|
+
version=None,
|
|
22
|
+
device: Optional[torch.device] = None,
|
|
23
|
+
verbose=0,
|
|
24
|
+
):
|
|
25
|
+
super().__init__(node, version, verbose=verbose)
|
|
26
|
+
self.axis = self.get_attribute_int(node, "axis", -1)
|
|
27
|
+
self.epsilon = self.get_attribute_float(node, "epsilon", 1e-5)
|
|
28
|
+
self.device = device
|
|
29
|
+
self.stash_type = onnx_dtype_to_torch_dtype(
|
|
30
|
+
self.get_attribute_int(node, "stash_type", onnx.TensorProto.FLOAT) # type: ignore[arg-type]
|
|
31
|
+
)
|
|
32
|
+
self.compute_std = len(node.output) > 1
|
|
33
|
+
assert not self.compute_std, (
|
|
34
|
+
f"This kernel implementation only work when only one output "
|
|
35
|
+
f"is required but {node.output} were."
|
|
36
|
+
)
|
|
37
|
+
self._cache: Dict[Tuple[int, int], onnx.ModelProto] = {}
|
|
38
|
+
self.is_cpu = torch.device("cpu") == self.device
|
|
39
|
+
|
|
40
|
+
def _make_model(self, itype: int, rank: int, has_bias: bool) -> onnx.ModelProto:
|
|
41
|
+
shape = [*["d{i}" for i in range(rank - 1)], "last"]
|
|
42
|
+
layer_model = oh.make_model(
|
|
43
|
+
oh.make_graph(
|
|
44
|
+
[
|
|
45
|
+
oh.make_node(
|
|
46
|
+
"LayerNormalization",
|
|
47
|
+
["X", "W", "B"] if has_bias else ["X", "W"],
|
|
48
|
+
["Z"],
|
|
49
|
+
axis=self.axis,
|
|
50
|
+
epsilon=self.epsilon,
|
|
51
|
+
)
|
|
52
|
+
],
|
|
53
|
+
"dummy",
|
|
54
|
+
(
|
|
55
|
+
[
|
|
56
|
+
oh.make_tensor_value_info("X", itype, shape),
|
|
57
|
+
oh.make_tensor_value_info("W", itype, ["last"]),
|
|
58
|
+
oh.make_tensor_value_info("B", itype, ["last"]),
|
|
59
|
+
]
|
|
60
|
+
if has_bias
|
|
61
|
+
else [
|
|
62
|
+
oh.make_tensor_value_info("X", itype, shape),
|
|
63
|
+
oh.make_tensor_value_info("W", itype, ["last"]),
|
|
64
|
+
]
|
|
65
|
+
),
|
|
66
|
+
[oh.make_tensor_value_info("Z", itype, shape)],
|
|
67
|
+
),
|
|
68
|
+
ir_version=9,
|
|
69
|
+
opset_imports=[oh.make_opsetid("", 18)],
|
|
70
|
+
)
|
|
71
|
+
provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
|
|
72
|
+
self._provider = provider
|
|
73
|
+
return InferenceSessionForTorch(layer_model, providers=[provider])
|
|
74
|
+
|
|
75
|
+
def run(self, x, scale, bias=None):
|
|
76
|
+
itype = torch_dtype_to_onnx_dtype(x.dtype)
|
|
77
|
+
rank = len(x.shape)
|
|
78
|
+
key = itype, rank
|
|
79
|
+
if key not in self._cache:
|
|
80
|
+
self._cache[key] = self._make_model(itype, rank, bias is not None)
|
|
81
|
+
sess = self._cache[key]
|
|
82
|
+
if self.verbose:
|
|
83
|
+
print(f"[LayerNormalizationOrt] running on {self._provider!r}")
|
|
84
|
+
feeds = dict(X=x.tensor, W=scale.tensor)
|
|
85
|
+
if bias is not None:
|
|
86
|
+
feeds["B"] = bias.tensor
|
|
87
|
+
got = sess.run(None, feeds)[0]
|
|
88
|
+
return OpRunTensor(got)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class MatMulOrt(OpRunKernel):
|
|
92
|
+
"MatMul with onnxruntime"
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def device_dependent(cls) -> bool:
|
|
96
|
+
"Needs device."
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
node: onnx.NodeProto,
|
|
102
|
+
version=None,
|
|
103
|
+
device: Optional[torch.device] = None,
|
|
104
|
+
verbose=0,
|
|
105
|
+
):
|
|
106
|
+
super().__init__(node, version, verbose=verbose)
|
|
107
|
+
self.device = device
|
|
108
|
+
self._cache: Dict[Tuple[int, int, int], onnx.ModelProto] = {}
|
|
109
|
+
self.is_cpu = torch.device("cpu") == self.device
|
|
110
|
+
|
|
111
|
+
def _make_model(self, itype: int, ranka: int, rankb: int) -> onnx.ModelProto:
|
|
112
|
+
shapea = ["a{i}" for i in range(ranka)]
|
|
113
|
+
shapeb = ["b{i}" for i in range(rankb)]
|
|
114
|
+
shapec = ["c{i}" for i in range(max(ranka, rankb))]
|
|
115
|
+
model = oh.make_model(
|
|
116
|
+
oh.make_graph(
|
|
117
|
+
[oh.make_node("MatMul", ["A", "B"], ["C"])],
|
|
118
|
+
"dummy",
|
|
119
|
+
[
|
|
120
|
+
oh.make_tensor_value_info("A", itype, shapea),
|
|
121
|
+
oh.make_tensor_value_info("B", itype, shapeb),
|
|
122
|
+
],
|
|
123
|
+
[oh.make_tensor_value_info("C", itype, shapec)],
|
|
124
|
+
),
|
|
125
|
+
ir_version=9,
|
|
126
|
+
opset_imports=[oh.make_opsetid("", 18)],
|
|
127
|
+
)
|
|
128
|
+
provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
|
|
129
|
+
self._provider = provider
|
|
130
|
+
return InferenceSessionForTorch(model, providers=[provider])
|
|
131
|
+
|
|
132
|
+
def run(self, a, b):
|
|
133
|
+
itype = torch_dtype_to_onnx_dtype(a.dtype)
|
|
134
|
+
ranka, rankb = len(a.shape), len(b.shape)
|
|
135
|
+
key = itype, ranka, rankb
|
|
136
|
+
if key not in self._cache:
|
|
137
|
+
self._cache[key] = self._make_model(itype, ranka, rankb)
|
|
138
|
+
sess = self._cache[key]
|
|
139
|
+
if self.verbose:
|
|
140
|
+
print(f"[MatMulOrt] running on {self._provider!r}")
|
|
141
|
+
feeds = dict(A=a.tensor, B=b.tensor)
|
|
142
|
+
got = sess.run(None, feeds)[0]
|
|
143
|
+
return OpRunTensor(got)
|
|
@@ -698,7 +698,8 @@ def string_type(
|
|
|
698
698
|
print(f"[string_type] CONFIG:{type(obj)}")
|
|
699
699
|
s = str(obj.to_diff_dict()).replace("\n", "").replace(" ", "")
|
|
700
700
|
return f"{obj.__class__.__name__}(**{s})"
|
|
701
|
-
|
|
701
|
+
if obj.__class__.__name__ in {"TorchModelContainer", "InferenceSession"}:
|
|
702
|
+
return f"{obj.__class__.__name__}(...)"
|
|
702
703
|
if verbose:
|
|
703
704
|
print(f"[string_type] END:{type(obj)}")
|
|
704
705
|
raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")
|
|
@@ -1306,11 +1307,11 @@ def max_diff(
|
|
|
1306
1307
|
rdiff = diff / (exp_cpu.abs() + 1e-3)
|
|
1307
1308
|
if diff.numel() > 0:
|
|
1308
1309
|
abs_diff, rel_diff, sum_diff, n_diff, nan_diff = (
|
|
1309
|
-
float(diff.max()),
|
|
1310
|
-
float(rdiff.max()),
|
|
1311
|
-
float(diff.sum()),
|
|
1310
|
+
float(diff.max().detach()),
|
|
1311
|
+
float(rdiff.max().detach()),
|
|
1312
|
+
float(diff.sum().detach()),
|
|
1312
1313
|
float(diff.numel()),
|
|
1313
|
-
float(ndiff.sum()),
|
|
1314
|
+
float(ndiff.sum().detach()),
|
|
1314
1315
|
)
|
|
1315
1316
|
argm = tuple(map(int, torch.unravel_index(diff.argmax(), diff.shape)))
|
|
1316
1317
|
elif got_cpu.numel() == exp_cpu.numel():
|
|
@@ -237,18 +237,24 @@ def create_model_builder(
|
|
|
237
237
|
"OlmoForCausalLM": builder.OLMoModel,
|
|
238
238
|
"PhiForCausalLM": builder.PhiModel,
|
|
239
239
|
"Phi3ForCausalLM": (
|
|
240
|
-
lambda config, *
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
240
|
+
lambda config, *args: (
|
|
241
|
+
(
|
|
242
|
+
builder.Phi3MiniModel
|
|
243
|
+
if config.max_position_embeddings
|
|
244
|
+
== config.original_max_position_embeddings
|
|
245
|
+
else builder.Phi3MiniLongRoPEModel
|
|
246
|
+
)(config, *args)
|
|
244
247
|
)
|
|
245
248
|
),
|
|
246
249
|
"PhiMoEForCausalLM": builder.Phi3MoELongRoPEModel,
|
|
247
250
|
"Phi3SmallForCausalLM": (
|
|
248
|
-
lambda config, *
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
251
|
+
lambda config, *args: (
|
|
252
|
+
(
|
|
253
|
+
builder.Phi3SmallModel
|
|
254
|
+
if config.max_position_embeddings
|
|
255
|
+
== config.original_max_position_embeddings
|
|
256
|
+
else builder.Phi3SmallLongRoPEModel
|
|
257
|
+
)(config, *args)
|
|
252
258
|
)
|
|
253
259
|
),
|
|
254
260
|
"Phi3VForCausalLM": builder.Phi3VModel,
|
|
@@ -317,7 +323,17 @@ def create_model_builder(
|
|
|
317
323
|
)
|
|
318
324
|
|
|
319
325
|
cls = arch_map[config.architectures[0]]
|
|
326
|
+
|
|
327
|
+
# ModelBuilder does not like None values for some parameters.
|
|
328
|
+
remove = set()
|
|
329
|
+
for c in ["head_dim"]:
|
|
330
|
+
if hasattr(config, c) and getattr(config, c) is None:
|
|
331
|
+
remove.add(c)
|
|
332
|
+
for c in remove:
|
|
333
|
+
delattr(config, c)
|
|
334
|
+
|
|
320
335
|
onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
|
|
336
|
+
|
|
321
337
|
if post:
|
|
322
338
|
post(onnx_model)
|
|
323
339
|
_make_model(onnx_model, model, verbose=verbose)
|
|
@@ -55,7 +55,11 @@ def make_feeds(
|
|
|
55
55
|
names = (
|
|
56
56
|
[i.name for i in proto.graph.input]
|
|
57
57
|
if isinstance(proto, onnx.ModelProto)
|
|
58
|
-
else (
|
|
58
|
+
else (
|
|
59
|
+
[i.name for i in proto.get_inputs()]
|
|
60
|
+
if hasattr(proto, "get_inputs")
|
|
61
|
+
else (proto.input_names if hasattr(proto, "input_names") else proto)
|
|
62
|
+
)
|
|
59
63
|
)
|
|
60
64
|
assert (
|
|
61
65
|
isinstance(names, list)
|
|
@@ -878,6 +878,8 @@ def to_tensor(tensor: onnx.TensorProto, base_dir: str = "") -> torch.Tensor:
|
|
|
878
878
|
|
|
879
879
|
if tensor.HasField("raw_data"):
|
|
880
880
|
raw_data = tensor.raw_data
|
|
881
|
+
if len(raw_data) == 0:
|
|
882
|
+
return torch.tensor([], dtype=torch_dtype).reshape(dims)
|
|
881
883
|
if sys.byteorder == "big":
|
|
882
884
|
# Convert endian from little to big
|
|
883
885
|
raw_data = torch.frombuffer(raw_data, dtype=torch_dtype).byteswap().tobytes()
|