onnx-diagnostic 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. onnx_diagnostic/__init__.py +1 -1
  2. onnx_diagnostic/_command_lines_parser.py +18 -0
  3. onnx_diagnostic/api.py +15 -0
  4. onnx_diagnostic/ext_test_case.py +3 -1
  5. onnx_diagnostic/helpers/args_helper.py +1 -1
  6. onnx_diagnostic/helpers/doc_helper.py +143 -0
  7. onnx_diagnostic/helpers/helper.py +6 -5
  8. onnx_diagnostic/helpers/model_builder_helper.py +24 -8
  9. onnx_diagnostic/helpers/rt_helper.py +5 -1
  10. onnx_diagnostic/helpers/torch_helper.py +2 -0
  11. onnx_diagnostic/reference/__init__.py +1 -0
  12. onnx_diagnostic/reference/torch_evaluator.py +648 -0
  13. onnx_diagnostic/reference/torch_ops/__init__.py +55 -0
  14. onnx_diagnostic/reference/torch_ops/_op_run.py +335 -0
  15. onnx_diagnostic/reference/torch_ops/access_ops.py +94 -0
  16. onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
  17. onnx_diagnostic/reference/torch_ops/controlflow_ops.py +121 -0
  18. onnx_diagnostic/reference/torch_ops/generator_ops.py +36 -0
  19. onnx_diagnostic/reference/torch_ops/nn_ops.py +196 -0
  20. onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
  21. onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
  22. onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
  23. onnx_diagnostic/reference/torch_ops/shape_ops.py +121 -0
  24. onnx_diagnostic/reference/torch_ops/unary_ops.py +86 -0
  25. onnx_diagnostic/tasks/__init__.py +22 -1
  26. onnx_diagnostic/tasks/image_classification.py +2 -2
  27. onnx_diagnostic/tasks/text_generation.py +3 -3
  28. onnx_diagnostic/torch_export_patches/eval/__init__.py +106 -37
  29. onnx_diagnostic/torch_export_patches/eval/model_cases.py +12 -25
  30. onnx_diagnostic/torch_export_patches/patch_module_helper.py +130 -16
  31. onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +88 -0
  32. onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +142 -0
  33. onnx_diagnostic/torch_models/test_helper.py +133 -16
  34. onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
  35. {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/METADATA +1 -1
  36. {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/RECORD +39 -23
  37. {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/WHEEL +1 -1
  38. {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/licenses/LICENSE.txt +0 -0
  39. {onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/top_level.txt +0 -0
@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
3
3
  Functions, classes to dig into a model when this one is right, slow, wrong...
4
4
  """
5
5
 
6
- __version__ = "0.6.0"
6
+ __version__ = "0.6.2"
7
7
  __author__ = "Xavier Dupré"
@@ -352,6 +352,12 @@ def get_parser_validate() -> ArgumentParser:
352
352
  action=BooleanOptionalAction,
353
353
  help="validate the trained model (requires downloading)",
354
354
  )
355
+ parser.add_argument(
356
+ "--runtime",
357
+ choices=["onnxruntime", "torch", "ref"],
358
+ default="onnxruntime",
359
+ help="onnx runtime to use, onnxruntime by default",
360
+ )
355
361
  parser.add_argument(
356
362
  "-o",
357
363
  "--dump-folder",
@@ -399,6 +405,15 @@ def get_parser_validate() -> ArgumentParser:
399
405
  "example: --mop attn_implementation=eager",
400
406
  action=_ParseDict,
401
407
  )
408
+ parser.add_argument(
409
+ "--repeat",
410
+ default=1,
411
+ type=int,
412
+ help="number of times to run the model to measures inference time",
413
+ )
414
+ parser.add_argument(
415
+ "--warmup", default=0, type=int, help="number of times to run the model to do warmup"
416
+ )
402
417
  return parser
403
418
 
404
419
 
@@ -453,6 +468,9 @@ def _cmd_validate(argv: List[Any]):
453
468
  model_options=args.mop,
454
469
  subfolder=args.subfolder,
455
470
  opset=args.opset,
471
+ runtime=args.runtime,
472
+ repeat=args.repeat,
473
+ warmup=args.warmup,
456
474
  )
457
475
  print("")
458
476
  print("-- summary --")
onnx_diagnostic/api.py ADDED
@@ -0,0 +1,15 @@
1
+ from typing import Any
2
+
3
+
4
+ class TensorLike:
5
+ """Mocks a tensor."""
6
+
7
+ @property
8
+ def dtype(self) -> Any:
9
+ "Must be overwritten."
10
+ raise NotImplementedError("dtype must be overwritten.")
11
+
12
+ @property
13
+ def shape(self) -> Any:
14
+ "Must be overwritten."
15
+ raise NotImplementedError("shape must be overwritten.")
@@ -907,11 +907,13 @@ class ExtTestCase(unittest.TestCase):
907
907
  except AssertionError as e:
908
908
  expected_max = numpy.abs(expected).max()
909
909
  expected_value = numpy.abs(value).max()
910
+ te = expected.astype(int) if expected.dtype == numpy.bool_ else expected
911
+ tv = value.astype(int) if value.dtype == numpy.bool_ else value
910
912
  rows = [
911
913
  f"{msg}\n{e}" if msg else str(e),
912
914
  f"expected max value={expected_max}",
913
915
  f"expected computed value={expected_value}\n",
914
- f"ratio={expected / value}\ndiff={expected - value}",
916
+ f"ratio={te / tv}\ndiff={te - tv}",
915
917
  ]
916
918
  raise AssertionError("\n".join(rows)) # noqa: B904
917
919
 
@@ -113,7 +113,7 @@ def get_parsed_args(
113
113
  )
114
114
 
115
115
  res = parser.parse_args(args=new_args)
116
- update = {}
116
+ update: Dict[str, Union[int, float]] = {}
117
117
  for k, v in res.__dict__.items():
118
118
  try:
119
119
  vi = int(v)
@@ -0,0 +1,143 @@
1
+ from typing import Dict, Optional, Tuple
2
+ import onnx
3
+ import onnx.helper as oh
4
+ import torch
5
+ from ..reference.torch_ops import OpRunKernel, OpRunTensor
6
+ from .torch_helper import onnx_dtype_to_torch_dtype, torch_dtype_to_onnx_dtype
7
+ from .ort_session import InferenceSessionForTorch
8
+
9
+
10
+ class LayerNormalizationOrt(OpRunKernel):
11
+ "LayerNormalization with onnxruntime"
12
+
13
+ @classmethod
14
+ def device_dependent(cls) -> bool:
15
+ "Needs device."
16
+ return False
17
+
18
+ def __init__(
19
+ self,
20
+ node: onnx.NodeProto,
21
+ version=None,
22
+ device: Optional[torch.device] = None,
23
+ verbose=0,
24
+ ):
25
+ super().__init__(node, version, verbose=verbose)
26
+ self.axis = self.get_attribute_int(node, "axis", -1)
27
+ self.epsilon = self.get_attribute_float(node, "epsilon", 1e-5)
28
+ self.device = device
29
+ self.stash_type = onnx_dtype_to_torch_dtype(
30
+ self.get_attribute_int(node, "stash_type", onnx.TensorProto.FLOAT) # type: ignore[arg-type]
31
+ )
32
+ self.compute_std = len(node.output) > 1
33
+ assert not self.compute_std, (
34
+ f"This kernel implementation only work when only one output "
35
+ f"is required but {node.output} were."
36
+ )
37
+ self._cache: Dict[Tuple[int, int], onnx.ModelProto] = {}
38
+ self.is_cpu = torch.device("cpu") == self.device
39
+
40
+ def _make_model(self, itype: int, rank: int, has_bias: bool) -> onnx.ModelProto:
41
+ shape = [*["d{i}" for i in range(rank - 1)], "last"]
42
+ layer_model = oh.make_model(
43
+ oh.make_graph(
44
+ [
45
+ oh.make_node(
46
+ "LayerNormalization",
47
+ ["X", "W", "B"] if has_bias else ["X", "W"],
48
+ ["Z"],
49
+ axis=self.axis,
50
+ epsilon=self.epsilon,
51
+ )
52
+ ],
53
+ "dummy",
54
+ (
55
+ [
56
+ oh.make_tensor_value_info("X", itype, shape),
57
+ oh.make_tensor_value_info("W", itype, ["last"]),
58
+ oh.make_tensor_value_info("B", itype, ["last"]),
59
+ ]
60
+ if has_bias
61
+ else [
62
+ oh.make_tensor_value_info("X", itype, shape),
63
+ oh.make_tensor_value_info("W", itype, ["last"]),
64
+ ]
65
+ ),
66
+ [oh.make_tensor_value_info("Z", itype, shape)],
67
+ ),
68
+ ir_version=9,
69
+ opset_imports=[oh.make_opsetid("", 18)],
70
+ )
71
+ provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
72
+ self._provider = provider
73
+ return InferenceSessionForTorch(layer_model, providers=[provider])
74
+
75
+ def run(self, x, scale, bias=None):
76
+ itype = torch_dtype_to_onnx_dtype(x.dtype)
77
+ rank = len(x.shape)
78
+ key = itype, rank
79
+ if key not in self._cache:
80
+ self._cache[key] = self._make_model(itype, rank, bias is not None)
81
+ sess = self._cache[key]
82
+ if self.verbose:
83
+ print(f"[LayerNormalizationOrt] running on {self._provider!r}")
84
+ feeds = dict(X=x.tensor, W=scale.tensor)
85
+ if bias is not None:
86
+ feeds["B"] = bias.tensor
87
+ got = sess.run(None, feeds)[0]
88
+ return OpRunTensor(got)
89
+
90
+
91
+ class MatMulOrt(OpRunKernel):
92
+ "MatMul with onnxruntime"
93
+
94
+ @classmethod
95
+ def device_dependent(cls) -> bool:
96
+ "Needs device."
97
+ return False
98
+
99
+ def __init__(
100
+ self,
101
+ node: onnx.NodeProto,
102
+ version=None,
103
+ device: Optional[torch.device] = None,
104
+ verbose=0,
105
+ ):
106
+ super().__init__(node, version, verbose=verbose)
107
+ self.device = device
108
+ self._cache: Dict[Tuple[int, int, int], onnx.ModelProto] = {}
109
+ self.is_cpu = torch.device("cpu") == self.device
110
+
111
+ def _make_model(self, itype: int, ranka: int, rankb: int) -> onnx.ModelProto:
112
+ shapea = ["a{i}" for i in range(ranka)]
113
+ shapeb = ["b{i}" for i in range(rankb)]
114
+ shapec = ["c{i}" for i in range(max(ranka, rankb))]
115
+ model = oh.make_model(
116
+ oh.make_graph(
117
+ [oh.make_node("MatMul", ["A", "B"], ["C"])],
118
+ "dummy",
119
+ [
120
+ oh.make_tensor_value_info("A", itype, shapea),
121
+ oh.make_tensor_value_info("B", itype, shapeb),
122
+ ],
123
+ [oh.make_tensor_value_info("C", itype, shapec)],
124
+ ),
125
+ ir_version=9,
126
+ opset_imports=[oh.make_opsetid("", 18)],
127
+ )
128
+ provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
129
+ self._provider = provider
130
+ return InferenceSessionForTorch(model, providers=[provider])
131
+
132
+ def run(self, a, b):
133
+ itype = torch_dtype_to_onnx_dtype(a.dtype)
134
+ ranka, rankb = len(a.shape), len(b.shape)
135
+ key = itype, ranka, rankb
136
+ if key not in self._cache:
137
+ self._cache[key] = self._make_model(itype, ranka, rankb)
138
+ sess = self._cache[key]
139
+ if self.verbose:
140
+ print(f"[MatMulOrt] running on {self._provider!r}")
141
+ feeds = dict(A=a.tensor, B=b.tensor)
142
+ got = sess.run(None, feeds)[0]
143
+ return OpRunTensor(got)
@@ -698,7 +698,8 @@ def string_type(
698
698
  print(f"[string_type] CONFIG:{type(obj)}")
699
699
  s = str(obj.to_diff_dict()).replace("\n", "").replace(" ", "")
700
700
  return f"{obj.__class__.__name__}(**{s})"
701
-
701
+ if obj.__class__.__name__ in {"TorchModelContainer", "InferenceSession"}:
702
+ return f"{obj.__class__.__name__}(...)"
702
703
  if verbose:
703
704
  print(f"[string_type] END:{type(obj)}")
704
705
  raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")
@@ -1306,11 +1307,11 @@ def max_diff(
1306
1307
  rdiff = diff / (exp_cpu.abs() + 1e-3)
1307
1308
  if diff.numel() > 0:
1308
1309
  abs_diff, rel_diff, sum_diff, n_diff, nan_diff = (
1309
- float(diff.max()),
1310
- float(rdiff.max()),
1311
- float(diff.sum()),
1310
+ float(diff.max().detach()),
1311
+ float(rdiff.max().detach()),
1312
+ float(diff.sum().detach()),
1312
1313
  float(diff.numel()),
1313
- float(ndiff.sum()),
1314
+ float(ndiff.sum().detach()),
1314
1315
  )
1315
1316
  argm = tuple(map(int, torch.unravel_index(diff.argmax(), diff.shape)))
1316
1317
  elif got_cpu.numel() == exp_cpu.numel():
@@ -237,18 +237,24 @@ def create_model_builder(
237
237
  "OlmoForCausalLM": builder.OLMoModel,
238
238
  "PhiForCausalLM": builder.PhiModel,
239
239
  "Phi3ForCausalLM": (
240
- lambda config, *_: (
241
- builder.Phi3MiniModel
242
- if config.max_position_embeddings == config.original_max_position_embeddings
243
- else builder.Phi3MiniLongRoPEModel
240
+ lambda config, *args: (
241
+ (
242
+ builder.Phi3MiniModel
243
+ if config.max_position_embeddings
244
+ == config.original_max_position_embeddings
245
+ else builder.Phi3MiniLongRoPEModel
246
+ )(config, *args)
244
247
  )
245
248
  ),
246
249
  "PhiMoEForCausalLM": builder.Phi3MoELongRoPEModel,
247
250
  "Phi3SmallForCausalLM": (
248
- lambda config, *_: (
249
- builder.Phi3SmallModel
250
- if config.max_position_embeddings == config.original_max_position_embeddings
251
- else builder.Phi3SmallLongRoPEModel
251
+ lambda config, *args: (
252
+ (
253
+ builder.Phi3SmallModel
254
+ if config.max_position_embeddings
255
+ == config.original_max_position_embeddings
256
+ else builder.Phi3SmallLongRoPEModel
257
+ )(config, *args)
252
258
  )
253
259
  ),
254
260
  "Phi3VForCausalLM": builder.Phi3VModel,
@@ -317,7 +323,17 @@ def create_model_builder(
317
323
  )
318
324
 
319
325
  cls = arch_map[config.architectures[0]]
326
+
327
+ # ModelBuilder does not like None values for some parameters.
328
+ remove = set()
329
+ for c in ["head_dim"]:
330
+ if hasattr(config, c) and getattr(config, c) is None:
331
+ remove.add(c)
332
+ for c in remove:
333
+ delattr(config, c)
334
+
320
335
  onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
336
+
321
337
  if post:
322
338
  post(onnx_model)
323
339
  _make_model(onnx_model, model, verbose=verbose)
@@ -55,7 +55,11 @@ def make_feeds(
55
55
  names = (
56
56
  [i.name for i in proto.graph.input]
57
57
  if isinstance(proto, onnx.ModelProto)
58
- else ([i.name for i in proto.get_inputs()] if hasattr(proto, "get_inputs") else proto)
58
+ else (
59
+ [i.name for i in proto.get_inputs()]
60
+ if hasattr(proto, "get_inputs")
61
+ else (proto.input_names if hasattr(proto, "input_names") else proto)
62
+ )
59
63
  )
60
64
  assert (
61
65
  isinstance(names, list)
@@ -878,6 +878,8 @@ def to_tensor(tensor: onnx.TensorProto, base_dir: str = "") -> torch.Tensor:
878
878
 
879
879
  if tensor.HasField("raw_data"):
880
880
  raw_data = tensor.raw_data
881
+ if len(raw_data) == 0:
882
+ return torch.tensor([], dtype=torch_dtype).reshape(dims)
881
883
  if sys.byteorder == "big":
882
884
  # Convert endian from little to big
883
885
  raw_data = torch.frombuffer(raw_data, dtype=torch_dtype).byteswap().tobytes()
@@ -1,2 +1,3 @@
1
1
  from .evaluator import ExtendedReferenceEvaluator
2
2
  from .ort_evaluator import OnnxruntimeEvaluator
3
+ from .torch_evaluator import TorchOnnxEvaluator