PyPI - onnx-diagnostic - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend

onnx-diagnostic 0.6.0py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +18 -0
onnx_diagnostic/api.py +15 -0
onnx_diagnostic/ext_test_case.py +3 -1
onnx_diagnostic/helpers/args_helper.py +1 -1
onnx_diagnostic/helpers/doc_helper.py +143 -0
onnx_diagnostic/helpers/helper.py +6 -5
onnx_diagnostic/helpers/model_builder_helper.py +24 -8
onnx_diagnostic/helpers/rt_helper.py +5 -1
onnx_diagnostic/helpers/torch_helper.py +2 -0
onnx_diagnostic/reference/__init__.py +1 -0
onnx_diagnostic/reference/torch_evaluator.py +648 -0
onnx_diagnostic/reference/torch_ops/__init__.py +55 -0
onnx_diagnostic/reference/torch_ops/_op_run.py +335 -0
onnx_diagnostic/reference/torch_ops/access_ops.py +94 -0
onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
onnx_diagnostic/reference/torch_ops/controlflow_ops.py +121 -0
onnx_diagnostic/reference/torch_ops/generator_ops.py +36 -0
onnx_diagnostic/reference/torch_ops/nn_ops.py +196 -0
onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
onnx_diagnostic/reference/torch_ops/shape_ops.py +121 -0
onnx_diagnostic/reference/torch_ops/unary_ops.py +86 -0
onnx_diagnostic/tasks/__init__.py +22 -1
onnx_diagnostic/tasks/image_classification.py +2 -2
onnx_diagnostic/tasks/text_generation.py +3 -3
onnx_diagnostic/torch_export_patches/eval/__init__.py +106 -37
onnx_diagnostic/torch_export_patches/eval/model_cases.py +12 -25
onnx_diagnostic/torch_export_patches/patch_module_helper.py +130 -16
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +88 -0
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +142 -0
onnx_diagnostic/torch_models/test_helper.py +133 -16
onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/RECORD +39 -23
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/WHEEL +1 -1
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.6.0.dist-info → onnx_diagnostic-0.6.2.dist-info}/top_level.txt +0 -0

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.6.0"
+__version__ = "0.6.2"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -352,6 +352,12 @@ def get_parser_validate() -> ArgumentParser:
         action=BooleanOptionalAction,
         help="validate the trained model (requires downloading)",
     )
+    parser.add_argument(
+        "--runtime",
+        choices=["onnxruntime", "torch", "ref"],
+        default="onnxruntime",
+        help="onnx runtime to use, onnxruntime by default",
+    )
     parser.add_argument(
         "-o",
         "--dump-folder",
@@ -399,6 +405,15 @@ def get_parser_validate() -> ArgumentParser:
         "example: --mop attn_implementation=eager",
         action=_ParseDict,
     )
+    parser.add_argument(
+        "--repeat",
+        default=1,
+        type=int,
+        help="number of times to run the model to measures inference time",
+    )
+    parser.add_argument(
+        "--warmup", default=0, type=int, help="number of times to run the model to do warmup"
+    )
     return parser
@@ -453,6 +468,9 @@ def _cmd_validate(argv: List[Any]):
             model_options=args.mop,
             subfolder=args.subfolder,
             opset=args.opset,
+            runtime=args.runtime,
+            repeat=args.repeat,
+            warmup=args.warmup,
         )
         print("")
         print("-- summary --")

onnx_diagnostic/api.py ADDED Viewed

@@ -0,0 +1,15 @@
+from typing import Any
+class TensorLike:
+    """Mocks a tensor."""
+    @property
+    def dtype(self) -> Any:
+        "Must be overwritten."
+        raise NotImplementedError("dtype must be overwritten.")
+    @property
+    def shape(self) -> Any:
+        "Must be overwritten."
+        raise NotImplementedError("shape must be overwritten.")

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -907,11 +907,13 @@ class ExtTestCase(unittest.TestCase):
         except AssertionError as e:
             expected_max = numpy.abs(expected).max()
             expected_value = numpy.abs(value).max()
+            te = expected.astype(int) if expected.dtype == numpy.bool_ else expected
+            tv = value.astype(int) if value.dtype == numpy.bool_ else value
             rows = [
                 f"{msg}\n{e}" if msg else str(e),
                 f"expected max value={expected_max}",
                 f"expected computed value={expected_value}\n",
-                f"ratio={expected / value}\ndiff={expected - value}",
+                f"ratio={te / tv}\ndiff={te - tv}",
             ]
             raise AssertionError("\n".join(rows))  # noqa: B904

onnx_diagnostic/helpers/args_helper.py CHANGED Viewed

@@ -113,7 +113,7 @@ def get_parsed_args(
         )
     res = parser.parse_args(args=new_args)
-    update = {}
+    update: Dict[str, Union[int, float]] = {}
     for k, v in res.__dict__.items():
         try:
             vi = int(v)

onnx_diagnostic/helpers/doc_helper.py ADDED Viewed

@@ -0,0 +1,143 @@
+from typing import Dict, Optional, Tuple
+import onnx
+import onnx.helper as oh
+import torch
+from ..reference.torch_ops import OpRunKernel, OpRunTensor
+from .torch_helper import onnx_dtype_to_torch_dtype, torch_dtype_to_onnx_dtype
+from .ort_session import InferenceSessionForTorch
+class LayerNormalizationOrt(OpRunKernel):
+    "LayerNormalization with onnxruntime"
+    @classmethod
+    def device_dependent(cls) -> bool:
+        "Needs device."
+        return False
+    def __init__(
+        self,
+        node: onnx.NodeProto,
+        version=None,
+        device: Optional[torch.device] = None,
+        verbose=0,
+    ):
+        super().__init__(node, version, verbose=verbose)
+        self.axis = self.get_attribute_int(node, "axis", -1)
+        self.epsilon = self.get_attribute_float(node, "epsilon", 1e-5)
+        self.device = device
+        self.stash_type = onnx_dtype_to_torch_dtype(
+            self.get_attribute_int(node, "stash_type", onnx.TensorProto.FLOAT)  # type: ignore[arg-type]
+        )
+        self.compute_std = len(node.output) > 1
+        assert not self.compute_std, (
+            f"This kernel implementation only work when only one output "
+            f"is required but {node.output} were."
+        )
+        self._cache: Dict[Tuple[int, int], onnx.ModelProto] = {}
+        self.is_cpu = torch.device("cpu") == self.device
+    def _make_model(self, itype: int, rank: int, has_bias: bool) -> onnx.ModelProto:
+        shape = [*["d{i}" for i in range(rank - 1)], "last"]
+        layer_model = oh.make_model(
+            oh.make_graph(
+                [
+                    oh.make_node(
+                        "LayerNormalization",
+                        ["X", "W", "B"] if has_bias else ["X", "W"],
+                        ["Z"],
+                        axis=self.axis,
+                        epsilon=self.epsilon,
+                    )
+                ],
+                "dummy",
+                (
+                    [
+                        oh.make_tensor_value_info("X", itype, shape),
+                        oh.make_tensor_value_info("W", itype, ["last"]),
+                        oh.make_tensor_value_info("B", itype, ["last"]),
+                    ]
+                    if has_bias
+                    else [
+                        oh.make_tensor_value_info("X", itype, shape),
+                        oh.make_tensor_value_info("W", itype, ["last"]),
+                    ]
+                ),
+                [oh.make_tensor_value_info("Z", itype, shape)],
+            ),
+            ir_version=9,
+            opset_imports=[oh.make_opsetid("", 18)],
+        )
+        provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
+        self._provider = provider
+        return InferenceSessionForTorch(layer_model, providers=[provider])
+    def run(self, x, scale, bias=None):
+        itype = torch_dtype_to_onnx_dtype(x.dtype)
+        rank = len(x.shape)
+        key = itype, rank
+        if key not in self._cache:
+            self._cache[key] = self._make_model(itype, rank, bias is not None)
+        sess = self._cache[key]
+        if self.verbose:
+            print(f"[LayerNormalizationOrt] running on {self._provider!r}")
+        feeds = dict(X=x.tensor, W=scale.tensor)
+        if bias is not None:
+            feeds["B"] = bias.tensor
+        got = sess.run(None, feeds)[0]
+        return OpRunTensor(got)
+class MatMulOrt(OpRunKernel):
+    "MatMul with onnxruntime"
+    @classmethod
+    def device_dependent(cls) -> bool:
+        "Needs device."
+        return False
+    def __init__(
+        self,
+        node: onnx.NodeProto,
+        version=None,
+        device: Optional[torch.device] = None,
+        verbose=0,
+    ):
+        super().__init__(node, version, verbose=verbose)
+        self.device = device
+        self._cache: Dict[Tuple[int, int, int], onnx.ModelProto] = {}
+        self.is_cpu = torch.device("cpu") == self.device
+    def _make_model(self, itype: int, ranka: int, rankb: int) -> onnx.ModelProto:
+        shapea = ["a{i}" for i in range(ranka)]
+        shapeb = ["b{i}" for i in range(rankb)]
+        shapec = ["c{i}" for i in range(max(ranka, rankb))]
+        model = oh.make_model(
+            oh.make_graph(
+                [oh.make_node("MatMul", ["A", "B"], ["C"])],
+                "dummy",
+                [
+                    oh.make_tensor_value_info("A", itype, shapea),
+                    oh.make_tensor_value_info("B", itype, shapeb),
+                ],
+                [oh.make_tensor_value_info("C", itype, shapec)],
+            ),
+            ir_version=9,
+            opset_imports=[oh.make_opsetid("", 18)],
+        )
+        provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
+        self._provider = provider
+        return InferenceSessionForTorch(model, providers=[provider])
+    def run(self, a, b):
+        itype = torch_dtype_to_onnx_dtype(a.dtype)
+        ranka, rankb = len(a.shape), len(b.shape)
+        key = itype, ranka, rankb
+        if key not in self._cache:
+            self._cache[key] = self._make_model(itype, ranka, rankb)
+        sess = self._cache[key]
+        if self.verbose:
+            print(f"[MatMulOrt] running on {self._provider!r}")
+        feeds = dict(A=a.tensor, B=b.tensor)
+        got = sess.run(None, feeds)[0]
+        return OpRunTensor(got)

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -698,7 +698,8 @@ def string_type(
                 print(f"[string_type] CONFIG:{type(obj)}")
             s = str(obj.to_diff_dict()).replace("\n", "").replace(" ", "")
             return f"{obj.__class__.__name__}(**{s})"
+    if obj.__class__.__name__ in {"TorchModelContainer", "InferenceSession"}:
+        return f"{obj.__class__.__name__}(...)"
     if verbose:
         print(f"[string_type] END:{type(obj)}")
     raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")
@@ -1306,11 +1307,11 @@ def max_diff(
         rdiff = diff / (exp_cpu.abs() + 1e-3)
         if diff.numel() > 0:
             abs_diff, rel_diff, sum_diff, n_diff, nan_diff = (
-                float(diff.max()),
-                float(rdiff.max()),
-                float(diff.sum()),
+                float(diff.max().detach()),
+                float(rdiff.max().detach()),
+                float(diff.sum().detach()),
                 float(diff.numel()),
-                float(ndiff.sum()),
+                float(ndiff.sum().detach()),
             )
             argm = tuple(map(int, torch.unravel_index(diff.argmax(), diff.shape)))
         elif got_cpu.numel() == exp_cpu.numel():

onnx_diagnostic/helpers/model_builder_helper.py CHANGED Viewed

@@ -237,18 +237,24 @@ def create_model_builder(
         "OlmoForCausalLM": builder.OLMoModel,
         "PhiForCausalLM": builder.PhiModel,
         "Phi3ForCausalLM": (
-            lambda config, *_: (
-                builder.Phi3MiniModel
-                if config.max_position_embeddings == config.original_max_position_embeddings
-                else builder.Phi3MiniLongRoPEModel
+            lambda config, *args: (
+                (
+                    builder.Phi3MiniModel
+                    if config.max_position_embeddings
+                    == config.original_max_position_embeddings
+                    else builder.Phi3MiniLongRoPEModel
+                )(config, *args)
             )
         ),
         "PhiMoEForCausalLM": builder.Phi3MoELongRoPEModel,
         "Phi3SmallForCausalLM": (
-            lambda config, *_: (
-                builder.Phi3SmallModel
-                if config.max_position_embeddings == config.original_max_position_embeddings
-                else builder.Phi3SmallLongRoPEModel
+            lambda config, *args: (
+                (
+                    builder.Phi3SmallModel
+                    if config.max_position_embeddings
+                    == config.original_max_position_embeddings
+                    else builder.Phi3SmallLongRoPEModel
+                )(config, *args)
             )
         ),
         "Phi3VForCausalLM": builder.Phi3VModel,
@@ -317,7 +323,17 @@ def create_model_builder(
         )
     cls = arch_map[config.architectures[0]]
+    # ModelBuilder does not like None values for some parameters.
+    remove = set()
+    for c in ["head_dim"]:
+        if hasattr(config, c) and getattr(config, c) is None:
+            remove.add(c)
+    for c in remove:
+        delattr(config, c)
     onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
     if post:
         post(onnx_model)
     _make_model(onnx_model, model, verbose=verbose)

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -55,7 +55,11 @@ def make_feeds(
     names = (
         [i.name for i in proto.graph.input]
         if isinstance(proto, onnx.ModelProto)
-        else ([i.name for i in proto.get_inputs()] if hasattr(proto, "get_inputs") else proto)
+        else (
+            [i.name for i in proto.get_inputs()]
+            if hasattr(proto, "get_inputs")
+            else (proto.input_names if hasattr(proto, "input_names") else proto)
+        )
     )
     assert (
         isinstance(names, list)

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -878,6 +878,8 @@ def to_tensor(tensor: onnx.TensorProto, base_dir: str = "") -> torch.Tensor:
     if tensor.HasField("raw_data"):
         raw_data = tensor.raw_data
+        if len(raw_data) == 0:
+            return torch.tensor([], dtype=torch_dtype).reshape(dims)
         if sys.byteorder == "big":
             # Convert endian from little to big
             raw_data = torch.frombuffer(raw_data, dtype=torch_dtype).byteswap().tobytes()

onnx_diagnostic/reference/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
 from .evaluator import ExtendedReferenceEvaluator
 from .ort_evaluator import OnnxruntimeEvaluator
+from .torch_evaluator import TorchOnnxEvaluator

onnx-diagnostic 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

onnx-diagnostic 0.6.0py3-none-any.whl → 0.6.2py3-none-any.whl