PyPI - onnx-diagnostic - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl - Mend

onnx-diagnostic 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

onnx_diagnostic/reference/ort_evaluator.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 from onnx import (
     AttributeProto,
@@ -6,6 +6,7 @@ from onnx import (
     FunctionProto,
     ModelProto,
     NodeProto,
+    TensorProto,
     TypeProto,
     ValueInfoProto,
     helper as oh,
@@ -16,7 +17,14 @@ from onnx import (
 from onnx.defs import onnx_opset_version
 import onnxruntime
 from ..helpers import string_type
-from ..helpers.onnx_helper import pretty_onnx, dtype_to_tensor_dtype, to_array_extended
+from ..helpers.onnx_helper import (
+    get_hidden_inputs,
+    dtype_to_tensor_dtype,
+    np_dtype_to_tensor_dtype,
+    to_array_extended,
+    pretty_onnx,
+)
+from ..helpers.torch_helper import onnx_dtype_to_torch_dtype, torch_dtype_to_onnx_dtype
 from ..helpers.ort_session import (
     InferenceSessionForTorch,
     InferenceSessionForNumpy,
@@ -31,6 +39,54 @@ PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto)
 Proto = Union[FunctionProto, ModelProto, GraphProto, NodeProto]
+class OnnxList(list):
+    """Defines a list for the runtime."""
+    def __init__(self, itype: Union[list, int]):
+        super().__init__()
+        if isinstance(itype, int):
+            self.itype = itype
+            self.dtype = onnx_dtype_to_torch_dtype(itype)
+        else:
+            assert itype, "The list cannot be created with an empty list."
+            self.itype = (
+                np_dtype_to_tensor_dtype(itype[0].dtype)
+                if isinstance(itype[0], np.ndarray)
+                else torch_dtype_to_onnx_dtype(itype[0].dtype)
+            )
+            self.extend(itype)
+            self.dtype = itype[0].dtype
+        self.shape = "OnnxList"
+    def get_device(self):
+        "Returns the device of the first tensor."
+        assert len(self) > 0, "Cannot access the device for an empty list."
+        return self[0].get_device() if hasattr(self[0], "get_device") else -1
+    def numpy(self):
+        "Creates a new list with all tensors on numpy or self it is already the case."
+        if all(isinstance(v, np.ndarray) for v in self):
+            return self
+        return OnnxList([v.detach().cpu().numpy() for v in self])
+    def to(self, tensor_like) -> "OnnxList":
+        "Creates a new list with all tensors on numpy or pytorch depending on `tensor_like`."
+        if isinstance(tensor_like, np.ndarray):
+            return self
+        import torch
+        return OnnxList(
+            [
+                torch.from_numpy(t).to(tensor_like.device) if isinstance(t, np.ndarray) else t
+                for t in self
+            ]
+        )
+    def clone(self) -> "OnnxList":
+        "Clone (torch)."
+        return OnnxList([t.clone() for t in self]) if len(self) > 0 else OnnxList(self.itype)
 class OnnxruntimeEvaluator:
     """
     This class loads an onnx model and the executes one by one the nodes
@@ -54,6 +110,9 @@ class OnnxruntimeEvaluator:
     :param whole: if True, do not split node by node
     :param torch_or_numpy: force the use of one of them, True for torch,
         False for numpy, None to let the class choose
+    :param dump_onnx_model: dumps the temporary onnx model created if whole is True
+    :param function_kwargs: a FunctionProto may have parameters,
+        this contains the values of them
     """
     def __init__(
@@ -77,6 +136,8 @@ class OnnxruntimeEvaluator:
         opsets: Optional[Union[int, Dict[str, int]]] = None,
         whole: bool = False,
         torch_or_numpy: Optional[bool] = None,
+        function_kwargs: Optional[Dict[str, Any]] = None,
+        dump_onnx_model: Optional[str] = None,
     ):
         if isinstance(proto, str):
             self.proto: Proto = load(proto)
@@ -90,6 +151,9 @@ class OnnxruntimeEvaluator:
         assert isinstance(
             self.proto, PROTO
         ), f"Unexpected type for self.proto {type(self.proto)}"
+        assert (
+            whole or not dump_onnx_model
+        ), f"whole must be True for dump_onnx_model={dump_onnx_model!r}"
         self._cache: Dict[
             Any, Tuple[Proto, Union["OnnxruntimeEvaluator", _InferenceSession]]  # noqa: UP037
@@ -109,6 +173,8 @@ class OnnxruntimeEvaluator:
             use_training_api=use_training_api,
         )
         self.to_tensor_or_array = to_array_extended if not torch_or_numpy else to_tensor
+        self.function_kwargs = function_kwargs
+        self.dump_onnx_model = dump_onnx_model
         self.verbose = verbose
         self.torch_or_numpy = torch_or_numpy
@@ -199,6 +265,8 @@ class OnnxruntimeEvaluator:
     def _log_arg(self, a: Any) -> Any:
         if isinstance(a, (str, int, float)):
             return a
+        if isinstance(a, OnnxList):
+            return string_type(a)
         device = f"D{a.get_device()}:" if hasattr(a, "detach") else ""
         if hasattr(a, "shape"):
             prefix = "A:" if hasattr(a, "astype") else "T:"
@@ -221,6 +289,12 @@ class OnnxruntimeEvaluator:
     def _is_local_function(self, node: NodeProto) -> bool:
         return (node.domain, node.op_type) in self.local_functions
+    def _run_init(self, feed_inputs):
+        if self.sess_ is None:
+            assert self.proto, "self.proto is empty"
+            _, self.sess_ = self._get_sess(self.proto, list(feed_inputs.values()))
+        return self.sess_
     def run(
         self,
         outputs: Optional[List[str]],
@@ -244,9 +318,7 @@ class OnnxruntimeEvaluator:
         """
         if self.rt_nodes_ is None:
             # runs a whole
-            if self.sess_ is None:
-                assert self.proto, "self.proto is empty"
-                _, self.sess_ = self._get_sess(self.proto, list(feed_inputs.values()))
+            self._run_init(feed_inputs)
             assert self.sess_, "mypy not happy"
             return self.sess_.run(outputs, feed_inputs)
         if outputs is None:
@@ -273,14 +345,16 @@ class OnnxruntimeEvaluator:
             if node.op_type == "If" and node.domain == "":
                 outputs = self._run_if(node, inputs, results)
             elif node.op_type in {"Scan", "Loop"} and node.domain == "":
-                outputs = self._run_scan(node, inputs, results)
+                outputs = self._run_scan_or_loop(node, inputs, results)
             elif self._is_local_function(node):
                 outputs = self._run_local(node, inputs, results)
             else:
                 outputs = self._run(node, inputs, results)
-            for name, value in zip(node.output, outputs):
-                if name == "":
-                    continue
+            node_output = [o for o in node.output if o]
+            assert len(node_output) == len(
+                outputs
+            ), f"Length mismatch between node output={node.output} and outputs={outputs}"
+            for name, value in zip(node_output, outputs):
                 self._log(2, " + %s: %s", name, value)  # type: ignore[arg-type]
                 assert isinstance(name, str), f"unexpected type for name {type(name)}"
                 results[name] = value
@@ -355,11 +429,12 @@ class OnnxruntimeEvaluator:
         nodes: Sequence[NodeProto],
         vinputs: Sequence[ValueInfoProto],
         voutputs: Sequence[ValueInfoProto],
+        functions: Optional[Sequence[FunctionProto]] = None,
     ) -> ModelProto:
         onx = oh.make_model(
             oh.make_graph(nodes, "-", vinputs, voutputs),
             ir_version=getattr(self.proto, "ir_version", self.ir_version),
-            functions=getattr(self.proto, "functions", None),
+            functions=[*getattr(self.proto, "functions", []), *(functions or [])],
         )
         del onx.opset_import[:]
         if hasattr(self.proto, "opset_import"):
@@ -373,51 +448,61 @@ class OnnxruntimeEvaluator:
                 )
         else:
             onx.opset_import.append(oh.make_opsetid("", onnx_opset_version()))
+        opsets = {d.domain: d.version for d in onx.opset_import}
+        add = {}
+        for node in self.enumerate_nodes(onx.graph.node):
+            if node.domain and node.domain not in opsets and node.domain not in add:
+                add[node.domain] = 1
+        onx.opset_import.extend([oh.make_opsetid(k, v) for k, v in add.items()])
         # That helps fixing bugs.
         onx = shi.infer_shapes(onx)
         return onx
-    @classmethod
-    def _get_hidden_inputs(self, graph: GraphProto) -> Set[str]:
-        """
-        Returns the hidden inputs (inputs coming from an upper context)
-        used by a subgraph.
-        """
-        hidden = set()
-        memo = set(i.name for i in graph.initializer)
-        memo |= set(i.name for i in graph.sparse_initializer)
-        for node in graph.node:
-            for i in node.input:
-                if i not in memo:
-                    hidden.add(i)
-            for att in node.attribute:
-                if att.type == AttributeProto.GRAPH and att.g:
-                    hid = self._get_hidden_inputs(att.g)
-                    less = set(h for h in hid if h not in memo)
-                    hidden |= less
-            memo |= set(node.output)
-        return hidden
+    def _make_model_outputs(
+        self, node: NodeProto, inputs: List[ValueInfoProto]
+    ) -> Tuple[List[NodeProto], List[ValueInfoProto]]:
+        return [], [oh.make_value_info(o, TypeProto()) for o in node.output if o]
+    def enumerate_nodes(self, nodes: List[NodeProto]) -> Iterator[NodeProto]:
+        "Enumerates nodes recursively."
+        for node in nodes:
+            if node.op_type in {"Scan", "If", "Loop"}:
+                for att in node.attribute:
+                    if att.type == AttributeProto.GRAPH:
+                        yield from self.enumerate_nodes(att.g.node)
+            yield node
     @classmethod
-    def _get_hidden_node_inputs(self, node: NodeProto) -> Set[str]:
-        """Calls multiple _get_hidden_inputs on every attribute."""
+    def _get_hidden_node_inputs(cls, node: NodeProto) -> Set[str]:
+        """Calls multiple get_hidden_inputs on every attribute."""
         if node.op_type not in {"Loop", "Scan", "If"}:
             return set()
         hidden = set()
         for att in node.attribute:
             if att.type == AttributeProto.GRAPH:
-                hidden |= self._get_hidden_inputs(att.g)
+                hidden |= get_hidden_inputs(att.g)
         return hidden - (hidden & set(node.input))
     def _get_sess(
         self, node: Union[ModelProto, NodeProto], inputs: List[Any]
     ) -> Tuple[ModelProto, _InferenceSession]:
+        on_cpu = None
         if isinstance(node, ModelProto):
             onx = node
         else:
+            functions = []
+            if isinstance(node, FunctionProto):
+                functions.append(node)
+                node = oh.make_node(
+                    node.name,
+                    list(node.input),
+                    list(node.output),
+                    domain=node.domain,
+                    **(self.function_kwargs or {}),
+                )
             assert isinstance(node, NodeProto), f"Unexpected type {type(node)} for node"
-            if node.op_type == "Constant":
+            if node.op_type == "Constant" and node.domain == "":
                 # We force the type to be a boolean.
                 ref = ExtendedReferenceEvaluator(node)
                 cst = ref.run(None, {})[0]
@@ -427,6 +512,19 @@ class OnnxruntimeEvaluator:
                         node.output[0], dtype_to_tensor_dtype(cst.dtype), cst.shape
                     )
                 ]
+                prenodes = []  # type: ignore[var-annotated]
+            elif node.op_type == "ConcatFromSequence" and node.domain == "":
+                # We force the type to be a boolean.
+                vinputs = [
+                    oh.make_value_info(
+                        node.input[0],
+                        type_proto=oh.make_sequence_type_proto(
+                            oh.make_tensor_type_proto(elem_type=inputs[0].itype, shape=None)
+                        ),
+                    )
+                ]
+                voutputs = [oh.make_tensor_value_info(node.output[0], inputs[0].itype, None)]
+                prenodes = []  # type: ignore[var-annotated]
             else:
                 unique_names = set()
                 vinputs = []
@@ -440,18 +538,35 @@ class OnnxruntimeEvaluator:
                     vinputs.append(value)
                 # no need to run shape inference
-                voutputs = [oh.make_value_info(o, TypeProto()) for o in node.output]
+                prenodes, voutputs = self._make_model_outputs(node, vinputs)
-            onx = self._make_model_proto([node], vinputs, voutputs)
+            onx = self._make_model_proto(
+                [*prenodes, node], vinputs, voutputs, functions=functions
+            )
+            if node.op_type in {"Shape", "Size"}:
+                on_cpu = True
+        if self.dump_onnx_model:
+            onnx_save(
+                onx, self.dump_onnx_model, save_as_external_data=len(onx.graph.node) > 100
+            )
         cls = (
             InferenceSessionForNumpy
             if any(isinstance(i, np.ndarray) for i in inputs)
             and (not isinstance(self.torch_or_numpy, bool) or not self.torch_or_numpy)
             else InferenceSessionForTorch
         )
+        if (
+            "providers" not in self.session_kwargs or not self.session_kwargs["providers"]
+        ) and any(hasattr(t, "is_cuda") and t.is_cuda for t in inputs):
+            sess_kwargs = self.session_kwargs.copy()
+            sess_kwargs["providers"] = ["CUDAExecutionProvider"]
+        else:
+            sess_kwargs = self.session_kwargs or {}
+        if on_cpu and "CUDAExecutionProvider" in (sess_kwargs.get("providers", []) or []):
+            sess_kwargs["cpu_outputs"] = True
         try:
-            sess = cls(onx, **self.session_kwargs)
+            sess = cls(onx, **sess_kwargs)
         except (
             onnxruntime.capi.onnxruntime_pybind11_state.Fail,
             onnxruntime.capi.onnxruntime_pybind11_state.InvalidGraph,
@@ -473,15 +588,29 @@ class OnnxruntimeEvaluator:
             if i == "" or i in unique_names:
                 continue
             unique_names.add(i)
-            value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(it.dtype), it.shape)
+            if isinstance(it, OnnxList):
+                value = oh.make_value_info(
+                    i,
+                    type_proto=oh.make_sequence_type_proto(
+                        oh.make_tensor_type_proto(
+                            elem_type=dtype_to_tensor_dtype(it.dtype), shape=None
+                        )
+                    ),
+                )
+            else:
+                value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(it.dtype), it.shape)
             vinputs.append(value)
-        reduced_set = self._get_hidden_inputs(g)
+        reduced_set = get_hidden_inputs(g)
         for i, v in context.items():
             if i in reduced_set and i not in unique_names:
                 unique_names.add(i)
                 value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(v.dtype), v.shape)
                 vinputs.append(value)
+        assert len(reduced_set & set(context)) == len(reduced_set), (
+            f"Missing hidden inputs {sorted(reduced_set)} from context={sorted(context)} "
+            f"(len(inputs)={len([i for i in inputs if i])}) for node {pretty_onnx(node)}"
+        )
         return vinputs
     def _get_sess_if(
@@ -530,6 +659,14 @@ class OnnxruntimeEvaluator:
     def _run(self, node: NodeProto, inputs: List[Any], results: Dict[str, Any]) -> List[Any]:
         """Runs a node."""
+        if node.op_type[0] == "S":
+            if node.op_type == "SequenceEmpty":
+                dtype = TensorProto.FLOAT
+                for att in node.attribute:
+                    if att.name == "dtype":
+                        dtype = att.i
+                return [OnnxList(itype=dtype)]
         types = [(None if a is None else (a.dtype, a.shape)) for a in inputs]
         key = (id(node), *types)
         if key in self._cache:
@@ -538,13 +675,31 @@ class OnnxruntimeEvaluator:
             onx, sess = self._get_sess(node, inputs)
             self._cache[key] = onx, sess
-        feeds = dict(zip(node.input, inputs))
-        if "" in feeds:
-            feeds[""] = np.array([0], dtype=np.float32)
+        feeds = {}
+        for i, val in zip(node.input, inputs):
+            if i == "":
+                assert (
+                    val is None
+                ), f"input name={i!r} but val={string_type(val, with_shape=True)}"
+                continue
+            feeds[i] = val
         assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}"
+        if node.op_type[0] == "C":
+            if node.op_type == "ConcatFromSequence":
+                res = sess.sess.run(None, self.feeds_to_numpy(feeds))  # type: ignore[union-attr]
+                if isinstance(inputs[0][0], np.ndarray):
+                    return list(res)
+                import torch
+                return [torch.from_numpy(r).to(inputs[0][0].device) for r in res]
         outputs = list(sess.run(None, feeds))
         assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}"
+        assert not any(type(v) is list for v in outputs), (
+            f"One output type is a list, this should not be allowed, "
+            f"node.op_type={node.op_type}, feeds={string_type(feeds, with_shape=True)}"
+        )
         return outputs
     def _run_if(
@@ -570,7 +725,7 @@ class OnnxruntimeEvaluator:
         assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}"
         return outputs
-    def _get_sess_scan(
+    def _get_sess_scan_or_loop(
         self, node: NodeProto, branch: str, inputs: List[Any], context: Dict[str, Any]
     ) -> Tuple[ModelProto, "OnnxruntimeEvaluator"]:
         g = None
@@ -605,10 +760,26 @@ class OnnxruntimeEvaluator:
         )
         return onx, sess
-    def _run_scan(
+    def feeds_to_numpy(self, feeds):
+        new_feeds = {}
+        for k, v in feeds.items():
+            if hasattr(v, "detach"):
+                new_feeds[k] = v.detach().cpu().numpy()
+            elif isinstance(v, OnnxList):
+                new_feeds[k] = v.numpy()
+            else:
+                new_feeds[k] = v
+        return new_feeds
+    def _run_scan_or_loop(
         self, node: NodeProto, inputs: List[Any], results: Dict[str, Any]
     ) -> List[Any]:
         """Runs a node Scan."""
+        assert not any(type(i) is list for i in inputs), (
+            f"One input is a list but it should an OnnxList, "
+            f"node.op_type={node.op_type!r}, node.input={node.input}, "
+            f"inputs={string_type(inputs, with_shape=True)}"
+        )
         feeds = dict(zip(node.input, inputs))
         feeds.update(results)
         name = "body"
@@ -616,10 +787,21 @@ class OnnxruntimeEvaluator:
         if key in self._cache:
             sess = self._cache[key][1]
         else:
-            self._cache[key] = _onx, sess = self._get_sess_scan(node, name, inputs, results)
+            self._cache[key] = _onx, sess = self._get_sess_scan_or_loop(
+                node, name, inputs, results
+            )
         assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}"
         feeds = {name: results[name] for name in sess.input_names}
+        if node.op_type == "Loop" and any(isinstance(v, OnnxList) for v in feeds.values()):
+            # This operator uses sequence. onnxruntime does not play well with sequence.
+            sess._run_init(feeds)  # type: ignore[union-attr]
+            outputs = sess.sess_.sess.run(None, self.feeds_to_numpy(feeds))  # type: ignore[union-attr]
+            return [
+                (OnnxList(v).to(feeds[node.input[0]]) if isinstance(v, list) else v)
+                for v in outputs
+            ]
         outputs = sess.run(None, feeds)
         assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}"
         return outputs

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -1,10 +1,6 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.config_helper import (
-    update_config,
-    check_hasattr,
-    default_num_hidden_layers as nhl,
-)
+from ..helpers.config_helper import update_config, check_hasattr
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
@@ -13,8 +9,9 @@ __TASK__ = "feature-extraction"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_hidden_layers")
-    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, nhl()))
+    check_hasattr(config, "vocab_size")
+    # Bart architecture does not like too much that the number of layers is changed.
+    kwargs = dict(vocab_size=2056)
     update_config(config, kwargs)
     return kwargs
@@ -25,7 +22,8 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    sequence_length2: int = 3,
+    past_length: int = 30,
+    past_length2: int = 4,
     decoder_attention_heads: Optional[int] = None,
     encoder_attention_heads: Optional[int] = None,
     encoder_ffn_dim: Optional[int] = None,
@@ -73,13 +71,13 @@ def get_inputs(
                         torch.randn(
                             batch_size,
                             encoder_attention_heads,
-                            sequence_length,
+                            past_length,
                             encoder_ffn_dim,
                         ),
                         torch.randn(
                             batch_size,
                             encoder_attention_heads,
-                            sequence_length,
+                            past_length,
                             encoder_ffn_dim,
                         ),
                     )
@@ -92,13 +90,13 @@ def get_inputs(
                         torch.randn(
                             batch_size,
                             decoder_attention_heads,
-                            sequence_length2,
+                            past_length2,
                             decoder_ffn_dim,
                         ),
                         torch.randn(
                             batch_size,
                             decoder_attention_heads,
-                            sequence_length2,
+                            past_length2,
                             decoder_ffn_dim,
                         ),
                     )
@@ -124,7 +122,8 @@ def get_inputs(
             batch_size=batch_size + 1,
             sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
-            sequence_length2=sequence_length2,
+            past_length=past_length,
+            past_length2=past_length2,
             decoder_attention_heads=decoder_attention_heads,
             encoder_attention_heads=encoder_attention_heads,
             encoder_ffn_dim=encoder_ffn_dim,
@@ -146,7 +145,9 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         check_hasattr(config, "vocab_size")
     kwargs = dict(
         batch_size=2,
-        sequence_length=30,
+        sequence_length=12,
+        past_length=30,
+        past_length2=4,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
     for att in [

onnx-diagnostic 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

onnx-diagnostic 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl