PyPI - onnx-diagnostic - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import os
 import sys
 import warnings
-from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 import numpy.typing as npt
 import onnx
@@ -15,6 +15,7 @@ from onnx import (
     GraphProto,
     ModelProto,
     NodeProto,
+    OperatorSetIdProto,
     TensorProto,
     ValueInfoProto,
     load as onnx_load,
@@ -671,21 +672,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int:  # noqa: F821
     try:
         return oh.np_dtype_to_tensor_dtype(dt)
     except ValueError:
-        try:
-            import ml_dtypes
-        except ImportError:
-            ml_dtypes = None  # type: ignore
-        if ml_dtypes is not None:
-            if dt == ml_dtypes.bfloat16:
-                return TensorProto.BFLOAT16
-            if dt == ml_dtypes.float8_e4m3fn:
-                return TensorProto.FLOAT8E4M3FN
-            if dt == ml_dtypes.float8_e4m3fnuz:
-                return TensorProto.FLOAT8E4M3FNUZ
-            if dt == ml_dtypes.float8_e5m2:
-                return TensorProto.FLOAT8E5M2
-            if dt == ml_dtypes.float8_e5m2fnuz:
-                return TensorProto.FLOAT8E5M2FNUZ
+        import ml_dtypes
+        if dt == ml_dtypes.bfloat16:
+            return TensorProto.BFLOAT16
+        if dt == ml_dtypes.float8_e4m3fn:
+            return TensorProto.FLOAT8E4M3FN
+        if dt == ml_dtypes.float8_e4m3fnuz:
+            return TensorProto.FLOAT8E4M3FNUZ
+        if dt == ml_dtypes.float8_e5m2:
+            return TensorProto.FLOAT8E5M2
+        if dt == ml_dtypes.float8_e5m2fnuz:
+            return TensorProto.FLOAT8E5M2FNUZ
     if dt == np.float32:
         return TensorProto.FLOAT
     if dt == np.float16:
@@ -1198,3 +1196,104 @@ def shadowing_names(
         existing |= not_empty
         created |= not_empty
     return shadow, post_shadow, created
+def extract_subset_of_nodes(
+    model: ModelProto,
+    name: str,
+    node_index: Optional[int] = None,
+    cut_points: Optional[Set[str]] = None,
+) -> List[NodeProto]:
+    """
+    Extracts the minimal subgraphs which can produce the output ``name``
+    knowing ``cut_points``.
+    :param model: original model
+    :param name: result name
+    :param node_index: if the node index is known, otherwise searches for it
+    :param cut_points: the known results or input name otherwise
+    :return: minimal list of nodes
+    """
+    if node_index is None:
+        for i, node in enumerate(model.graph.node):
+            if name in node.output:
+                node_index = i
+                break
+    assert (
+        node_index is not None
+        and node_index < len(model.graph.node)
+        and name in model.graph.node[node_index].output
+    ), f"node_index is still empty or wrong for result {name!r}"
+    if cut_points is None:
+        cut_points = {n.name for n in model.graph.input} | {
+            n.name for n in model.graph.initializer
+        }
+    elif model.graph.initializer:
+        cut_points = cut_points | {n.name for n in model.graph.initializer}
+    node = model.graph.node[node_index]
+    selected = {node_index}
+    current_node_index = node_index
+    current_input_index = 0
+    intermediate = {name}
+    inputs = set(k for k in node.input if k)
+    while not (inputs <= cut_points) and current_node_index >= 0:
+        node = model.graph.node[current_node_index]
+        if current_input_index == 0:
+            needs = [o for o in node.output if o in intermediate and o not in cut_points]
+            if needs:
+                selected.add(current_node_index)
+            else:
+                current_node_index -= 1
+                continue
+        res = node.input[current_input_index]
+        if res not in cut_points:
+            intermediate.add(res)
+        current_input_index += 1
+        if current_input_index >= len(node.input):
+            current_node_index -= 1
+            current_input_index = 0
+    return [model.graph.node[i] for i in sorted(selected)]
+def make_submodel(
+    nodes: List[NodeProto],
+    ir_version: int,
+    opset_imports: List[OperatorSetIdProto],
+    output_names: List[str],
+    type_rank_fn: Callable[[str], Tuple[int, int]],
+) -> ModelProto:
+    """
+    Creates a model with the given list of nodes.
+    It computes the minimum list of inputs needed for this model.
+    The function assumes the nodes are sorted.
+    It does not handle yet subgraphs.
+    :param nodes: list of nodes
+    :param ir_version: ir version
+    :param opset_imports: opset import
+    :param output_names: desired outputs
+    :param function: function returning the type and the rank of a result
+    :return: model proto
+    """
+    def _mkv_(name, itype, irank):
+        return oh.make_tensor_value_info(name, itype, [f"{name}_d{i}" for i in range(irank)])
+    not_known: Set[str] = set()
+    for node in nodes[::-1]:
+        not_known -= set(node.output)
+        not_known |= set(node.input)
+    model = oh.make_model(
+        oh.make_graph(
+            nodes,
+            "submodel",
+            [_mkv_(n, *type_rank_fn(n)) for n in sorted(not_known)],
+            [_mkv_(n, *type_rank_fn(n)) for n in sorted(output_names)],
+        ),
+        ir_version=ir_version,
+        opset_imports=opset_imports,
+    )
+    return model

onnx_diagnostic/helpers/ort_session.py CHANGED Viewed

@@ -108,7 +108,10 @@ class _InferenceSession:
                     session_options,
                     providers=providers,
                 )
-            except onnxruntime.capi.onnxruntime_pybind11_state.Fail as e:
+            except (
+                onnxruntime.capi.onnxruntime_pybind11_state.Fail,
+                onnxruntime.capi.onnxruntime_pybind11_state.InvalidGraph,
+            ) as e:
                 if isinstance(sess, onnx.ModelProto):
                     debug_path = "_debug_InferenceSession_last_failure.onnx"
                     onnx.save(
@@ -134,7 +137,13 @@ class _InferenceSession:
         self.sess = sess
         self.input_names = [i.name for i in sess.get_inputs()]
+        assert (
+            "" not in self.input_names
+        ), f"Input name cannot be empty but input_names={self.input_names}"
         self.output_names = [i.name for i in sess.get_outputs()]
+        assert (
+            "" not in self.input_names
+        ), f"Output name cannot be empty but output_names={self.output_names}"
         self.input_shapes = [i.shape for i in sess.get_inputs()]
         self.output_shapes = [i.shape for i in sess.get_outputs()]
         self.input_types = [i.type for i in sess.get_inputs()]
@@ -338,6 +347,7 @@ class InferenceSessionForTorch(_InferenceSession):
     :param optimized_model_filepath:  see :class:`onnxruntime.SessionOptions`
     :param disable_aot_function_inlining:  see :class:`onnxruntime.SessionOptions`
     :param use_training_api: use onnxruntime-traning API
+    :param cpu_output: if True, force the outputs to be on CPU
     """
     def __init__(
@@ -353,6 +363,7 @@ class InferenceSessionForTorch(_InferenceSession):
         optimized_model_filepath: Optional[str] = None,
         disable_aot_function_inlining: Optional[bool] = None,
         use_training_api: Optional[bool] = None,
+        cpu_outputs: bool = False,
     ):
         super().__init__(
             sess,
@@ -367,6 +378,7 @@ class InferenceSessionForTorch(_InferenceSession):
             disable_aot_function_inlining=disable_aot_function_inlining,
             use_training_api=use_training_api,
         )
+        self.cpu_outputs = cpu_outputs
     def _get_ortvalues_from_torch_tensors(
         self, tensors: Tuple[torch.Tensor, ...], n_outputs: int
@@ -490,23 +502,37 @@ class InferenceSessionForTorch(_InferenceSession):
         feeds is a dictionary of :class:`torch.Tensor`.
         The output device is CPU even if the outputs are on CUDA.
         """
-        new_feeds = {}
+        input_names = []
+        values = ORTC.OrtValueVector()
+        device = -1
         for k, v in feeds.items():
+            assert k != "", f"Input cannot be empty but feeds names={list(feeds)}"
+            device = max(device, v.get_device())
             assert hasattr(v, "__dlpack__"), f"class {type(v)} should be serialized"
             if not v.is_contiguous():
                 v = v.contiguous()
             if v.dtype == torch.bool:
-                # It does not work with dlpack
-                # unless onnxruntime updates the version it is using.
-                new_feeds[k] = ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(
-                    v.detach().numpy(), onnx.TensorProto.BOOL
-                )
+                v = v.to(torch.uint8)
+                v = ORTC.OrtValue.from_dlpack(v.__dlpack__(), True)
             else:
-                new_feeds[k] = ORTC.OrtValue.from_dlpack(v.__dlpack__(), False)
+                v = ORTC.OrtValue.from_dlpack(v.detach().__dlpack__(), False)
+            input_names.append(k)
+            values.push_back(v)
         if self.nvtx:
-            self.torch.cuda.nvtx.range_push("run_with_ort_values")
-        ort_outputs = self.sess._sess.run_with_ort_values(
-            new_feeds, output_names or self.output_names, self.run_options
+            self.torch.cuda.nvtx.range_push("run_with_ortvaluevector")
+        # ort_outputs = self.sess._sess.run_with_ort_values(
+        #    new_feeds, output_names or self.output_names, self.run_options
+        # )
+        ort_outputs = ORTC.OrtValueVector()
+        out_names = output_names or self.output_names
+        self.sess._sess.run_with_ortvaluevector(
+            self.run_options,
+            input_names,
+            values,
+            out_names,
+            ort_outputs,
+            [DEVICES[-1 if self.cpu_outputs else device] for o in out_names],
         )
         if self.nvtx:
             self.torch.cuda.nvtx.range_pop()

onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl