PyPI - onnx-diagnostic - Versions diffs - 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +87 -77
onnx_diagnostic/doc.py +22 -0
onnx_diagnostic/ext_test_case.py +1 -1
onnx_diagnostic/helpers/cache_helper.py +59 -0
onnx_diagnostic/helpers/config_helper.py +8 -4
onnx_diagnostic/helpers/helper.py +30 -3
onnx_diagnostic/helpers/log_helper.py +585 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +4 -1
onnx_diagnostic/helpers/model_builder_helper.py +54 -73
onnx_diagnostic/helpers/torch_helper.py +18 -2
onnx_diagnostic/reference/__init__.py +1 -0
onnx_diagnostic/reference/ort_evaluator.py +29 -4
onnx_diagnostic/reference/report_results_comparison.py +95 -0
onnx_diagnostic/reference/torch_evaluator.py +21 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +3 -0
onnx_diagnostic/tasks/feature_extraction.py +3 -0
onnx_diagnostic/tasks/fill_mask.py +3 -0
onnx_diagnostic/tasks/image_classification.py +7 -1
onnx_diagnostic/tasks/image_text_to_text.py +3 -0
onnx_diagnostic/tasks/mixture_of_expert.py +3 -0
onnx_diagnostic/tasks/object_detection.py +3 -0
onnx_diagnostic/tasks/sentence_similarity.py +3 -0
onnx_diagnostic/tasks/summarization.py +3 -0
onnx_diagnostic/tasks/text2text_generation.py +3 -0
onnx_diagnostic/tasks/text_classification.py +3 -0
onnx_diagnostic/tasks/text_generation.py +90 -43
onnx_diagnostic/tasks/zero_shot_image_classification.py +3 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +78 -25
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +37 -0
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +365 -17
onnx_diagnostic/torch_models/hghub/hub_api.py +20 -4
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +209 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +3 -0
onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py +23 -50
onnx_diagnostic/torch_models/{test_helper.py → validate.py} +158 -103
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.0.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.0.dist-info}/RECORD +41 -39
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.0.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.0.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.0.dist-info}/top_level.txt +0 -0

onnx_diagnostic/helpers/model_builder_helper.py CHANGED Viewed

@@ -3,9 +3,9 @@ import os
 import requests
 import sys
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from urllib.parse import urlparse
-from onnx import helper, save_model, external_data_helper, ModelProto
+from onnx import ModelProto, TensorProto
 CACHE_SUBDIR = "onnx-diagnostic"
@@ -114,87 +114,58 @@ def _make_model(self, model, verbose: int = 0):
                 self.make_lm_head(module)
-def save_model_builder(self, out_dir: Optional[str] = "", verbose: int = 0) -> ModelProto:
+def save_model_builder(
+    self, out_dir: Optional[str] = "", verbose: int = 0
+) -> Union[str, ModelProto]:
     """
     Saves a model created by function :func:`create_model_builder`.
     If out_dir is empty or not specified, the function still returns the
     generated model.
     """
-    if verbose:
-        print(f"[save_model_builder] Saving ONNX model in {out_dir}")
-    # Create ONNX model
-    model = helper.make_model(
-        opset_imports=[
-            self.clear_field(
-                helper.make_operatorsetid("", 21 if self.quant_attrs["use_qdq"] else 14),
-                "domain",
-            ),
-            helper.make_operatorsetid("com.microsoft", 1),
-        ],
-        ir_version=7,
-        producer_name="onnxruntime-genai",
-        producer_version="0.0.0",
-        graph=self.make_graph(
-            name="main_graph",
-            inputs=self.inputs,
-            outputs=self.outputs,
-            initializer=self.initializers,
-            value_info=self.value_infos,
-            nodes=self.nodes,
-        ),
-    )
-    # Load external data into ONNX model
-    external_data_helper.load_external_data_for_model(model, self.cache_dir)
-    # Delete external data files on disk before re-saving
-    for path in os.listdir(self.cache_dir):
-        if path.endswith(".bin"):
-            os.remove(os.path.join(self.cache_dir, path))
+    import onnx_ir
-    # Delete temporary cache dir if empty
-    # if len(os.listdir(self.cache_dir)) == 0:
-    #    os.rmdir(self.cache_dir)
+    if verbose:
+        print(f"[save_model_builder] Saving ONNX model in {out_dir!r}")
-    # Quantize ONNX model to desired precision
+    # Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
     already_quantized_in_qdq_format = (
         self.quant_type is not None and self.quant_attrs["use_qdq"]
-    )  # Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
-    if self.onnx_dtype == "int4" and not already_quantized_in_qdq_format:
-        model = self.to_int4(model)
+    )
+    model = (
+        self.to_int4()
+        if self.onnx_dtype in {onnx_ir.DataType.INT4, onnx_ir.DataType.UINT4}
+        and not already_quantized_in_qdq_format
+        else self.model
+    )
+    model.graph.sort()
+    if not out_dir:
+        return onnx_ir.to_proto(model)
-    # Save ONNX model with only one external data file and delete any existing duplicate copies
-    if out_dir:
-        out_path = os.path.join(out_dir, self.filename)
-        data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
-        if os.path.exists(out_path):
-            if verbose:
-                print(f"[save_model_builder] Overwriting {out_path!r}")
-            os.remove(out_path)
-        if os.path.exists(data_path):
-            if verbose:
-                print(f"[save_model_builder] Overwriting {data_path!r}")
-            os.remove(data_path)
+    out_path = os.path.join(out_dir, self.filename)
+    data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
-    if out_dir:
-        location = os.path.basename(data_path)
-        if os.path.exists(location):
-            os.remove(location)
+    # Save ONNX model with only one external data file and delete any existing duplicate copies
+    out_path = os.path.join(out_dir, self.filename)
+    data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
+    if os.path.exists(out_path):
         if verbose:
-            print(f"[save_model_builder] out_path={out_path!r}")
-            print(f"[save_model_builder] location={location!r}")
-        save_model(
-            model,
-            out_path,
-            save_as_external_data=True,
-            all_tensors_to_one_file=True,
-            location=location,
-            size_threshold=1024,
-            convert_attribute=False,
-        )
-        return None
-    return model
+            print(f"[save_model_builder] Overwriting {out_path!r}")
+        os.remove(out_path)
+    if os.path.exists(data_path):
+        if verbose:
+            print(f"[save_model_builder] Overwriting {data_path!r}")
+        os.remove(data_path)
+    onnx_ir.save(
+        model,
+        out_path,
+        external_data=os.path.basename(data_path),
+        size_threshold_bytes=2**10,
+    )
+    if verbose:
+        print(f"[save_model_builder] saved in {out_dir!r}")
+    return out_path
 def create_model_builder(
@@ -335,13 +306,23 @@ def create_model_builder(
     for c in remove:
         delattr(config, c)
-    onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
+    convert = {
+        "fp32": TensorProto.FLOAT,
+        "fp16": TensorProto.FLOAT16,
+        "bfp16": TensorProto.BFLOAT16,
+    }
+    assert (
+        precision in convert
+    ), f"Unexpected value for precision={precision!r}, should be in {convert}"
+    onnx_model = cls(
+        config, io_dtype, convert[precision], execution_provider, cache_dir, extra_options
+    )
     if post:
         post(onnx_model)
     _make_model(onnx_model, model, verbose=verbose)
-    assert onnx_model.nodes, (
+    assert onnx_model.model, (
         f"No node in the model, io_dtype={io_dtype!r}, "
         f"precision={precision!r}, execution_provider={execution_provider!r}, "
         f"extra_options={extra_options!r}, cache_dir={cache_dir!r}, "

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .cache_helper import (
     make_encoder_decoder_cache,
     make_sliding_window_cache,
     make_mamba_cache,
+    make_static_cache,
 )
 from .mini_onnx_builder import create_onnx_model_from_input_tensors
 from .onnx_helper import (
@@ -288,7 +289,8 @@ def steal_forward(
     """
     The necessary modification to steem forward method and prints out inputs
     and outputs using :func:`onnx_diagnostic.helpers.string_type`.
-    See example :ref:`l-plot-tiny-llm-export`.
+    See example :ref:`l-plot-tiny-llm-export` or
+    :ref:`l-plot-intermediate-results`.
     :param model: a model or a list of models to monitor,
         every model can also be a tuple(name, model), name is displayed well.
@@ -410,12 +412,15 @@ def steal_forward(
             proto = create_onnx_model_from_input_tensors(storage)
             if verbose:
                 print("-- dumps stored objects")
+            location = f"{os.path.split(dump_file)[-1]}.data"
+            if os.path.exists(location):
+                os.remove(location)
             onnx.save(
                 proto,
                 dump_file,
                 save_as_external_data=True,
                 all_tensors_to_one_file=True,
-                location=f"{os.path.split(dump_file)[-1]}.data",
+                location=location,
             )
             if verbose:
                 print("-- done dump stored objects")
@@ -723,6 +728,15 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
                 )
             )
         )
+    if value.__class__.__name__ == "StaticCache":
+        return make_static_cache(
+            list(
+                zip(
+                    [t.to(to_value) for t in value.key_cache],
+                    [t.to(to_value) for t in value.value_cache],
+                )
+            )
+        )
     if value.__class__.__name__ == "EncoderDecoderCache":
         return make_encoder_decoder_cache(
             to_any(value.self_attention_cache, to_value),
@@ -769,6 +783,8 @@ def torch_deepcopy(value: Any) -> Any:
         return make_dynamic_cache(
             torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
         )
+    if value.__class__.__name__ == "StaticCache":
+        return make_static_cache(torch_deepcopy(list(zip(value.key_cache, value.value_cache))))
     if value.__class__.__name__ == "SlidingWindowCache":
         return make_sliding_window_cache(
             torch_deepcopy(list(zip(value.key_cache, value.value_cache)))

onnx_diagnostic/reference/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .evaluator import ExtendedReferenceEvaluator
 from .ort_evaluator import OnnxruntimeEvaluator
 from .torch_evaluator import TorchOnnxEvaluator
+from .report_results_comparison import ReportResultComparison

onnx_diagnostic/reference/ort_evaluator.py CHANGED Viewed

@@ -22,8 +22,11 @@ from ..helpers.ort_session import (
     InferenceSessionForNumpy,
     _InferenceSession,
 )
+from ..helpers.torch_helper import to_tensor
+from .report_results_comparison import ReportResultComparison
 from .evaluator import ExtendedReferenceEvaluator
 PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto)
 Proto = Union[FunctionProto, ModelProto, GraphProto, NodeProto]
@@ -49,6 +52,8 @@ class OnnxruntimeEvaluator:
     :param ir_version: ir version to use when unknown
     :param opsets: opsets to use when unknown
     :param whole: if True, do not split node by node
+    :param torch_or_numpy: force the use of one of them, True for torch,
+        False for numpy, None to let the class choose
     """
     def __init__(
@@ -71,6 +76,7 @@ class OnnxruntimeEvaluator:
         ir_version: int = 10,
         opsets: Optional[Union[int, Dict[str, int]]] = None,
         whole: bool = False,
+        torch_or_numpy: Optional[bool] = None,
     ):
         if isinstance(proto, str):
             self.proto: Proto = load(proto)
@@ -102,8 +108,10 @@ class OnnxruntimeEvaluator:
             disable_aot_function_inlining=disable_aot_function_inlining,
             use_training_api=use_training_api,
         )
+        self.to_tensor_or_array = to_array_extended if not torch_or_numpy else to_tensor
         self.verbose = verbose
+        self.torch_or_numpy = torch_or_numpy
         self.sess_: Optional[_InferenceSession] = None
         if whole:
             self.nodes: Optional[List[NodeProto]] = None
@@ -122,7 +130,10 @@ class OnnxruntimeEvaluator:
                 )
             )
             self.rt_inits_ = (
-                {init.name: to_array_extended(init) for init in self.proto.graph.initializer}
+                {
+                    init.name: self.to_tensor_or_array(init)
+                    for init in self.proto.graph.initializer
+                }
                 if hasattr(self.proto, "graph")
                 else {}
             )
@@ -190,13 +201,14 @@ class OnnxruntimeEvaluator:
             return a
         device = f"D{a.get_device()}:" if hasattr(a, "detach") else ""
         if hasattr(a, "shape"):
+            prefix = "A:" if hasattr(a, "astype") else "T:"
             if self.verbose < 4:  # noqa: PLR2004
-                return f"{device}{a.dtype}:{a.shape} in [{a.min()}, {a.max()}]"
+                return f"{prefix}{device}{a.dtype}:{a.shape} in [{a.min()}, {a.max()}]"
             elements = a.ravel().tolist()
             if len(elements) > 10:  # noqa: PLR2004
                 elements = elements[:10]
-                return f"{device}{a.dtype}:{a.shape}:{','.join(map(str, elements))}..."
-            return f"{device}{a.dtype}:{a.shape}:{elements}"
+                return f"{prefix}{device}{a.dtype}:{a.shape}:{','.join(map(str, elements))}..."
+            return f"{prefix}{device}{a.dtype}:{a.shape}:{elements}"
         if hasattr(a, "append"):
             return ", ".join(map(self._log_arg, a))
         return a
@@ -214,6 +226,7 @@ class OnnxruntimeEvaluator:
         outputs: Optional[List[str]],
         feed_inputs: Dict[str, Any],
         intermediate: bool = False,
+        report_cmp: Optional[ReportResultComparison] = None,
     ) -> Union[Dict[str, Any], List[Any]]:
         """
         Runs the model.
@@ -222,6 +235,10 @@ class OnnxruntimeEvaluator:
         :param outputs: required outputs or None for all
         :param feed_inputs: inputs
         :param intermediate: returns all output instead of the last ones
+        :param report_cmp: used as a reference,
+            every intermediate results is compare to every existing one,
+            if not empty, it is an instance of
+            :class:`onnx_diagnostic.reference.ReportResultComparison`
         :return: outputs, as a list if return_all is False,
             as a dictionary if return_all is True
         """
@@ -267,6 +284,10 @@ class OnnxruntimeEvaluator:
                 self._log(2, " + %s: %s", name, value)  # type: ignore[arg-type]
                 assert isinstance(name, str), f"unexpected type for name {type(name)}"
                 results[name] = value
+            if report_cmp:
+                reported = report_cmp.report(dict(zip(node.output, outputs)))
+                if self.verbose > 1:
+                    print(f"  -- report {len(reported)} comparisons")
             if not intermediate:
                 self._clean_unused_inplace(i_node, node, results)
@@ -426,6 +447,7 @@ class OnnxruntimeEvaluator:
         cls = (
             InferenceSessionForNumpy
             if any(isinstance(i, np.ndarray) for i in inputs)
+            and (not isinstance(self.torch_or_numpy, bool) or not self.torch_or_numpy)
             else InferenceSessionForTorch
         )
         try:
@@ -486,6 +508,7 @@ class OnnxruntimeEvaluator:
             verbose=self.verbose,
             ir_version=self.ir_version,
             opsets=self.opsets,
+            torch_or_numpy=self.torch_or_numpy,
             **self.session_kwargs,
         )
         return onx, sess
@@ -500,6 +523,7 @@ class OnnxruntimeEvaluator:
             verbose=self.verbose,
             ir_version=self.ir_version,
             opsets=self.opsets,
+            torch_or_numpy=self.torch_or_numpy,
             **self.session_kwargs,
         )
         return ev.proto, sess
@@ -575,6 +599,7 @@ class OnnxruntimeEvaluator:
             verbose=self.verbose,
             ir_version=self.ir_version,
             opsets=self.opsets,
+            torch_or_numpy=self.torch_or_numpy,
             whole=True,
             **self.session_kwargs,
         )

onnx_diagnostic/reference/report_results_comparison.py ADDED Viewed

@@ -0,0 +1,95 @@
+from typing import Any, Dict, List, Tuple, Union
+ReportKeyNameType = Union[str, Tuple[str, int, str]]
+ReportKeyValueType = Tuple[int, Tuple[int, ...]]
+class ReportResultComparison:
+    """
+    Holds tensors a runtime can use as a reference to compare
+    intermediate results.
+    See :meth:`onnx_diagnostic.reference.TorchOnnxEvaluator.run`.
+    :param tensors: tensor
+    """
+    def __init__(self, tensors: Dict[ReportKeyNameType, "torch.Tensor"]):  # noqa: F821
+        from ..helpers.onnx_helper import dtype_to_tensor_dtype
+        from ..helpers import max_diff, string_type
+        assert all(
+            hasattr(v, "shape") and hasattr(v, "dtype") for v in tensors.values()
+        ), f"One of the tensors is not: {string_type(tensors, with_shape=True)}"
+        self.dtype_to_tensor_dtype = dtype_to_tensor_dtype
+        self.max_diff = max_diff
+        self.tensors = tensors
+        self._build_mapping()
+    def key(self, tensor: "torch.Tensor") -> ReportKeyValueType:  # noqa: F821
+        "Returns a key for a tensor, (onnx dtype, shape)."
+        return self.dtype_to_tensor_dtype(tensor.dtype), tuple(map(int, tensor.shape))
+    def _build_mapping(self):
+        mapping = {}
+        for k, v in self.tensors.items():
+            key = self.key(v)
+            if key not in mapping:
+                mapping[key] = []
+            mapping[key].append(k)
+        self.mapping = mapping
+        self.clear()
+    def clear(self):
+        """Clears the last report."""
+        self.report_cmp = {}
+        self.unique_run_names = set()
+    @property
+    def value(
+        self,
+    ) -> Dict[Tuple[Tuple[int, str], ReportKeyNameType], Dict[str, Union[float, str]]]:
+        "Returns the report."
+        return self.report_cmp
+    @property
+    def data(self) -> List[Dict[str, Any]]:
+        "Returns data which can be consumed by a dataframe."
+        rows = []
+        for k, v in self.value.items():
+            (i_run, run_name), ref_name = k
+            d = dict(run_index=i_run, run_name=run_name, ref_name=ref_name)
+            d.update(v)
+            rows.append(d)
+        return rows
+    def report(
+        self, outputs: Dict[str, "torch.Tensor"]  # noqa: F821
+    ) -> List[Tuple[Tuple[int, str], ReportKeyNameType, Dict[str, Union[float, str]]]]:
+        """
+        For every tensor in outputs, compares it to every tensor held by
+        this class if it shares the same type and shape. The function returns
+        the results of the comparison. The function also collects the results
+        into a dictionary the user can retrieve later.
+        """
+        res: List[Tuple[Tuple[int, str], ReportKeyNameType, Dict[str, Union[float, str]]]] = []
+        for name, tensor in outputs.items():
+            i_run = len(self.unique_run_names)
+            self.unique_run_names.add(name)
+            key = self.key(tensor)
+            if key not in self.mapping:
+                continue
+            cache: Dict["torch.device", "torch.Tensor"] = {}  # noqa: F821, UP037
+            for held_key in self.mapping[key]:
+                t2 = self.tensors[held_key]
+                if hasattr(t2, "device") and hasattr(tensor, "device"):
+                    if t2.device in cache:
+                        t = cache[t2.device]
+                    else:
+                        cache[t2.device] = t = tensor.to(t2.device)
+                    diff = self.max_diff(t, t2)
+                else:
+                    diff = self.max_diff(tensor, t2)
+                res.append((i_run, name, held_key, diff))  # type: ignore[arg-type]
+                self.report_cmp[(i_run, name), held_key] = diff
+        return res

onnx_diagnostic/reference/torch_evaluator.py CHANGED Viewed

@@ -5,6 +5,7 @@ import onnx
 import torch
 from ..helpers.torch_helper import to_tensor
 from ..torch_onnx.runtime_info import first_used_last_used, RuntimeValue
+from .report_results_comparison import ReportResultComparison
 from . import torch_ops
@@ -455,12 +456,17 @@ class TorchOnnxEvaluator:
         self,
         outputs: Optional[List[str]],
         feeds: Union[Dict[str, torch.Tensor], Dict[str, np.ndarray]],
+        report_cmp: Optional[ReportResultComparison] = None,
     ) -> Union[List[Optional[torch.Tensor]], List[Optional[np.ndarray]]]:
         """
         Runs the ONNX model.
         :param outputs: outputs required
         :param feeds: inputs
+        :param report_cmp: used as a reference,
+            every intermediate results is compare to every existing one,
+            if not empty, it is an instance of
+            :class:`onnx_diagnostic.reference.ReportResultComparison`
         :return: output tensors.
         """
         use_numpy = any(isinstance(t, np.ndarray) for t in feeds.values())
@@ -532,6 +538,21 @@ class TorchOnnxEvaluator:
                             f"+R {kernel.output[0]}: "
                             f"{self.runtime_info[kernel.output[0]].string_type()}"
                         )
+                if report_cmp:
+                    reported = report_cmp.report(
+                        dict(
+                            zip(
+                                kernel.output,
+                                (
+                                    tuple((r.tensor if r else None) for r in res)  # type: ignore[attr-defined]
+                                    if isinstance(res, tuple)
+                                    else ((res.tensor if res else None),)  # type: ignore[attr-defined]
+                                ),
+                            )
+                        )
+                    )
+                    if self.verbose > 1:
+                        print(f"  -- report {len(reported)} comparisons")
             # free intermediate results
             for name in self.last_used[it]:

onnx_diagnostic/tasks/automatic_speech_recognition.py CHANGED Viewed

@@ -69,6 +69,9 @@ def get_inputs(
             use_cache:bool,return_dict:bool
         )
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -35,6 +35,9 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "sequence_length"
     shapes = {

onnx_diagnostic/tasks/fill_mask.py CHANGED Viewed

@@ -35,6 +35,9 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "sequence_length"
     shapes = {

onnx_diagnostic/tasks/image_classification.py CHANGED Viewed

@@ -48,11 +48,14 @@ def get_inputs(
     :param input_height: input height
     :return: dictionary
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     assert isinstance(
         input_width, int
     ), f"Unexpected type for input_width {type(input_width)}{config}"
     assert isinstance(
-        input_width, int
+        input_height, int
     ), f"Unexpected type for input_height {type(input_height)}{config}"
     shapes = {
@@ -67,6 +70,9 @@ def get_inputs(
             -1, 1
         ),
     )
+    if model.__class__.__name__ == "ViTForImageClassification":
+        inputs["interpolate_pos_encoding"] = True
+        shapes["interpolate_pos_encoding"] = None  # type: ignore[assignment]
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         res["inputs2"] = get_inputs(

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -52,6 +52,9 @@ def get_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :return: dictionary
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)

onnx_diagnostic/tasks/mixture_of_expert.py CHANGED Viewed

@@ -61,6 +61,9 @@ def get_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :return: dictionary
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     assert not add_second_input, "add_second_input=True not yet implemented"
     raise NotImplementedError(f"get_inputs not yet implemented for task {__TASK__!r}.")

onnx_diagnostic/tasks/object_detection.py CHANGED Viewed

@@ -41,6 +41,9 @@ def get_inputs(
     :param input_height: input height
     :return: dictionary
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     assert isinstance(
         input_width, int
     ), f"Unexpected type for input_width {type(input_width)}{config}"

onnx_diagnostic/tasks/sentence_similarity.py CHANGED Viewed

@@ -35,6 +35,9 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"
     shapes = {

onnx_diagnostic/tasks/summarization.py CHANGED Viewed

@@ -62,6 +62,9 @@ def get_inputs(
         decoder_input_ids:T7s1x1,
         encoder_outputs:dict(last_hidden_state:T1s1x16x512)
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -64,6 +64,9 @@ def get_inputs(
         decoder_input_ids:T7s1x1,
         encoder_outputs:dict(last_hidden_state:T1s1x16x512)
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)

onnx_diagnostic/tasks/text_classification.py CHANGED Viewed

@@ -35,6 +35,9 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("sequence_length", min=1, max=1024)
     shapes = {

onnx-diagnostic 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.0py3-none-any.whl