PyPI - onnx-diagnostic - Versions diffs - 0.7.16__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

onnx-diagnostic 0.7.16py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +78 -22
onnx_diagnostic/export/api.py +124 -0
onnx_diagnostic/export/dynamic_shapes.py +2 -1
onnx_diagnostic/export/shape_helper.py +47 -70
onnx_diagnostic/ext_test_case.py +11 -0
onnx_diagnostic/helpers/cache_helper.py +38 -7
onnx_diagnostic/helpers/fake_tensor_helper.py +224 -104
onnx_diagnostic/helpers/helper.py +27 -33
onnx_diagnostic/helpers/log_helper.py +109 -5
onnx_diagnostic/helpers/memory_peak.py +2 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +1 -1
onnx_diagnostic/helpers/model_builder_helper.py +132 -2
onnx_diagnostic/helpers/onnx_helper.py +1 -1
onnx_diagnostic/helpers/ort_session.py +4 -0
onnx_diagnostic/helpers/rt_helper.py +393 -43
onnx_diagnostic/helpers/torch_helper.py +20 -1
onnx_diagnostic/tasks/__init__.py +7 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +2 -8
onnx_diagnostic/tasks/feature_extraction.py +2 -8
onnx_diagnostic/tasks/image_text_to_text.py +10 -8
onnx_diagnostic/tasks/summarization.py +2 -8
onnx_diagnostic/tasks/text2text_generation.py +3 -8
onnx_diagnostic/tasks/text_generation.py +86 -65
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +718 -438
onnx_diagnostic/torch_export_patches/patch_details.py +340 -0
onnx_diagnostic/torch_export_patches/patch_inputs.py +1 -1
onnx_diagnostic/torch_export_patches/patch_module.py +9 -36
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +12 -6
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +162 -24
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +140 -104
onnx_diagnostic/torch_models/untrained/llm_phi2.py +1 -4
onnx_diagnostic/torch_models/validate.py +626 -228
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/RECORD +38 -36
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.7.16.dist-info → onnx_diagnostic-0.8.1.dist-info}/top_level.txt +0 -0

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.16"
+__version__ = "0.8.1"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -265,7 +265,7 @@ def get_parser_config() -> ArgumentParser:
         "--mop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional model options, use to change some parameters of the model, "
+        help="Additional model options, used to change some parameters of the model, "
         "example:\n  --mop attn_implementation=sdpa or --mop attn_implementation=eager",
         action=_ParseDict,
     )
@@ -442,11 +442,17 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         default=True,
         action=_BoolOrParseDictPatch,
         nargs="*",
-        help="Applies patches before exporting, it can be a boolean "
-        "to enable to disable the patches or be more finetuned. It is possible to "
-        "disable patch for torch by adding "
-        '--patch "patch_sympy=False" --patch "patch_torch=False", '
-        "default is True.",
+        help=textwrap.dedent(
+            """
+        Applies patches before exporting, it can be a boolean
+        to enable to disable the patches or be more finetuned
+        (default is True). It is possible to disable patch for torch
+        by adding:
+            --patch "patch_sympy=False" --patch "patch_torch=False"
+        """.strip(
+                "\n"
+            )
+        ),
     )
     parser.add_argument(
         "--rewrite",
@@ -476,10 +482,16 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         "--inputs2",
         default=1,
         type=int,
-        help="Validates or exports the model on a second set of inputs\n"
-        "to check the exported model supports dynamism. The values is used "
-        "as an increment to the first set of inputs. A high value may trick "
-        "a different behavior in the model and missed by the exporter.",
+        help=textwrap.dedent(
+            """
+        Validates or exports the model on a second set of inputs
+        to check the exported model supports dynamism. The values is used
+        as an increment to the first set of inputs. A high value may trick
+        a different behavior in the model and missed by the exporter.
+        """.strip(
+                "\n"
+            )
+        ),
     )
     parser.add_argument(
         "--runtime",
@@ -512,9 +524,15 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         parser.add_argument(
             "--ortfusiontype",
             required=False,
-            help="Applies onnxruntime fusion, this parameter should contain the\n"
-            "model type or multiple values separated by `|`. `ALL` can be used\n"
-            "to run them all.",
+            help=textwrap.dedent(
+                """
+                Applies onnxruntime fusion, this parameter should contain the
+                model type or multiple values separated by `|`. `ALL` can be used
+                to run them all.
+                """.strip(
+                    "\n"
+                )
+            ),
         )
     parser.add_argument("-v", "--verbose", default=0, type=int, help="verbosity")
     parser.add_argument("--dtype", help="Changes dtype if necessary.")
@@ -523,18 +541,32 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         "--iop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional input options, use to change the default"
-        "inputs use to export, example:\n  --iop cls_cache=SlidingWindowCache"
-        "\n  --iop cls_cache=StaticCache",
+        help=textwrap.dedent(
+            """
+        Additional input options, used to change the default
+        inputs use to export. Examples:
+            --iop cls_cache=SlidingWindowCache
+            --iop cls_cache=StaticCache
+        """.strip(
+                "\n"
+            )
+        ),
         action=_ParseDict,
     )
     parser.add_argument(
         "--mop",
         metavar="KEY=VALUE",
         nargs="*",
-        help="Additional model options, use to change some parameters of the model, "
-        "example:\n  --mop attn_implementation=sdpa --mop attn_implementation=eager\n  "
-        "--mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\"",
+        help=textwrap.dedent(
+            """
+            Additional model options, used to change some parameters
+            of the model. Example:
+                --mop attn_implementation=sdpa --mop attn_implementation=eager"
+                --mop "rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}"
+            """.strip(
+                "\n"
+            )
+        ),
         action=_ParseDict,
     )
     if name == "validate":
@@ -566,9 +598,32 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser:
         parser.add_argument(
             "--quiet-input-sets",
             default="",
-            help="Avoids raising an exception when an input sets does not work with "
-            "the exported model.\nExample: --quiet-input-sets=inputs,inputs22",
+            help=textwrap.dedent(
+                """
+                Avoids raising an exception when an input sets does not work with
+                the exported model. Example:
+                    --quiet-input-sets=inputs,inputs22
+                """.strip(
+                    "\n"
+                )
+            ),
         )
+    parser.add_argument(
+        "--expop",
+        metavar="KEY=VALUE",
+        nargs="*",
+        help=textwrap.dedent(
+            """
+            Additional exporter options, use to change some parameters
+            of the model. Examples:
+                --expop report=True
+                --expop report=True --expop verify=True
+            """.strip(
+                "\n"
+            )
+        ),
+        action=_ParseDict,
+    )
     return parser
@@ -634,6 +689,7 @@ def _cmd_validate(argv: List[Any]):
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
+            exporter_options=args.expop,
         )
         print("")
         print("-- summary --")
@@ -940,7 +996,7 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
         "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
         "n_node_gqa,n_node_layer_normalization,n_node_layer_normalization23,"
-        "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
+        "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,n_node_random,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"
         "time_export_unbiased,onnx_n_nodes_no_cst,n_node_initializer_small",

onnx_diagnostic/export/api.py ADDED Viewed

@@ -0,0 +1,124 @@
+from typing import Any, Dict, List, Sequence, Optional, Tuple, Union
+import torch
+def to_onnx(
+    mod: Union["torch.nn.Module", "torch.fx.GraphModule"],  # noqa: F821
+    args: Optional[Sequence["torch.Tensor"]] = None,  # noqa: F821
+    kwargs: Optional[Dict[str, "torch.Tensor"]] = None,  # noqa: F821
+    input_names: Optional[Sequence[str]] = None,
+    target_opset: Optional[Union[int, Dict[str, int]]] = None,
+    verbose: int = 0,
+    dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
+    filename: Optional[str] = None,
+    output_names: Optional[List[str]] = None,
+    output_dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
+    exporter: str = "onnx-dynamo",
+) -> Any:
+    """
+    Common API for exporters. By default, the models are optimized to use the
+    most efficient kernels implemented in :epkg:`onnxruntime`.
+    :param mod: torch model
+    :param args: unnamed arguments
+    :param kwargs: named arguments
+    :param input_names: input names for the onnx model (optional)
+    :param target_opset: opset to target, if not specified, each converter
+        keeps its default value
+    :param verbose: verbosity level
+    :param dynamic_shapes: dynamic shapes, usually a nested structure
+        included a dictionary for each tensor
+    :param filename: output filename
+    :param output_names: to change the output of the onnx model
+    :param output_dynamic_shapes: to overwrite the dynamic shapes names
+    :param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
+    :return: the output of the selected exporter, usually a structure including
+        an onnx model
+    A simple example:
+    .. code-block:: python
+        to_onnx(
+            model,
+            kwargs=inputs,
+            dynamic_shapes=ds,
+            exporter=exporter,
+            filename=filename,
+        )
+    """
+    if exporter == "custom":
+        from experimental_experiment.torch_interpreter import to_onnx as _to_onnx
+        from experimental_experiment.xbuilder import OptimizationOptions
+        return _to_onnx(
+            mod,
+            args=args,
+            kwargs=kwargs,
+            input_names=input_names,
+            output_names=output_names,
+            target_opset=target_opset,
+            verbose=verbose,
+            filename=filename,
+            dynamic_shapes=dynamic_shapes,
+            large_model=True,
+            output_dynamic_shapes=output_dynamic_shapes,
+            options=OptimizationOptions(patterns="default+onnxruntime"),
+        )
+    if exporter in ("dynamo", "onnx-dynamo"):
+        import onnxscript.rewriter.ort_fusions as ort_fusions
+        assert (
+            not output_dynamic_shapes
+        ), f"output_dynamic_shapes not supported for exporter={exporter!r}"
+        epo = torch.onnx.export(
+            mod,
+            args=args or tuple(),
+            kwargs=kwargs,
+            input_names=input_names,
+            output_names=output_names,
+            opset_version=target_opset,
+            dynamic_shapes=dynamic_shapes,
+            dynamo=True,
+        )
+        ort_fusions.optimize_for_ort(epo.model)
+        epo.save(filename)
+        return epo
+    if exporter == "modelbuilder":
+        import os
+        from ..helpers import flatten_object, string_type
+        from ..helpers.model_builder_helper import create_model_builder, save_model_builder
+        assert filename, f"filename must be specified for exporter={exporter!r}"
+        assert (
+            not output_dynamic_shapes
+        ), f"output_dynamic_shapes not supported for exporter={exporter!r}"
+        assert hasattr(mod, "config"), f"configuration is missing in model class {type(mod)}"
+        assert not args, f"only kwargs can be defined with exporter={exporter!r}"
+        assert list(kwargs) == ["input_ids", "attention_mask", "past_key_values"], (  # type: ignore[arg-type]
+            f"Only a specified set of inputs is supported for exporter={exporter!r}, "
+            f"but it is {list(kwargs)}"  # type: ignore[arg-type]
+        )
+        flat_inputs = flatten_object(kwargs, drop_keys=True)
+        first = flat_inputs[0]
+        first_float = [
+            t
+            for t in flat_inputs
+            if t.dtype in {torch.float32, torch.double, torch.float16, torch.bfloat16}
+        ]
+        assert first_float, (
+            f"Unable to find a float tensor in the inputs "
+            f"{string_type(kwargs, with_shape=True)}"
+        )
+        onx = create_model_builder(
+            mod.config,
+            mod,
+            precision=str(first_float[0].dtype).split(".")[-1],
+            execution_provider="cuda" if first.is_cuda else "cpu",
+            cache_dir=os.path.dirname(filename),
+        )
+        save_model_builder(onx, os.path.dirname(filename))
+        return onx
+    raise ValueError(f"Unknown exporter={exporter!r}")

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import inspect
+import itertools
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 import numpy as np
 import torch
@@ -934,7 +935,7 @@ class ModelInputs:
                         auto=auto if isinstance(auto, bool) else f"{auto}_{i}vdc",
                     )
                 )
-            return [key_cache, value_cache]
+            return list(itertools.chain.from_iterable(zip(key_cache, value_cache)))
         raise NotImplementedError(
             f"Unable to build dynamic shapes for type {set_types.pop()}: "

onnx_diagnostic/export/shape_helper.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import Any, Dict, List, Set, Optional, Tuple, Union
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
-from ..helpers.fake_tensor_helper import fake_reshape
 from .dynamic_shapes import ModelInputs
@@ -203,14 +202,49 @@ def guess_dynamic_shapes_from_inputs(
 def make_fake_with_dynamic_dimensions(
-    x: Any,
-    dynamic_shapes: Any,
-    fake_mode: Optional["FakeTensorMode"] = None,  # noqa: F821
-) -> Tuple[Any, "FakeTensorMode"]:  # noqa: F821
+    x: Any, dynamic_shapes: Any, context: Optional["FakeTensorContext"] = None  # noqa: F821
+) -> Tuple[Any, "FakeTensorContext"]:  # noqa: F821
     """
     Replaces all tensors by fake tensor respecting the same
     constraints as the following dynamic shapes.
     This uses function :func:`onnx_diagnostic.helpers.fake_tensor_helper.make_fake`.
+    Parameter ``existing`` is used to reused the same object when the dynamic
+    dimension is given the same name as another one.
+    A simple tensor:
+    .. runpython::
+        :showcode:
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
+        inputs, _ = make_fake_with_dynamic_dimensions(
+            torch.rand((2, 3, 4, 5), dtype=torch.float32),
+            {0: "batch", 2: "cache_length"},
+        )
+        print(inputs)
+    Two tensors:
+    .. runpython::
+        :showcode:
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
+        inputs, _ = make_fake_with_dynamic_dimensions(
+            (
+                torch.rand((2, 3, 4, 5), dtype=torch.float32),
+                torch.rand((2, 3, 4, 5), dtype=torch.float32),
+            ),
+            ({0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}),
+        )
+        print(inputs)
+    With a cache:
     .. runpython::
         :showcode:
@@ -243,8 +277,10 @@ def make_fake_with_dynamic_dimensions(
                 "attention_mask": {0: "batch", 1: "cache+seq"},
                 "position_ids": {0: "batch", 1: "seq_length"},
                 "past_key_values": [
-                    [{0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}],
-                    [{0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}],
+                    {0: "batch", 2: "cache_length"},
+                    {0: "batch", 2: "cache_length"},
+                    {0: "batch", 2: "cache_length"},
+                    {0: "batch", 2: "cache_length"},
                 ],
             },
         )
@@ -252,68 +288,9 @@ def make_fake_with_dynamic_dimensions(
     """
     if x is None:
         return None, None
-    if fake_mode is None:
-        from torch.fx.experimental.symbolic_shapes import ShapeEnv
-        from torch._subclasses.fake_tensor import FakeTensorMode
+    if context is None:
+        from ..helpers.fake_tensor_helper import FakeTensorContext
-        shape_env = ShapeEnv()
-        fake_mode = FakeTensorMode(shape_env=shape_env)
+        context = FakeTensorContext()
-    if isinstance(x, (list, tuple)):
-        return (
-            x.__class__(
-                [
-                    make_fake_with_dynamic_dimensions(
-                        i, fake_mode=fake_mode, dynamic_shapes=ds
-                    )[0]
-                    for i, ds in zip(x, dynamic_shapes)
-                ]
-            ),
-            fake_mode,
-        )
-    if isinstance(x, dict):
-        return {
-            k: make_fake_with_dynamic_dimensions(
-                v, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[k]
-            )[0]
-            for k, v in x.items()
-        }, fake_mode
-    if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
-        assert hasattr(x, "layers"), (
-            f"Une more recent version of transformers (>=4.55), "
-            f"'layers' not found in class {type(x)}"
-        )
-        assert (
-            isinstance(dynamic_shapes, list) and len(dynamic_shapes) == 2
-        ), f"Unexpected dynamic_shapes={dynamic_shapes} for a DynamicCache"
-        for il, layer in enumerate(x.layers):
-            assert hasattr(layer, "keys") and hasattr(layer, "values"), (
-                f"Une more recent version of transformers (>=4.55), 'layers' "
-                f"not found in class {type(layer)} ({dir(layer)})"
-            )
-            layer.keys = make_fake_with_dynamic_dimensions(
-                layer.keys, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[0][il]
-            )[0]
-            layer.values = make_fake_with_dynamic_dimensions(
-                layer.values, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[1][il]
-            )[0]
-        return x, fake_mode
-    if x.__class__.__name__ == "EncoderDecoderCache":
-        make_fake_with_dynamic_dimensions(
-            x.self_attention_cache, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[0]
-        )
-        make_fake_with_dynamic_dimensions(
-            x.cross_attention_cache, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[1]
-        )
-        return x, fake_mode
-    if hasattr(x, "shape"):
-        t = fake_reshape(x, dynamic_shapes, fake_mode=fake_mode)
-        assert t.device == x.device, f"device mismatch {x.device} -> {t.device}"
-        assert t.dtype == x.dtype, f"dtype mismatch {x.dtype} -> {t.dtype}"
-        return t, fake_mode
-    from ..helpers import string_type
-    raise TypeError(
-        f"Unexpected type {type(x)} for x, content is {string_type(x, with_shape=True)}"
-    )
+    return context.make_fake_with_dynamic_dimensions(x, dynamic_shapes), context

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -630,6 +630,17 @@ def has_onnxruntime_training(push_back_batch: bool = False):
     return True
+def has_onnxruntime_genai():
+    """Tells if onnxruntime_genai is installed."""
+    try:
+        import onnxruntime_genai  # noqa: F401
+        return True
+    except ImportError:
+        # onnxruntime not training
+        return False
 def requires_onnxruntime_training(
     push_back_batch: bool = False, ortmodule: bool = False, msg: str = ""
 ) -> Callable:

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import packaging.version as pv
 import torch
 import transformers
@@ -46,9 +46,14 @@ class CacheKeyValue:
             raise NotImplementedError(f"type(cache)={type(cache)}")
     def make_dynamic_cache(self):
-        """Do the reverse operation."""
+        """Does the reverse operation."""
         return make_dynamic_cache(list(zip(self.key_cache, self.value_cache)))
+    @property
+    def n_layers(self) -> int:
+        """Returns the number of layers."""
+        return len(self.key_cache) if self.key_cache else 0
 def flatten_unflatten_for_dynamic_shapes(
     obj: Any,
@@ -134,10 +139,31 @@ def is_cache_dynamic_registered(fast: bool = False) -> bool:
     return len(cache2.key_cache) == len(cache.value_cache)
+def make_dynamic_shapes_kv_cache(
+    cache: transformers.cache_utils.Cache, shape_of_one: Dict[int, Any]
+) -> List[Dict[int, Any]]:
+    """
+    Returns the dynamic shapes for key-value cache
+    :param cache: a cache
+    :param shape_of_one: shape of one element
+    :return: dynamic shapes
+    """
+    return [shape_of_one for _ in range(CacheKeyValue(cache).n_layers * 2)]
+def _preprocess_key_value_pairs(
+    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
+) -> List[Tuple[torch.Tensor, torch.Tensor]]:
+    if not key_value_pairs or isinstance(key_value_pairs[0], tuple):
+        return key_value_pairs
+    return list(zip(key_value_pairs[::2], key_value_pairs[1::2]))
 if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
     def make_dynamic_cache(
-        key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
     ) -> transformers.cache_utils.DynamicCache:
         """
         Creates an instance of :class:`transformers.cache_utils.DynamicCache`.
@@ -173,6 +199,7 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
         ``transformers>=4.56``. Before that version, only FakeTensor with static dimensions
         are supported.
         """
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
         if (
             key_value_pairs
             and isinstance(key_value_pairs[0][0], torch._subclasses.fake_tensor.FakeTensor)
@@ -212,7 +239,7 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
 else:
     def make_dynamic_cache(
-        key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
     ) -> transformers.cache_utils.DynamicCache:
         """
         Creates an instance of :class:`transformers.cache_utils.DynamicCache`.
@@ -244,6 +271,7 @@ else:
             )
             print(string_type(past_key_values, with_shape=True))
         """
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
         cache = transformers.cache_utils.DynamicCache(len(key_value_pairs))  # type: ignore
         for i, (key, value) in enumerate(key_value_pairs):
             cache.update(key, value, i)
@@ -251,7 +279,7 @@ else:
 def make_static_cache(
-    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
     max_cache_len: Optional[int] = None,
 ) -> transformers.cache_utils.DynamicCache:
     """
@@ -284,6 +312,7 @@ def make_static_cache(
         )
         print(string_type(past_key_values, with_shape=True))
     """
+    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
     class _config:
         def __init__(self):
@@ -426,9 +455,10 @@ def make_mamba_cache(
 def make_sliding_window_cache(
-    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
 ) -> transformers.cache_utils.SlidingWindowCache:
     "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
+    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
     class _config:
         def __init__(self):
@@ -481,7 +511,7 @@ def make_sliding_window_cache(
 def make_hybrid_cache(
-    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
     max_cache_len: Optional[int] = None,
     max_batch_size: Optional[int] = None,
     sliding_window: Optional[int] = None,
@@ -566,6 +596,7 @@ def make_hybrid_cache(
             self.key_cache.append(new_layer_key_cache)
             self.value_cache.append(new_layer_value_cache)
     """
+    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
     layer_types = None
     if key_value_pairs:
         assert (

onnx-diagnostic 0.7.16__py3-none-any.whl → 0.8.1__py3-none-any.whl

onnx-diagnostic 0.7.16py3-none-any.whl → 0.8.1py3-none-any.whl