PyPI - onnx-diagnostic - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

onnx-diagnostic 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.0"
+__version__ = "0.7.2"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -333,7 +333,24 @@ def get_parser_validate() -> ArgumentParser:
             of supported tasks.
             """
         ),
-        epilog="If the model id is specified, one untrained version of it is instantiated.",
+        epilog=textwrap.dedent(
+            """
+            If the model id is specified, one untrained version of it is instantiated.
+            Examples:
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --patch --export onnx-dynamo --opt ir
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --patch --export custom --opt default
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --export modelbuilder
+            """
+        ),
         formatter_class=RawTextHelpFormatter,
     )
     parser.add_argument("-m", "--mid", type=str, help="model id, usually <author>/<name>")
@@ -372,6 +389,12 @@ def get_parser_validate() -> ArgumentParser:
         type=int,
         help="Raises an exception if a dynamic dimension becomes static.",
     )
+    parser.add_argument(
+        "--same-as-trained",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Validates a model identical to the trained model but not trained.",
+    )
     parser.add_argument(
         "--trained",
         default=False,
@@ -487,7 +510,8 @@ def _cmd_validate(argv: List[Any]):
             do_run=args.run,
             verbose=args.verbose,
             quiet=args.quiet,
-            trained=args.trained,
+            same_as_pretrained=args.same_as_trained,
+            use_pretrained=args.trained,
             dtype=args.dtype,
             device=args.device,
             patch=args.patch,
@@ -609,6 +633,178 @@ def _cmd_stats(argv: List[Any]):
         print("done.")
+def get_parser_agg() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="agg",
+        description=textwrap.dedent(
+            """
+            Aggregates statistics coming from benchmarks.
+            Every run is a row. Every row is indexed by some keys,
+            and produces values. Every row has a date.
+            """
+        ),
+        epilog=textwrap.dedent(
+            """
+            examples:\n
+                python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1
+            """
+        ),
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("output", help="output excel file")
+    parser.add_argument(
+        "inputs",
+        nargs="+",
+        help="input csv or zip files, at least 1, it can be a name, or search path",
+    )
+    parser.add_argument(
+        "--filter", default="rawdata_.*.csv", help="filter for input files inside zip files"
+    )
+    parser.add_argument(
+        "--recent",
+        default=True,
+        action=BooleanOptionalAction,
+        help="Keeps only the most recent experiment for the same of keys.",
+    )
+    parser.add_argument(
+        "--keep-last-date",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Rewrite all dates to the last one to simplifies the analysis, "
+        "this assume changing the date does not add ambiguity, if any, option "
+        "--recent should be added.",
+    )
+    parser.add_argument(
+        "--raw",
+        default=True,
+        action=BooleanOptionalAction,
+        help="Keeps the raw data in a sheet.",
+    )
+    parser.add_argument("-t", "--time", default="DATE", help="Date or time column")
+    parser.add_argument(
+        "-k",
+        "--keys",
+        default="^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,"
+        "machine,exporter,dynamic,rtopt,dtype,device,architecture",
+        help="List of columns to consider as keys, "
+        "multiple values are separated by `,`\n"
+        "regular expressions are allowed",
+    )
+    parser.add_argument(
+        "--drop-keys",
+        default="",
+        help="Drops keys from the given list. Something it is faster "
+        "to remove one than to select all the remaining ones.",
+    )
+    parser.add_argument(
+        "-w",
+        "--values",
+        default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*,^onnx_.*,^op_onnx_.*,^peak_gpu_.*",
+        help="List of columns to consider as values, "
+        "multiple values are separated by `,`\n"
+        "regular expressions are allowed",
+    )
+    parser.add_argument(
+        "-i", "--ignored", default="^version_.*", help="List of columns to ignore"
+    )
+    parser.add_argument(
+        "-f",
+        "--formula",
+        default="speedup,bucket[speedup],ERR1,n_models,n_model_eager,"
+        "n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic,"
+        "n_model_pass,n_model_faster,"
+        "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
+        "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
+        "n_node_constant,n_node_shape,n_node_expand,"
+        "n_node_function,n_node_initializer,n_node_scatter,"
+        "time_export_unbiased,onnx_n_nodes_no_cst,n_node_initializer_small",
+        help="Columns to compute after the aggregation was done.",
+    )
+    parser.add_argument(
+        "--views",
+        default="agg-suite,agg-all,disc,speedup,time,time_export,err,cmd,"
+        "bucket-speedup,raw-short,counts,peak-gpu,onnx",
+        help="Views to add to the output files.",
+    )
+    parser.add_argument(
+        "--csv",
+        default="raw-short",
+        help="Views to dump as csv files.",
+    )
+    parser.add_argument("-v", "--verbose", type=int, default=0, help="verbosity")
+    parser.add_argument(
+        "--filter-in",
+        default="",
+        help="adds a filter to filter in data, syntax is\n"
+        '``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
+    )
+    parser.add_argument(
+        "--filter-out",
+        default="",
+        help="adds a filter to filter out data, syntax is\n"
+        '``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
+    )
+    return parser
+def _cmd_agg(argv: List[Any]):
+    from .helpers.log_helper import (
+        CubeLogsPerformance,
+        open_dataframe,
+        enumerate_csv_files,
+        filter_data,
+    )
+    parser = get_parser_agg()
+    args = parser.parse_args(argv[1:])
+    reg = re.compile(args.filter)
+    csv = list(
+        enumerate_csv_files(
+            args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
+        )
+    )
+    assert csv, f"No csv files in {args.inputs}, args.filter={args.filter!r}, csv={csv}"
+    if args.verbose:
+        from tqdm import tqdm
+        loop = tqdm(csv)
+    else:
+        loop = csv
+    dfs = []
+    for c in loop:
+        df = open_dataframe(c)
+        assert (
+            args.time in df.columns
+        ), f"Missing time column {args.time!r} in {c!r}\n{df.head()}\n{sorted(df.columns)}"
+        dfs.append(filter_data(df, filter_in=args.filter_in, filter_out=args.filter_out))
+    drop_keys = set(args.drop_keys.split(","))
+    cube = CubeLogsPerformance(
+        dfs,
+        time=args.time,
+        keys=[a for a in args.keys.split(",") if a and a not in drop_keys],
+        values=[a for a in args.values.split(",") if a],
+        ignored=[a for a in args.ignored.split(",") if a],
+        recent=args.recent,
+        formulas={k: k for k in args.formula.split(",")},
+        keep_last_date=args.keep_last_date,
+    )
+    cube.load(verbose=max(args.verbose - 1, 0))
+    if args.verbose:
+        print(f"Dumps final file into {args.output!r}")
+    cube.to_excel(
+        args.output,
+        {k: k for k in args.views.split(",")},
+        verbose=args.verbose,
+        csv=args.csv.split(","),
+        raw=args.raw,
+    )
+    if args.verbose:
+        print(f"Wrote {args.output!r}")
 def get_main_parser() -> ArgumentParser:
     parser = ArgumentParser(
         prog="onnx_diagnostic",
@@ -619,19 +815,29 @@ def get_main_parser() -> ArgumentParser:
             Type 'python -m onnx_diagnostic <cmd> --help'
             to get help for a specific command.
+            agg        - aggregates statistics from multiple files
             config     - prints a configuration for a model id
             find       - find node consuming or producing a result
             lighten    - makes an onnx model lighter by removing the weights,
-            unlighten  - restores an onnx model produces by the previous experiment
             print      - prints the model on standard output
-            validate   - validate a model
             stats      - produces statistics on a model
+            unlighten  - restores an onnx model produces by the previous experiment
+            validate   - validate a model
             """
         ),
     )
     parser.add_argument(
         "cmd",
-        choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
+        choices=[
+            "agg",
+            "config",
+            "find",
+            "lighten",
+            "print",
+            "stats",
+            "unlighten",
+            "validate",
+        ],
         help="Selects a command.",
     )
     return parser
@@ -646,6 +852,7 @@ def main(argv: Optional[List[Any]] = None):
         config=_cmd_config,
         validate=_cmd_validate,
         stats=_cmd_stats,
+        agg=_cmd_agg,
     )
     if argv is None:
@@ -667,6 +874,7 @@ def main(argv: Optional[List[Any]] = None):
                 config=get_parser_config,
                 validate=get_parser_validate,
                 stats=get_parser_stats,
+                agg=get_parser_agg,
             )
             cmd = argv[0]
             if cmd not in parsers:

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -630,9 +630,12 @@ class ModelInputs:
         method_name: str = "forward",
         name: str = "main",
     ):
-        assert isinstance(model, torch.nn.Module) or inspect.ismodule(
-            model
-        ), f"unexpected type for model={type(model)}, it must be a torch.nn.Module"
+        assert (
+            model is None or isinstance(model, torch.nn.Module) or inspect.ismodule(model)
+        ), (
+            f"unexpected type for model={type(model)}, "
+            f"it must be a torch.nn.Module or None"
+        )
         assert name, (
             f"name={name!r} cannot be empty this string is used to "
             f"display meaningful error messages"
@@ -641,26 +644,42 @@ class ModelInputs:
         self.model = model
         self.level = level
         self.method_name = method_name
-        self.forward = getattr(model, method_name)
-        self.signature = inspect.signature(self.forward)
+        self.forward = getattr(model, method_name) if model is not None else None
+        self.signature = inspect.signature(self.forward) if self.forward else None
         # information about the signature
-        self.forward_parameter_names = set(
-            p.name
-            for p in self.signature.parameters.values()
-            if p.kind not in {p.VAR_POSITIONAL, p.VAR_KEYWORD}
+        self.forward_parameter_names = (
+            set(
+                p.name
+                for p in self.signature.parameters.values()
+                if p.kind not in {p.VAR_POSITIONAL, p.VAR_KEYWORD}
+            )
+            if self.signature
+            else None
+        )
+        self.forward_ordered_parameter_names = (
+            list(self.signature.parameters) if self.signature else None
+        )
+        self.forward_positioned_parameter_names = (
+            [
+                p.name
+                for p in self.signature.parameters.values()
+                if p.kind in (p.VAR_POSITIONAL, p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+            ]
+            if self.signature
+            else None
+        )
+        names = (
+            [p.name for p in self.signature.parameters.values() if p.kind == p.VAR_POSITIONAL]
+            if self.signature
+            else None
         )
-        self.forward_ordered_parameter_names = list(self.signature.parameters)
-        self.forward_positioned_parameter_names = [
-            p.name
-            for p in self.signature.parameters.values()
-            if p.kind in (p.VAR_POSITIONAL, p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-        ]
-        names = [
-            p.name for p in self.signature.parameters.values() if p.kind == p.VAR_POSITIONAL
-        ]
         self.forward_args = names[0] if names else None
-        names = [p.name for p in self.signature.parameters.values() if p.kind == p.VAR_KEYWORD]
+        names = (
+            [p.name for p in self.signature.parameters.values() if p.kind == p.VAR_KEYWORD]
+            if self.signature
+            else None
+        )
         self.forward_kwargs = names[0] if names else None
         self.forward_custom_op_schema = None
         self.forward_need_serialization = False
@@ -711,6 +730,7 @@ class ModelInputs:
     @property
     def true_model_name(self) -> str:
         "Returns class name or module name."
+        assert self.model is not None, "model was None when the class was initialized."
         return (
             self.model.__class__.__name__
             if isinstance(self.model, torch.nn.Module)
@@ -942,7 +962,7 @@ class ModelInputs:
                 )
             )
         names = s2.pop()
-        for name in names:
+        for i, name in enumerate(names):
             assert name not in {"_diag", "verbose"}, (
                 f"{self.full_name}: unexpected parameter {name!r}, names={names}"
                 f"\ninputs[0]={string_type(self.inputs[0], with_shape=True)}"
@@ -968,6 +988,14 @@ class ModelInputs:
         with the corresponding dynamic shapes.
         *kwargs*, *dynamic_shapes* are modified inplace.
         """
+        assert (
+            self.signature is not None
+            and self.forward_parameter_names is not None
+            and self.forward_ordered_parameter_names is not None
+        ), (
+            "model was None when the class was initialized, "
+            "cannot move args to kwargs without the signature."
+        )
         sig = self.signature
         arg_dyn, kw_dyn = dynamic_shapes
         for i, p in enumerate(sig.parameters):

onnx_diagnostic/export/shape_helper.py ADDED Viewed

@@ -0,0 +1,126 @@
+from typing import Any, Dict, List, Set, Tuple, Union
+from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
+from .dynamic_shapes import ModelInputs
+def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
+    """
+    Returns the dynamic shapes for the given inputs.
+    All dimensions are considered as dynamic.
+    ``dim_prefix`` can be a string (the function uses it as a prefix),
+    or ``torch.export.Dim.AUTO`` or ``torch.export.Dim.DYNAMIC``.
+    .. runpython::
+        :showcode:
+        import pprint
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import all_dynamic_shape_from_inputs
+        bsize, nheads, slen, dim = 2, 1, 30, 96
+        inputs = dict(
+            input_ids=torch.randint(15, size=(2, 3), dtype=torch.int64),
+            attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64),
+            position_ids=torch.arange(3, dtype=torch.int64),
+            past_key_values=make_dynamic_cache(
+                [(torch.randn(bsize, nheads, slen, dim),
+                  torch.randn(bsize, nheads, slen, dim))]
+            ),
+        )
+        ds = all_dynamic_shape_from_inputs(inputs)
+        pprint.pprint(ds)
+    """
+    if isinstance(dim_prefix, str):
+        prefixes: Set[str] = set()
+        def tensor_to_shape(tensor):
+            n = len(prefixes)
+            p = f"{dim_prefix}_{n}"
+            prefixes.add(p)
+            return {i: f"{p}_{i}" for i in range(tensor.ndim)}
+    else:
+        def tensor_to_shape(tensor):
+            return {i: dim_prefix for i in range(tensor.ndim)}  # noqa: C420
+    return flatten_unflatten_for_dynamic_shapes(
+        inputs, change_function=tensor_to_shape, use_dict=True
+    )
+def guess_dynamic_shapes_from_inputs(
+    inputs: List[Any], auto: Union[bool, str] = False
+) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
+    """
+    Guesses which dimension is dimension from a set of inputs.
+    Every dimension having different values over multiple sets
+    of inputs. Every dimension not changing remains static.
+    :param inputs: a list of input sets
+    :param auto: True for ``torch.export.Dim.AUTO``,
+        False for ``torch.export.Dim.DYNAMIC``,
+        a string to get a unique string for every dynamic dimension
+    :return: args and kwargs
+    .. runpython::
+        :showcode:
+        import pprint
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import guess_dynamic_shapes_from_inputs
+        bsize, nheads, slen, dim = 2, 1, 30, 96
+        inputs1 = dict(
+            input_ids=torch.randint(15, size=(2, 3), dtype=torch.int64),
+            attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64),
+            position_ids=torch.arange(3, dtype=torch.int64),
+            past_key_values=make_dynamic_cache(
+                [
+                    (
+                        torch.randn(bsize, nheads, slen, dim),
+                        torch.randn(bsize, nheads, slen, dim),
+                    ),
+                ]
+            ),
+        )
+        bsize, nheads, slen, dim = 3, 1, 33, 96
+        inputs2 = dict(
+            input_ids=torch.randint(15, size=(3, 4), dtype=torch.int64),
+            attention_mask=torch.randint(1, size=(3, 34), dtype=torch.int64),
+            position_ids=torch.arange(4, dtype=torch.int64),
+            past_key_values=make_dynamic_cache(
+                [
+                    (
+                        torch.randn(bsize, nheads, slen, dim),
+                        torch.randn(bsize, nheads, slen, dim),
+                    ),
+                ]
+            ),
+        )
+        ds = guess_dynamic_shapes_from_inputs([inputs1, inputs2], auto="d")
+        pprint.pprint(ds)
+    This function returns something equivalent to function
+    :class:`torch.export.dynamic_shapes.AdditionalInputs` but this
+    one needs a model.
+    .. runpython::
+        :showcode:
+        import pprint
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import guess_dynamic_shapes_from_inputs
+        from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
+        data = get_untrained_model_with_inputs("arnir0/Tiny-LLM", add_second_input=True)
+        ds = torch.export.dynamic_shapes.AdditionalInputs()
+        ds.add((), data["inputs"])
+        ds.add((), data["inputs2"])
+        pprint.pprint(ds.dynamic_shapes(data["model"], (), data["inputs"]))
+    """
+    mi = ModelInputs(None, inputs)
+    return mi.guess_dynamic_shapes(auto=auto)

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -756,6 +756,18 @@ class ExtTestCase(unittest.TestCase):
         "Adds a todo printed when all test are run."
         cls._todos.append((f, msg))
+    @classmethod
+    def ort(cls):
+        import onnxruntime
+        return onnxruntime
+    @classmethod
+    def to_onnx(self, *args, **kwargs):
+        from experimental_experiment.torch_interpreter import to_onnx
+        return to_onnx(*args, **kwargs)
     def print_model(self, model: "ModelProto"):  # noqa: F821
         "Prints a ModelProto"
         from onnx_diagnostic.helpers.onnx_helper import pretty_onnx
@@ -917,6 +929,15 @@ class ExtTestCase(unittest.TestCase):
             ]
             raise AssertionError("\n".join(rows))  # noqa: B904
+    def assertEqualDataFrame(self, d1, d2, **kwargs):
+        """
+        Checks that two dataframes are equal.
+        Calls :func:`pandas.testing.assert_frame_equal`.
+        """
+        from pandas.testing import assert_frame_equal
+        assert_frame_equal(d1, d2, **kwargs)
     def assertEqualTrue(self, value: Any, msg: str = ""):
         if value is True:
             return
@@ -967,6 +988,16 @@ class ExtTestCase(unittest.TestCase):
                 atol=atol,
                 rtol=rtol,
             )
+        elif expected.__class__.__name__ == "StaticCache":
+            self.assertEqual(type(expected), type(value), msg=msg)
+            self.assertEqual(expected.max_cache_len, value.max_cache_len)
+            atts = ["key_cache", "value_cache"]
+            self.assertEqualAny(
+                {k: expected.__dict__.get(k, None) for k in atts},
+                {k: value.__dict__.get(k, None) for k in atts},
+                atol=atol,
+                rtol=rtol,
+            )
         elif expected.__class__.__name__ == "EncoderDecoderCache":
             self.assertEqual(type(expected), type(value), msg=msg)
             atts = ["self_attention_cache", "cross_attention_cache"]

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -1,11 +1,15 @@
-from typing import Any, List, Tuple
+from typing import Any, Callable, List, Optional, Tuple
 import packaging.version as pv
 import torch
 import transformers
 import transformers.cache_utils
-def flatten_unflatten_for_dynamic_shapes(obj: Any, use_dict: bool = False) -> Any:
+def flatten_unflatten_for_dynamic_shapes(
+    obj: Any,
+    use_dict: bool = False,
+    change_function: Optional[Callable[[torch.Tensor], Any]] = None,
+) -> Any:
     """
     Returns the object in a different structure similar to what
     the definition of the dynamic shapes should use.
@@ -15,11 +19,13 @@ def flatten_unflatten_for_dynamic_shapes(obj: Any, use_dict: bool = False) -> An
         :func:`torch.export.export` only considers the values,
         the context gives the dictionary keys but it is not expressed
         in the dynamic shapes, these specifications seems to be different
-        for the strict and non strict mode.
+        for the strict and non strict mode. It also preserves tuple.
+    :param change_function: to modifies the tensor in the structure itself,
+        like replace them by a shape
     :return: the serialized object
     """
     if isinstance(obj, torch.Tensor):
-        return obj
+        return change_function(obj) if change_function else obj
     flat, spec = torch.utils._pytree.tree_flatten(obj)
     start = 0
     end = 0
@@ -27,12 +33,17 @@ def flatten_unflatten_for_dynamic_shapes(obj: Any, use_dict: bool = False) -> An
     for subspec in spec.children_specs:
         end += subspec.num_leaves
         value = subspec.unflatten(flat[start:end])
-        value = flatten_unflatten_for_dynamic_shapes(value, use_dict=use_dict)
+        value = flatten_unflatten_for_dynamic_shapes(
+            value, use_dict=use_dict, change_function=change_function
+        )
         subtrees.append(value)
         start = end
-    if use_dict and (spec.type is dict or spec.context):
-        # This a dictionary.
-        return dict(zip(spec.context, subtrees))
+    if use_dict:
+        if spec.type is dict or spec.context:
+            # This a dictionary.
+            return dict(zip(spec.context, subtrees))
+        if spec.type is tuple:
+            return tuple(subtrees)
     # This is a list.
     return subtrees
@@ -143,10 +154,12 @@ else:
 def make_static_cache(
     key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+    max_cache_len: Optional[int] = None,
 ) -> transformers.cache_utils.DynamicCache:
     """
     Creates an instance of :class:`transformers.cache_utils.StaticCache`.
     :param key_value_pairs: list of pairs of (key, values)
+    :param max_cache_len: max_cache_length or something inferred from the vector
     :return: :class:`transformers.cache_utils.StaticCache`
     Example:
@@ -168,7 +181,8 @@ def make_static_cache(
                     torch.randn(bsize, nheads, slen, dim),
                 )
                 for i in range(n_layers)
-            ]
+            ],
+            max_cache_len=10,
         )
         print(string_type(past_key_values, with_shape=True))
     """
@@ -179,24 +193,32 @@ def make_static_cache(
             self.num_attention_heads = key_value_pairs[0][0].shape[1]
             self.num_hidden_layers = len(key_value_pairs)
+    assert max_cache_len is not None, (
+        f"max_cache_len={max_cache_len} cannot be setup "
+        f"automatically yet from shape {key_value_pairs[0][0].shape}"
+    )
+    torch._check(
+        max_cache_len >= key_value_pairs[0][0].shape[2],
+        (
+            f"max_cache_len={max_cache_len} cannot be smaller "
+            f"shape[2]={key_value_pairs[0][0].shape[2]} in shape "
+            f"{key_value_pairs[0][0].shape}"
+        ),
+    )
     cache = transformers.cache_utils.StaticCache(
         _config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
         device=key_value_pairs[0][0].device,
         dtype=key_value_pairs[0][0].dtype,
-        max_cache_len=key_value_pairs[0][0].shape[2],
+        max_cache_len=max_cache_len,
     )
     for i in range(len(key_value_pairs)):
-        assert cache.key_cache[i].shape == key_value_pairs[i][0].shape, (
-            f"Shape mismatch, expected {cache.key_cache[i].shape}, "
-            f"got {key_value_pairs[i][0].shape}"
-        )
-        cache.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
-        assert cache.value_cache[i].shape == key_value_pairs[i][1].shape, (
-            f"Shape mismatch, expected {cache.value_cache[i].shape}, "
-            f"got {key_value_pairs[i][1].shape}"
-        )
-        cache.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
+        assert (
+            key_value_pairs[i][0].shape == key_value_pairs[i][1].shape
+        ), f"Shape mismatch {key_value_pairs[i][0].shape} != {key_value_pairs[i][1].shape}"
+        d = key_value_pairs[i][1].shape[2]
+        cache.key_cache[i][:, :, :d, :] = key_value_pairs[i][0]
+        cache.value_cache[i][:, :, :d, :] = key_value_pairs[i][1]
     return cache

onnx-diagnostic 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

onnx-diagnostic 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl