PyPI - onnx-diagnostic - Versions diffs - 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

onnx-diagnostic 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.14"
+__version__ = "0.7.16"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -371,30 +371,34 @@ class _BoolOrParseDictPatch(argparse.Action):
         setattr(namespace, self.dest, d)
-def get_parser_validate() -> ArgumentParser:
+def get_parser_validate(name: str = "validate") -> ArgumentParser:
     parser = ArgumentParser(
-        prog="validate",
+        prog=name,
         description=textwrap.dedent(
             """
-            Prints out dummy inputs for a particular task or a model id.
-            If both mid and task are empty, the command line displays the list
-            of supported tasks.
+            Validates a model for a particular task given the model id.
+            It exports the model and then validates it by computing the discrepancies
+            on different input sets.
+            """
+            if name == "validate"
+            else """
+            Creates a script to export  a model for a particular task given the model id.
             """
         ),
         epilog=textwrap.dedent(
-            """
+            f"""
             If the model id is specified, one untrained version of it is instantiated.
             Examples:
-            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+            python -m onnx_diagnostic {name} -m microsoft/Phi-4-mini-reasoning \\
                 --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
                 --dtype float16 --device cuda --patch --export onnx-dynamo --opt ir
-            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+            python -m onnx_diagnostic {name} -m microsoft/Phi-4-mini-reasoning \\
                 --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
                 --dtype float16 --device cuda --patch --export custom --opt default
-            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+            python -m onnx_diagnostic {name} -m microsoft/Phi-4-mini-reasoning \\
                 --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
                 --dtype float16 --device cuda --export modelbuilder
@@ -405,12 +409,12 @@ def get_parser_validate() -> ArgumentParser:
             The behaviour may be modified compare the original configuration,
             the following argument can be rope_scaling to dynamic:
-                --mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\""
+                --mop \"rope_scaling={{'rope_type': 'dynamic', 'factor': 10.0}}\""
             You can profile the command line by running:
-                pyinstrument -m onnx_diagnostic validate ...
-                pyinstrument -r html -o profile.html -m onnx_diagnostic validate ...
+                pyinstrument -m onnx_diagnostic {name} ...
+                pyinstrument -r html -o profile.html -m onnx_diagnostic {name} ...
             """
         ),
         formatter_class=RawTextHelpFormatter,
@@ -460,19 +464,19 @@ def get_parser_validate() -> ArgumentParser:
         "--same-as-trained",
         default=False,
         action=BooleanOptionalAction,
-        help="Validates a model identical to the trained model but not trained.",
+        help="Validates or exports a model identical to the trained model but not trained.",
     )
     parser.add_argument(
         "--trained",
         default=False,
         action=BooleanOptionalAction,
-        help="Validates the trained model (requires downloading).",
+        help="Validates or exports the trained model (requires downloading).",
     )
     parser.add_argument(
         "--inputs2",
         default=1,
         type=int,
-        help="Validates the model on a second set of inputs\n"
+        help="Validates or exports the model on a second set of inputs\n"
         "to check the exported model supports dynamism. The values is used "
         "as an increment to the first set of inputs. A high value may trick "
         "a different behavior in the model and missed by the exporter.",
@@ -504,13 +508,14 @@ def get_parser_validate() -> ArgumentParser:
         "--subfolder",
         help="Subfolder where to find the model and the configuration.",
     )
-    parser.add_argument(
-        "--ortfusiontype",
-        required=False,
-        help="Applies onnxruntime fusion, this parameter should contain the\n"
-        "model type or multiple values separated by `|`. `ALL` can be used\n"
-        "to run them all.",
-    )
+    if name == "validate":
+        parser.add_argument(
+            "--ortfusiontype",
+            required=False,
+            help="Applies onnxruntime fusion, this parameter should contain the\n"
+            "model type or multiple values separated by `|`. `ALL` can be used\n"
+            "to run them all.",
+        )
     parser.add_argument("-v", "--verbose", default=0, type=int, help="verbosity")
     parser.add_argument("--dtype", help="Changes dtype if necessary.")
     parser.add_argument("--device", help="Changes the device if necessary.")
@@ -532,27 +537,38 @@ def get_parser_validate() -> ArgumentParser:
         "--mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\"",
         action=_ParseDict,
     )
-    parser.add_argument(
-        "--repeat",
-        default=1,
-        type=int,
-        help="number of times to run the model to measures inference time",
-    )
-    parser.add_argument(
-        "--warmup", default=0, type=int, help="number of times to run the model to do warmup"
-    )
+    if name == "validate":
+        parser.add_argument(
+            "--repeat",
+            default=1,
+            type=int,
+            help="number of times to run the model to measures inference time",
+        )
+        parser.add_argument(
+            "--warmup",
+            default=0,
+            type=int,
+            help="number of times to run the model to do warmup",
+        )
     parser.add_argument(
         "--outnames",
         help="This comma separated list defines the output names "
         "the onnx exporter should use.",
         default="",
     )
-    parser.add_argument(
-        "--ort-logs",
-        default=False,
-        action=BooleanOptionalAction,
-        help="Enables onnxruntime logging when the session is created",
-    )
+    if name == "validate":
+        parser.add_argument(
+            "--ort-logs",
+            default=False,
+            action=BooleanOptionalAction,
+            help="Enables onnxruntime logging when the session is created",
+        )
+        parser.add_argument(
+            "--quiet-input-sets",
+            default="",
+            help="Avoids raising an exception when an input sets does not work with "
+            "the exported model.\nExample: --quiet-input-sets=inputs,inputs22",
+        )
     return parser
@@ -614,6 +630,7 @@ def _cmd_validate(argv: List[Any]):
             warmup=args.warmup,
             inputs2=args.inputs2,
             ort_logs=args.ort_logs,
+            quiet_input_sets=set(args.quiet_input_sets.split(",")),
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
@@ -624,6 +641,94 @@ def _cmd_validate(argv: List[Any]):
             print(f":{k},{v};")
+def _cmd_export_sample(argv: List[Any]):
+    from .helpers import string_type
+    from .torch_models.validate import get_inputs_for_task, _make_folder_name
+    from .torch_models.code_sample import code_sample
+    from .tasks import supported_tasks
+    parser = get_parser_validate("exportsample")
+    args = parser.parse_args(argv[1:])
+    if not args.task and not args.mid:
+        print("-- list of supported tasks:")
+        print("\n".join(supported_tasks()))
+    elif not args.mid:
+        data = get_inputs_for_task(args.task)
+        if args.verbose:
+            print(f"task: {args.task}")
+        max_length = max(len(k) for k in data["inputs"]) + 1
+        print("-- inputs")
+        for k, v in data["inputs"].items():
+            print(f"  + {k.ljust(max_length)}: {string_type(v, with_shape=True)}")
+        print("-- dynamic_shapes")
+        for k, v in data["dynamic_shapes"].items():
+            print(f"  + {k.ljust(max_length)}: {string_type(v)}")
+    else:
+        # Let's skip any invalid combination if known to be unsupported
+        if (
+            "onnx" not in (args.export or "")
+            and "custom" not in (args.export or "")
+            and (args.opt or "")
+        ):
+            print(f"code-sample - unsupported args: export={args.export!r}, opt={args.opt!r}")
+            return
+        patch_dict = args.patch if isinstance(args.patch, dict) else {"patch": args.patch}
+        code = code_sample(
+            model_id=args.mid,
+            task=args.task,
+            do_run=args.run,
+            verbose=args.verbose,
+            quiet=args.quiet,
+            same_as_pretrained=args.same_as_trained,
+            use_pretrained=args.trained,
+            dtype=args.dtype,
+            device=args.device,
+            patch=patch_dict,
+            rewrite=args.rewrite and patch_dict.get("patch", True),
+            stop_if_static=args.stop_if_static,
+            optimization=args.opt,
+            exporter=args.export,
+            dump_folder=args.dump_folder,
+            drop_inputs=None if not args.drop else args.drop.split(","),
+            input_options=args.iop,
+            model_options=args.mop,
+            subfolder=args.subfolder,
+            opset=args.opset,
+            runtime=args.runtime,
+            output_names=(
+                None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
+            ),
+        )
+        if args.dump_folder:
+            os.makedirs(args.dump_folder, exist_ok=True)
+            name = (
+                _make_folder_name(
+                    model_id=args.mid,
+                    exporter=args.export,
+                    optimization=args.opt,
+                    dtype=args.dtype,
+                    device=args.device,
+                    subfolder=args.subfolder,
+                    opset=args.opset,
+                    drop_inputs=None if not args.drop else args.drop.split(","),
+                    same_as_pretrained=args.same_as_trained,
+                    use_pretrained=args.trained,
+                    task=args.task,
+                ).replace("/", "-")
+                + ".py"
+            )
+            fullname = os.path.join(args.dump_folder, name)
+            if args.verbose:
+                print(f"-- prints code in {fullname!r}")
+            print("--")
+            with open(fullname, "w") as f:
+                f.write(code)
+            if args.verbose:
+                print("-- done")
+        else:
+            print(code)
 def get_parser_stats() -> ArgumentParser:
     parser = ArgumentParser(
         prog="stats",
@@ -834,7 +939,7 @@ def get_parser_agg() -> ArgumentParser:
         "n_model_pass,n_model_faster,"
         "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
         "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23,"
-        "n_node_layer_normalization,n_node_layer_normalization23,"
+        "n_node_gqa,n_node_layer_normalization,n_node_layer_normalization23,"
         "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
         "n_node_constant,n_node_shape,n_node_expand,"
         "n_node_function,n_node_initializer,n_node_scatter,"
@@ -953,14 +1058,15 @@ def get_main_parser() -> ArgumentParser:
             Type 'python -m onnx_diagnostic <cmd> --help'
             to get help for a specific command.
-            agg        - aggregates statistics from multiple files
-            config     - prints a configuration for a model id
-            find       - find node consuming or producing a result
-            lighten    - makes an onnx model lighter by removing the weights,
-            print      - prints the model on standard output
-            stats      - produces statistics on a model
-            unlighten  - restores an onnx model produces by the previous experiment
-            validate   - validate a model
+            agg          - aggregates statistics from multiple files
+            config       - prints a configuration for a model id
+            exportsample - produces a code to export a model
+            find         - find node consuming or producing a result
+            lighten      - makes an onnx model lighter by removing the weights,
+            print        - prints the model on standard output
+            stats        - produces statistics on a model
+            unlighten    - restores an onnx model produces by the previous experiment
+            validate     - validate a model
             """
         ),
     )
@@ -969,6 +1075,7 @@ def get_main_parser() -> ArgumentParser:
         choices=[
             "agg",
             "config",
+            "exportsample",
             "find",
             "lighten",
             "print",
@@ -991,6 +1098,7 @@ def main(argv: Optional[List[Any]] = None):
         validate=_cmd_validate,
         stats=_cmd_stats,
         agg=_cmd_agg,
+        exportsample=_cmd_export_sample,
     )
     if argv is None:
@@ -1013,13 +1121,14 @@ def main(argv: Optional[List[Any]] = None):
                 validate=get_parser_validate,
                 stats=get_parser_stats,
                 agg=get_parser_agg,
+                exportsample=lambda: get_parser_validate("exportsample"),  # type: ignore[operator]
             )
             cmd = argv[0]
             if cmd not in parsers:
                 raise ValueError(
                     f"Unknown command {cmd!r}, it should be in {list(sorted(parsers))}."
                 )
-            parser = parsers[cmd]()
+            parser = parsers[cmd]()  # type: ignore[operator]
             parser.parse_args(argv[1:])
         raise RuntimeError("The programme should have exited before.")

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -8,17 +8,17 @@ from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
 DYNAMIC_SHAPES = Tuple[Tuple[Any, ...], Dict[str, Any]]
-def flatten_dynamic_shapes(ds: Any) -> Any:
+def _flatten_dynamic_shapes(ds: Any) -> Any:
     """Flattens the dynamic shapes."""
     if isinstance(ds, list):
-        return _flat_list([flatten_dynamic_shapes(t) for t in ds])
+        return _flat_list([_flatten_dynamic_shapes(t) for t in ds])
     if isinstance(ds, tuple):
-        return tuple(_flat_list([flatten_dynamic_shapes(t) for t in ds]))
+        return tuple(_flat_list([_flatten_dynamic_shapes(t) for t in ds]))
     if isinstance(ds, dict):
         if all(isinstance(i, int) for i in ds):
             # That's a dynamic shape
             return ds
-        return _flat_list([flatten_dynamic_shapes(t) for t in ds.values()])
+        return _flat_list([_flatten_dynamic_shapes(t) for t in ds.values()])
     raise AssertionError(f"Not implemented for {type(ds)}: {ds}")
@@ -226,7 +226,7 @@ class CoupleInputsDynamicShapes:
         for i, d in enumerate(inputs.shape):
             if i in ds and not isinstance(ds[i], int):
                 # dynamic then
-                if d in {0, 1}:
+                if isinstance(d, int) and d in {0, 1}:
                     # export issues for sure
                     issues[i] = f"d=[{d}]"
         return issues if issues else None
@@ -380,7 +380,7 @@ class CoupleInputsDynamicShapes:
         flat, spec = torch.utils._pytree.tree_flatten(inputs)
         if all(isinstance(t, torch.Tensor) for t in flat):
             # We need to flatten dynamic shapes as well
-            ds = flatten_dynamic_shapes(ds)
+            ds = _flatten_dynamic_shapes(ds)
         res = cls._generic_walker_step(
             processor, flat, ds, flatten_unflatten=flatten_unflatten
         )

onnx_diagnostic/export/shape_helper.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from typing import Any, Dict, List, Set, Tuple, Union
+from typing import Any, Dict, List, Set, Optional, Tuple, Union
 from ..helpers.cache_helper import flatten_unflatten_for_dynamic_shapes
+from ..helpers.fake_tensor_helper import fake_reshape
 from .dynamic_shapes import ModelInputs
-def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
+def all_dynamic_shapes_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
     """
     Returns the dynamic shapes for the given inputs.
     All dimensions are considered as dynamic.
@@ -18,7 +19,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
         import pprint
         import torch
         from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
-        from onnx_diagnostic.export.shape_helper import all_dynamic_shape_from_inputs
+        from onnx_diagnostic.export.shape_helper import all_dynamic_shapes_from_inputs
         from onnx_diagnostic.torch_export_patches import torch_export_patches
         bsize, nheads, slen, dim = 2, 1, 30, 96
@@ -32,7 +33,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
             ),
         )
         with torch_export_patches(patch_transformers=True):
-            ds = all_dynamic_shape_from_inputs(inputs)
+            ds = all_dynamic_shapes_from_inputs(inputs)
         pprint.pprint(ds)
     For this function to work, patches must be enabled if :epkg:`transformers`
@@ -50,7 +51,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
             make_sliding_window_cache,
             make_static_cache,
         )
-        from onnx_diagnostic.export.shape_helper import all_dynamic_shape_from_inputs
+        from onnx_diagnostic.export.shape_helper import all_dynamic_shapes_from_inputs
         from onnx_diagnostic.torch_export_patches import torch_export_patches
         caches = [
@@ -104,7 +105,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
         with torch_export_patches(patch_transformers=True):
             for cache in caches:
                 print(f"-- {cache.__class__.__name__}")
-                pprint.pprint(all_dynamic_shape_from_inputs(cache))
+                pprint.pprint(all_dynamic_shapes_from_inputs(cache))
     """
     if isinstance(dim_prefix, str):
         prefixes: Set[str] = set()
@@ -199,3 +200,120 @@ def guess_dynamic_shapes_from_inputs(
     """
     mi = ModelInputs(None, inputs)
     return mi.guess_dynamic_shapes(auto=auto)
+def make_fake_with_dynamic_dimensions(
+    x: Any,
+    dynamic_shapes: Any,
+    fake_mode: Optional["FakeTensorMode"] = None,  # noqa: F821
+) -> Tuple[Any, "FakeTensorMode"]:  # noqa: F821
+    """
+    Replaces all tensors by fake tensor respecting the same
+    constraints as the following dynamic shapes.
+    This uses function :func:`onnx_diagnostic.helpers.fake_tensor_helper.make_fake`.
+    .. runpython::
+        :showcode:
+        import pprint
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
+        inputs, _ = make_fake_with_dynamic_dimensions(
+            dict(
+                input_ids=torch.randint(30360, size=(2, 3), dtype=torch.int64),
+                attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64),
+                position_ids=torch.randint(32, size=(2, 3), dtype=torch.int64),
+                past_key_values=make_dynamic_cache(
+                    [
+                        (
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                        ),
+                        (
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                        ),
+                    ]
+                ),
+            ),
+            dynamic_shapes={
+                "input_ids": {0: "batch", 1: "seq_length"},
+                "attention_mask": {0: "batch", 1: "cache+seq"},
+                "position_ids": {0: "batch", 1: "seq_length"},
+                "past_key_values": [
+                    [{0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}],
+                    [{0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}],
+                ],
+            },
+        )
+        pprint.pprint(inputs)
+    """
+    if x is None:
+        return None, None
+    if fake_mode is None:
+        from torch.fx.experimental.symbolic_shapes import ShapeEnv
+        from torch._subclasses.fake_tensor import FakeTensorMode
+        shape_env = ShapeEnv()
+        fake_mode = FakeTensorMode(shape_env=shape_env)
+    if isinstance(x, (list, tuple)):
+        return (
+            x.__class__(
+                [
+                    make_fake_with_dynamic_dimensions(
+                        i, fake_mode=fake_mode, dynamic_shapes=ds
+                    )[0]
+                    for i, ds in zip(x, dynamic_shapes)
+                ]
+            ),
+            fake_mode,
+        )
+    if isinstance(x, dict):
+        return {
+            k: make_fake_with_dynamic_dimensions(
+                v, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[k]
+            )[0]
+            for k, v in x.items()
+        }, fake_mode
+    if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
+        assert hasattr(x, "layers"), (
+            f"Une more recent version of transformers (>=4.55), "
+            f"'layers' not found in class {type(x)}"
+        )
+        assert (
+            isinstance(dynamic_shapes, list) and len(dynamic_shapes) == 2
+        ), f"Unexpected dynamic_shapes={dynamic_shapes} for a DynamicCache"
+        for il, layer in enumerate(x.layers):
+            assert hasattr(layer, "keys") and hasattr(layer, "values"), (
+                f"Une more recent version of transformers (>=4.55), 'layers' "
+                f"not found in class {type(layer)} ({dir(layer)})"
+            )
+            layer.keys = make_fake_with_dynamic_dimensions(
+                layer.keys, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[0][il]
+            )[0]
+            layer.values = make_fake_with_dynamic_dimensions(
+                layer.values, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[1][il]
+            )[0]
+        return x, fake_mode
+    if x.__class__.__name__ == "EncoderDecoderCache":
+        make_fake_with_dynamic_dimensions(
+            x.self_attention_cache, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[0]
+        )
+        make_fake_with_dynamic_dimensions(
+            x.cross_attention_cache, fake_mode=fake_mode, dynamic_shapes=dynamic_shapes[1]
+        )
+        return x, fake_mode
+    if hasattr(x, "shape"):
+        t = fake_reshape(x, dynamic_shapes, fake_mode=fake_mode)
+        assert t.device == x.device, f"device mismatch {x.device} -> {t.device}"
+        assert t.dtype == x.dtype, f"dtype mismatch {x.dtype} -> {t.dtype}"
+        return t, fake_mode
+    from ..helpers import string_type
+    raise TypeError(
+        f"Unexpected type {type(x)} for x, content is {string_type(x, with_shape=True)}"
+    )

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -979,7 +979,11 @@ class ExtTestCase(unittest.TestCase):
             else:
                 for e, g in zip(expected, value):
                     self.assertEqualAny(e, g, msg=msg, atol=atol, rtol=rtol)
-        elif expected.__class__.__name__ in ("DynamicCache", "SlidingWindowCache"):
+        elif expected.__class__.__name__ in (
+            "DynamicCache",
+            "SlidingWindowCache",
+            "HybridCache",
+        ):
             self.assertEqual(type(expected), type(value), msg=msg)
             atts = ["key_cache", "value_cache"]
             self.assertEqualAny(

onnx-diagnostic 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

onnx-diagnostic 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl