PyPI - onnx-diagnostic - Versions diffs - 0.8.6__py3-none-any.whl → 0.8.7__py3-none-any.whl - Mend

onnx-diagnostic 0.8.6py3-none-any.whl → 0.8.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/ci_models/export_qwen25_vl.py CHANGED Viewed

@@ -8,10 +8,10 @@ Requirements
 ::
     git+https://github.com/sdpython/experimental-experiment.git  # optional
-    huggingface_hub>=1.2.1
+    huggingface_hub
     onnx-diagnostic>=0.8.6
     onnxruntime>=1.23
-    torch>=2.9  # weekly is better
+    torch>=2.10  # weekly is better
     tqdm
     transformers>=4.57
@@ -59,6 +59,7 @@ It is possible to overwrite this by by setting environment variable
 import os
 import sys
 import time
+import warnings
 from typing import Any, Dict, List, Tuple
 from .ci_helpers import (
     check_for_discrepancies_and_log_everything_into_a_json_file,
@@ -97,7 +98,6 @@ def get_untrained_model(model_id: str, second_input: bool, verbose: int) -> Dict
                 },
                 # "_attn_implementation": "flash_attention_2",
                 "_attn_implementation": "sdpa",
-                "dtype": "float16",
             }
     config_reduction = _config_reduction
@@ -281,6 +281,10 @@ def main(
         ).eval()
         data = dict(model=model)
         config = model.config
+        if not hasattr(config, "bos_token_id") or not config.bos_token_id:
+            config.bos_token_id = 151643
+        if not hasattr(config, "eos_token_id") or not config.eos_token_id:
+            config.eos_token_id = 151645
     else:
         print("-- random model")
         data = get_untrained_model(model_id, second_input=second_input, verbose=1)
@@ -298,7 +302,11 @@ def main(
     print(f"-- config._attn_implementation={model.config._attn_implementation}")
     print(f"-- model.dtype={model.dtype}")
     print(f"-- model.device={model.device}")
-    processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+    try:
+        processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+    except OSError as e:
+        warnings.warn(f"Unable to access internet due to {e!r}", ResourceWarning, stacklevel=0)
+        return
     print(f"-- processor={type(processor)}")
     export_inputs, other_inputs = None, None

onnx_diagnostic/export/api.py CHANGED Viewed

@@ -154,6 +154,7 @@ def to_onnx(
             options=options,
             inline=inline,
             dispatcher=main_dispatcher,
+            optimize=optimize,
             **(exporter_kwargs or {}),
         )

onnx_diagnostic/export/cf_simple_loop_for.py CHANGED Viewed

@@ -11,6 +11,7 @@ from torch._higher_order_ops.utils import (
     unique_graph_id,
     validate_subgraph_args_types,
 )
+import torch._dynamo.variables.higher_order_ops as hop
 from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_tensor_tree
 from torch.utils._python_dispatch import _get_current_dispatch_mode
@@ -97,14 +98,18 @@ def _simple_loop_for_fn(
                 f"Unexpected number of results {len(r)} for function {body_fn}, "
                 f"expected {len(res[-1])}"
             )
+            assert all(isinstance(t, torch.Tensor) for t in r), (
+                f"Unexpected type {[type(_) for _ in r]} for returned by function {body_fn}, "
+                f"it must be a tuple of Tensor or a Tensor."
+            )
             res.append(r)
         else:
             assert isinstance(r, torch.Tensor), (
-                f"Unexpected type {r} for function {body_fn}, "
-                f"it must be a tuple or a Tensor."
+                f"Unexpected type {type(r)} coming from function {body_fn}, "
+                f"it must be a tuple of Tensor or a Tensor."
             )
             assert not res or len(res[-1]) == 1, (
-                f"Unexpected number of results {len(r)} for function {body_fn}, "
+                f"Unexpected number of results {len(r)} coming from function {body_fn}, "
                 f"expected {len(res[-1])}"
             )
             res.append((r,))
@@ -126,8 +131,6 @@ def _simple_loop_for_fn(
     )
-# from torch._functorch.utils import exposed_in
-# @exposed_in("torch")
 def _simple_loop_for(
     n_iter: Union[int, torch.Tensor],
     body_fn: Callable,
@@ -159,7 +162,7 @@ def _simple_loop_for(
     if torch.compiler.is_dynamo_compiling():
         return simple_loop_for_op(
-            n_iter, body_fn, (n_iter, *operands), concatenation_dims=concatenation_dims
+            n_iter, body_fn, operands, concatenation_dims=concatenation_dims
         )
     if isinstance(n_iter, (bool, int, float)):
@@ -181,8 +184,10 @@ def _simple_loop_for(
     with setup_compilation_env() as _backend:
         return _loop_for_op_wrapper(n_iter, body_fn, operands, concatenation_dims)
-        # return torch.compile(_loop_for_op_wrapper, backend=backend, fullgraph=True)(
-        #    n_iter, body_fn, operands, concatenation_dims)
+        # This is needed to support function body using module weights or function body
+        # defined as a class method. This is yet to be implemented.
+        # cpl = torch.compile(_loop_for_op_wrapper, backend=_backend, fullgraph=True)
+        # return cpl(n_iter, body_fn, operands, concatenation_dims)
 def trace_simple_loop_for(
@@ -236,9 +241,15 @@ def loop_for_op_dense(n_iter, body_fn, operands, concatenation_dims=None):
     )
     mode = _get_current_dispatch_mode()
     assert mode is None, "Mode should never be enabled for CPU/CUDA key"
-    return _simple_loop_for_fn(
-        n_iter, body_fn, operands, concatenation_dims=concatenation_dims
+    is_fake = isinstance(n_iter, torch._subclasses.fake_tensor.FakeTensor)
+    res = _simple_loop_for_fn(n_iter, body_fn, operands, concatenation_dims=concatenation_dims)
+    assert is_fake or not any(
+        isinstance(r, torch._subclasses.fake_tensor.FakeTensor) for r in res
+    ), (
+        f"One result is a fake tensor but the inputs were not, type(n_iter)={type(n_iter)}, "
+        f"operands: {[type(_) for _ in operands]}, res: {[type(_) for _ in res]}"
     )
+    return res
 @simple_loop_for_op.py_impl(ProxyTorchDispatchMode)
@@ -267,6 +278,180 @@ simple_loop_for_op.fallthrough(torch._C.DispatchKey.AutogradCPU)
 simple_loop_for_op.fallthrough(torch._C.DispatchKey.AutogradCUDA)
+class SimpleLoopForHigherOrderVariable(hop.TorchHigherOrderOperatorVariable):
+    """
+    Replicates the same pattern found for other higher order operators.
+    This enables recursive compilation and the use of modules inside a function.
+    """
+    _HOP_NAME = "simple_loop_for"
+    _ALLOW_FALLBACK_TO_EAGER = False
+    supports_input_mutation = False
+    supports_aliasing = False
+    def _call_function(
+        self,
+        tx: torch._dynamo.symbolic_convert.InstructionTranslator,
+        args: list[hop.VariableTracker],
+        kwargs: dict[str, hop.VariableTracker],
+    ) -> hop.VariableTracker:
+        """Main function."""
+        args, kwargs = hop.LazyVariableTracker.realize_all((args, kwargs))
+        for i, k in enumerate(["n_iter", "body_fn", "operands", "concatenated_dims"]):
+            if v := kwargs.pop(k, None):
+                assert i == len(args), "did not provide the right number of non-keyword args"
+                args.append(v)
+        if len(args) != 4 or kwargs:
+            hop.unimplemented(
+                gb_type="simple_loop_for: improper args/kwargs",
+                context=f"args: {args}, kwargs: {kwargs}",
+                explanation=f"torch.cond expects 4 positional arguments (got {len(args)}) "
+                f"and no keyword arguments (got {len(kwargs)})",
+                hints=[*hop.graph_break_hints.USER_ERROR],
+            )
+        # Specialize into one of the branches since pred is constant
+        n_iter, body_fn, operands, _concatenated_dims = args
+        assert type(n_iter) is not hop.ConstantVariable, (
+            f"n_iter is a {type(n_iter)}. When used simple_loop_for, "
+            f"it unrolls the loop. A SymInt should be used."
+        )
+        # predicate
+        if type(n_iter.realize()) not in (
+            hop.ConstantVariable,
+            hop.TensorVariable,
+            hop.SymNodeVariable,
+        ):
+            hop.unimplemented(
+                gb_type="simple_loop_for: improper predicate",
+                context=str(n_iter),
+                explanation=(
+                    f"Expected `n_iter` to be an int or a integer "
+                    f"tensor with a single item "
+                    f"but got {str(type(n_iter))} with original python type "
+                    f"{str(n_iter.python_type())}."
+                ),
+                hints=[*hop.graph_break_hints.USER_ERROR],
+            )
+        # operands
+        if not isinstance(operands, (hop.ListVariable, hop.TupleVariable)):
+            hop.unimplemented(
+                gb_type="simple_loop_for: improper operands",
+                context=str(operands),
+                explanation="Expected `operands` to be a list/tuple "
+                f"but got {operands.python_type()}.",
+                hints=[*hop.graph_break_hints.USER_ERROR],
+            )
+        operands_seq = operands.unpack_var_sequence(tx)
+        if not hop.only_consist_of(
+            operands, (hop.TensorVariable, hop.ConstantVariable, hop.SymNodeVariable)
+        ):
+            hop.unimplemented(
+                gb_type="simple_loop_for: improper operands contents",
+                context=str(operands),
+                explanation=(
+                    "Expected `operands` to be a list/tuple of pytrees "
+                    "that only consists of tensor leaves."
+                ),
+                hints=[*hop.graph_break_hints.USER_ERROR],
+            )
+        # branches
+        hop._check_supported_callable_arg(tx, body_fn, "body_fn")
+        def speculate_body():
+            (
+                (ret_val, ret_spec),
+                ret_graph,
+                ret_lifted_freevars,
+            ) = hop.speculate_subgraph(
+                tx,
+                args[1],
+                (args[0], *operands_seq),
+                {},
+                self._HOP_NAME,
+                source_target=self.value,
+                should_flatten_outputs=True,
+                # TODO - removing consts from control flow ops need more work
+                remove_consts_from_outputs=False,
+                supports_input_mutation=self.supports_input_mutation,
+                supports_aliasing=self.supports_aliasing,
+            )
+            # need to ensure we increase epoch so we don't memoize unbacked bindings
+            # across different subgraphs which can interfere with runtime assertion
+            # generation.
+            tx.fake_mode.epoch += 1
+            if not hop.only_consist_of(ret_val, (hop.TensorVariable, hop.ConstantVariable)):
+                hop.unimplemented(
+                    gb_type="simple_loop_for: unsupported branch return type",
+                    context=str(ret_val),
+                    explanation=(
+                        "Expected branches to return a possibly nested "
+                        "pytree of tensors or constant ints."
+                    ),
+                    hints=[*hop.graph_break_hints.USER_ERROR],
+                )
+            for ret in ret_val.unpack_var_sequence(tx):
+                if ret.is_python_constant() and not isinstance(ret.as_python_constant(), int):
+                    hop.unimplemented(
+                        gb_type=(
+                            "simple_loop_for: unsupported branch return type "
+                            "(constant non-int)"
+                        ),
+                        context=str(ret_val),
+                        explanation="Constants returned from branches must be ints.",
+                        hints=[*hop.graph_break_hints.USER_ERROR],
+                    )
+            return ret_val, ret_spec, ret_graph, ret_lifted_freevars
+        body_r, body_spec, body_graph, body_lifted_freevars = speculate_body()
+        body_nn_modules = dict(tx.output.nn_modules)
+        same_spec = body_spec.treespec.as_python_constant()
+        if same_spec is not NotImplemented and not same_spec:
+            hop.unimplemented(
+                gb_type="simple_loop_for: differing branch outputs",
+                context=(
+                    f"body_spec: {body_spec.treespec}, false_spec: "
+                    f"{body_spec.treespec}, same_spec: {same_spec}"
+                ),
+                explanation="Expected branches to return the same pytree structure.",
+                hints=[*hop.graph_break_hints.USER_ERROR],
+            )
+        body_name = tx.output.install_subgraph(
+            "loop_body", torch.fx.GraphModule(body_nn_modules, body_graph)
+        )
+        body_node = hop.make_attr(tx, body_name)
+        p_args = (
+            n_iter.as_proxy(),
+            body_node,
+            # We pick true_shared but it shouldn't matter
+            operands.as_proxy() + tuple(body_lifted_freevars.keys()),
+        )
+        return hop._call_function_and_unflatten_output(
+            tx,
+            simple_loop_for,
+            p_args,
+            {},
+            None,
+            body_spec,
+            body_r,
+        )
+hop._hop_name_to_variable_class["simple_loop_for"] = SimpleLoopForHigherOrderVariable
+# @torch._functorch.utils.exposed_in("torch")
 def simple_loop_for(
     n_iter: Union[int, torch.Tensor],
     body_fn: Callable,

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -1267,6 +1267,7 @@ class ExtTestCase(unittest.TestCase):
             :class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`
         """
         from .helpers import string_type, string_diff, max_diff
+        from .helpers.torch_helper import torch_deepcopy
         from .helpers.rt_helper import make_feeds
         from .helpers.ort_session import InferenceSessionForTorch
@@ -1283,6 +1284,12 @@ class ExtTestCase(unittest.TestCase):
                 model_file = proto
                 name = proto
                 proto = onnx.load(name)
+            elif hasattr(proto, "save"):
+                name = f"{test_name}.onnx"
+                proto.save(name)
+                proto = onnx.load(name)
+            elif hasattr(proto, "model_proto"):
+                proto = proto.model_proto
             elif not self.unit_test_going():
                 assert isinstance(
                     proto, onnx.ModelProto
@@ -1341,9 +1348,9 @@ class ExtTestCase(unittest.TestCase):
             if copy_inputs:
                 expected = [
                     (
-                        model(*copy.deepcopy(inp))
+                        model(*torch_deepcopy(inp))
                         if isinstance(inp, tuple)
-                        else model(**copy.deepcopy(inp))
+                        else model(**torch_deepcopy(inp))
                     )
                     for inp in inputs
                 ]

onnx_diagnostic/helpers/bench_run.py CHANGED Viewed

@@ -20,7 +20,7 @@ class BenchmarkError(RuntimeError):
 def _clean_string(s: str) -> str:
-    cleaned = [c for c in s if 32 <= ord(c) < 127 and c not in {","}]
+    cleaned = [c for c in s if 32 <= ord(c) < 127 and c not in {",", ":"}]
     return "".join(cleaned)

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -1921,9 +1921,7 @@ class CubeLogsPerformance(CubeLogs):
             return lambdas[formula]
         if formula == "onnx_n_nodes_no_cst":
-            return lambda df: gdf(df, "onnx_n_nodes", 0) - gdf(
-                df, "op_onnx__Constant", 0
-            ).fillna(0)
+            return lambda df: gdf(df, "onnx_n_nodes", 0) - gdf(df, "op_onnx__Constant", 0)
         if formula == "peak_gpu_torch":
             return lambda df: gdf(df, "mema_gpu_5_after_export") - gdf(df, "mema_gpu_4_reset")
         if formula == "peak_gpu_nvidia":

onnx_diagnostic/helpers/optim_helper.py ADDED Viewed

@@ -0,0 +1,116 @@
+from typing import Optional, Union
+import pprint
+import onnx
+def optimize_model(
+    algorithm: str,
+    model: Union[onnx.ModelProto, str],
+    output: Optional[str] = None,
+    processor: Optional[str] = None,
+    infer_shapes: bool = True,
+    remove_shape_info: bool = False,
+    verbose: int = 1,
+):
+    """
+    Optimizes an onnx model by fusing nodes. It looks for patterns in the graphs
+    and replaces them by the corresponding nodes. It also does basic optimization
+    such as removing identity nodes or unused nodes.
+    :param algorithm: algorithm to choose
+    :param model: model to optimize as a proto or a filename
+    :param output: if not empty, the optimized model is saved
+    :param processor: optimization are done for the processor
+    :param infer_shapes: infer shapes before optimizing, this might not be
+        available for all algorithm
+    :param remove_shape_info: remove shape information before saving the model
+    :param verbose: verbosity level
+    :return: optimized model
+    The goal is to make the model faster.
+    Argument patterns defines the patterns to apply or the set of patterns.
+    It is possible to show statistics or to remove a particular pattern.
+    Here are some environment variables which can be used to trigger
+    these displays.
+    Available options algorithms, default and default+runtime:
+    - ``DROPPATTERN=<pattern1,patterns2,...>``: do not apply
+      those patterns when optimizing a model
+    - ``DUMPPATTERNS=<folder>``: dumps all matched and applied nodes when a pattern is applied
+    - ``PATTERN=<pattern1,pattern2,...>``: increase verbosity
+      for specific patterns to understand why one pattern was not applied,
+      this shows which line is rejecting a pattern if it seems one pattern was missed
+    """
+    if isinstance(model, str):
+        if verbose:
+            print(f"[optimize_model] load {model!r}")
+        proto = onnx.load(model)
+        if verbose:
+            print("[optimize_model] done loading.")
+    else:
+        proto = model
+    if verbose:
+        print(f"[optimize_model] optimize with {algorithm!r}")
+    if algorithm in {"default", "default+onnxruntime"}:
+        from experimental_experiment.xoptim import get_pattern_list
+        from experimental_experiment.xbuilder import GraphBuilder, OptimizationOptions
+        pats = get_pattern_list(algorithm)
+        gr = GraphBuilder(
+            proto,
+            infer_shapes_options=infer_shapes,
+            optimization_options=OptimizationOptions(
+                patterns=pats,
+                verbose=verbose,
+                remove_unused=True,
+                constant_folding=True,
+                remove_identity=True,
+                max_iter=max(100, len(proto.graph.node) // 2),
+                processor=processor or "CPU",
+            ),
+        )
+        if verbose:
+            print(f"[optimize_model] starts optimizing with {len(pats)} patterns")
+            print(f"[optimize_model] model has {len(proto.graph.node)} nodes")
+        opt_onx, report = gr.to_onnx(optimize=True, return_optimize_report=True)
+        if verbose:
+            print("[optimize_model] optimization report")
+            pprint.pprint(report)
+            print("[optimize_model] done")
+    elif algorithm == "slim":
+        import onnxslim
+        opt_onx = onnxslim.slim(proto, no_shape_infer=not infer_shapes)
+    elif algorithm in {"ir", "os_ort"}:
+        import onnx_ir
+        import onnxscript.optimizer
+        from onnxscript.rewriter.ort_fusions import optimize_for_ort
+        model_ir = onnx_ir.from_proto(proto)
+        if algorithm == "ir":
+            onnxscript.optimizer.optimize(model_ir)
+        else:
+            optimize_for_ort(model_ir)
+        opt_onx = onnx_ir.serde.serialize_model(model_ir)
+    del proto
+    if verbose:
+        print(f"[optimize_model] done optimizing, model has {len(opt_onx.graph.node)} nodes")
+    if remove_shape_info:
+        if verbose:
+            print(f"[optimize_model] remove shape information {len(opt_onx.graph.value_info)}")
+        del opt_onx.graph.value_info[:]
+        if verbose:
+            print("[optimize_model] done removing shape info")
+    if output:
+        if verbose:
+            print(f"[optimize_model] save file into {output!r}")
+        onnx.save(opt_onx, output, save_as_external_data=True)
+        if verbose:
+            print("[optimize_model] done saving")
+    return opt_onx

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -13,6 +13,10 @@ from .data import get_data
 __TASK__ = "image-text-to-text"
+def should_have_vision_config(config):
+    return config.architectures != ["FuyuForCausalLM"]
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
     kwargs: Dict[str, Any] = {}
@@ -477,7 +481,8 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
                 "hidden_size",
                 "pad_token_id",
             )
-            check_hasattr(config, "vision_config", ("image_token_index", "image_token_id"))
+            if should_have_vision_config(config):
+                check_hasattr(config, "vision_config", ("image_token_index", "image_token_id"))
             text_config = True
         else:
             check_hasattr(
@@ -491,7 +496,8 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
                 "vision_config",
             )
             text_config = False
-        check_hasattr(config.vision_config, ("num_channels", "in_chans", "in_channels"))
+        if should_have_vision_config(config):
+            check_hasattr(config.vision_config, ("num_channels", "in_chans", "in_channels"))
     kwargs = dict(
         head_dim=(
             16
@@ -552,17 +558,21 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         ),
         width=(
             224
-            if config is None or not hasattr(config.vision_config, "image_size")
+            if config is None
+            or not should_have_vision_config(config)
+            or not hasattr(config.vision_config, "image_size")
             else config.vision_config.image_size
         ),
         height=(
             224
-            if config is None or not hasattr(config.vision_config, "image_size")
+            if config is None
+            or not should_have_vision_config(config)
+            or not hasattr(config.vision_config, "image_size")
             else config.vision_config.image_size
         ),
         num_channels=(
             3
-            if config is None
+            if config is None or not should_have_vision_config(config)
             else _pick(config.vision_config, "num_channels", "in_chans", "in_channels")
         ),
         pad_token_id=(

onnx-diagnostic 0.8.6__py3-none-any.whl → 0.8.7__py3-none-any.whl

onnx-diagnostic 0.8.6py3-none-any.whl → 0.8.7py3-none-any.whl