PyPI - onnx-diagnostic - Versions diffs - 0.8.5__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

onnx-diagnostic 0.8.5py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

onnx_diagnostic/export/cf_simple_loop_for.py ADDED Viewed

@@ -0,0 +1,352 @@
+import contextlib
+from typing import Callable, List, Optional, Sequence, Tuple, Union
+import torch
+from torch._C import DispatchKey
+from torch._ops import HigherOrderOperator
+from torch._subclasses.fake_tensor import FakeTensorMode
+import torch.utils._pytree as pytree
+from torch._higher_order_ops.utils import (
+    check_input_alias_and_mutation_return_outputs,
+    reenter_make_fx,
+    unique_graph_id,
+    validate_subgraph_args_types,
+)
+from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_tensor_tree
+from torch.utils._python_dispatch import _get_current_dispatch_mode
+class SimpleLoopForOp(HigherOrderOperator):
+    """Higher order op for :func:`simple_loop_for`."""
+    def __init__(self):
+        super().__init__("simple_loop_for")
+    def __call__(self, n_iter, body_fn, operands, concatenation_dims=None):
+        validate_subgraph_args_types(operands)
+        return super().__call__(n_iter, body_fn, operands, concatenation_dims)
+    def gen_schema(self, n_iter, body_fn, operands, concatenation_dims):
+        from torch._higher_order_ops.schema import HopSchemaGenerator
+        from torch._higher_order_ops.utils import materialize_as_graph
+        body_gm: torch.fx.GraphModule = materialize_as_graph(  # type: ignore[annotation-unchecked]
+            body_fn, (torch.tensor(0, dtype=torch.int64), *operands)
+        )
+        (
+            _,
+            _,
+            _,
+            body_mutated_inputs,
+            body_outputs,
+        ) = check_input_alias_and_mutation_return_outputs(body_gm)
+        mutated_inputs = body_mutated_inputs
+        schema_gen = HopSchemaGenerator(self)
+        schema_gen.add_arg("n_iter", n_iter)
+        schema_gen.add_arg("body_fn", body_gm)
+        for idx, arg in enumerate(operands):
+            schema_gen.add_arg(f"operand{idx}", arg, is_mutated=idx in mutated_inputs)
+        for out in body_outputs:
+            schema_gen.add_output(out)
+        assert concatenation_dims is None or len(concatenation_dims) == len(body_outputs), (
+            f"concatenation_dims={concatenation_dims} but its length should be equal to "
+            f"the number of outputs ({len(body_outputs)})"
+        )
+        schema_gen.add_schema_tree_spec(n_iter, body_fn, operands, concatenation_dims)
+        return schema_gen.gen_schema()
+simple_loop_for_op = SimpleLoopForOp()
+def _simple_loop_for_fn(
+    n_iter: torch.Tensor,
+    body_fn: Callable,
+    operands: Tuple[torch.Tensor, ...] = (),
+    concatenation_dims: Optional[Sequence[int]] = None,
+) -> Tuple[torch.Tensor, ...]:
+    """
+    Python implementation of the loop.
+    :param n_iter: number of iteration
+    :param body_fn: function implementing the body
+    :param concatenation_dims: dimension used to reduce the list produced by the loop
+    :param operands: arguments to the loop body
+    :return: results
+    """
+    torch._check(
+        isinstance(n_iter, (int, torch.Tensor)),
+        lambda: f"Unexpected type {type(n_iter)} for n_iter",
+    )
+    torch._check(callable(body_fn), lambda: f"Unexpected type {type(body_fn)} for body_fn")
+    torch._check(
+        concatenation_dims is None or isinstance(concatenation_dims, (list, tuple)),
+        lambda: f"Unexpected type {type(concatenation_dims)} for concatenation_dims",
+    )
+    torch._check(
+        isinstance(operands, tuple), lambda: f"Unexpected type {type(operands)} for operands"
+    )
+    res: List[Union[torch.Tensor, Tuple[torch.Tensor, ...]]] = []
+    for i in torch.arange(
+        n_iter, dtype=torch.int64 if isinstance(n_iter, int) else n_iter.dtype
+    ):
+        r = body_fn(i, *operands)
+        if isinstance(r, tuple):
+            assert not res or len(r) == len(res[-1]), (
+                f"Unexpected number of results {len(r)} for function {body_fn}, "
+                f"expected {len(res[-1])}"
+            )
+            res.append(r)
+        else:
+            assert isinstance(r, torch.Tensor), (
+                f"Unexpected type {r} for function {body_fn}, "
+                f"it must be a tuple or a Tensor."
+            )
+            assert not res or len(res[-1]) == 1, (
+                f"Unexpected number of results {len(r)} for function {body_fn}, "
+                f"expected {len(res[-1])}"
+            )
+            res.append((r,))
+    if not res:
+        return torch.empty(tuple(), dtype=torch.float32, device=operands[0].device)
+    n_res = len(res[0])
+    return tuple(
+        torch.cat(
+            [r[i] for r in res],
+            dim=(
+                0
+                if concatenation_dims is None or i >= len(concatenation_dims)
+                else concatenation_dims[i]
+            ),
+        )
+        for i in range(n_res)
+    )
+# from torch._functorch.utils import exposed_in
+# @exposed_in("torch")
+def _simple_loop_for(
+    n_iter: Union[int, torch.Tensor],
+    body_fn: Callable,
+    operands: Tuple[torch.Tensor, ...] = (),
+    concatenation_dims: Optional[Sequence[int]] = None,
+) -> Tuple[torch.Tensor, ...]:
+    def _validate_input(n_iter, body_fn, operands, concatenation_dims):
+        assert isinstance(
+            n_iter, (int, torch.Tensor, torch.SymInt)
+        ), f"Expected pred to be bool or tensor, but got {n_iter}."
+        assert (
+            not isinstance(n_iter, torch.Tensor) or n_iter.numel() == 1
+        ), f"Expected pred to be bool or single-element tensor, but got {n_iter}."
+        assert callable(body_fn), "Expect both branches to be callable."
+        assert isinstance(operands, (tuple, list)) and pytree.tree_all(
+            lambda t: isinstance(t, torch.Tensor), operands
+        ), (
+            "Expect operands to be a tuple of possibly nested dict/list/tuple that only "
+            f"consists of tensor leaves, but got {operands}."
+        )
+        assert concatenation_dims is None or (
+            isinstance(concatenation_dims, (list, tuple))
+            and all(isinstance(i, int) for i in concatenation_dims)
+        ), (
+            f"concatenation_dims should be None or a list of integers but it is "
+            f"{concatenation_dims}. Its length should be equal to the number of outputs."
+        )
+        assert torch._dynamo.is_dynamo_supported(), "simple_loop_for requires dynamo support."
+    if torch.compiler.is_dynamo_compiling():
+        return simple_loop_for_op(
+            n_iter, body_fn, (n_iter, *operands), concatenation_dims=concatenation_dims
+        )
+    if isinstance(n_iter, (bool, int, float)):
+        torch._check(
+            isinstance(n_iter, int),
+            lambda: f"n_iter must be an integer or a tensor not {type(n_iter)}",
+        )
+        return _simple_loop_for_fn(
+            n_iter, body_fn, operands, concatenation_dims=concatenation_dims
+        )
+    def _loop_for_op_wrapper(n_iter, body_fn, operands, concatenation_dims):
+        return simple_loop_for_op(n_iter, body_fn, operands, concatenation_dims)
+    _validate_input(n_iter, body_fn, operands, concatenation_dims)
+    # This requires torch>=2.10.
+    from torch._higher_order_ops.utils import setup_compilation_env
+    with setup_compilation_env() as _backend:
+        return _loop_for_op_wrapper(n_iter, body_fn, operands, concatenation_dims)
+        # return torch.compile(_loop_for_op_wrapper, backend=backend, fullgraph=True)(
+        #    n_iter, body_fn, operands, concatenation_dims)
+def trace_simple_loop_for(
+    proxy_mode, func_overload, n_iter, body_fn, operands, concatenation_dims
+):
+    """See function ``simple_loop_for``."""
+    assert isinstance(operands, (list, tuple)) and (
+        concatenation_dims is None
+        or (
+            isinstance(concatenation_dims, (list, tuple))
+            and all(isinstance(i, int) for i in concatenation_dims)
+        )
+    ), (
+        f"simple_loop_for operands must be a list or tuple of tensors and SymInts and "
+        f"concatenation_dims must be None or a list of integer, "
+        f"operands={[type(o) for o in operands]}, "
+        f"concatenation_dims={concatenation_dims}"
+    )
+    body_graph = reenter_make_fx(body_fn)(n_iter, *operands)
+    body_outs = []
+    for node in body_graph.graph.nodes:
+        if node.op == "output":
+            body_outs.extend(node.args)
+    # flat_body_outs = pytree.arg_tree_leaves(*body_outs)
+    _i, body_name = unique_graph_id(proxy_mode, prefix="body_graph")
+    proxy_mode.tracer.root.register_module(body_name, body_graph)
+    args = (n_iter, body_graph, operands, concatenation_dims)
+    proxy_args = pytree.tree_map(proxy_mode.tracer.unwrap_proxy, args)
+    out_proxy = proxy_mode.tracer.create_proxy("call_function", func_overload, proxy_args, {})
+    out = func_overload(n_iter, body_graph, operands, concatenation_dims)
+    return track_tensor_tree(out, out_proxy, constant=None, tracer=proxy_mode.tracer)
+@simple_loop_for_op.py_impl(DispatchKey.CompositeExplicitAutograd)
+def loop_for_op_dense(n_iter, body_fn, operands, concatenation_dims=None):
+    """Registered eager mode implementation."""
+    assert all(isinstance(o, torch.Tensor) for o in operands) and (
+        concatenation_dims is None
+        or (
+            isinstance(concatenation_dims, (list, tuple))
+            and all(isinstance(i, int) for i in concatenation_dims)
+        )
+    ), (
+        f"simple_loop_for operands must be a list or tuple of tensors and SymInts and "
+        f"concatenation_dims must be None or a list of integer, "
+        f"operands={[type(o) for o in operands]}, "
+        f"concatenation_dims={concatenation_dims}"
+    )
+    mode = _get_current_dispatch_mode()
+    assert mode is None, "Mode should never be enabled for CPU/CUDA key"
+    return _simple_loop_for_fn(
+        n_iter, body_fn, operands, concatenation_dims=concatenation_dims
+    )
+@simple_loop_for_op.py_impl(ProxyTorchDispatchMode)
+def inner(mode, n_iter, body_fn, operands, concatenation_dims=None):
+    """Registered tracing implementation."""
+    return trace_simple_loop_for(
+        mode, simple_loop_for_op, n_iter, body_fn, operands, concatenation_dims
+    )
+@simple_loop_for_op.py_impl(FakeTensorMode)
+def simple_loop_for_fake_tensor_mode(mode, n_iter, body_fn, operands, concatenation_dims=None):
+    """Registered FakeMode implementation."""
+    ignore_fresh_unbacked = contextlib.nullcontext()
+    if mode.shape_env:
+        ignore_fresh_unbacked = mode.shape_env.ignore_fresh_unbacked_symbols()
+    with mode, ignore_fresh_unbacked:
+        flat_body_outs, true_body_spec = pytree.tree_flatten(body_fn(n_iter, *operands))
+    return pytree.tree_unflatten(flat_body_outs, true_body_spec)
+# Registration for autograd.
+simple_loop_for_op.fallthrough(torch._C.DispatchKey.AutogradCPU)
+simple_loop_for_op.fallthrough(torch._C.DispatchKey.AutogradCUDA)
+def simple_loop_for(
+    n_iter: Union[int, torch.Tensor],
+    body_fn: Callable,
+    operands: Tuple[torch.Tensor, ...] = (),
+    concatenation_dims: Optional[Union[int, Sequence[int]]] = None,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, ...]]:
+    """
+    Implements a simple loop for, the body is defined by a function which takes the
+    iteration number stored in a tensor, and other tensors.
+    It results one or several tensors in a tuple. All of them
+    are finally concatenated along the first dimension.
+    :param n_iter: iteration number
+    :param body: function
+    :param operands: bidy  arguments
+    :param concatenation_dims: dimension or dimensions used to concatenate the output sequences
+    :return: contenated outputs, the output is a Tensor
+    An example with one output:
+    .. runpython::
+        :showcode:
+        import torch
+        from onnx_diagnostic.export.cf_simple_loop_for import simple_loop_for
+        class Model(torch.nn.Module):
+            def forward(self, n_iter, x):
+                def body(i, x):
+                    return (x[: i.item() + 1].unsqueeze(1),)
+                return simple_loop_for(n_iter, body, (x,))
+        model = Model()
+        n_iter = torch.tensor(4, dtype=torch.int64)
+        x = torch.arange(10, dtype=torch.float32)
+        ep = torch.export.export(
+            model, (n_iter, x), dynamic_shapes=({}, ({0: torch.export.Dim.DYNAMIC}))
+        )
+        print(ep)
+    Another example with two outputs and a final concatenation on different axes.
+    .. runpython::
+        :showcode:
+        import torch
+        from onnx_diagnostic.export.cf_simple_loop_for import simple_loop_for
+        class Model(torch.nn.Module):
+            def forward(self, n_iter, x):
+                def body(i, x):
+                    return (x[: i.item() + 1].unsqueeze(1), x[i.item() + 1 :].unsqueeze(0))
+                return simple_loop_for(n_iter, body, (x,), (0, 1))
+        model = Model()
+        n_iter = torch.tensor(4, dtype=torch.int64)
+        x = torch.arange(10, dtype=torch.float32)
+        ep = torch.export.export(
+            model, (n_iter, x), dynamic_shapes=({}, ({0: torch.export.Dim.DYNAMIC}))
+        )
+        print(ep)
+    """
+    res = _simple_loop_for(
+        n_iter,
+        body_fn,
+        operands,
+        concatenation_dims=(
+            (concatenation_dims,)
+            if isinstance(concatenation_dims, int)
+            else concatenation_dims
+        ),
+    )
+    torch._check(
+        isinstance(res, tuple),
+        lambda: f"Output of the loop should be a tuple not {type(res)}.",
+    )
+    return res[0] if len(res) == 1 else res

onnx_diagnostic/export/control_flow_onnx.py CHANGED Viewed

@@ -55,13 +55,13 @@ def is_exporting() -> bool:
     return _TEST_EXPORT or torch.compiler.is_exporting() or torch.compiler.is_compiling()
-def _loop_for_onnx_fn(n_iter, body_fn, reduction_dim, args):
+def _loop_for_onnx_fn(n_iter, body_fn, concatenation_dims, args):
     """
     Python implementation of the loop.
     :param n_iter: number of iteration
     :param body_fn: function implementing the body
-    :param reduction_dim: dimension used to reduce the list produced by the loop
+    :param concatenation_dims: dimension used to reduce the list produced by the loop
     :param args: arguments to the loop body
     :return: results
     """
@@ -95,7 +95,9 @@ def _loop_for_onnx_fn(n_iter, body_fn, reduction_dim, args):
             torch.cat(
                 [r[i] for r in res],
                 dim=(
-                    0 if reduction_dim is None or i >= len(reduction_dim) else reduction_dim[i]
+                    0
+                    if concatenation_dims is None or i >= len(concatenation_dims)
+                    else concatenation_dims[i]
                 ),
             )
             for i in range(n_res)
@@ -106,7 +108,7 @@ def _loop_for_onnx_fn(n_iter, body_fn, reduction_dim, args):
 def make_custom_loop_for_onnx(
     n_iter: torch.Tensor,
     body_fn: Callable,
-    reduction_dim: Optional[Sequence[int]],
+    concatenation_dims: Optional[Sequence[int]],
     args: Sequence[torch.Tensor],
     body_gm: Optional[torch.fx.GraphModule] = None,
     body_mutated_inputs: Optional[List[Any]] = None,
@@ -120,7 +122,7 @@ def make_custom_loop_for_onnx(
     :param n_iter: number of iterations defined by a tensor of no dimension
     :param body_fn: the loop body defined as a function
-    :param reduction_dim: dimension used to concatenated the results
+    :param concatenation_dims: dimension used to concatenated the results
     :param args: list of tensors, input to the body
     :param body_gm: torch.fx.GraphModule equivalent to *body_gm*
     :param body_mutated_inputs: inputs to *body_gm*
@@ -133,7 +135,7 @@ def make_custom_loop_for_onnx(
     assert body_mutated_inputs is not None, "body_mutated_inputs cannot be None"
     assert body_outputs is not None, "body_outputs cannot be None"
     srank = "_".join("x".join(map(str, s.shape)) for s in body_outputs)
-    sred = "x".join(map(str, reduction_dim)) if reduction_dim else ""
+    sred = "x".join(map(str, concatenation_dims)) if concatenation_dims else ""
     full_name = (
         body_fn.__qualname__.replace("<locals>", "L")
         .replace("<lambda>", "l")
@@ -169,14 +171,14 @@ def make_custom_loop_for_onnx(
         custom_def,
         _make_onx,
         (
-            lambda g, sts, outputs, *args, bc=_make_onx, rd=reduction_dim, name=name: (
+            lambda g, sts, outputs, *args, bc=_make_onx, rd=concatenation_dims, name=name: (
                 convert_custom_loop_into_onnx(
                     g,
                     sts,
                     outputs,
                     *args,
                     body_callable=bc,
-                    reduction_dim=rd,
+                    concatenation_dims=rd,
                     name=name,
                 )
             )
@@ -196,7 +198,7 @@ def convert_custom_loop_into_onnx(
     outputs: List[str],
     *args: str,
     body_callable: Callable[..., onnx.ModelProto],
-    reduction_dim: Optional[Sequence[int]] = None,
+    concatenation_dims: Optional[Sequence[int]] = None,
     name: str = "loop_for_onnx",
 ) -> Union[str, List[str]]:
     """
@@ -207,7 +209,7 @@ def convert_custom_loop_into_onnx(
     :param outputs: output names
     :param args: input argument known at export time
     :param body: GraphProto, the loop body
-    :param reduction_dim: the dimension to follow when aggregating the
+    :param concatenation_dims: the dimension to follow when aggregating the
         list of tensors after the loop ran
     :param name: to give the onnx nodes a name
     :return: output names
@@ -289,7 +291,11 @@ def convert_custom_loop_into_onnx(
             out,
             outputs=[o],
             name=name,
-            axis=0 if not reduction_dim or i >= len(reduction_dim) else reduction_dim[i],
+            axis=(
+                0
+                if not concatenation_dims or i >= len(concatenation_dims)
+                else concatenation_dims[i]
+            ),
         )
         for i, (out, o) in enumerate(zip(outloop, outputs))
     ]
@@ -337,7 +343,7 @@ def loop_for_onnx(
     n_iter: Union[torch.SymInt, torch.Tensor],
     body_fn: Callable[..., Tuple[torch.Tensor]],
     args: Sequence[torch.Tensor],
-    reduction_dim: Optional[Sequence[int]] = None,
+    concatenation_dims: Optional[Sequence[int]] = None,
 ) -> Tuple[torch.Tensor, ...]:
     """
     High operators used to easily export a loop in ONNX.
@@ -353,7 +359,7 @@ def loop_for_onnx(
         in a tensor with no dimension, all the others
         are not changed during the loop
     :param args: the available tensors at every loop
-    :param reduction_dim: the loop aggregated the results into list,
+    :param concatenation_dims: the loop aggregated the results into list,
         one of each output, each of them is concatenated into one
         tensor along one dimension, by default, it is the first
         dimension, but it can be defined otherwise
@@ -449,7 +455,7 @@ def loop_for_onnx(
         )
         print(ep)
-    A last example with ``reduction_dim``:
+    A last example with ``concatenation_dims``:
     .. runpython::
         :showcode:
@@ -465,7 +471,7 @@ def loop_for_onnx(
                 def body(i, x):
                     return x[: i.item() + 1].unsqueeze(1), x[: i.item() + 1].unsqueeze(0) + 1
-                two = loop_for_onnx(n_iter, body, (x,), reduction_dim=[0, 1])
+                two = loop_for_onnx(n_iter, body, (x,), concatenation_dims=[0, 1])
                 return two[0] + two[1].T
@@ -516,7 +522,7 @@ def loop_for_onnx(
         name, _custom_ops = make_custom_loop_for_onnx(
             n_iter,
             body_fn,
-            reduction_dim,
+            concatenation_dims,
             args,
             body_gm=body_gm,
             body_mutated_inputs=body_mutated_inputs,
@@ -525,4 +531,4 @@ def loop_for_onnx(
         fct = getattr(torch.ops.onnx_higher_ops, name)
         return fct(n_iter, *args)
-    return _loop_for_onnx_fn(n_iter, body_fn, reduction_dim, args)
+    return _loop_for_onnx_fn(n_iter, body_fn, concatenation_dims, args)

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -700,6 +700,19 @@ def requires_onnx(version: str, msg: str = "") -> Callable:
     return lambda x: x
+def requires_experimental_experiment(version: str, msg: str = "") -> Callable:
+    """Skips a unit test if :epkg:`onnx-array-api` is not recent enough."""
+    import packaging.version as pv
+    import experimental_experiment
+    if pv.Version(experimental_experiment.__version__) < pv.Version(version):
+        msg = (
+            f"onnx-array-api version {experimental_experiment.__version__} < {version}: {msg}"
+        )
+        return unittest.skip(msg)
+    return lambda x: x
 def requires_onnx_array_api(version: str, msg: str = "") -> Callable:
     """Skips a unit test if :epkg:`onnx-array-api` is not recent enough."""
     import packaging.version as pv
@@ -774,6 +787,7 @@ class ExtTestCase(unittest.TestCase):
     def setUpClass(cls):
         logger = logging.getLogger("onnxscript.optimizer.constant_folding")
         logger.setLevel(logging.ERROR)
+        warnings.filterwarnings("ignore", category=DeprecationWarning)
         unittest.TestCase.setUpClass()
     @classmethod

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -818,6 +818,7 @@ def torch_export_patches(
     rewrite: Optional[List[Callable]] = None,
     dump_rewriting: Optional[str] = None,
     patch_details: Optional[PatchDetails] = None,
+    profile: Optional[str] = None,
 ) -> Callable:
     """
     Tries to bypass some situations :func:`torch.export.export` does not support.
@@ -850,6 +851,8 @@ def torch_export_patches(
     :param dump_rewriting: dumps rewriting information in file beginning with that prefix
     :param patch_details: if specified, this class is used to stored every rewritten done.
     :param verbose: to show which patches is applied
+    :param profile: starts profiling whatever is called inside the context manager,
+        output the profiling into a text file
     The list of available patches.
@@ -1017,10 +1020,23 @@ def torch_export_patches(
         if verbose:
             print("[torch_export_patches] done patching")
+        if profile:
+            from pyinstrument import Profiler
+            profiler = Profiler()
+            profiler.start()
+        else:
+            profiler = None
         try:
             yield fct_callable
         finally:
+            if profiler:
+                profiler.stop()
+                with open(profile, "w") as f:
+                    f.write(profiler.output_html())
             # unpatch
             if verbose:

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py CHANGED Viewed

@@ -256,8 +256,12 @@ if patch_qwen2_5:
         return attn_output
     def qwen_version_selector(opset: int, *args: torch.Tensor) -> Tuple[str, torch.dtype]:
-        first_tensor = next(a for a in args if a is not None)
-        dtype = first_tensor.dtype
+        first_float_tensor = next(
+            a
+            for a in args
+            if a is not None and a.dtype in {torch.float16, torch.float32, torch.bfloat16}
+        )
+        dtype = first_float_tensor.dtype
         strategy = patched_Qwen2_5_VLVisionAttention.STRATEGY_FOR_ATTENTION()
         itype = torch_dtype_to_onnx_dtype(dtype)
         if strategy is not None:
@@ -269,7 +273,7 @@ if patch_qwen2_5:
         if dtype == torch.float16 or itype == onnx.TensorProto.FLOAT16:
             # first_tensor may be a SymbolicTensor (onnx).
             # is_cuda is not available.
-            if hasattr(first_tensor, "is_cuda") and first_tensor.is_cuda:
+            if hasattr(first_float_tensor, "is_cuda") and first_float_tensor.is_cuda:
                 return "PACKED", itype
             return "LOOPMHA", itype
         raise AssertionError(
@@ -733,3 +737,71 @@ if patch_qwen2_5:
             attn_output = attn_output.reshape(seq_length, -1).contiguous()
             attn_output = self.proj(attn_output)
             return attn_output
+    class patched_Qwen2_5_VLModel:
+        _PATCHES_ = ["get_placeholder_mask"]
+        _PATCHED_CLASS_ = transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.Qwen2_5_VLModel
+        def get_placeholder_mask(
+            self,
+            input_ids: torch.LongTensor,
+            inputs_embeds: torch.FloatTensor,
+            image_features: Optional[torch.FloatTensor] = None,
+            video_features: Optional[torch.FloatTensor] = None,
+        ):
+            if input_ids is None:
+                special_image_mask = inputs_embeds == self.get_input_embeddings()(
+                    torch.tensor(
+                        self.config.image_token_id,
+                        dtype=torch.long,
+                        device=inputs_embeds.device,
+                    )
+                )
+                special_image_mask = special_image_mask.all(-1)
+                special_video_mask = inputs_embeds == self.get_input_embeddings()(
+                    torch.tensor(
+                        self.config.video_token_id,
+                        dtype=torch.long,
+                        device=inputs_embeds.device,
+                    )
+                )
+                special_video_mask = special_video_mask.all(-1)
+            else:
+                special_image_mask = input_ids == self.config.image_token_id
+                special_video_mask = input_ids == self.config.video_token_id
+            special_image_mask = (
+                special_image_mask.unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
+            )
+            # PATCHED: we should use torch._check
+            # but this fails for compilation. It cannot be verified with FakeTensors
+            # torch._check(
+            #    image_features is None
+            #    or inputs_embeds[special_image_mask].numel() == image_features.numel(),
+            #    lambda: (
+            #        f"Image features and image tokens do not match: tokens: "
+            #        f"{special_image_mask.sum()}, features {image_features.shape[0]}"
+            #    ),
+            # )
+            special_video_mask = (
+                special_video_mask.unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
+            )
+            # PATCHED: we should use torch._check
+            # but this fails for compilation. It cannot be verified with FakeTensors
+            # torch._check(
+            #    video_features is None
+            #    or inputs_embeds[special_video_mask].numel() == video_features.numel(),
+            #    lambda: (
+            #        f"Videos features and video tokens do not match: tokens: "
+            #        f"{special_video_mask.sum()}, features {video_features.shape[0]}"
+            #    ),
+            # )
+            return special_image_mask, special_video_mask

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -77,6 +77,7 @@ if patch_qwen2_5:
         patched_Qwen2_5_VisionTransformerPretrainedModel,
         patched_Qwen2_5_VLVisionAttentionOneIteration,
         patched_Qwen2_5_VLVisionAttention,
+        patched_Qwen2_5_VLModel,
         PLUGS as PLUGS_Qwen25,
     )

onnx-diagnostic 0.8.5__py3-none-any.whl → 0.8.6__py3-none-any.whl

onnx-diagnostic 0.8.5py3-none-any.whl → 0.8.6py3-none-any.whl