PyPI - onnx-diagnostic - Versions diffs - 0.8.0__py3-none-any.whl - Mend

onnx-diagnostic 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

onnx_diagnostic/__init__.py +7 -0
onnx_diagnostic/__main__.py +4 -0
onnx_diagnostic/_command_lines_parser.py +1141 -0
onnx_diagnostic/api.py +15 -0
onnx_diagnostic/doc.py +100 -0
onnx_diagnostic/export/__init__.py +2 -0
onnx_diagnostic/export/api.py +124 -0
onnx_diagnostic/export/dynamic_shapes.py +1083 -0
onnx_diagnostic/export/shape_helper.py +296 -0
onnx_diagnostic/export/validate.py +173 -0
onnx_diagnostic/ext_test_case.py +1290 -0
onnx_diagnostic/helpers/__init__.py +1 -0
onnx_diagnostic/helpers/_log_helper.py +463 -0
onnx_diagnostic/helpers/args_helper.py +132 -0
onnx_diagnostic/helpers/bench_run.py +450 -0
onnx_diagnostic/helpers/cache_helper.py +687 -0
onnx_diagnostic/helpers/config_helper.py +170 -0
onnx_diagnostic/helpers/doc_helper.py +163 -0
onnx_diagnostic/helpers/fake_tensor_helper.py +273 -0
onnx_diagnostic/helpers/graph_helper.py +386 -0
onnx_diagnostic/helpers/helper.py +1707 -0
onnx_diagnostic/helpers/log_helper.py +2245 -0
onnx_diagnostic/helpers/memory_peak.py +249 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +600 -0
onnx_diagnostic/helpers/model_builder_helper.py +469 -0
onnx_diagnostic/helpers/onnx_helper.py +1200 -0
onnx_diagnostic/helpers/ort_session.py +736 -0
onnx_diagnostic/helpers/rt_helper.py +476 -0
onnx_diagnostic/helpers/torch_helper.py +987 -0
onnx_diagnostic/reference/__init__.py +4 -0
onnx_diagnostic/reference/evaluator.py +254 -0
onnx_diagnostic/reference/ops/__init__.py +1 -0
onnx_diagnostic/reference/ops/op_add_add_mul_mul.py +68 -0
onnx_diagnostic/reference/ops/op_attention.py +60 -0
onnx_diagnostic/reference/ops/op_average_pool_grad.py +63 -0
onnx_diagnostic/reference/ops/op_bias_softmax.py +16 -0
onnx_diagnostic/reference/ops/op_cast_like.py +46 -0
onnx_diagnostic/reference/ops/op_complex.py +26 -0
onnx_diagnostic/reference/ops/op_concat.py +15 -0
onnx_diagnostic/reference/ops/op_constant_of_shape.py +67 -0
onnx_diagnostic/reference/ops/op_fused_matmul.py +31 -0
onnx_diagnostic/reference/ops/op_gather.py +29 -0
onnx_diagnostic/reference/ops/op_gather_elements.py +45 -0
onnx_diagnostic/reference/ops/op_gather_grad.py +12 -0
onnx_diagnostic/reference/ops/op_memcpy_host.py +11 -0
onnx_diagnostic/reference/ops/op_mul_sigmoid.py +23 -0
onnx_diagnostic/reference/ops/op_negxplus1.py +8 -0
onnx_diagnostic/reference/ops/op_qlinear_average_pool.py +40 -0
onnx_diagnostic/reference/ops/op_qlinear_conv.py +102 -0
onnx_diagnostic/reference/ops/op_quick_gelu.py +23 -0
onnx_diagnostic/reference/ops/op_replace_zero.py +13 -0
onnx_diagnostic/reference/ops/op_rotary.py +19 -0
onnx_diagnostic/reference/ops/op_scan.py +65 -0
onnx_diagnostic/reference/ops/op_scatter_elements.py +107 -0
onnx_diagnostic/reference/ops/op_scatternd_of_shape.py +22 -0
onnx_diagnostic/reference/ops/op_simplified_layer_normalization.py +8 -0
onnx_diagnostic/reference/ops/op_skip_layer_normalization.py +13 -0
onnx_diagnostic/reference/ops/op_slice.py +20 -0
onnx_diagnostic/reference/ops/op_transpose_cast.py +16 -0
onnx_diagnostic/reference/ops/op_tri_matrix.py +17 -0
onnx_diagnostic/reference/ort_evaluator.py +652 -0
onnx_diagnostic/reference/quantized_tensor.py +46 -0
onnx_diagnostic/reference/report_results_comparison.py +95 -0
onnx_diagnostic/reference/torch_evaluator.py +669 -0
onnx_diagnostic/reference/torch_ops/__init__.py +56 -0
onnx_diagnostic/reference/torch_ops/_op_run.py +335 -0
onnx_diagnostic/reference/torch_ops/access_ops.py +94 -0
onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
onnx_diagnostic/reference/torch_ops/controlflow_ops.py +121 -0
onnx_diagnostic/reference/torch_ops/generator_ops.py +36 -0
onnx_diagnostic/reference/torch_ops/nn_ops.py +196 -0
onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
onnx_diagnostic/reference/torch_ops/shape_ops.py +121 -0
onnx_diagnostic/reference/torch_ops/unary_ops.py +93 -0
onnx_diagnostic/tasks/__init__.py +90 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +188 -0
onnx_diagnostic/tasks/data/__init__.py +13 -0
onnx_diagnostic/tasks/data/dummies_imagetext2text_generation_gemma3.onnx +0 -0
onnx_diagnostic/tasks/feature_extraction.py +162 -0
onnx_diagnostic/tasks/fill_mask.py +89 -0
onnx_diagnostic/tasks/image_classification.py +144 -0
onnx_diagnostic/tasks/image_text_to_text.py +581 -0
onnx_diagnostic/tasks/image_to_video.py +127 -0
onnx_diagnostic/tasks/mask_generation.py +143 -0
onnx_diagnostic/tasks/mixture_of_expert.py +79 -0
onnx_diagnostic/tasks/object_detection.py +134 -0
onnx_diagnostic/tasks/sentence_similarity.py +89 -0
onnx_diagnostic/tasks/summarization.py +227 -0
onnx_diagnostic/tasks/text2text_generation.py +230 -0
onnx_diagnostic/tasks/text_classification.py +89 -0
onnx_diagnostic/tasks/text_generation.py +352 -0
onnx_diagnostic/tasks/text_to_image.py +95 -0
onnx_diagnostic/tasks/zero_shot_image_classification.py +128 -0
onnx_diagnostic/torch_export_patches/__init__.py +21 -0
onnx_diagnostic/torch_export_patches/eval/__init__.py +725 -0
onnx_diagnostic/torch_export_patches/eval/model_cases.py +898 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1098 -0
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +311 -0
onnx_diagnostic/torch_export_patches/patch_details.py +340 -0
onnx_diagnostic/torch_export_patches/patch_expressions.py +108 -0
onnx_diagnostic/torch_export_patches/patch_inputs.py +211 -0
onnx_diagnostic/torch_export_patches/patch_module.py +1047 -0
onnx_diagnostic/torch_export_patches/patch_module_helper.py +184 -0
onnx_diagnostic/torch_export_patches/patches/__init__.py +0 -0
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +1090 -0
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +2139 -0
onnx_diagnostic/torch_export_patches/serialization/__init__.py +46 -0
onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py +34 -0
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +313 -0
onnx_diagnostic/torch_models/__init__.py +0 -0
onnx_diagnostic/torch_models/code_sample.py +343 -0
onnx_diagnostic/torch_models/hghub/__init__.py +1 -0
onnx_diagnostic/torch_models/hghub/hub_api.py +422 -0
onnx_diagnostic/torch_models/hghub/hub_data.py +234 -0
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +4905 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +388 -0
onnx_diagnostic/torch_models/hghub/model_specific.py +76 -0
onnx_diagnostic/torch_models/llms.py +2 -0
onnx_diagnostic/torch_models/untrained/__init__.py +0 -0
onnx_diagnostic/torch_models/untrained/llm_phi2.py +113 -0
onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py +76 -0
onnx_diagnostic/torch_models/validate.py +2124 -0
onnx_diagnostic/torch_onnx/__init__.py +0 -0
onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
onnx_diagnostic/torch_onnx/sbs.py +440 -0
onnx_diagnostic-0.8.0.dist-info/METADATA +213 -0
onnx_diagnostic-0.8.0.dist-info/RECORD +132 -0
onnx_diagnostic-0.8.0.dist-info/WHEEL +5 -0
onnx_diagnostic-0.8.0.dist-info/licenses/LICENSE.txt +19 -0
onnx_diagnostic-0.8.0.dist-info/top_level.txt +1 -0

onnx_diagnostic/torch_export_patches/eval/model_cases.py ADDED Viewed

@@ -0,0 +1,898 @@
+import numpy as np
+import torch
+from ..patches.patch_torch import patched_vmap
+DIM = torch.export.Dim
+DYN = torch.export.Dim.DYNAMIC
+class AtenRollRelu(torch.nn.Module):
+    def forward(self, x):
+        return torch.relu(torch.roll(x, -1, -1))
+    _inputs = ((torch.arange(8 * 3) + 10).reshape((2, -1, 4)).to(torch.float32),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class AtenRollPos(torch.nn.Module):
+    def forward(self, x):
+        return torch.roll(x, 1, -1)
+    _inputs = ((torch.arange(8 * 3) + 10).reshape((2, -1, 4)).to(torch.float32),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceAdd(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bias = torch.ones((1, 4), dtype=torch.float32)
+    def forward(self, x):
+        x += self.bias
+        return x
+    _inputs = [(torch.rand(3, 4),), (torch.rand(5, 4),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceAdd2(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bias = torch.ones((1, 4), dtype=torch.float32)
+    def forward(self, x):
+        x.add_(self.bias)
+        return x
+    _inputs = [(torch.rand(3, 4),), (torch.rand(5, 4),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceAdd_Mul(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bias = torch.ones((1, 4), dtype=torch.float32)
+    def forward(self, x):
+        x.add_(self.bias)
+        return x * 2
+    _inputs = [(torch.rand(3, 4),), (torch.rand(5, 4),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceCloneAdd_(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bias = torch.ones((1, 4), dtype=torch.float32)
+    def forward(self, x):
+        x = x.clone()
+        x.add_(self.bias)
+        return x
+    _inputs = [(torch.rand(3, 4),), (torch.rand(5, 4),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceSetItemSquare(torch.nn.Module):
+    def forward(self, x):
+        x[:2, :3] = 1
+        return x
+    _inputs = [(torch.rand(5, 5),), (torch.rand(7, 5),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceSetItemSquareAdd(torch.nn.Module):
+    def forward(self, x):
+        x[:2, :3] = 1
+        return x + 2
+    _inputs = [(torch.rand(5, 5),), (torch.rand(7, 5),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceSetItemSquareAdd2(torch.nn.Module):
+    def forward(self, x):
+        x[:2, :3] = 1
+        return x + 2, x + 3
+    _inputs = [(torch.rand(5, 5),), (torch.rand(7, 5),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class InplaceSetItemEllipsis_1(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.params = torch.zeros((1, 8192, 4), dtype=torch.float32)
+    def forward(self, index, update):
+        copy = self.params.clone()
+        copy[..., index] = update
+        return copy
+    _inputs = (
+        (torch.from_numpy(np.array([0, 3, 2, 1])).to(torch.int64)),
+        (torch.arange(4 * 8192) + 10).reshape((-1, 4)).to(torch.float32),
+    )
+    _dynamic = {"index": {0: DIM("batch")}, "update": {0: DIM("batch"), 1: DYN}}
+class InplaceSetItemEllipsis_2(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.params = torch.zeros((1, 8192, 6), dtype=torch.float32)
+    def forward(self, index, update):
+        copy = self.params.clone()
+        copy[..., index] = update
+        return copy
+    _inputs = (
+        torch.from_numpy(np.array([0, 3, 2, 5])).to(torch.int64),
+        (torch.arange(4 * 8192) + 10).reshape((-1, 4)).to(torch.float32),
+    )
+    _dynamic = {"index": {0: DIM("batch")}, "update": {0: DIM("batch"), 1: DYN}}
+class InplaceSetItemMask(torch.nn.Module):
+    def forward(self, x):
+        mask = x.to(bool)
+        x[mask] = 2
+        return x
+    _inputs = [(torch.randn((2, 3, 3)),), (torch.randn((3, 3, 3)),)]
+    _dynamic = {"x": {0: DIM("batch")}}
+class AtenInterpolate(torch.nn.Module):
+    def forward(self, x):
+        y = torch.nn.functional.interpolate(
+            x,
+            scale_factor=2.0,
+            mode="bilinear",
+            recompute_scale_factor=False,
+        )
+        return y
+    _inputs = (torch.randn(2, 2, 3, 4, requires_grad=False),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class AtenNonZero(torch.nn.Module):
+    def forward(self, x):
+        y = torch.nonzero(x)
+        return y
+    _inputs = (torch.randn(3, 4, requires_grad=False),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class AtenNonZeroTuple(torch.nn.Module):
+    def forward(self, x):
+        y = torch.nonzero(x, as_tuple=True)
+        return y[0], y[1]
+    _inputs = (torch.randn(3, 4, requires_grad=False),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class AtenAsStrided(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x):
+        y = torch.as_strided(x, (2, 2, 8, 4), (128, 8, 16, 1))
+        return y
+    _inputs = (torch.randn((2, 2, 8, 8), requires_grad=False),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ComplexPolar(torch.nn.Module):
+    def forward(self, x, angle):
+        return torch.polar(x, angle)
+    _inputs = (torch.rand(4, 4), torch.rand(4, 4))
+    _dynamic = {"x": {0: DIM("batch")}, "angle": {0: DIM("batch")}}
+class ControlFlowCond(torch.nn.Module):
+    def forward(self, x):
+        def true_fn(x):
+            return torch.sin(x)
+        def false_fn(x):
+            return torch.cos(x)
+        return torch.cond(x.sum() > 0, true_fn, false_fn, [x])
+    _inputs = (torch.rand(5, 3),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowCond2Outputs(torch.nn.Module):
+    def forward(self, x):
+        def true_fn(x):
+            return torch.sin(x), torch.cos(x)
+        def false_fn(x):
+            return torch.cos(x), torch.sin(x)
+        return torch.cond(x.sum() > 0, true_fn, false_fn, [x])
+    _inputs = (torch.rand(5, 3),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowCond2Inputs(torch.nn.Module):
+    def forward(self, x, y):
+        def true_fn(x, y):
+            return torch.sin(x), torch.cos(x) + y
+        def false_fn(x, y):
+            return torch.cos(x), torch.sin(x) + y
+        return torch.cond(x.sum() > 0, true_fn, false_fn, [x, y])
+    _inputs = torch.rand(5, 3), torch.rand(5, 3)
+    _dynamic = {"x": {0: DIM("batch")}, "y": {0: DIM("batch")}}
+class ControlFlowNestCond(torch.nn.Module):
+    def forward(self, x):
+        def true_fn2(x):
+            def true_fn1(x):
+                return torch.sin(x)
+            def false_fn1(x):
+                return torch.cos(x)
+            return torch.cond(x.sum() < 0, true_fn1, false_fn1, [x])
+        def false_fn2(x):
+            return -x
+        return torch.cond(x.sum() > 0, true_fn2, false_fn2, [x])
+    _inputs = (torch.rand(5, 3),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowCondConstant(torch.nn.Module):
+    def forward(self, x):
+        def true_fn(x):
+            return torch.sin(x) - torch.ones(x.shape, dtype=x.dtype)
+        def false_fn(x):
+            return torch.cos(x) + torch.ones((1, 1024), dtype=x.dtype)
+        return torch.cond(x.sum() > 0, true_fn, false_fn, [x])
+    _inputs = (torch.rand(1024, 1024),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowCondNestedModule(torch.nn.Module):
+    class Submodule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            # Nested weight
+            self.weight = torch.nn.Parameter(torch.tensor([100.0]))
+        def forward(self, x):
+            def true_fn(x):
+                return x * self.weight
+            def false_fn(x):
+                return x / self.weight
+            y = torch.cond(torch.abs(x).sum() > 100, true_fn, false_fn, [x])
+            return y
+    def __init__(self):
+        super().__init__()
+        self.submodule = ControlFlowCondNestedModule.Submodule()
+        self.weight = torch.nn.Parameter(torch.tensor([42.0]))
+    def forward(self, x):
+        def true_fn(x):
+            return self.submodule(x)
+        def false_fn(x):
+            return x - self.weight
+        y = torch.cond(x.sum() > 0, true_fn, false_fn, [x])
+        return y
+    _inputs = (torch.tensor([-1, 2]),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowCondNonZero(torch.nn.Module):
+    def forward(self, input_ids, image_features, vocab_size):
+        def then_branch(input_ids, image_features, vocab_size):
+            input_shape = input_ids.size()
+            input_ids = input_ids.view(-1, input_shape[-1])
+            condition = (input_ids < 0) & (input_ids > -int(1e9))
+            positions = torch.nonzero(condition, as_tuple=True)
+            input_ids = input_ids.clamp_min(0).clamp_max(vocab_size)
+            return (input_ids, positions[0], positions[1])
+        def else_branch(input_ids, image_features, vocab_size):
+            r = torch.where(torch.zeros((1, 1), dtype=torch.bool))
+            return (input_ids, r[0], r[1])
+        a, b, c = torch.cond(
+            image_features.numel() > 0,
+            then_branch,
+            else_branch,
+            [input_ids, image_features, vocab_size],
+        )
+        return a, b, c
+    _inputs = [
+        (
+            (torch.arange(24) - 8).reshape((2, -1)).to(torch.int64),
+            torch.arange(32).reshape((2, -1)).to(torch.float32),
+            1025,
+        ),
+        (
+            (torch.arange(24) - 8).reshape((2, -1)).to(torch.int64),
+            torch.tensor([[], []], dtype=torch.float32),
+            1025,
+        ),
+    ]
+    _dynamic = (
+        {0: DIM("batch")},
+        {0: DIM("batch"), 1: DIM("seq_length")},
+        None,
+    )
+class ControlFlowCondIdentity_153832(torch.nn.Module):
+    """`#153832 <https://github.com/pytorch/pytorch/issues/153832>`_"""
+    def forward(self, x, y):
+        def branch_cond_then_1(x):
+            x = torch.abs(x) + 1
+            return x
+        def branch_cond_else_1(x):
+            return x  # fails but succeeds with x.clone()
+        x = torch.cond(x.sum() > 0, branch_cond_then_1, branch_cond_else_1, [x])
+        return x + y
+    _inputs = [
+        (torch.rand((3, 4)), torch.rand((3, 4))),
+        (torch.rand((4, 5)), torch.rand((4, 5))),
+    ]
+    _dynamic = {"x": {0: DYN, 1: DYN}, "y": {0: DYN, 1: DYN}}
+class ControlFlowScan(torch.nn.Module):
+    @staticmethod
+    def add(carry: torch.Tensor, y: torch.Tensor):
+        next_carry = carry + y
+        return [next_carry, next_carry]
+    def forward(self, x):
+        init = torch.zeros_like(x[0])
+        carry, _out = torch.ops.higher_order.scan(
+            ControlFlowScan.add, [init], [x], additional_inputs=[]
+        )
+        return carry
+    _inputs = (torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowScan2Carried(torch.nn.Module):
+    @staticmethod
+    def add(carry1: torch.Tensor, carry2: torch.Tensor, y1: torch.Tensor, y2: torch.Tensor):
+        next_carry1 = carry1 + y1
+        next_carry2 = carry2 * y2
+        return [next_carry1, next_carry2, next_carry1, next_carry2]
+    def forward(self, x):
+        init1 = torch.zeros_like(x[0])
+        init2 = torch.ones_like(x[0])
+        carry1, carry2, out1, out2 = torch.ops.higher_order.scan(
+            ControlFlowScan2Carried.add,
+            [init1, init2],
+            [x, x * 2],
+            # dim=0,  # 01/31/2025, not supported anymore
+            additional_inputs=[],
+        )
+        return carry1, carry2, out1, out2
+    _inputs = (
+        torch.tensor([[1, 2, 3, -1], [4, 5, 6, -1], [7, 8, 9, -1]], dtype=torch.float32),
+    )
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowScanCDist(torch.nn.Module):
+    @staticmethod
+    def dist(carry: torch.Tensor, x: torch.Tensor):
+        sub = carry - x.reshape((1, -1))
+        sq = sub * sub
+        rd = sq.sum(axis=1) ** 0.5
+        # clone --> UnsupportedAliasMutationException:
+        # Combine_fn might be aliasing the input!
+        return [carry.clone(), rd]
+    def forward(self, x):
+        _carry, out = torch.ops.higher_order.scan(
+            ControlFlowScanCDist.dist,
+            [x],
+            [x],
+            # dim=0,  # 01/31/2025, not supported anymore
+            additional_inputs=[],
+        )
+        return out
+    _inputs = (
+        torch.tensor([[1, 2, 3, -1], [4, 5, 6, -1], [7, 8, 9, -1]], dtype=torch.float32),
+    )
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowScanCDist2(torch.nn.Module):
+    @staticmethod
+    def dist(unused: torch.Tensor, x: torch.Tensor, samex: torch.Tensor):
+        sub = samex - x.reshape((1, -1))
+        sq = sub * sub
+        rd = torch.sqrt(sq.sum(axis=1))
+        # clone --> UnsupportedAliasMutationException:
+        # Combine_fn might be aliasing the input!
+        return [unused.clone(), rd]
+    def forward(self, x):
+        z = torch.tensor([0], dtype=torch.float32)
+        y = x.clone()
+        out = torch.ops.higher_order.scan(
+            ControlFlowScanCDist2.dist,
+            [z],
+            [x],
+            # dim=0,  # 01/31/2025, not supported anymore
+            additional_inputs=[y],
+        )
+        return out[1]
+    _inputs = (
+        torch.tensor([[1, 2, 3, -1], [4, 5, 6, -1], [7, 8, 9, -1]], dtype=torch.float32),
+    )
+    _dynamic = {"x": {0: DIM("batch")}}
+class ControlFlowScanCDistXY(torch.nn.Module):
+    @staticmethod
+    def dist(y: torch.Tensor, scanned_x: torch.Tensor):
+        sub = y - scanned_x.reshape((1, -1))
+        sq = sub * sub
+        rd = torch.sqrt(sq.sum(axis=1))
+        # clone --> UnsupportedAliasMutationException:
+        # Combine_fn might be aliasing the input!
+        return [y.clone(), rd]
+    def forward(self, x, y):
+        _carry, out = torch.ops.higher_order.scan(
+            ControlFlowScanCDistXY.dist,
+            [y],
+            [x],
+            # dim=0,  # 01/31/2025, not supported anymore
+            additional_inputs=[],
+        )
+        return out
+    _inputs = [
+        (torch.randn(3, 4), torch.randn(5, 4)),
+        (torch.randn(13, 14), torch.randn(15, 14)),
+    ]
+    _dynamic = {
+        "x": {0: DIM("x_rows"), 1: DIM("dim")},
+        "y": {0: DIM("y_rows"), 1: DIM("dim")},
+    }
+class ControlFlowScanInplace_153705(torch.nn.Module):
+    """
+    `#153705 <https://github.com/pytorch/pytorch/issues/153705>`_
+    """
+    def forward(self, x, y):
+        def loop_body_1(z, iv, x, y):
+            z = z.clone()
+            i = iv.item()
+            z[i, :] = ((x[i, :] - y) ** 2).sum(dim=-1)
+            return [z, iv]
+        z = torch.empty((x.shape[0], y.shape[0]))
+        r = torch.ops.higher_order.scan(
+            loop_body_1, [z], [torch.arange(x.shape[0], dtype=torch.int64)], [x, y]
+        )
+        return r[0]
+    _inputs = [
+        (torch.rand((3, 4)), torch.rand((5, 4))),
+        (torch.rand((4, 5)), torch.rand((6, 5))),
+    ]
+    _dynamic = {"x": {0: DYN, 1: DYN}, "y": {0: DYN, 1: DYN}}
+class ControlFlowScanDecomposition_151564(torch.nn.Module):
+    """
+    `#151564 <https://github.com/pytorch/pytorch/issues/151564>`_
+    """
+    @classmethod
+    def dummy_loop(cls, padded: torch.Tensor, pos: torch.Tensor):
+        copy = torch.zeros(padded.shape)
+        for i in range(pos.shape[0]):
+            p = pos[i]
+            copy[i, :p] = padded[i, :p]
+        return copy
+    @classmethod
+    def dummy_loop_with_scan(cls, padded: torch.Tensor, pos: torch.Tensor):
+        def pad_row(padded, p):
+            row = torch.zeros((padded.shape[0],))
+            torch._check(p.item() > 0)
+            torch._check(p.item() < padded.shape[0])
+            # this check is not always true, we add it anyway to make this dimension >= 2
+            # and avoid raising an exception about dynamic dimension in {0, 1}
+            if torch.compiler.is_exporting():
+                torch._check(p.item() > 1)
+            row[: p.item()] = padded[: p.item()]
+            return (row,)
+        return torch.ops.higher_order.scan(
+            pad_row,
+            [],
+            [padded, pos],
+            [],
+        )
+    @classmethod
+    def select_when_exporting(cls, f, f_scan):
+        return f_scan if torch.compiler.is_exporting() else f
+    def forward(self, images, position):
+        return self.select_when_exporting(self.dummy_loop, self.dummy_loop_with_scan)(
+            images, position
+        )
+    _inputs = [(torch.randn((5, 6)), torch.arange(5, dtype=torch.int64) + 1)]
+    _dynamic = {"images": {0: DYN, 1: DYN}, "position": {0: DYN}}
+class SignatureInt1(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, i: int = 2):
+        return torch.sigmoid(self.linear(x)) - self.buff + x[:, i : i + 1]
+    _inputs = [
+        ((torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32), 1),
+        ((torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32), 2),
+    ]
+    _dynamic = ({0: DIM("batch", min=1, max=1024)}, None)
+class SignatureFloat1(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, alpha: float = 2.0):
+        return torch.sigmoid(self.linear(x)) - self.buff * alpha
+    _inputs = [
+        ((torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32), 1.5),
+        ((torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32), 2.5),
+    ]
+    _dynamic = ({0: DIM("batch", min=1, max=1024)}, None)
+class SignatureInt2(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, i: int = 2):
+        return torch.sigmoid(self.linear(x)) - self.buff + x[:, i]
+    _inputs = ((torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32), 1)
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "i": None,  # DIM("ii", min=0, max=3)}
+    }
+class SignatureListFixedLength(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, lx: list):
+        return (
+            torch.sigmoid(self.linear(x)) - self.buff + lx[0] * lx[1].sum(axis=1, keepdim=True)
+        )
+    _inputs = [
+        (
+            (torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(4) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(4 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+        (
+            (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(8) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(8 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+    ]
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "lx": [{0: DIM("batch")}, {0: DIM("batch")}],
+    }
+class SignatureListVariableLength(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, lx: list):
+        t = torch.cat(lx, dim=1).sum(axis=1, keepdim=True)
+        return torch.sigmoid(self.linear(x)) - self.buff + t
+    _inputs = [
+        (
+            (torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(4) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(4 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+        (
+            (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(8) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(8 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+                (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            ],
+        ),
+    ]
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "lx": [{0: DIM("batch")}, {0: DIM("batch")}],
+    }
+class BuildInLen(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, lx: list):
+        t = lx[0] * lx[1].sum(axis=1, keepdim=True)
+        if len(lx) > 2:
+            t = t + lx[2].sum(axis=1, keepdim=True)
+        return torch.sigmoid(self.linear(x)) - self.buff + t
+    _inputs = [
+        (
+            (torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(4) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(4 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+        (
+            (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(8) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(8 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+                (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            ],
+        ),
+    ]
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "lx": [{0: DIM("batch")}, {0: DIM("batch")}],
+    }
+class BuildInIsInstance(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, lx: list | torch.Tensor):
+        if isinstance(lx, list):
+            t = lx[0] * lx[1].sum(axis=1, keepdim=True)
+            return torch.sigmoid(self.linear(x)) - self.buff + t
+        return torch.sigmoid(self.linear(x)) - self.buff + lx
+    _inputs = [
+        (
+            (torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(4) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(4 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+        (
+            (torch.arange(8 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+            [
+                (torch.arange(8) + 10).reshape((-1, 1)).to(torch.float32),
+                (torch.arange(8 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+            ],
+        ),
+    ]
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "lx": [{0: DIM("batch")}, {0: DIM("batch")}],
+    }
+class SignatureShapeAsIndex(torch.nn.Module):
+    def __init__(self, n_dims: int = 3, n_targets: int = 1):
+        super().__init__()
+        self.linear = torch.nn.Linear(n_dims, n_targets)
+        self.buff = torch.nn.parameter.Buffer(torch.tensor([0.5] * n_targets))
+    def forward(self, x, y):
+        t = torch.sigmoid(self.linear(x)) + x
+        return t[:, : y.shape[1]]
+    _inputs = (
+        (torch.arange(4 * 3) + 10).reshape((-1, 3)).to(torch.float32),
+        (torch.arange(4 * 2) + 10).reshape((-1, 2)).to(torch.float32),
+    )
+    _dynamic = {
+        "x": {0: DIM("batch", min=0, max=1024)},
+        "y": {
+            0: DIM("batch", min=0, max=1024),
+            1: DIM("length", min=0, max=2),
+        },
+    }
+class TypeBFloat16(torch.nn.Module):
+    def forward(self, x):
+        xb = x.to(torch.bfloat16)
+        return (xb + xb).to(torch.float32)
+    _inputs = (torch.rand(4, 4).to(torch.float32),)
+    _dynamic = {"x": {0: DIM("batch")}}
+class CropLastDimensionWithTensorShape(torch.nn.Module):
+    def forward(self, x, y):
+        return x[..., : y.shape[0]]
+    _inputs = [
+        (
+            torch.rand(3, 4, 4).to(torch.float32),
+            torch.rand(
+                2,
+            ).to(torch.float32),
+        ),
+        (
+            torch.rand(6, 4, 4).to(torch.float32),
+            torch.rand(
+                3,
+            ).to(torch.float32),
+        ),
+    ]
+    _dynamic = {
+        "x": {0: DIM("batch")},
+        "y": {0: DIM("crop", min=1, max=3)},
+    }
+class CropLastDimensionWithTensorContent(torch.nn.Module):
+    def forward(self, x, shape):
+        return x[..., : shape[0]]
+    _inputs = [
+        (torch.rand(3, 4, 4).to(torch.float32), torch.tensor([2], dtype=torch.int64)),
+        (torch.rand(6, 4, 4).to(torch.float32), torch.tensor([3], dtype=torch.int64)),
+    ]
+    _dynamic = {"x": {0: DIM("batch")}, "shape": {}}
+class SignatureListFixedWithNone(torch.nn.Module):
+    def forward(self, lx):
+        x = lx[0]
+        if lx[1] is not None:
+            x += lx[1]
+        if lx[2] is not None:
+            x += lx[2]
+        return x
+    _inputs = [
+        ([torch.rand((4, 4)), torch.rand((4, 4)), None],),
+        ([torch.rand((4, 4)), torch.rand((4, 4)), torch.rand((4, 4))],),
+    ]
+    _dynamic = {
+        "lx": [{0: DIM("batch")}, {0: DIM("batch")}],
+    }
+class CreateFromShape(torch.nn.Module):
+    def forward(self, x):
+        y = torch.ones((x.shape[0], x.shape[1] + 1))
+        return y
+    _inputs = [(torch.rand((4, 4)),), (torch.rand((5, 5)),)]
+    _dynamic = {"x": {0: DIM("dx"), 1: DIM("dy")}}
+class CreateFromShapeThroughFunction(torch.nn.Module):
+    @staticmethod
+    def add_one(dim):
+        return dim + 1
+    def forward(self, x):
+        dy1 = CreateFromShapeThroughFunction.add_one(x.shape[1])
+        y = torch.ones((x.shape[0], dy1))
+        return y
+    _inputs = [(torch.rand((4, 4)),)]
+    _dynamic = {"x": {0: DIM("dx"), 1: DIM("dy")}}
+class Vmap(torch.nn.Module):
+    def forward(self, x, y):
+        f = lambda x, y: x * y + 1  # noqa: E731
+        return torch.vmap(f)(x, y)
+    _inputs = [(torch.tensor([1.0, 2.0, 3.0]), torch.tensor([0.1, 0.2, 0.3]))]
+    _dynamic = {"x": {0: DYN}, "y": {0: DYN}}
+class VmapPython(torch.nn.Module):
+    def forward(self, x, y):
+        f = lambda x, y: x * y + 1  # noqa: E731
+        return patched_vmap(f)(x, y)
+    _inputs = [(torch.tensor([1.0, 2.0, 3.0]), torch.tensor([0.1, 0.2, 0.3]))]
+    _dynamic = {"x": {0: DYN}, "y": {0: DYN}}
+class ExportWithDimension0(torch.nn.Module):
+    def forward(self, x):
+        return x @ torch.arange(x.shape[1], dtype=torch.float32).reshape((-1, 1))
+    _inputs = [(torch.empty((0, 3), dtype=torch.float32),)]
+    _dynamic = {"x": {0: DYN, 1: DYN}}
+    _valid = [(torch.rand((2, 3), dtype=torch.float32),)]
+class ExportWithDimension1(torch.nn.Module):
+    def forward(self, x):
+        return x @ torch.arange(x.shape[1], dtype=torch.float32).reshape((-1, 1))
+    _inputs = [(torch.zeros((1, 3), dtype=torch.float32),)]
+    _dynamic = {"x": {0: DYN, 1: DYN}}
+    _valid = [(torch.rand((2, 3), dtype=torch.float32),)]