PyPI - onnx-diagnostic - Versions diffs - 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl - Mend

onnx-diagnostic 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +66 -8
onnx_diagnostic/ext_test_case.py +2 -0
onnx_diagnostic/helpers/_log_helper.py +461 -0
onnx_diagnostic/helpers/cache_helper.py +250 -15
onnx_diagnostic/helpers/helper.py +146 -10
onnx_diagnostic/helpers/log_helper.py +404 -315
onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
onnx_diagnostic/helpers/onnx_helper.py +13 -7
onnx_diagnostic/helpers/torch_helper.py +33 -11
onnx_diagnostic/tasks/__init__.py +2 -0
onnx_diagnostic/tasks/feature_extraction.py +86 -5
onnx_diagnostic/tasks/image_text_to_text.py +260 -56
onnx_diagnostic/tasks/mask_generation.py +139 -0
onnx_diagnostic/tasks/text2text_generation.py +2 -2
onnx_diagnostic/tasks/text_generation.py +6 -2
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +7 -1
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
onnx_diagnostic/torch_export_patches/patch_inputs.py +4 -1
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +397 -128
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +57 -40
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +5 -0
onnx_diagnostic/torch_models/validate.py +26 -3
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/RECORD +29 -27
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/top_level.txt +0 -0

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.cache_helper import make_dynamic_cache
+from ..helpers.cache_helper import make_dynamic_cache, make_hybrid_cache
 from ..helpers.config_helper import update_config, check_hasattr, _pick
 __TASK__ = "image-text-to-text"
@@ -11,99 +11,284 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
     kwargs: Dict[str, Any] = {}
     if hasattr(config, "num_hidden_layers"):
         config.num_hidden_layers = min(config.num_hidden_layers, 2)
-    if hasattr(config, "vision_config") and hasattr(config.vision_config, "num_hidden_layers"):
-        config.vision_config.num_hidden_layers = min(config.vision_config.num_hidden_layers, 2)
+    if hasattr(config, "mm_tokens_per_image"):
+        config.mm_tokens_per_image = min(config.mm_tokens_per_image, 2)
+    if hasattr(config, "vision_config"):
+        if hasattr(config.vision_config, "num_hidden_layers"):
+            config.vision_config.num_hidden_layers = min(
+                config.vision_config.num_hidden_layers, 2
+            )
+        if hasattr(config.vision_config, "image_size"):
+            config.vision_config.image_size = min(config.vision_config.image_size, 96)
+        if hasattr(config.vision_config, "intermediate_size"):
+            config.vision_config.intermediate_size = min(
+                config.vision_config.intermediate_size, 1076
+            )
+        if hasattr(config.vision_config, "patch_size"):
+            config.vision_config.patch_size = min(config.vision_config.patch_size, 2)
+        if hasattr(config.vision_config, "hidden_size"):
+            config.vision_config.hidden_size = min(config.vision_config.hidden_size, 16)
+    if hasattr(config, "text_config"):
+        if hasattr(config.text_config, "intermediate_size"):
+            config.text_config.intermediate_size = min(
+                config.text_config.intermediate_size, 320
+            )
+        if hasattr(config.text_config, "hidden_size"):
+            config.text_config.hidden_size = min(config.text_config.hidden_size, 16)
+        if hasattr(config.text_config, "num_hidden_layers"):
+            config.text_config.num_hidden_layers = min(config.text_config.num_hidden_layers, 2)
+        if hasattr(config.text_config, "layer_types"):
+            config.text_config.layer_types = config.text_config.layer_types[
+                : config.text_config.num_hidden_layers
+            ]
+        if hasattr(config.text_config, "num_attention_heads"):
+            config.text_config.num_attention_heads = min(
+                config.text_config.num_attention_heads, 2
+            )
     update_config(config, kwargs)
     return kwargs
-def get_inputs(
+def _get_inputs_gemma3(
     model: torch.nn.Module,
     config: Optional[Any],
     dummy_max_token_id: int,
     num_key_value_heads: int,
     num_hidden_layers: int,
+    pad_token_id: int,
+    image_token_index: int,
     head_dim: int,
     width: int,
     height: int,
     num_channels: int,
     batch_size: int = 2,
-    sequence_length: int = 30,
-    sequence_length2: int = 3,
+    sequence_length: int = 43,
+    sequence_length2: int = 43,
     n_images: int = 2,
     dynamic_rope: bool = False,
-    add_second_input: int = 1,
+    max_sequence_length: int = 380,
     **kwargs,  # unused
 ):
     """
-    Generates input for task ``image-text-to-text``.
+    ::
-    :param model: model to get the missing information
-    :param config: configuration used to generate the model
-    :param head_dim: last dimension of the cache
-    :param dummy_max_token_id: dummy max token id
-    :param batch_size: batch size
-    :param sequence_length: sequence length
-    :param sequence_length2: new sequence length
-    :param n_images: number of images
-    :param width: width of the image
-    :param height: height of the image
-    :param num_channels: number of channels
-    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
-    :return: dictionary
+        dict(input_ids:T7s1x281,
+            pixel_values:T16s1x3x896x896,
+            attention_mask:dict(full_attention:T9s1x1x281x380,sliding_attention:T9s1x1x281x380),
+            position_ids:T7s1x281,
+            past_key_values:HybridCache(
+                key_cache=#34[T1s1x4x380x256,...],
+                value_cache=#34[T1s1x4x380x256,...]),
+            token_type_ids:T7s1x281,
+            cache_position:T7s281,
+            logits_to_keep:1)
+        dict(input_ids:T7s1x1,
+            pixel_values:None,
+            attention_mask:dict(full_attention:T9s1x1x1x380,sliding_attention:T9s1x1x1x380),
+            position_ids:T7s1x1,
+            past_key_values:HybridCache(
+                key_cache=#34[T1s1x4x380x256,...],
+                value_cache=#34[T1s1x4x380x256,...]),
+            token_type_ids:T7s1x1,
+            cache_position:T7s1,
+            logits_to_keep:1)
     """
     assert (
         "cls_cache" not in kwargs
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
-    images = "images"  # torch.export.Dim("images", min=1, max=4096)
+    # cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
+        "token_type_ids": {0: batch, 1: seq_length},
         "attention_mask": {
-            0: batch,
-            1: "cache+seq",  # cache_length + seq_length
-        },
-        "position_ids": {
-            0: batch,
-            1: "cache+seq",  # cache_length + seq_length
+            "full_attention": {0: batch, 2: seq_length},
+            "sliding_attention": {0: batch, 2: seq_length},
         },
+        "position_ids": {0: batch, 1: seq_length},
+        "cache_position": {1: seq_length},
         "past_key_values": [
-            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
-            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            [{0: batch} for _ in range(num_hidden_layers)],
+            [{0: batch} for _ in range(num_hidden_layers)],
         ],
-        "pixel_values": {0: batch, 1: images},
-        "image_attention_mask": {0: batch, 1: seq_length, 2: images},
+        "pixel_values": {0: batch},
+        "use_cache": None,
     }
+    input_ids = torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to(
+        torch.int64
+    )
+    input_ids[:, 1] = image_token_index
+    # input_ids[input_ids == image_token_index] = pad_token_id
+    token_type_ids = torch.zeros_like(input_ids)
+    token_type_ids[input_ids == image_token_index] = 1
     inputs = dict(
-        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to(
-            torch.int64
+        input_ids=input_ids,
+        token_type_ids=token_type_ids,
+        attention_mask=dict(
+            full_attention=torch.randn(batch_size, 1, sequence_length, max_sequence_length),
+            sliding_attention=torch.randn(batch_size, 1, sequence_length, max_sequence_length),
         ),
-        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-            torch.int64
-        ),
-        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
-        .to(torch.int64)
-        .expand((batch_size, -1)),
-        past_key_values=make_dynamic_cache(
+        cache_position=torch.arange(0, sequence_length).to(torch.int64),
+        position_ids=torch.arange(0, sequence_length).to(torch.int64).expand((batch_size, -1)),
+        past_key_values=make_hybrid_cache(
             [
                 (
-                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
-                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
+                    torch.randn(
+                        batch_size, num_key_value_heads, max_sequence_length, head_dim
+                    ),
+                    torch.randn(
+                        batch_size, num_key_value_heads, max_sequence_length, head_dim
+                    ),
                 )
                 for i in range(num_hidden_layers)
             ]
         ),
-        pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to(
-            torch.int64
-        ),
+        pixel_values=torch.randn(n_images, num_channels, width, height).clamp(-1, 1),
         image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
             torch.int64
         ),
+        use_cache=True,  # Gemma3 does not set this value to true when a cache is provided
     )
-    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    return dict(inputs=inputs, dynamic_shapes=shapes)
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    dummy_max_token_id: int,
+    num_key_value_heads: int,
+    num_hidden_layers: int,
+    pad_token_id: int,
+    image_token_index: int,
+    head_dim: int,
+    width: int,
+    height: int,
+    num_channels: int,
+    batch_size: int = 2,
+    sequence_length: int = 43,
+    sequence_length2: int = 43,
+    n_images: int = 2,
+    dynamic_rope: bool = False,
+    add_second_input: int = 1,
+    **kwargs,  # unused
+):
+    """
+    Generates input for task ``image-text-to-text``.
+    :param model: model to get the missing information
+    :param config: configuration used to generate the model
+    :param head_dim: last dimension of the cache
+    :param dummy_max_token_id: dummy max token id
+    :param pad_token_id: pad_token_id
+    :param image_token_index: image_token_index
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :param n_images: number of images
+    :param width: width of the image
+    :param height: height of the image
+    :param num_channels: number of channels
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :return: dictionary
+    """
+    if model.__class__.__name__.startswith("Gemma3"):
+        res = _get_inputs_gemma3(
+            model,
+            config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_key_value_heads=num_key_value_heads,
+            num_hidden_layers=num_hidden_layers,
+            pad_token_id=pad_token_id,
+            image_token_index=image_token_index,
+            head_dim=head_dim,
+            width=width,
+            height=height,
+            num_channels=num_channels,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            sequence_length2=sequence_length2,
+            n_images=n_images,
+            dynamic_rope=dynamic_rope,
+            **kwargs,
+        )
+    else:
+        assert (
+            "cls_cache" not in kwargs
+        ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
+        batch = torch.export.Dim("batch", min=1, max=1024)
+        batch_img = torch.export.Dim("batch_img", min=1, max=1024)
+        seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
+        cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
+        images = "images"  # torch.export.Dim("images", min=1, max=4096)
+        shapes = {
+            "input_ids": {0: batch, 1: seq_length},
+            "token_type_ids": {0: batch, 1: seq_length},
+            "attention_mask": {0: batch, 1: "cache+seq"},
+            "position_ids": {0: batch, 1: "cache+seq"},
+            "past_key_values": [
+                [{0: batch} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            "pixel_values": (
+                {0: batch, 1: images}
+                if model.__class__.__name__ == "IdeficsForVisionText2Text"
+                else {0: batch_img}
+            ),
+            "image_attention_mask": {0: batch, 1: seq_length, 2: images},
+            "use_cache": None,
+        }
+        input_ids = torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        )
+        input_ids[0, 0] = image_token_index
+        input_ids[1, 1] = image_token_index
+        # input_ids[input_ids == image_token_index] = pad_token_id
+        token_type_ids = torch.zeros_like(input_ids)
+        token_type_ids[input_ids == image_token_index] = 1
+        inputs = dict(
+            input_ids=input_ids,
+            attention_mask=torch.cat(
+                [
+                    torch.ones((batch_size, sequence_length), dtype=torch.int64),
+                    input_ids.ne(pad_token_id).to(torch.int64),
+                ],
+                axis=-1,
+            ),
+            position_ids=torch.arange(0, sequence_length2)
+            .to(torch.int64)
+            .expand((batch_size, -1)),
+            past_key_values=make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size, num_key_value_heads, sequence_length, head_dim
+                        ),
+                        torch.randn(
+                            batch_size, num_key_value_heads, sequence_length, head_dim
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            pixel_values=(
+                torch.randn((batch_size, n_images, num_channels, width, height)).clamp(-1, 1)
+                if model.__class__.__name__ == "IdeficsForVisionText2Text"
+                else torch.randn(n_images, num_channels, width, height).clamp(-1, 1)
+            ),
+            image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
+                torch.int64
+            ),
+            token_type_ids=token_type_ids,
+            use_cache=True,  # Gemma3 does not set this value to true when a cache is provided
+        )
+        res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         assert (
             add_second_input > 0
@@ -123,6 +308,8 @@ def get_inputs(
             sequence_length2=sequence_length2 + 1,
             n_images=n_images + 1,
             dynamic_rope=dynamic_rope,
+            pad_token_id=pad_token_id,
+            image_token_index=image_token_index,
             add_second_input=0,
             **kwargs,
         )["inputs"]
@@ -145,8 +332,9 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
                 ("num_key_value_heads", "num_attention_heads"),
                 "intermediate_size",
                 "hidden_size",
+                "pad_token_id",
             )
-            check_hasattr(config, "vision_config")
+            check_hasattr(config, "vision_config", "image_token_index")
             text_config = True
         else:
             check_hasattr(
@@ -163,19 +351,25 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         check_hasattr(config.vision_config, "image_size", "num_channels")
     kwargs = dict(
         batch_size=2,
-        sequence_length=30,
-        sequence_length2=3,
+        sequence_length=43,
+        sequence_length2=43,
         head_dim=(
             16
             if config is None
             else getattr(
                 config,
                 "head_dim",
-                (config.text_config.hidden_size if text_config else config.hidden_size)
-                // (
-                    config.text_config.num_attention_heads
-                    if text_config
-                    else config.num_attention_heads
+                (
+                    config.text_config.head_dim
+                    if text_config and hasattr(config.text_config, "head_dim")
+                    else (
+                        (config.text_config.hidden_size if text_config else config.hidden_size)
+                        // (
+                            config.text_config.num_attention_heads
+                            if text_config
+                            else config.num_attention_heads
+                        )
+                    )
                 ),
             )
         ),
@@ -219,5 +413,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         width=224 if config is None else config.vision_config.image_size,
         height=224 if config is None else config.vision_config.image_size,
         num_channels=3 if config is None else config.vision_config.num_channels,
+        pad_token_id=(
+            0
+            if config is None or not hasattr(config, "text_config")
+            else config.text_config.pad_token_id
+        ),
+        image_token_index=(
+            4
+            if config is None or not hasattr(config, "image_token_index")
+            else config.image_token_index
+        ),
     )
     return kwargs, get_inputs

onnx_diagnostic/tasks/mask_generation.py ADDED Viewed

@@ -0,0 +1,139 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import update_config, check_hasattr
+__TASK__ = "mask-generation"
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    kwargs: Dict[str, Any] = {}
+    if hasattr(config, "num_hidden_layers"):
+        config.num_hidden_layers = min(config.num_hidden_layers, 2)
+    if hasattr(config, "vision_config") and hasattr(config.vision_config, "num_hidden_layers"):
+        config.vision_config.num_hidden_layers = min(config.vision_config.num_hidden_layers, 2)
+    update_config(config, kwargs)
+    return kwargs
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    batch_size: int,
+    width: int,
+    height: int,
+    num_channels: int,
+    output_channels: int,
+    window_size: int,
+    add_second_input: bool = True,
+    **kwargs,  # unused
+):
+    """
+    Generates input for task ``mask-generation``.
+    :param model: model to get the missing information
+    :param config: configuration used to generate the model
+    :param batch_size: batch size
+    :param width: width of the image
+    :param height: height of the image
+    :param num_channels: number of channels in the image
+    :param output_channels: number of output channels
+    :param window_size: size of the window for the vision model
+    :return: dictionary with inputs and dynamic shapes
+    """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
+    # TODO(anyone): input_masks is weirdly failing all the time with mismatch channels
+    # with Conv or embedding_size. I guess maybe the model is too implicit on the
+    # input_masks shape.
+    # TODO(titaiwang): modeling code specifically requires the height and width of inputs
+    # should be the same as the config.vision_config.image_size. Does that make sense?
+    shapes = {
+        "pixel_values": {0: "batch"},  # 1: num_channels is static
+        "input_points": {0: "batch", 1: "point_batch_size", 2: "nb_points_per_image"},
+        "input_boxes": {0: "batch", 1: "point_batch_size"},
+        # "input_masks": {0: "batch", 2: "height", 3: "width"},
+    }
+    inputs = dict(
+        pixel_values=torch.randn(
+            (batch_size, num_channels, height, width), dtype=torch.float32
+        ).clamp(-1, 1),
+        input_points=torch.randn(
+            (batch_size, 2, 10, 2), dtype=torch.float32
+        ),  # 10 points per image
+        input_boxes=torch.randn((batch_size, 2, 4), dtype=torch.float32),  # 1 box per image
+        # input_masks=torch.randn(
+        #     (batch_size, 1, height, width), dtype=torch.float32
+        # ),  # mask for the image
+    )
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            batch_size=batch_size + 1,
+            width=width,
+            height=height,
+            num_channels=num_channels,
+            output_channels=output_channels,
+            window_size=window_size,
+            add_second_input=False,
+            **kwargs,
+        )["inputs"]
+    return res
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        # generates mask as outputs
+        if hasattr(config, "mask_decoder_config"):
+            check_hasattr(
+                config.mask_decoder_config,
+                "hidden_size",
+                "iou_head_hidden_dim",
+                "iou_head_depth",
+                "num_hidden_layers",
+                "num_multimask_outputs",
+            )
+        if hasattr(config, "prompt_encoder_config"):
+            check_hasattr(
+                config.prompt_encoder_config,
+                "hidden_size",
+                "image_embedding_size",
+                "image_size",
+                "mask_input_channels",
+            )
+        if hasattr(config, "vision_config"):
+            check_hasattr(
+                config.vision_config,
+                "image_size",
+                "hidden_size",
+                "intermediate_size",
+                "num_hidden_layers",
+                "output_channels",
+                "num_channels",
+                "window_size",
+            )
+    kwargs = dict(
+        batch_size=2,
+        width=1024 if config is None else config.vision_config.image_size,
+        height=1024 if config is None else config.vision_config.image_size,
+        num_channels=3 if config is None else config.vision_config.num_channels,
+        output_channels=256 if config is None else config.vision_config.output_channels,
+        window_size=14 if config is None else config.vision_config.window_size,
+    )
+    return kwargs, get_inputs

onnx_diagnostic/tasks/text2text_generation.py CHANGED Viewed

@@ -69,8 +69,8 @@ def get_inputs(
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
-    cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)
+    cache_length = "cache_length_key"
+    cache_length2 = "cache_length_val"
     shapes = {
         "input_ids": {0: batch, 1: seq_length},

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import torch
-import transformers
 from ..helpers.cache_helper import (
     make_dynamic_cache,
     make_mamba_cache,
@@ -95,9 +94,14 @@ def get_inputs(
     cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
     if config is not None and config.__class__.__name__ == "FalconMambaConfig":
+        try:
+            from transformers.models.mamba.modeling_mamba import MambaCache
+        except ImportError:
+            from transformers.cache_utils import MambaCache
         assert cls_cache in (
             "MambaCache",
-            transformers.cache_utils.MambaCache,
+            MambaCache,
         ), f"Unexpected value for cls_cache={cls_cache} and config={config}"
         seq_length_multiple = 8
         sequence_length = (

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -16,6 +16,8 @@ def get_function(name: str) -> Tuple[type, Callable]:
     module_name = ".".join(spl[:-1])
     fname = spl[-1]
     mod = importlib.import_module(module_name)
+    if not hasattr(mod, fname):
+        return None, None
     return mod, getattr(mod, fname)
@@ -33,12 +35,16 @@ def get_patches(mod, verbose: int = 0) -> Tuple[str, List[Any]]:
                 doc = v.__doc__.lstrip()
                 if doc.startswith("manual patch"):
                     continue
-                reg = re.compile("[[]patch:([a-z_A-Z.]+)[]]")
+                reg = re.compile("[\\[]patch:([a-z_A-Z.]+)[\\]]")
                 fall = reg.findall(doc)
                 assert (
                     len(fall) == 1
                 ), f"Unable to find patching information for {v} in \n{doc}"
                 fmod, f = get_function(fall[0])
+                if fmod is None and f is None:
+                    # The function does not exist in this version of transformers.
+                    # No patch is needed.
+                    continue
                 to_patch.append({"module": fmod, "function": f, "patch": v})
     name = mod.__name__

onnx_diagnostic/torch_export_patches/onnx_export_serialization.py CHANGED Viewed

@@ -6,12 +6,17 @@ import torch
 import transformers
 from transformers.cache_utils import (
     DynamicCache,
-    MambaCache,
     EncoderDecoderCache,
+    HybridCache,
     SlidingWindowCache,
     StaticCache,
 )
+try:
+    from transformers.models.mamba.modeling_mamba import MambaCache
+except ImportError:
+    from transformers.cache_utils import MambaCache
 from ..helpers import string_type
 from .serialization import _lower_name_with_
@@ -161,6 +166,9 @@ def serialization_functions(
             flatten_dynamic_cache,
             unflatten_dynamic_cache,
             flatten_with_keys_dynamic_cache,
+            flatten_hybrid_cache,
+            unflatten_hybrid_cache,
+            flatten_with_keys_hybrid_cache,
             flatten_mamba_cache,
             unflatten_mamba_cache,
             flatten_with_keys_mamba_cache,
@@ -187,6 +195,14 @@ def serialization_functions(
                 # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
                 verbose=verbose,
             ),
+            HybridCache: lambda verbose=verbose: register_class_serialization(
+                HybridCache,
+                flatten_hybrid_cache,
+                unflatten_hybrid_cache,
+                flatten_with_keys_hybrid_cache,
+                # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+                verbose=verbose,
+            ),
             MambaCache: lambda verbose=verbose: register_class_serialization(
                 MambaCache,
                 flatten_mamba_cache,

onnx_diagnostic/torch_export_patches/patch_inputs.py CHANGED Viewed

@@ -70,6 +70,8 @@ def convert_dynamic_axes_into_dynamic_shapes(
     :param verbose: verbosity
     :return: (args, kwargs, dynamic shapes)
     """
+    from ..helpers.cache_helper import CacheKeyValue
     new_kwargs = {}
     if args:
         assert hasattr(model, "forward"), f"Missing method 'forward' for {model!r}"
@@ -121,7 +123,8 @@ def convert_dynamic_axes_into_dynamic_shapes(
                 changes[k] = type(updated_kwargs[k])
                 continue
         if isinstance(v, transformers.cache_utils.DynamicCache):
-            updated_kwargs[k] = [v.key_cache, v.value_cache]
+            ca = CacheKeyValue(v)
+            updated_kwargs[k] = [ca.key_cache, ca.value_cache]
             changes[k] = type(v)
             continue
         raise NotImplementedError(

onnx-diagnostic 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl

onnx-diagnostic 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl