PyPI - onnx-diagnostic - Versions diffs - 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl - Mend

onnx-diagnostic 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -108,7 +108,7 @@ def flatten_unflatten_for_dynamic_shapes(
 def is_cache_dynamic_registered(fast: bool = False) -> bool:
     """
-    Tells class :class:`transformers.cache_utils.DynamicCache` can be
+    Tells if class :class:`transformers.cache_utils.DynamicCache` can be
     serialized and deserialized. Only then, :func:`torch.export.export`
     can export a model.
@@ -168,7 +168,33 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
                 ]
             )
             print(string_type(past_key_values, with_shape=True))
+        The function is fully able to handle ``FakeTensor`` with dynamic dimensions if
+        ``transformers>=4.56``. Before that version, only FakeTensor with static dimensions
+        are supported.
         """
+        if (
+            key_value_pairs
+            and isinstance(key_value_pairs[0][0], torch._subclasses.fake_tensor.FakeTensor)
+            and pv.Version(transformers.__version__) >= pv.Version("4.56")
+        ):
+            cache = transformers.cache_utils.DynamicCache()
+            cache.layers.extend(
+                [transformers.cache_utils.DynamicLayer() for _ in key_value_pairs]
+            )
+            for i, layer in enumerate(cache.layers):
+                k, v = key_value_pairs[i][0], key_value_pairs[i][1]
+                layer.dtype = k.dtype
+                layer.device = k.device
+                layer.keys = k
+                layer.values = v
+                layer.is_initialized = True
+            assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+                f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+                f"{len(key_value_pairs)} expected."
+            )
+            return finalize_cache(cache)
         cache = transformers.cache_utils.DynamicCache(key_value_pairs)
         if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
             # The cache constructor contains the two following lines
@@ -494,51 +520,51 @@ def make_hybrid_cache(
     .. code-block:: python
-            self.max_cache_len = (
-                max_cache_len if max_cache_len is not None else config.max_position_embeddings)
+        self.max_cache_len = (
+            max_cache_len if max_cache_len is not None else config.max_position_embeddings)
-            # Sliding layers can't be larger than the overall max cache len
-            self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
-            self.max_batch_size = max_batch_size
+        # Sliding layers can't be larger than the overall max cache len
+        self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
+        self.max_batch_size = max_batch_size
-            self.head_dim = (
-                config.head_dim if hasattr(config, "head_dim")
-                else config.hidden_size // config.num_attention_heads
-            )
+        self.head_dim = (
+            config.head_dim if hasattr(config, "head_dim")
+            else config.hidden_size // config.num_attention_heads
+        )
-            self._dtype = dtype
-            self.num_key_value_heads = (
-                config.num_attention_heads
-                if getattr(config, "num_key_value_heads", None) is None
-                else config.num_key_value_heads
-            )
+        self._dtype = dtype
+        self.num_key_value_heads = (
+            config.num_attention_heads
+            if getattr(config, "num_key_value_heads", None) is None
+            else config.num_key_value_heads
+        )
-            # If the attribute does not exist in the config, fallback to a simple StaticCache
-            if hasattr(config, "layer_types"):
-                self.is_sliding = [
-                    layer_type != "full_attention" for layer_type in config.layer_types]
-            else:
-                self.is_sliding = [False] * config.num_hidden_layers
-            self.key_cache: list[torch.Tensor] = []
-            self.value_cache: list[torch.Tensor] = []
-            global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                  self.max_cache_len, self.head_dim)
-            sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                   self.sliding_window_len, self.head_dim)
-            self.sliding_window = min(config.sliding_window, max_cache_len)
-            device = torch.device(device) if device is not None else None
-            for i in range(config.num_hidden_layers):
-                layer_device = layer_device_map[i] if layer_device_map is not None else device
-                cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
-                new_layer_key_cache = torch.zeros(
-                    cache_shape, dtype=self._dtype, device=layer_device)
-                new_layer_value_cache = torch.zeros(
-                    cache_shape, dtype=self._dtype, device=layer_device)
-                torch._dynamo.mark_static_address(new_layer_key_cache)
-                torch._dynamo.mark_static_address(new_layer_value_cache)
-                self.key_cache.append(new_layer_key_cache)
-                self.value_cache.append(new_layer_value_cache)
+        # If the attribute does not exist in the config, fallback to a simple StaticCache
+        if hasattr(config, "layer_types"):
+            self.is_sliding = [
+                layer_type != "full_attention" for layer_type in config.layer_types]
+        else:
+            self.is_sliding = [False] * config.num_hidden_layers
+        self.key_cache: list[torch.Tensor] = []
+        self.value_cache: list[torch.Tensor] = []
+        global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                self.max_cache_len, self.head_dim)
+        sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                self.sliding_window_len, self.head_dim)
+        self.sliding_window = min(config.sliding_window, max_cache_len)
+        device = torch.device(device) if device is not None else None
+        for i in range(config.num_hidden_layers):
+            layer_device = layer_device_map[i] if layer_device_map is not None else device
+            cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
+            new_layer_key_cache = torch.zeros(
+                cache_shape, dtype=self._dtype, device=layer_device)
+            new_layer_value_cache = torch.zeros(
+                cache_shape, dtype=self._dtype, device=layer_device)
+            torch._dynamo.mark_static_address(new_layer_key_cache)
+            torch._dynamo.mark_static_address(new_layer_value_cache)
+            self.key_cache.append(new_layer_key_cache)
+            self.value_cache.append(new_layer_value_cache)
     """
     layer_types = None
     if key_value_pairs:

onnx_diagnostic/helpers/config_helper.py CHANGED Viewed

@@ -95,7 +95,8 @@ def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[typ
     mod_name = cls.__module__
     mod = importlib.import_module(mod_name)
     source = inspect.getsource(mod)
-    reg = re.compile("config: ([A-Za-z0-9]+)")
+    # [^O] avoids capturing Optional[Something]
+    reg = re.compile("config: ([^O][A-Za-z0-9]+)")
     fall = reg.findall(source)
     if len(fall) == 0:
         assert not exc, (

onnx_diagnostic/helpers/fake_tensor_helper.py ADDED Viewed

@@ -0,0 +1,153 @@
+from typing import Any, Dict, Optional, Tuple
+_UNIQUE = set()
+def _unique():
+    i = 129 + 1
+    while i in _UNIQUE:
+        i += 1
+    _UNIQUE.add(i)
+    return i
+def fake_reshape(
+    true_tensor: "torch.Tensor",  # noqa: F821
+    sh: Dict[int, Any],  # noqa: F821
+    fake_tensor: Optional["FakeTensor"] = None,  # noqa: F821
+    fake_mode: Optional["FakeTensorMode"] = None,  # noqa: F821
+) -> "FakeTensor":  # noqa: F821
+    """
+    Changes the shape of a true tensor to make it dynamic.
+    :param true_tensor: true tensor
+    :param sh: dynamic shape
+    :param fake_tensor: fake tensor, if None, make a fake one
+    :param fake_mode: fake tensor mode
+    :return: fake tensor
+    """
+    import torch
+    # deal with 0/1
+    for i in sh:
+        if true_tensor.shape[i] <= 1:
+            expanded_shape = list(true_tensor.shape)
+            expanded_shape[i] = _unique()
+            true_tensor = torch.empty(
+                tuple(expanded_shape), dtype=true_tensor.dtype, device=true_tensor.device
+            )
+    # deal with equivalent dimension
+    new_shape = list(true_tensor.shape)
+    mapping = {}
+    for i, s in sh.items():
+        d = true_tensor.shape[i]
+        if d not in mapping:
+            mapping[d] = s
+        elif mapping[d] != s:
+            d = _unique()
+            mapping[d] = s
+            new_shape[i] = d
+    true_tensor = torch.empty(
+        tuple(new_shape), dtype=true_tensor.dtype, device=true_tensor.device
+    )
+    # now switch to FakeTensor
+    if fake_mode is None:
+        from torch.fx.experimental.symbolic_shapes import ShapeEnv
+        from torch._subclasses.fake_tensor import FakeTensorMode
+        shape_env = ShapeEnv()
+        fake_mode = FakeTensorMode(shape_env=shape_env)
+    if fake_tensor is None:
+        fake_tensor = fake_mode.from_tensor(true_tensor, static_shapes=False)
+    assert fake_mode is not None, "fake_mode must be provided"
+    new_shape = list(true_tensor.shape)
+    for i in sh:
+        new_shape[i] = fake_tensor.shape[i]
+    reduced_tensor = fake_mode.from_tensor(true_tensor, static_shapes=True).sum(
+        axis=tuple(sorted(sh)), keepdim=True
+    )
+    return reduced_tensor.expand(*new_shape)
+def make_fake(
+    x: Any, fake_mode: Optional["FakeTensorMode"] = None  # noqa: F821
+) -> Tuple[Optional["FakeTensor"], Optional["FakeTensorMode"]]:  # noqa: F821
+    """
+    Replaces all tensors by fake tensors.
+    This modification happens inplace for caches.
+    This function is only implemented for cache with
+    ``transformers>=4.55``.
+    .. runpython::
+        :showcode:
+        import pprint
+        import torch
+        from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+        from onnx_diagnostic.helpers.fake_tensor_helper import make_fake
+        inputs, _ = make_fake(
+            dict(
+                input_ids=torch.randint(30360, size=(2, 3), dtype=torch.int64),
+                attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64),
+                position_ids=torch.randint(32, size=(2, 3), dtype=torch.int64),
+                past_key_values=make_dynamic_cache(
+                    [
+                        (
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                        ),
+                        (
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                            torch.rand((2, 32, 30, 96), dtype=torch.float16),
+                        ),
+                    ]
+                ),
+            )
+        )
+        pprint.pprint(inputs)
+    """
+    if x is None:
+        return None, None
+    if fake_mode is None:
+        from torch.fx.experimental.symbolic_shapes import ShapeEnv
+        from torch._subclasses.fake_tensor import FakeTensorMode
+        shape_env = ShapeEnv()
+        fake_mode = FakeTensorMode(shape_env=shape_env)
+    if isinstance(x, (list, tuple)):
+        return x.__class__([make_fake(i, fake_mode=fake_mode)[0] for i in x]), fake_mode
+    if isinstance(x, dict):
+        return {k: make_fake(v, fake_mode=fake_mode)[0] for k, v in x.items()}, fake_mode
+    if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
+        assert hasattr(x, "layers"), (
+            f"Une more recent version of transformers (>=4.55), "
+            f"'layers' not found in class {type(x)}"
+        )
+        for layer in x.layers:
+            assert hasattr(layer, "keys") and hasattr(layer, "values"), (
+                f"Une more recent version of transformers (>=4.55), 'layers' "
+                f"not found in class {type(layer)} ({dir(layer)})"
+            )
+            layer.keys = make_fake(layer.keys, fake_mode=fake_mode)[0]
+            layer.values = make_fake(layer.values, fake_mode=fake_mode)[0]
+        return x, fake_mode
+    if x.__class__.__name__ == "EncoderDecoderCache":
+        make_fake(x.self_attention_cache, fake_mode=fake_mode)
+        make_fake(x.cross_attention_cache, fake_mode=fake_mode)
+        return x, fake_mode
+    if hasattr(x, "shape"):
+        t = fake_mode.from_tensor(x, static_shapes=False)
+        return t, fake_mode
+    from . import string_type
+    raise TypeError(
+        f"Unexpected type {type(x)} for x, content is {string_type(x, with_shape=True)}"
+    )

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -463,6 +463,7 @@ def string_type(
         if verbose:
             print(f"[string_type] F2:{type(obj)}")
         return f"{prefix}F{i}s{'x'.join(map(str, obj.shape))}"
     if isinstance(obj, torch.Tensor):
         from .torch_helper import torch_dtype_to_onnx_dtype
@@ -783,6 +784,8 @@ def string_type(
             obj, ultralytics.engine.results.Results
         ), f"Unexpected type={type(obj)}"
         return f"ultralytics.{obj.__class__.__name__}(...)"
+    if obj.__class__.__name__ == "FakeTensorMode":
+        return f"{obj}"
     if verbose:
         print(f"[string_type] END:{type(obj)}")

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -3,8 +3,6 @@ import numpy as np
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .torch_helper import to_numpy
-from .cache_helper import is_cache_dynamic_registered
 def name_type_to_onnx_dtype(name: str) -> int:
@@ -49,7 +47,7 @@ def make_feeds(
     assert (
         not check_flatten
         or not all(isinstance(obj, torch.Tensor) for obj in flat)
-        or not is_cache_dynamic_registered(fast=True)
+        # or not is_cache_dynamic_registered(fast=True)
         or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
     ), (
         f"Unexpected number of flattened objects, "
@@ -57,6 +55,8 @@ def make_feeds(
         f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
     )
     if use_numpy:
+        from .torch_helper import to_numpy
         flat = [to_numpy(t) if isinstance(t, torch.Tensor) else t for t in flat]
     names = (
         [i.name for i in proto.graph.input]

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -186,12 +186,13 @@ def _get_inputs_gemma3(
             f"total_sequence_length={total_sequence_length} != 860 "
             f"for model {model.__class__.__name__}"
         )
-        assert (
-            head_dim == 256
-        ), f"head_dim={head_dim} != 256 for model {model.__class__.__name__}"
+        assert head_dim in (
+            256,
+            32,
+        ), f"head_dim={head_dim} not in (32, 256) for model {model.__class__.__name__}"
         assert n_images == 1, f"n_images={n_images} != 1 for model {model.__class__.__name__}"
-        assert num_key_value_heads == 4, (
-            f"num_key_value_heads={num_key_value_heads} != 256 "
+        assert num_key_value_heads in (1, 4), (
+            f"num_key_value_heads={num_key_value_heads} not in (1, 4) "
             f"for this model {model.__class__.__name__}"
         )
@@ -270,7 +271,7 @@ def get_inputs_default(
         "input_ids": {0: batch, 1: seq_length},
         "token_type_ids": {0: batch, 1: seq_length},
         "attention_mask": {0: batch, 1: "cache+seq"},
-        "position_ids": {0: batch, 1: "cache+seq"},
+        "position_ids": {0: batch, 1: seq_length},
         "past_key_values": [
             [{0: batch} for _ in range(num_hidden_layers)],
             [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -19,6 +19,9 @@ __TASK__ = "text-generation"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
     # FalconMambaConfig: use_mambapy
+    if hasattr(config, "text_config"):
+        # The model is probably of mixture of models used only for text.
+        config = config.text_config
     check_hasattr(
         config,
         ("head_dim", ("hidden_size", "num_attention_heads"), "use_mambapy"),
@@ -217,10 +220,7 @@ def get_inputs(
                     0: batch,
                     1: "cache+seq",  # cache_length + seq_length
                 },
-                "position_ids": {
-                    0: batch,
-                    1: "cache+seq",  # cache_length + seq_length
-                },
+                "position_ids": {0: batch, 1: seq_length},
                 "past_key_values": [
                     [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
                     [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
@@ -308,6 +308,9 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
     If the configuration is None, the function selects typical dimensions.
     """
+    if hasattr(config, "text_config"):
+        # The model is probably of mixture of models used only for text.
+        config = config.text_config
     if config is not None:
         check_hasattr(
             config,

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -2,7 +2,7 @@ import functools
 import importlib
 import contextlib
 import re
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from .onnx_export_serialization import (
     register_cache_serialization,
     unregister_cache_serialization,
@@ -160,7 +160,7 @@ def register_additional_serialization_functions(
 @contextlib.contextmanager
 def torch_export_patches(
     patch_sympy: bool = True,
-    patch_torch: bool = True,
+    patch_torch: Union[bool, int] = True,
     patch_transformers: bool = False,
     patch_diffusers: bool = False,
     catch_constraints: bool = True,
@@ -349,6 +349,7 @@ def torch_export_patches(
                 _catch_produce_guards_and_solve_constraints,
                 patch__check_input_constraints_for_graph,
                 patched__broadcast_in_dim_meta,
+                patched__broadcast_in_dim_meta_level_2,
                 patched__maybe_broadcast,
                 patched_ShapeEnv,
             )
@@ -390,8 +391,13 @@ def torch_export_patches(
             # torch._prims._broadcast_in_dim_meta
             f_broadcast_in_dim = torch._prims.broadcast_in_dim
             f__broadcast_in_dim_meta = torch._prims._broadcast_in_dim_meta
-            torch._prims._broadcast_in_dim_meta = patched__broadcast_in_dim_meta
-            torch._prims.broadcast_in_dim = patched__broadcast_in_dim_meta
+            _patched_dim_f = (
+                patched__broadcast_in_dim_meta_level_2
+                if patch_torch == 2
+                else patched__broadcast_in_dim_meta
+            )
+            torch._prims._broadcast_in_dim_meta = _patched_dim_f
+            torch._prims.broadcast_in_dim = _patched_dim_f
             # torch._refs._maybe_broadcast
             f__maybe_broadcast = torch._refs._maybe_broadcast
@@ -422,7 +428,7 @@ def torch_export_patches(
                 )
             )
-        if stop_if_static:
+        if patch_torch and stop_if_static:
             ShapeEnv._log_guard_remember = ShapeEnv._log_guard
             if verbose:
@@ -453,6 +459,16 @@ def torch_export_patches(
             except ImportError:
                 masking_utils = None
+            try:
+                import transformers.integrations.sdpa_attention as sdpa_attention
+            except ImportError:
+                sdpa_attention = None
+            try:
+                import transformers.modeling_utils as modeling_utils
+            except ImportError:
+                modeling_utils = None
             if verbose:
                 import transformers
@@ -464,7 +480,7 @@ def torch_export_patches(
                 patch_transformers_list, verbose=verbose
             )
-            if (
+            if (  # vmap
                 masking_utils
                 and patch_transformers_list.patch_masking_utils
                 and hasattr(masking_utils, "_vmap_for_bhqkv")
@@ -499,7 +515,7 @@ def torch_export_patches(
                 else:
                     f_transformers_sdpa_mask = None
-            if (
+            if (  # eager_mask
                 masking_utils
                 and patch_transformers_list.patch_masking_utils
                 and hasattr(masking_utils, "eager_mask")
@@ -526,7 +542,7 @@ def torch_export_patches(
                         patch_transformers_list.patched_eager_mask
                     )
-            if (
+            if (  # sdpa_mask
                 masking_utils
                 and patch_transformers_list.patch_masking_utils
                 and hasattr(masking_utils, "sdpa_mask")
@@ -547,6 +563,29 @@ def torch_export_patches(
                         patch_transformers_list.patched_sdpa_mask_recent_torch
                     )
+            if (  # sdpa_attention_forward
+                sdpa_attention is not None
+                and modeling_utils is not None
+                and hasattr(sdpa_attention, "sdpa_attention_forward")
+                and hasattr(sdpa_attention, "use_gqa_in_sdpa")
+                and hasattr(modeling_utils, "AttentionInterface")
+            ):
+                if verbose:
+                    print(
+                        "[torch_export_patches] patches "
+                        "transformers.integrations.sdpa_attention.sdpa_attention_forward"
+                    )
+                f_sdpa_attention_forward = sdpa_attention.sdpa_attention_forward
+                sdpa_attention.sdpa_attention_forward = (
+                    patch_transformers_list.patched_sdpa_attention_forward
+                )
+                modeling_utils.sdpa_attention_forward = (
+                    patch_transformers_list.patched_sdpa_attention_forward
+                )
+                modeling_utils.AttentionInterface._global_mapping["sdpa"] = (
+                    patch_transformers_list.patched_sdpa_attention_forward
+                )
         if custom_patches:
             if verbose:
                 print("[torch_export_patches] applies custom patches")
@@ -656,7 +695,7 @@ def torch_export_patches(
                     patch_transformers_list, revert_patches_info, verbose=verbose
                 )
-                if (
+                if (  # vmap
                     masking_utils
                     and patch_transformers_list.patch_masking_utils
                     and hasattr(masking_utils, "_vmap_for_bhqkv")
@@ -687,7 +726,7 @@ def torch_export_patches(
                                 "transformers.masking_utils.sdpa_mask"
                             )
-                if (
+                if (  # eager_mask
                     masking_utils
                     and patch_transformers_list.patch_masking_utils
                     and hasattr(masking_utils, "eager_mask")
@@ -714,7 +753,7 @@ def torch_export_patches(
                                 "in ALL_MASK_ATTENTION_FUNCTIONS"
                             )
-                if (
+                if (  # sdpa_mask
                     masking_utils
                     and patch_transformers_list.patch_masking_utils
                     and hasattr(masking_utils, "sdpa_mask")
@@ -734,6 +773,25 @@ def torch_export_patches(
                                 "in ALL_MASK_ATTENTION_FUNCTIONS"
                             )
+                if (  # sdpa_attention_forward
+                    sdpa_attention is not None
+                    and modeling_utils is not None
+                    and hasattr(sdpa_attention, "sdpa_attention_forward")
+                    and hasattr(sdpa_attention, "use_gqa_in_sdpa")
+                    and hasattr(modeling_utils, "AttentionInterface")
+                ):
+                    sdpa_attention.sdpa_attention_forward = f_sdpa_attention_forward
+                    modeling_utils.sdpa_attention_forward = f_sdpa_attention_forward
+                    modeling_utils.AttentionInterface._global_mapping["sdpa"] = (
+                        f_sdpa_attention_forward
+                    )
+                    if verbose:
+                        print(
+                            "[torch_export_patches] restored "
+                            "transformers.integrations.sdpa_attention."
+                            "sdpa_attention_forward"
+                        )
             ########
             # caches
             ########

onnx_diagnostic/torch_export_patches/onnx_export_serialization.py CHANGED Viewed

@@ -12,17 +12,26 @@ from transformers.cache_utils import (
     StaticCache,
 )
-try:
-    from transformers.models.mamba.modeling_mamba import MambaCache
-except ImportError:
-    from transformers.cache_utils import MambaCache
 from ..helpers import string_type
 from .serialization import _lower_name_with_
 PATCH_OF_PATCHES: Set[Any] = set()
+def get_mamba_cache_cls() -> type:
+    try:
+        from transformers.models.mamba.modeling_mamba import MambaCache
+        return MambaCache
+    except ImportError:
+        try:
+            from transformers.cache_utils import MambaCache
+            return MambaCache
+        except ImportError:
+            return None
 def register_class_serialization(
     cls,
     f_flatten: Callable,
@@ -203,13 +212,6 @@ def serialization_functions(
                 # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
                 verbose=verbose,
             ),
-            MambaCache: lambda verbose=verbose: register_class_serialization(
-                MambaCache,
-                flatten_mamba_cache,
-                unflatten_mamba_cache,
-                flatten_with_keys_mamba_cache,
-                verbose=verbose,
-            ),
             EncoderDecoderCache: lambda verbose=verbose: register_class_serialization(
                 EncoderDecoderCache,
                 flatten_encoder_decoder_cache,
@@ -232,6 +234,17 @@ def serialization_functions(
                 verbose=verbose,
             ),
         }
+        MambaCache = get_mamba_cache_cls()
+        if MambaCache:
+            transformers_classes[MambaCache] = (
+                lambda verbose=verbose: register_class_serialization(
+                    MambaCache,
+                    flatten_mamba_cache,
+                    unflatten_mamba_cache,
+                    flatten_with_keys_mamba_cache,
+                    verbose=verbose,
+                )
+            )
         classes.update(transformers_classes)
     if patch_diffusers:
@@ -287,7 +300,12 @@ def unregister_class_serialization(cls: type, verbose: int = 0):
 def unregister_cache_serialization(undo: Dict[str, bool], verbose: int = 0):
     """Undo all registrations."""
-    cls_ensemble = {MambaCache, DynamicCache, EncoderDecoderCache} | set(undo)
+    MambaCache = get_mamba_cache_cls()
+    cls_ensemble = (
+        {DynamicCache, EncoderDecoderCache}
+        | set(undo)
+        | ({MambaCache} if MambaCache else set())
+    )
     for cls in cls_ensemble:
         if undo.get(cls.__name__, False):
             unregister_class_serialization(cls, verbose)

onnx-diagnostic 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

onnx-diagnostic 0.7.14py3-none-any.whl → 0.7.16py3-none-any.whl