PyPI - onnx-diagnostic - Versions diffs - 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl - Mend

onnx-diagnostic 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

onnx_diagnostic/torch_export_patches/onnx_export_serialization.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pprint
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Set
 import packaging.version as pv
 import optree
 import torch
@@ -11,10 +11,9 @@ from transformers.cache_utils import (
     SlidingWindowCache,
     StaticCache,
 )
-from transformers.modeling_outputs import BaseModelOutput
-from ..helpers import string_type
-from ..helpers.cache_helper import make_static_cache
+from ..helpers import string_type
+from .serialization import _lower_name_with_
 PATCH_OF_PATCHES: Set[Any] = set()
@@ -29,7 +28,8 @@ def register_class_serialization(
 ) -> bool:
     """
     Registers a class.
-    It can be undone with :func:`unregister`.
+    It can be undone with
+    :func:`onnx_diagnostic.torch_export_patches.onnx_export_serialization.unregister_class_serialization`.
     :param cls: class to register
     :param f_flatten: see ``torch.utils._pytree.register_pytree_node``
@@ -40,10 +40,12 @@ def register_class_serialization(
     :return: registered or not
     """
     if cls is not None and cls in torch.utils._pytree.SUPPORTED_NODES:
+        if verbose and cls is not None:
+            print(f"[register_class_serialization] already registered {cls.__name__}")
         return False
     if verbose:
-        print(f"[register_cache_serialization] register {cls}")
+        print(f"[register_class_serialization] ---------- register {cls.__name__}")
     torch.utils._pytree.register_pytree_node(
         cls,
         f_flatten,
@@ -54,8 +56,8 @@ def register_class_serialization(
     if pv.Version(torch.__version__) < pv.Version("2.7"):
         if verbose:
             print(
-                f"[register_cache_serialization] "
-                f"register {cls} for torch=={torch.__version__}"
+                f"[register_class_serialization] "
+                f"---------- register {cls.__name__} for torch=={torch.__version__}"
             )
         torch.fx._pytree.register_pytree_flatten_spec(cls, lambda x, _: f_flatten(x)[0])
@@ -72,11 +74,35 @@ def register_class_serialization(
     return True
-def register_cache_serialization(verbose: int = 0) -> Dict[str, bool]:
+def register_cache_serialization(
+    patch_transformers: bool = False, patch_diffusers: bool = True, verbose: int = 0
+) -> Dict[str, bool]:
     """
-    Registers many classes with :func:`register_class_serialization`.
+    Registers many classes with
+    :func:`onnx_diagnostic.torch_export_patches.onnx_export_serialization.register_class_serialization`.
     Returns information needed to undo the registration.
+    :param patch_transformers: add serialization function for
+        :epkg:`transformers` package
+    :param patch_diffusers: add serialization function for
+        :epkg:`diffusers` package
+    :param verbosity: verbosity level
+    :return: information to unpatch
     """
+    wrong: Dict[type, Optional[str]] = {}
+    if patch_transformers:
+        from .serialization.transformers_impl import WRONG_REGISTRATIONS
+        wrong |= WRONG_REGISTRATIONS
+    if patch_diffusers:
+        from .serialization.diffusers_impl import WRONG_REGISTRATIONS
+        wrong |= WRONG_REGISTRATIONS
+    registration_functions = serialization_functions(
+        patch_transformers=patch_transformers, patch_diffusers=patch_diffusers, verbose=verbose
+    )
     # DynamicCache serialization is different in transformers and does not
     # play way with torch.export.export.
     # see test test_export_dynamic_cache_cat with NOBYPASS=1
@@ -85,109 +111,137 @@ def register_cache_serialization(verbose: int = 0) -> Dict[str, bool]:
     # torch.fx._pytree.register_pytree_flatten_spec(
     #           DynamicCache, _flatten_dynamic_cache_for_fx)
     # so we remove it anyway
-    if (
-        DynamicCache in torch.utils._pytree.SUPPORTED_NODES
-        and DynamicCache not in PATCH_OF_PATCHES
-        # and pv.Version(torch.__version__) < pv.Version("2.7")
-        and pv.Version(transformers.__version__) >= pv.Version("4.50")
-    ):
-        if verbose:
-            print(
-                f"[_fix_registration] DynamicCache is unregistered and "
-                f"registered first for transformers=={transformers.__version__}"
-            )
-        unregister(DynamicCache, verbose=verbose)
-        register_class_serialization(
-            DynamicCache,
-            flatten_dynamic_cache,
-            unflatten_dynamic_cache,
-            flatten_with_keys_dynamic_cache,
-            # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
-            verbose=verbose,
-        )
-        if verbose:
-            print("[_fix_registration] DynamicCache done.")
-        # To avoid doing it multiple times.
-        PATCH_OF_PATCHES.add(DynamicCache)
     # BaseModelOutput serialization is incomplete.
     # It does not include dynamic shapes mapping.
-    if (
-        BaseModelOutput in torch.utils._pytree.SUPPORTED_NODES
-        and BaseModelOutput not in PATCH_OF_PATCHES
-    ):
-        if verbose:
-            print(
-                f"[_fix_registration] BaseModelOutput is unregistered and "
-                f"registered first for transformers=={transformers.__version__}"
+    for cls, version in wrong.items():
+        if (
+            cls in torch.utils._pytree.SUPPORTED_NODES
+            and cls not in PATCH_OF_PATCHES
+            # and pv.Version(torch.__version__) < pv.Version("2.7")
+            and (
+                version is None or pv.Version(transformers.__version__) >= pv.Version(version)
             )
-        unregister(BaseModelOutput, verbose=verbose)
-        register_class_serialization(
-            BaseModelOutput,
-            flatten_base_model_output,
-            unflatten_base_model_output,
-            flatten_with_keys_base_model_output,
-            verbose=verbose,
-        )
-        if verbose:
-            print("[_fix_registration] BaseModelOutput done.")
-        # To avoid doing it multiple times.
-        PATCH_OF_PATCHES.add(BaseModelOutput)
-    return serialization_functions(verbose=verbose)
+        ):
+            assert cls in registration_functions, (
+                f"{cls} has no registration functions mapped to it, "
+                f"available options are {list(registration_functions)}"
+            )
+            if verbose:
+                print(
+                    f"[_fix_registration] {cls.__name__} is unregistered and "
+                    f"registered first"
+                )
+            unregister_class_serialization(cls, verbose=verbose)
+            registration_functions[cls](verbose=verbose)  # type: ignore[arg-type, call-arg]
+            if verbose:
+                print(f"[_fix_registration] {cls.__name__} done.")
+            # To avoid doing it multiple times.
+            PATCH_OF_PATCHES.add(cls)
+    # classes with no registration at all.
+    done = {}
+    for k, v in registration_functions.items():
+        done[k] = v(verbose=verbose)  # type: ignore[arg-type, call-arg]
+    return done
+def serialization_functions(
+    patch_transformers: bool = False, patch_diffusers: bool = False, verbose: int = 0
+) -> Dict[type, Callable[[int], bool]]:
+    """Returns the list of serialization functions."""
+    supported_classes: Set[type] = set()
+    classes: Dict[type, Callable[[int], bool]] = {}
+    all_functions: Dict[type, Optional[str]] = {}
-def serialization_functions(verbose: int = 0) -> Dict[str, Union[Callable, int]]:
-    """Returns the list of serialization functions."""
-    return dict(
-        DynamicCache=register_class_serialization(
-            DynamicCache,
+    if patch_transformers:
+        from .serialization.transformers_impl import (
+            __dict__ as dtr,
+            SUPPORTED_DATACLASSES,
             flatten_dynamic_cache,
             unflatten_dynamic_cache,
             flatten_with_keys_dynamic_cache,
-            # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
-            verbose=verbose,
-        ),
-        MambaCache=register_class_serialization(
-            MambaCache,
             flatten_mamba_cache,
             unflatten_mamba_cache,
             flatten_with_keys_mamba_cache,
-            verbose=verbose,
-        ),
-        EncoderDecoderCache=register_class_serialization(
-            EncoderDecoderCache,
             flatten_encoder_decoder_cache,
             unflatten_encoder_decoder_cache,
             flatten_with_keys_encoder_decoder_cache,
-            verbose=verbose,
-        ),
-        BaseModelOutput=register_class_serialization(
-            BaseModelOutput,
-            flatten_base_model_output,
-            unflatten_base_model_output,
-            flatten_with_keys_base_model_output,
-            verbose=verbose,
-        ),
-        SlidingWindowCache=register_class_serialization(
-            SlidingWindowCache,
             flatten_sliding_window_cache,
             unflatten_sliding_window_cache,
             flatten_with_keys_sliding_window_cache,
-            verbose=verbose,
-        ),
-        StaticCache=register_class_serialization(
-            StaticCache,
             flatten_static_cache,
             unflatten_static_cache,
             flatten_with_keys_static_cache,
-            verbose=verbose,
-        ),
-    )
+        )
+        all_functions.update(dtr)
+        supported_classes |= SUPPORTED_DATACLASSES
+        transformers_classes = {
+            DynamicCache: lambda verbose=verbose: register_class_serialization(
+                DynamicCache,
+                flatten_dynamic_cache,
+                unflatten_dynamic_cache,
+                flatten_with_keys_dynamic_cache,
+                # f_check=make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+                verbose=verbose,
+            ),
+            MambaCache: lambda verbose=verbose: register_class_serialization(
+                MambaCache,
+                flatten_mamba_cache,
+                unflatten_mamba_cache,
+                flatten_with_keys_mamba_cache,
+                verbose=verbose,
+            ),
+            EncoderDecoderCache: lambda verbose=verbose: register_class_serialization(
+                EncoderDecoderCache,
+                flatten_encoder_decoder_cache,
+                unflatten_encoder_decoder_cache,
+                flatten_with_keys_encoder_decoder_cache,
+                verbose=verbose,
+            ),
+            SlidingWindowCache: lambda verbose=verbose: register_class_serialization(
+                SlidingWindowCache,
+                flatten_sliding_window_cache,
+                unflatten_sliding_window_cache,
+                flatten_with_keys_sliding_window_cache,
+                verbose=verbose,
+            ),
+            StaticCache: lambda verbose=verbose: register_class_serialization(
+                StaticCache,
+                flatten_static_cache,
+                unflatten_static_cache,
+                flatten_with_keys_static_cache,
+                verbose=verbose,
+            ),
+        }
+        classes.update(transformers_classes)
+    if patch_diffusers:
+        from .serialization.diffusers_impl import SUPPORTED_DATACLASSES, __dict__ as dfu
+        all_functions.update(dfu)
+        supported_classes |= SUPPORTED_DATACLASSES
+    for cls in supported_classes:
+        lname = _lower_name_with_(cls.__name__)
+        assert (
+            f"flatten_{lname}" in all_functions
+        ), f"Unable to find function 'flatten_{lname}' in {list(all_functions)}"
+        classes[cls] = (
+            lambda verbose=verbose, _ln=lname, cls=cls, _al=all_functions: register_class_serialization(  # noqa: E501
+                cls,
+                _al[f"flatten_{_ln}"],
+                _al[f"unflatten_{_ln}"],
+                _al[f"flatten_with_keys_{_ln}"],
+                verbose=verbose,
+            )
+        )
+    return classes
-def unregister(cls: type, verbose: int = 0):
+def unregister_class_serialization(cls: type, verbose: int = 0):
     """Undo the registration."""
     # torch.utils._pytree._deregister_pytree_flatten_spec(cls)
     if cls in torch.fx._pytree.SUPPORTED_NODES:
@@ -217,264 +271,7 @@ def unregister(cls: type, verbose: int = 0):
 def unregister_cache_serialization(undo: Dict[str, bool], verbose: int = 0):
     """Undo all registrations."""
-    for cls in [MambaCache, DynamicCache, EncoderDecoderCache, BaseModelOutput]:
+    cls_ensemble = {MambaCache, DynamicCache, EncoderDecoderCache} | set(undo)
+    for cls in cls_ensemble:
         if undo.get(cls.__name__, False):
-            unregister(cls, verbose)
-############
-# MambaCache
-############
-def flatten_mamba_cache(
-    mamba_cache: MambaCache,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
-    flat = [
-        ("conv_states", mamba_cache.conv_states),
-        ("ssm_states", mamba_cache.ssm_states),
-    ]
-    return [f[1] for f in flat], [f[0] for f in flat]
-def unflatten_mamba_cache(
-    values: List[Any], context: torch.utils._pytree.Context, output_type=None
-) -> MambaCache:
-    """Restores a :class:`transformers.cache_utils.MambaCache` from python objects."""
-    conv_states, ssm_states = values
-    class _config:
-        def __init__(self):
-            if isinstance(conv_states, list):
-                self.intermediate_size = conv_states[0].shape[1]
-                self.state_size = ssm_states[0].shape[2]
-                self.conv_kernel = conv_states[0].shape[2]
-                self.num_hidden_layers = len(conv_states)
-            else:
-                self.intermediate_size = conv_states.shape[2]
-                self.state_size = ssm_states.shape[3]
-                self.conv_kernel = conv_states.shape[3]
-                self.num_hidden_layers = conv_states.shape[0]
-    cache = MambaCache(
-        _config(),
-        max_batch_size=1,
-        dtype=values[-1][0].dtype,
-        device="cpu" if values[-1][0].get_device() < 0 else "cuda",
-    )
-    values = dict(zip(context, values))
-    for k, v in values.items():
-        setattr(cache, k, v)
-    return cache
-def flatten_with_keys_mamba_cache(cache: MambaCache) -> Tuple[
-    List[Tuple[torch.utils._pytree.KeyEntry, Any]],
-    torch.utils._pytree.Context,
-]:
-    """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
-    values, context = flatten_mamba_cache(cache)
-    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
-##############
-# DynamicCache
-##############
-def flatten_dynamic_cache(
-    dynamic_cache: DynamicCache,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    if hasattr(transformers.cache_utils, "_flatten_dynamic_cache"):
-        return transformers.cache_utils._flatten_dynamic_cache(dynamic_cache)
-    flat = [("key_cache", dynamic_cache.key_cache), ("value_cache", dynamic_cache.value_cache)]
-    return [f[1] for f in flat], [f[0] for f in flat]
-def flatten_with_keys_dynamic_cache(
-    dynamic_cache: DynamicCache,
-) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
-    """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    if hasattr(transformers.cache_utils, "_flatten_with_keys_dynamic_cache"):
-        return transformers.cache_utils._flatten_with_keys_dynamic_cache(dynamic_cache)
-    values, context = flatten_dynamic_cache(dynamic_cache)
-    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
-def unflatten_dynamic_cache(
-    values: List[Any], context: torch.utils._pytree.Context, output_type=None
-) -> DynamicCache:
-    """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
-    if hasattr(transformers.cache_utils, "_unflatten_dynamic_cache"):
-        assert output_type is None, f"output_type={output_type} not supported"
-        return transformers.cache_utils._unflatten_dynamic_cache(values, context)
-    cache = transformers.cache_utils.DynamicCache()
-    values = dict(zip(context, values))
-    for k, v in values.items():
-        setattr(cache, k, v)
-    return cache
-#############
-# StaticCache
-#############
-def flatten_static_cache(
-    cache: StaticCache,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
-    flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
-    return [f[1] for f in flat], [f[0] for f in flat]
-def flatten_with_keys_static_cache(
-    cache: StaticCache,
-) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
-    """Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
-    values, context = flatten_static_cache(cache)
-    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
-def unflatten_static_cache(
-    values: List[Any], context: torch.utils._pytree.Context, output_type=None
-) -> StaticCache:
-    """Restores a :class:`transformers.cache_utils.StaticCache` from python objects."""
-    return make_static_cache(list(zip(values[0], values[1])))
-####################
-# SlidingWindowCache
-####################
-def flatten_sliding_window_cache(
-    cache: SlidingWindowCache,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """
-    Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
-    with python objects.
-    """
-    flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
-    return [f[1] for f in flat], [f[0] for f in flat]
-def flatten_with_keys_sliding_window_cache(
-    cache: SlidingWindowCache,
-) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
-    """
-    Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
-    with python objects.
-    """
-    values, context = flatten_sliding_window_cache(cache)
-    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
-def unflatten_sliding_window_cache(
-    values: List[Any], context: torch.utils._pytree.Context, output_type=None
-) -> SlidingWindowCache:
-    """Restores a :class:`transformers.cache_utils.SlidingWindowCache` from python objects."""
-    key_cache, value_cache = values
-    class _config:
-        def __init__(self):
-            self.head_dim = key_cache[0].shape[-1]
-            self.num_attention_heads = key_cache[0].shape[1]
-            self.num_hidden_layers = len(key_cache)
-            self.sliding_window = key_cache[0].shape[2]
-    cache = SlidingWindowCache(
-        _config(),
-        max_batch_size=key_cache[0].shape[0],
-        max_cache_len=key_cache[0].shape[2],  # sligding window
-        device=key_cache[0].device,
-        dtype=key_cache[0].dtype,
-    )
-    values = dict(zip(context, values))
-    for k, v in values.items():
-        setattr(cache, k, v)
-    return cache
-#####################
-# EncoderDecoderCache
-#####################
-def flatten_encoder_decoder_cache(
-    ec_cache: EncoderDecoderCache,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """
-    Serializes a :class:`transformers.cache_utils.EncoderDecoderCache`
-    with python objects.
-    """
-    dictionary = {
-        "self_attention_cache": ec_cache.self_attention_cache,
-        "cross_attention_cache": ec_cache.cross_attention_cache,
-    }
-    return torch.utils._pytree._dict_flatten(dictionary)
-def flatten_with_keys_encoder_decoder_cache(ec_cache: EncoderDecoderCache) -> Tuple[
-    List[Tuple[torch.utils._pytree.KeyEntry, Any]],
-    torch.utils._pytree.Context,
-]:
-    """
-    Serializes a :class:`transformers.cache_utils.EncoderDecoderCache`
-    with python objects.
-    """
-    dictionary = {
-        "self_attention_cache": ec_cache.self_attention_cache,
-        "cross_attention_cache": ec_cache.cross_attention_cache,
-    }
-    return torch.utils._pytree._dict_flatten_with_keys(dictionary)
-def unflatten_encoder_decoder_cache(
-    values: List[Any], context: torch.utils._pytree.Context, output_type=None
-) -> EncoderDecoderCache:
-    """Restores a :class:`transformers.cache_utils.EncoderDecoderCache` from python objects."""
-    dictionary = torch.utils._pytree._dict_unflatten(values, context)
-    return EncoderDecoderCache(**dictionary)
-#################
-# BaseModelOutput
-#################
-def flatten_base_model_output(
-    bo: BaseModelOutput,
-) -> Tuple[List[Any], torch.utils._pytree.Context]:
-    """
-    Serializes a :class:`transformers.modeling_outputs.BaseModelOutput`
-    with python objects.
-    """
-    return list(bo.values()), list(bo.keys())
-def flatten_with_keys_base_model_output(
-    bo: BaseModelOutput,
-) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
-    """
-    Serializes a :class:`transformers.modeling_outputs.BaseModelOutput`
-    with python objects.
-    """
-    values, context = flatten_base_model_output(bo)
-    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
-def unflatten_base_model_output(
-    values: List[Any],
-    context: torch.utils._pytree.Context,
-    output_type=None,
-) -> BaseModelOutput:
-    """
-    Restores a :class:`transformers.modeling_outputs.BaseModelOutput`
-    from python objects.
-    """
-    return BaseModelOutput(**dict(zip(context, values)))
+            unregister_class_serialization(cls, verbose)

onnx-diagnostic 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

onnx-diagnostic 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl