PyPI - onnx-diagnostic - Versions diffs - 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl - Mend

onnx-diagnostic 0.8.10py3-none-any.whl → 0.8.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +136 -140
onnx_diagnostic/ci_models/export_phi4_mm.py +2 -4
onnx_diagnostic/export/api.py +2 -4
onnx_diagnostic/export/validate.py +2 -0
onnx_diagnostic/ext_test_case.py +32 -15
onnx_diagnostic/helpers/args_helper.py +1 -0
onnx_diagnostic/helpers/bench_run.py +0 -1
onnx_diagnostic/helpers/cache_helper.py +6 -6
onnx_diagnostic/helpers/doc_helper.py +7 -4
onnx_diagnostic/helpers/graph_helper.py +6 -6
onnx_diagnostic/helpers/log_helper.py +37 -14
onnx_diagnostic/helpers/memory_peak.py +5 -1
onnx_diagnostic/helpers/mini_onnx_builder.py +9 -14
onnx_diagnostic/helpers/model_builder_helper.py +1 -1
onnx_diagnostic/helpers/onnx_helper.py +283 -110
onnx_diagnostic/helpers/ort_session.py +0 -1
onnx_diagnostic/helpers/torch_helper.py +8 -9
onnx_diagnostic/investigate/__init__.py +0 -0
onnx_diagnostic/investigate/input_observer.py +329 -0
onnx_diagnostic/reference/evaluator.py +0 -1
onnx_diagnostic/reference/ort_evaluator.py +0 -1
onnx_diagnostic/reference/report_results_comparison.py +9 -3
onnx_diagnostic/reference/torch_evaluator.py +5 -1
onnx_diagnostic/reference/torch_ops/_op_run.py +3 -5
onnx_diagnostic/reference/torch_ops/sequence_ops.py +1 -1
onnx_diagnostic/tasks/feature_extraction.py +0 -1
onnx_diagnostic/torch_export_patches/__init__.py +0 -1
onnx_diagnostic/torch_export_patches/patch_module.py +1 -1
onnx_diagnostic/torch_export_patches/patches/_patch_transformers_rotary_embedding.py +2 -2
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +44 -23
onnx_diagnostic/torch_models/code_sample.py +5 -10
onnx_diagnostic/torch_models/hghub/hub_data.py +2 -4
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +6 -12
onnx_diagnostic/torch_models/validate.py +1 -1
onnx_diagnostic/torch_onnx/compare.py +0 -1
onnx_diagnostic/torch_onnx/runtime_info.py +1 -1
onnx_diagnostic/torch_onnx/sbs.py +1 -1
onnx_diagnostic/torch_onnx/sbs_dataclasses.py +2 -4
onnx_diagnostic/typing.py +15 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.8.11.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.8.11.dist-info}/RECORD +45 -43
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.8.11.dist-info}/WHEEL +1 -1
onnx_diagnostic/api.py +0 -15
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.8.11.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.8.11.dist-info}/top_level.txt +0 -0

onnx_diagnostic/investigate/input_observer.py ADDED Viewed

@@ -0,0 +1,329 @@
+import contextlib
+import inspect
+from typing import Any, Callable, Sequence
+import torch
+def flatten_unflatten_for_dynamic_shapes(
+    obj: Any,
+    use_dict: bool = True,
+    change_function: Callable[[torch.Tensor], Any] | None = None,
+) -> Any:
+    """
+    Returns the object in a different structure similar to what
+    the definition of the dynamic shapes should use.
+    Args:
+        obj:
+            object from a custom class
+        use_dict:
+            closer to the original result but
+            :func:`torch.export.export` only considers the values,
+            the context gives the dictionary keys but it is not expressed
+            in the dynamic shapes, these specifications seems to be different
+            for the strict and non strict mode. It also preserves tuple.
+        change_function:
+            to modify the tensor in the structure itself,
+            like replace them by a shape
+    Returns:
+        the serialized object
+    """
+    if isinstance(obj, torch.Tensor):
+        return change_function(obj) if change_function else obj
+    flat, spec = torch.utils._pytree.tree_flatten(obj)
+    start = 0
+    end = 0
+    subtrees = []
+    for subspec in (spec.children() if hasattr(spec, "children") else spec.children_specs):
+        end += subspec.num_leaves
+        value = subspec.unflatten(flat[start:end])
+        value = flatten_unflatten_for_dynamic_shapes(
+            value, use_dict=use_dict, change_function=change_function
+        )
+        subtrees.append(value)
+        start = end
+    if use_dict:
+        if spec.type is dict:
+            # This is a dictionary.
+            return dict(zip(spec.context, subtrees))
+        if spec.type is tuple:
+            return tuple(subtrees)
+        if spec.type is list:
+            return list(subtrees)
+        if spec.type is None and not subtrees:
+            return None
+        if spec.context:
+            # This is a custom class with attributes.
+            # It is returned as a list.
+            return list(subtrees)
+        raise ValueError(
+            f"Unable to interpret spec type {spec.type} "
+            f"(type is {type(spec.type)}, context is {spec.context}), "
+            f"spec={spec}, subtrees={subtrees}"
+        )
+    # This is a list.
+    return subtrees
+def infer_dynamic_dimensions(shape_list: Sequence[tuple[int, ...]]) -> list[int]:
+    """
+    Returns the list of dynamic dimensions given a list of shapes
+    corresponding to the same tensor.
+    Args:
+        shape_list:
+            list of shapes, they must all have the same length
+    Returns:
+        list of dynamic dimensions
+    """
+    unique_ranks = {len(shape) for shape in shape_list}
+    torch._check(
+        len(unique_ranks) == 1, lambda: "all shapes in shape_list must have the same rank"
+    )
+    rank = unique_ranks.pop()
+    dynamic = []
+    for i in range(rank):
+        dims = [shape[i] for shape in shape_list]
+        if len(set(dims)) > 1:
+            dynamic.append(i)
+    return dynamic
+class InputObserverInfo:
+    def __init__(self, signature: inspect.Signature):
+        # pyrefly: ignore
+        self.inputs_specs: list[torch.utils._pytree.PyTreeSpec] = []
+        self.flat_inputs: list[list[torch.Tensor | None]] = []
+        # pyrefly: ignore
+        self.outputs_specs: list[torch.utils._pytree.PyTreeSpec] = []
+        self.flat_outputs: list[torch.Tensor | list[torch.Tensor]] = []
+        self.signature = signature
+        self._max_args: tuple[Any, torch.Tensor] | None = None
+        self._max_kwargs: dict[str, torch.Tensor] | None = None
+    def __len__(self) -> int:
+        return len(self.flat_inputs)
+    def add_inputs(self, args: tuple[Any, ...], kwargs: dict[str, Any]):
+        kwargs = {
+            k: v
+            for k, v in kwargs.items()
+            if v is not None and not isinstance(v, (int, float, bool))
+        }
+        flat_args, spec = torch.utils._pytree.tree_flatten((args, kwargs))
+        self.inputs_specs.append(spec)
+        cloned = [
+            (None if not isinstance(t, torch.Tensor) else t.clone().detach())
+            for t in flat_args
+        ]
+        self.flat_inputs.append(cloned)
+        cloned_args, cloned_kwargs = torch.utils._pytree.tree_unflatten(cloned, spec)
+        if self._max_args is None or len(cloned_args) > len(self._max_args):
+            self._max_args = cloned_args
+        if self._max_kwargs is None or len(cloned_kwargs) > len(self._max_kwargs):
+            self._max_kwargs = cloned_kwargs
+    def add_outputs(self, res: torch.Tensor | tuple[torch.Tensor, ...]):
+        flat_res, spec = torch.utils._pytree.tree_flatten(res)
+        self.outputs_specs.append(spec)
+        self.flat_outputs.append([t.clone().detach() for t in flat_res])
+    def build_inputs_completed_with_none_values(self) -> list[list[torch.Tensor]]:
+        # Let's compute the sizes of each independently.
+        if not self.flat_inputs or self._max_args is None or self._max_kwargs is None:
+            raise RuntimeError("No inputs were captured.")
+        arg_sizes = [len(torch.utils._pytree.tree_flatten(a)[0]) for a in self._max_args]
+        kwarg_sizes = {
+            k: len(torch.utils._pytree.tree_flatten(v)[0]) for k, v in self._max_kwargs.items()
+        }
+        # Let's reprocess everything.
+        captured_inputs: dict[int | str, int] = {}
+        new_flat_inputs = []
+        for args_kwargs, spec in zip(self.flat_inputs, self.inputs_specs):
+            args, kwargs = torch.utils._pytree.tree_unflatten(args_kwargs, spec)
+            if len(set(kwargs) | set(self._max_kwargs)) > len(self._max_kwargs):
+                raise RuntimeError(
+                    "At least one call to the observed model "
+                    "must contain all the named arguments."
+                )
+            flat = []
+            for i in range(len(self._max_args)):
+                if i < len(args):
+                    ts = torch.utils._pytree.tree_flatten(args[i])[0]
+                    if i in captured_inputs and captured_inputs[i] != len(ts):
+                        raise RuntimeError(
+                            f"Positional argument {i} has {len(ts)} tensors "
+                            f"but previously got {captured_inputs[i]} tensors. "
+                            f"Inference is impossible in that case."
+                        )
+                    captured_inputs[i] = len(ts)
+                    flat.extend(ts)
+                else:
+                    flat.extend([None for _ in range(arg_sizes[i])])
+            for k in self._max_kwargs:
+                if k in kwargs:
+                    ts = torch.utils._pytree.tree_flatten(kwargs[k])[0]
+                    if k in captured_inputs and captured_inputs[k] != len(ts):
+                        raise RuntimeError(
+                            f"Named argument {k!r} has {len(ts)} tensors "
+                            f"but previously got {captured_inputs[k]} tensors. "
+                            f"Inference is impossible in that case."
+                        )
+                    captured_inputs[k] = len(ts)
+                    flat.extend(ts)
+                else:
+                    flat.extend([None for _ in range(kwarg_sizes[k])])
+            new_flat_inputs.append(flat)
+        return new_flat_inputs
+    def infer_dynamic_shapes(self) -> tuple[dict[int, Any], ...] | dict[str, dict[int, Any]]:
+        flat_inputs = self.build_inputs_completed_with_none_values()
+        # This is already checked by build_inputs_completed_with_none_values
+        # but this is not always well captured by tools checking types.
+        assert self._max_args is not None and self._max_kwargs is not None
+        if len({len(flat) for flat in flat_inputs}) != 1:
+            raise NotImplementedError(
+                "infer_dynamic_shapes is not implemented "
+                "when the number of input tensors are not the same."
+            )
+        shape_lists = [
+            [(None if t is None else t.shape) for t in tensors] for tensors in flat_inputs
+        ]
+        n_tensors = len(shape_lists[0])
+        dynamic_shapes = [
+            infer_dynamic_dimensions(
+                [s for s in [shapes[index] for shapes in shape_lists] if s is not None]
+            )
+            for index in range(n_tensors)
+        ]
+        cst = torch.export.Dim.DYNAMIC
+        flat_dynamic_shapes = [dict.fromkeys(dims, cst) for dims in dynamic_shapes]
+        if len(flat_dynamic_shapes) == len(self._max_args) + len(self._max_kwargs):
+            # It means forward method is called with tensors only.
+            if not self._max_kwargs:
+                # only positional arguments
+                return tuple(flat_dynamic_shapes)
+            if not self._max_args:
+                # only named arguments
+                return dict(zip(list(self._max_kwargs), flat_dynamic_shapes))
+            # positional arguments needs to be moved to the named arguments
+            n_args = len(self._max_args)
+            pos_names = list(self.signature.parameters)[:n_args]
+            return {
+                **dict(zip(pos_names, flat_dynamic_shapes[:n_args])),
+                **dict(zip(list(self._max_kwargs), flat_dynamic_shapes[n_args:])),
+            }
+        # nested types, here comes the fun part because the shapes cannot be unflattened,
+        # custom classes must appear in their flattened shape.
+        # This does not work in all cases but every time every available argument is flattened
+        # with the same number of tensors. The function does not check
+        # if that assumption is true.
+        flat_inputs, _max_spec = torch.utils._pytree.tree_flatten(
+            (self._max_args, self._max_kwargs)
+        )
+        torch._check(
+            len(flat_inputs) == len(flat_dynamic_shapes),
+            (
+                f"Length mismatch len(flat_inputs)={len(flat_inputs)}, "
+                f"len(flat_dynamic_shapes)={len(flat_dynamic_shapes)}"
+            ),
+        )
+        mapping = {id(t): shape for t, shape in zip(flat_inputs, flat_dynamic_shapes)}
+        ds_args, ds_kwargs = flatten_unflatten_for_dynamic_shapes(
+            (self._max_args, self._max_kwargs), change_function=lambda t: mapping[id(t)]
+        )
+        if not ds_kwargs:
+            return tuple(ds_args)
+        if not ds_args:
+            return tuple(ds_kwargs)
+        pos_names = list(self.signature.parameters)[: len(ds_args)]
+        return {**dict(zip(pos_names, ds_args)), **ds_kwargs}
+    def infer_arguments(
+        self, index: int | None = None
+    ) -> tuple[torch.Tensor, ...] | dict[str, torch.Tensor]:
+        # This is already checked by build_inputs_completed_with_none_values
+        # but this is not always well captured by tools checking types.
+        assert self._max_args is not None and self._max_kwargs is not None
+        candidate = None
+        if index is None:
+            for i, (args_kwargs, spec) in enumerate(zip(self.flat_inputs, self.inputs_specs)):
+                args, kwargs = torch.utils._pytree.tree_unflatten(args_kwargs, spec)
+                if len(args) == len(self._max_args) and len(kwargs) == len(self._max_kwargs):
+                    index = i
+                    candidate = args, kwargs
+                    break
+        if index is not None:
+            # found one available set.
+            args, kwargs = candidate or torch.utils._pytree.tree_unflatten(
+                self.flat_inputs[index], self.inputs_specs[index]
+            )
+            if not kwargs:
+                return args
+            if not args:
+                return kwargs
+            # We need to move args to kwargs
+            pos_names = list(self.signature.parameters)[: len(args)]
+            return {**dict(zip(pos_names, args)), **kwargs}
+        raise NotImplementedError(
+            "We could not find a good set of inputs/outputs. "
+            "We need to replace none by empty tensors."
+        )
+class InputObserver:
+    def __init__(self, store_n_calls: int = 3):
+        self.store_n_calls = store_n_calls
+        self.info: InputObserverInfo | None = None
+    def _forward_captured(self, *args, _captured_forward=None, **kwargs):
+        assert _captured_forward is not None, "_captured_forward cannot be None"
+        assert self.info is not None, "info cannot be None"
+        n_stored = len(self.info)
+        if n_stored < self.store_n_calls:
+            self.info.add_inputs(args, kwargs)
+        res = _captured_forward(*args, **kwargs)
+        if n_stored < self.store_n_calls:
+            self.info.add_outputs(res)
+        return res
+    @contextlib.contextmanager
+    def __call__(self, model: torch.nn.Module):
+        if self.info is not None:
+            raise RuntimeError(
+                "This class was already used to capture a model. Please create a new one."
+            )
+        self.info = InputObserverInfo(signature=inspect.signature(model.forward))
+        forward_method = model.forward
+        model.forward = (
+            lambda *args, _captured_forward=forward_method, **kwargs: self._forward_captured(
+                *args, _captured_forward=_captured_forward, **kwargs
+            )
+        )
+        try:
+            yield self
+        finally:
+            model.forward = forward_method
+    def _check_captured(self):
+        if self.info is None:
+            raise RuntimeError("No inputs were captured.")
+    def infer_dynamic_shapes(self) -> tuple[dict[int, Any], ...] | dict[str, dict[int, Any]]:
+        self._check_captured()
+        assert self.info is not None  # missed by type checking
+        return self.info.infer_dynamic_shapes()
+    def infer_arguments(
+        self, index: int | None = None
+    ) -> tuple[torch.Tensor, ...] | dict[str, torch.Tensor]:
+        self._check_captured()
+        assert self.info is not None  # missed by type checking
+        return self.info.infer_arguments(index=index)

onnx_diagnostic/reference/evaluator.py CHANGED Viewed

@@ -42,7 +42,6 @@ from .ops.op_slice import Slice_1, Slice_10
 from .ops.op_transpose_cast import Transpose2DCastFP16, Transpose2DCastFP32
 from .ops.op_tri_matrix import TriMatrix
 logger = getLogger("onnx-diagnostic-eval")

onnx_diagnostic/reference/ort_evaluator.py CHANGED Viewed

@@ -34,7 +34,6 @@ from ..helpers.torch_helper import to_tensor
 from .report_results_comparison import ReportResultComparison
 from .evaluator import ExtendedReferenceEvaluator
 PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto)
 Proto = Union[FunctionProto, ModelProto, GraphProto, NodeProto]

onnx_diagnostic/reference/report_results_comparison.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Set, Tuple, Union
 ReportKeyNameType = Union[str, Tuple[str, int, str]]
 ReportKeyValueType = Tuple[int, Tuple[int, ...]]
@@ -14,6 +13,7 @@ class ReportResultComparison:
     :param tensors: tensor
     """
+    # pyrefly: ignore[unknown-name]
     def __init__(self, tensors: Dict[ReportKeyNameType, "torch.Tensor"]):  # noqa: F821
         from ..helpers.onnx_helper import dtype_to_tensor_dtype
         from ..helpers import max_diff, string_type
@@ -25,7 +25,9 @@ class ReportResultComparison:
         self.max_diff = max_diff
         self.tensors = tensors
         self._build_mapping()
+        self.unique_run_names: Set[str] = set()
+    # pyrefly: ignore[unknown-name]
     def key(self, tensor: "torch.Tensor") -> ReportKeyValueType:  # noqa: F821
         "Returns a key for a tensor, (onnx dtype, shape)."
         return self.dtype_to_tensor_dtype(tensor.dtype), tuple(map(int, tensor.shape))
@@ -59,12 +61,15 @@ class ReportResultComparison:
         for k, v in self.value.items():
             (i_run, run_name), ref_name = k
             d = dict(run_index=i_run, run_name=run_name, ref_name=ref_name)
+            # pyrefly: ignore[no-matching-overload]
             d.update(v)
             rows.append(d)
         return rows
     def report(
-        self, outputs: Dict[str, "torch.Tensor"]  # noqa: F821
+        self,
+        # pyrefly: ignore[unknown-name]
+        outputs: Dict[str, "torch.Tensor"],  # noqa: F821
     ) -> List[Tuple[Tuple[int, str], ReportKeyNameType, Dict[str, Union[float, str]]]]:
         """
         For every tensor in outputs, compares it to every tensor held by
@@ -79,6 +84,7 @@ class ReportResultComparison:
             key = self.key(tensor)
             if key not in self.mapping:
                 continue
+            # pyrefly: ignore[unknown-name]
             cache: Dict["torch.device", "torch.Tensor"] = {}  # noqa: F821, UP037
             for held_key in self.mapping[key]:
                 t2 = self.tensors[held_key]

onnx_diagnostic/reference/torch_evaluator.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TorchOnnxEvaluator:
     * `functions`: local functions
     The class is not multithreaded. `runtime_info` gets updated
-    by the the class. The list of available kernels is returned by function
+    by the class. The list of available kernels is returned by function
     :func:`onnx_diagnostic.reference.torch_evaluator.get_kernels`.
     Example:
@@ -494,8 +494,10 @@ class TorchOnnxEvaluator:
             r = self.runtime_info[k]
             r.set_value(
                 torch_ops.OpRunTensor(
+                    # pyrefly: ignore[missing-attribute]
                     v.to(self.CUDA) if not r.is_shape and self.on_cuda else v,
                     is_constant=False,
+                    # pyrefly: ignore[missing-attribute]
                     may_cpu=len(v.shape) == 1 and v.numel() < 8 and v.dtype == torch.int64,
                 )
             )
@@ -524,6 +526,7 @@ class TorchOnnxEvaluator:
                         f"for kernel {type(kernel)}."
                     )
                     for name, t in zip(kernel.output, res):
+                        # pyrefly: ignore[bad-argument-type]
                         self.runtime_info[name].set_value(t)
                     if self.verbose:
                         for name in kernel.output:
@@ -644,6 +647,7 @@ class TorchOnnxEvaluator:
                         f"for kernel {type(kernel)}."
                     )
                     for name, t in zip(kernel.output, res):
+                        # pyrefly: ignore[bad-argument-type]
                         self.runtime_info[name].set_value(t)
                 else:
                     assert isinstance(

onnx_diagnostic/reference/torch_ops/_op_run.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional, Union, Tuple
 import onnx
 import torch
-from ...api import TensorLike
+from ...typing import TensorLike
 from ...helpers import string_type
 from ...helpers.torch_helper import to_tensor
@@ -149,7 +149,7 @@ class OpRunSequence(OpRunValue):
     ) -> "OpRunSequence":
         "Inserts a value at a given position."
         assert isinstance(tensor, OpRunTensor), f"Unexpected type {type(tensor)} for tensor"
-        new_seq = OpRunSequence()
+        new_seq = OpRunSequence()  # type: ignore[abstract]
         seq = self.sequence.copy()
         new_seq.sequence = seq
         if position is None:
@@ -314,9 +314,7 @@ class OpRunKernel:
 class OpRunFunction(OpRunKernel):
-    """
-    Defines a kernel based on a local functions.
-    """
+    """Defines a kernel based on a local functions."""
     def __init__(
         self,

onnx_diagnostic/reference/torch_ops/sequence_ops.py CHANGED Viewed

@@ -46,7 +46,7 @@ class SequenceEmpty_11(OpRunOpSequence):
         )
     def run(self) -> OpRunSequence:
-        return OpRunSequence(dtype=self.dtype)
+        return OpRunSequence(dtype=self.dtype)  # type: ignore[abstract]
 class SequenceInsert_11(OpRunOpSequence):

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 from ..helpers.config_helper import update_config, check_hasattr
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 __TASK__ = "feature-extraction"

onnx_diagnostic/torch_export_patches/__init__.py CHANGED Viewed

@@ -4,7 +4,6 @@ from .onnx_export_errors import (
 )
 from .patch_module import torch_export_rewrite
 # bypass_export_some_errors is the first name given to the patches.
 bypass_export_some_errors = torch_export_patches  # type: ignore

onnx_diagnostic/torch_export_patches/patch_module.py CHANGED Viewed

@@ -986,7 +986,7 @@ def torch_export_rewrite(
             name = me.__qualname__
             spl = name.split(".")
             if len(spl) == 1:
-                # This a function
+                # This is a function
                 module = me.__module__
                 if module in me.__globals__:
                     mod = me.__globals__[module]

onnx_diagnostic/torch_export_patches/patches/_patch_transformers_rotary_embedding.py CHANGED Viewed

@@ -7,10 +7,10 @@ import transformers
 def patched__compute_dynamic_ntk_parameters(
     config: Optional[transformers.PretrainedConfig] = None,
-    device: Optional["torch.device"] = None,
+    device: Optional[torch.device] = None,
     seq_len: Optional[int] = None,
     **rope_kwargs,
-) -> Tuple["torch.Tensor", float]:
+) -> Tuple[torch.Tensor, float]:
     """
     manual patch:
     ``[patch:transformers.modeling_rope_utils._compute_dynamic_ntk_parameters]``

onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import itertools
 from typing import Any, Callable, List, Set, Tuple
 import torch
+import transformers.cache_utils
 from transformers.cache_utils import Cache, DynamicCache, EncoderDecoderCache, StaticCache
 try:
@@ -22,22 +23,43 @@ from transformers.modeling_outputs import BaseModelOutput
 from ...helpers.cache_helper import make_dynamic_cache, make_static_cache, CacheKeyValue
 from . import make_serialization_function_for_dataclass
 SUPPORTED_DATACLASSES: Set[type] = set()
 WRONG_REGISTRATIONS = {
     DynamicCache: "4.50",
     BaseModelOutput: None,
 }
+SHORTEN_LAYER_NAMES = {
+    "DynamicLayer": "D",
+    "DynamicSlidingWindowLayer": "W",
+    "StaticLayer": "S",
+    "StaticSlidingWindowLayer": "X",
+    "D": "DynamicLayer",
+    "W": "DynamicSlidingWindowLayer",
+    "S": "StaticLayer",
+    "X": "StaticSlidingWindowLayer",
+}
 def _flatten_key_value_cache(cache: Cache) -> Tuple[List[Any], torch.utils._pytree.Context]:
     ca = CacheKeyValue(cache)
     flat = list(itertools.chain.from_iterable(zip(ca.key_cache, ca.value_cache)))
-    keys = list(
-        itertools.chain.from_iterable(
-            (f"key_{i}", f"value_{i}") for i in range(len(ca.key_cache))
+    unique = set(ca.cls_layers) if ca.cls_layers else None
+    if (
+        cache.__class__.__name__ != "DynamicCache"
+        or unique is None
+        or (len(unique) == 1 and unique.pop().__name__ == "DynamicLayer")
+    ):
+        keys = list(
+            itertools.chain.from_iterable(
+                (f"key_{i}", f"value_{i}") for i in range(len(ca.key_cache))
+            )
         )
-    )
+        return flat, keys
+    keys = []
+    for i in range(len(ca.key_cache)):
+        letter = SHORTEN_LAYER_NAMES[ca.cls_layers[i].__name__]
+        keys.extend([f"key_{letter}{i}", f"value_{letter}{i}"])
     return flat, keys
@@ -55,7 +77,20 @@ def _unflatten_cache(
     output_type=None,
 ) -> DynamicCache:
     """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
-    res = make_cache(list(zip(values[::2], values[1::2])))
+    expected = list(
+        itertools.chain.from_iterable(
+            (f"key_{i}", f"value_{i}") for i in range(len(values) // 2)
+        )
+    )
+    if expected == context:
+        res = make_cache(list(zip(values[::2], values[1::2])))
+    else:
+        cls_layer_names = [SHORTEN_LAYER_NAMES[name.split("_")[1][0]] for name in context][::2]
+        cls_layers = [
+            getattr(transformers.cache_utils, cls_name) for cls_name in cls_layer_names
+        ]
+        res = make_cache(list(zip(values[::2], values[1::2])), cls_layers=cls_layers)
     assert output_type is None or isinstance(
         res, output_type
     ), f"Type mismatch between {output_type} (expected) and {type(res)}"
@@ -71,14 +106,6 @@ def flatten_dynamic_cache(
     dynamic_cache: DynamicCache,
 ) -> Tuple[List[Any], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    assert (
-        not hasattr(dynamic_cache, "layers")
-        or not dynamic_cache.layers
-        or all(lay.__class__.__name__ == "DynamicLayer" for lay in dynamic_cache.layers)
-    ), (
-        f"The serialization does not work yet on other layers "
-        f"than DynamicLayer, but layers={[lay.__class__ for lay in dynamic_cache.layers]}"
-    )
     return _flatten_key_value_cache(dynamic_cache)
@@ -86,14 +113,6 @@ def flatten_with_keys_dynamic_cache(
     dynamic_cache: DynamicCache,
 ) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
-    assert (
-        not hasattr(dynamic_cache, "layers")
-        or not dynamic_cache.layers
-        or all(lay.__class__.__name__ == "DynamicLayer" for lay in dynamic_cache.layers)
-    ), (
-        f"The serialization does not work yet on other layers "
-        f"than DynamicLayer, but layers={[lay.__class__ for lay in dynamic_cache.layers]}"
-    )
     return _flatten_with_keys_cache(dynamic_cache)
@@ -161,7 +180,9 @@ def unflatten_static_cache(
 ) -> StaticCache:
     """Restores a :class:`transformers.cache_utils.StaticCache` from python objects."""
     return _unflatten_cache(
-        lambda *args: make_static_cache(*args, max_cache_len=values[0].shape[2]),
+        lambda *args, **kwargs: make_static_cache(
+            *args, max_cache_len=values[0].shape[2], **kwargs
+        ),
         values,
         context,
         output_type=output_type,

onnx_diagnostic/torch_models/code_sample.py CHANGED Viewed

@@ -8,11 +8,9 @@ from .hghub.model_inputs import _preprocess_model_id
 from .hghub import get_untrained_model_with_inputs
 from .validate import filter_inputs, make_patch_kwargs
 CODE_SAMPLES = {
     "imports": "from typing import Any\nimport torch",
-    "get_model_with_inputs": textwrap.dedent(
-        """
+    "get_model_with_inputs": textwrap.dedent("""
     def get_model_with_inputs(
         model_id:str,
         subfolder: str | None = None,
@@ -57,8 +55,7 @@ CODE_SAMPLES = {
             if device:
                 data["model"] = data["model"].to(device)
         return data["model"]
-    """
-    ),
+    """),
 }
@@ -198,7 +195,7 @@ def code_sample(
         this is not always possible
     :param use_pretrained: use the trained model, not the untrained one
     :param optimization: optimization to apply to the exported model,
-        depend on the the exporter
+        depend on the exporter
     :param quiet: if quiet, catches exception if any issue
     :param patch: applies patches (``patch_transformers=True, path_diffusers=True``)
         if True before exporting
@@ -326,11 +323,9 @@ def code_sample(
         imports,
         cache_import,
         CODE_SAMPLES["get_model_with_inputs"],
-        textwrap.dedent(
-            f"""
+        textwrap.dedent(f"""
             model = get_model_with_inputs({model_args})
-                        """
-        ),
+                        """),
         f"inputs = {input_code}",
         exporter_code,
     ]

onnx-diagnostic 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl

onnx-diagnostic 0.8.10py3-none-any.whl → 0.8.11py3-none-any.whl