PyPI - onnx-diagnostic - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +78 -22
onnx_diagnostic/export/api.py +35 -5
onnx_diagnostic/export/control_flow.py +511 -0
onnx_diagnostic/export/control_flow_research.py +135 -0
onnx_diagnostic/ext_test_case.py +33 -9
onnx_diagnostic/helpers/cache_helper.py +217 -203
onnx_diagnostic/helpers/helper.py +6 -2
onnx_diagnostic/helpers/log_helper.py +39 -5
onnx_diagnostic/helpers/memory_peak.py +2 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +55 -3
onnx_diagnostic/helpers/onnx_helper.py +13 -16
onnx_diagnostic/helpers/rt_helper.py +579 -15
onnx_diagnostic/helpers/torch_helper.py +5 -0
onnx_diagnostic/tasks/image_text_to_text.py +5 -1
onnx_diagnostic/tasks/text2text_generation.py +1 -0
onnx_diagnostic/tasks/text_generation.py +84 -54
onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +563 -61
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
onnx_diagnostic/torch_models/validate.py +620 -213
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +30 -28
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.0.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -1188,6 +1188,7 @@ class ExtTestCase(unittest.TestCase):
         copy_inputs: bool = True,
         expected: Optional[Any] = None,
         use_ort: bool = False,
+        ort_optimized_graph: bool = False,
         **kwargs,
     ):
         """
@@ -1206,6 +1207,7 @@ class ExtTestCase(unittest.TestCase):
         :param expected: expected values
         :param copy_inputs: to copy the inputs
         :param use_ort: use :class:`onnxruntime.InferenceSession`
+        :param ort_optimized_graph: dumps the optimized onnxruntime graph
         :param kwargs: arguments sent to
             :class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`
         """
@@ -1214,30 +1216,52 @@ class ExtTestCase(unittest.TestCase):
         from .helpers.ort_session import InferenceSessionForTorch
         kws = dict(with_shape=True, with_min_max=verbose > 1)
-        if verbose:
-            vname = test_name or "assert_onnx_disc"
+        vname = test_name or "assert_onnx_disc"
         if test_name:
+            import onnx
             name = f"{test_name}.onnx"
-            print(f"[{vname}] save the onnx model into {name!r}")
-            name = self.dump_onnx(name, proto)
-            print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
+            if verbose:
+                print(f"[{vname}] save the onnx model into {name!r}")
+            if isinstance(proto, str):
+                name = proto
+                proto = onnx.load(name)
+            else:
+                assert isinstance(
+                    proto, onnx.ModelProto
+                ), f"Unexpected type {type(proto)} for proto"
+                name = self.dump_onnx(name, proto)
+            if verbose:
+                print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
         if verbose:
             print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
         if use_ort:
+            assert isinstance(
+                proto, onnx.ModelProto
+            ), f"Unexpected type {type(proto)} for proto"
             feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
-            if verbose:
-                print(f"[{vname}] feeds {string_type(feeds, **kws)}")
             import onnxruntime
+            if verbose:
+                print(f"[{vname}] create onnxruntime.InferenceSession")
+            options = onnxruntime.SessionOptions()
+            if ort_optimized_graph:
+                options.optimized_model_filepath = f"{name}.optort.onnx"
             sess = onnxruntime.InferenceSession(
-                proto.SerializeToString(), providers=["CPUExecutionProvider"]
+                proto.SerializeToString(),
+                options,
+                providers=kwargs.get("providers", ["CPUExecutionProvider"]),
             )
+            if verbose:
+                print(f"[{vname}] run ort feeds {string_type(feeds, **kws)}")
             got = sess.run(None, feeds)
         else:
             feeds = make_feeds(proto, inputs, copy=True)
             if verbose:
-                print(f"[{vname}] feeds {string_type(feeds, **kws)}")
+                print(f"[{vname}] create InferenceSessionForTorch")
             sess = InferenceSessionForTorch(proto, **kwargs)
+            if verbose:
+                print(f"[{vname}] run orttorch feeds {string_type(feeds, **kws)}")
             got = sess.run(None, feeds)
         if verbose:
             print(f"[{vname}] compute expected values")

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -391,17 +391,22 @@ def make_static_cache(
     return finalize_cache(cache)
-def make_encoder_decoder_cache(
-    self_attention_cache: transformers.cache_utils.DynamicCache,
-    cross_attention_cache: transformers.cache_utils.DynamicCache,
-) -> transformers.cache_utils.EncoderDecoderCache:
-    """Creates an EncoderDecoderCache."""
-    return transformers.cache_utils.EncoderDecoderCache(
-        # self_attention_cache=self_attention_cache,
-        # cross_attention_cache=cross_attention_cache
-        self_attention_cache,
-        cross_attention_cache,
-    )
+if hasattr(transformers.cache_utils, "EncoderDecoderCache"):
+    def make_encoder_decoder_cache(
+        self_attention_cache: transformers.cache_utils.DynamicCache,
+        cross_attention_cache: transformers.cache_utils.DynamicCache,
+    ) -> transformers.cache_utils.EncoderDecoderCache:
+        """Creates an EncoderDecoderCache."""
+        return transformers.cache_utils.EncoderDecoderCache(
+            # self_attention_cache=self_attention_cache,
+            # cross_attention_cache=cross_attention_cache
+            self_attention_cache,
+            cross_attention_cache,
+        )
+else:
+    make_encoder_decoder_cache = None  # type: ignore[assignment]
 def make_mamba_cache(
@@ -454,220 +459,229 @@ def make_mamba_cache(
     return finalize_cache(cache)
-def make_sliding_window_cache(
-    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
-) -> transformers.cache_utils.SlidingWindowCache:
-    "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
-    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
+if hasattr(transformers.cache_utils, "SlidingWindowCache"):
-    class _config:
-        def __init__(self):
-            self.head_dim = key_value_pairs[0][0].shape[-1]
-            self.num_attention_heads = key_value_pairs[0][0].shape[1]
-            self.num_hidden_layers = len(key_value_pairs)
-            self.sliding_window = key_value_pairs[0][0].shape[2]
-        def get_text_config(self, *args, **kwargs):
-            return self
-    cache = transformers.cache_utils.SlidingWindowCache(
-        config=_config(),
-        max_batch_size=key_value_pairs[0][0].shape[0],
-        max_cache_len=key_value_pairs[0][0].shape[2],  # same as sliding_window
-        device=key_value_pairs[0][0].device,
-        dtype=key_value_pairs[0][0].dtype,
-    )
-    ca = CacheKeyValue(cache)
-    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
-        # transformers>= 4.55.2, layers are empty
-        cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
-        for i, (key, value) in enumerate(key_value_pairs):
-            cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
-        return cache
+    def make_sliding_window_cache(
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
+    ) -> transformers.cache_utils.SlidingWindowCache:
+        "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
-    for i in range(len(key_value_pairs)):
-        assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
-            f"Shape mismatch, expected {cache.key_cache[i].shape}, "
-            f"got {key_value_pairs[i][0].shape}"
+        class _config:
+            def __init__(self):
+                self.head_dim = key_value_pairs[0][0].shape[-1]
+                self.num_attention_heads = key_value_pairs[0][0].shape[1]
+                self.num_hidden_layers = len(key_value_pairs)
+                self.sliding_window = key_value_pairs[0][0].shape[2]
+            def get_text_config(self, *args, **kwargs):
+                return self
+        cache = transformers.cache_utils.SlidingWindowCache(
+            config=_config(),
+            max_batch_size=key_value_pairs[0][0].shape[0],
+            max_cache_len=key_value_pairs[0][0].shape[2],  # same as sliding_window
+            device=key_value_pairs[0][0].device,
+            dtype=key_value_pairs[0][0].dtype,
         )
-        ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
-        assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
-            f"Shape mismatch, expected {cache.value_cache[i].shape}, "
-            f"got {key_value_pairs[i][1].shape}"
+        ca = CacheKeyValue(cache)
+        if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+            # transformers>= 4.55.2, layers are empty
+            cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
+            for i, (key, value) in enumerate(key_value_pairs):
+                cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
+            return cache
+        for i in range(len(key_value_pairs)):
+            assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
+                f"Shape mismatch, expected {cache.key_cache[i].shape}, "
+                f"got {key_value_pairs[i][0].shape}"
+            )
+            ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
+            assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
+                f"Shape mismatch, expected {cache.value_cache[i].shape}, "
+                f"got {key_value_pairs[i][1].shape}"
+            )
+            ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
+        if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+            # The cache constructor contains the two following lines
+            # (in cache_utils.py) which append empty layers when the cache is
+            # initialized. We need to remove them.
+            # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+            # self.append_new_layers(self.num_hidden_layers - 1)
+            cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+        assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+            f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+            f"{len(key_value_pairs)} expected."
         )
-        ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
-    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
-        # The cache constructor contains the two following lines
-        # (in cache_utils.py) which append empty layers when the cache is
-        # initialized. We need to remove them.
-        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
-        # self.append_new_layers(self.num_hidden_layers - 1)
-        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
-    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
-        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
-        f"{len(key_value_pairs)} expected."
-    )
-    return finalize_cache(cache)
+        return finalize_cache(cache)
+else:
+    make_sliding_window_cache = None  # type: ignore[assignment]
-def make_hybrid_cache(
-    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
-    max_cache_len: Optional[int] = None,
-    max_batch_size: Optional[int] = None,
-    sliding_window: Optional[int] = None,
-) -> transformers.cache_utils.HybridCache:
-    """
-    Creates an instance of :class:`transformers.cache_utils.HybridCache`.
-    This version is valid for ``transformers < 4.50``.
+if hasattr(transformers.cache_utils, "HybridCache"):
-    :param key_value_pairs: list of pairs of (key, values)
-    :return: :class:`transformers.cache_utils.HybridCache`
+    def make_hybrid_cache(
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
+        max_cache_len: Optional[int] = None,
+        max_batch_size: Optional[int] = None,
+        sliding_window: Optional[int] = None,
+    ) -> transformers.cache_utils.HybridCache:
+        """
+        Creates an instance of :class:`transformers.cache_utils.HybridCache`.
+        This version is valid for ``transformers < 4.50``.
-    Example:
+        :param key_value_pairs: list of pairs of (key, values)
+        :return: :class:`transformers.cache_utils.HybridCache`
-    .. runpython::
-        :showcode:
+        Example:
-        import torch
-        from onnx_diagnostic.helpers import string_type
-        from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
+        .. runpython::
+            :showcode:
-        n_layers = 2
-        bsize, nheads, slen, dim = 2, 4, 3, 7
+            import torch
+            from onnx_diagnostic.helpers import string_type
+            from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
-        past_key_values = make_hybrid_cache(
-            [
-                (
-                    torch.randn(bsize, nheads, slen, dim),
-                    torch.randn(bsize, nheads, slen, dim),
-                )
-                for i in range(n_layers)
-            ]
-        )
-        print(string_type(past_key_values, with_shape=True))
+            n_layers = 2
+            bsize, nheads, slen, dim = 2, 4, 3, 7
+            past_key_values = make_hybrid_cache(
+                [
+                    (
+                        torch.randn(bsize, nheads, slen, dim),
+                        torch.randn(bsize, nheads, slen, dim),
+                    )
+                    for i in range(n_layers)
+                ]
+            )
+            print(string_type(past_key_values, with_shape=True))
-    This part defines how the shapes are working in one HybridCache.
+        This part defines how the shapes are working in one HybridCache.
-    .. code-block:: python
+        .. code-block:: python
-        self.max_cache_len = (
-            max_cache_len if max_cache_len is not None else config.max_position_embeddings)
+            self.max_cache_len = (
+                max_cache_len if max_cache_len is not None else config.max_position_embeddings)
-        # Sliding layers can't be larger than the overall max cache len
-        self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
-        self.max_batch_size = max_batch_size
+            # Sliding layers can't be larger than the overall max cache len
+            self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
+            self.max_batch_size = max_batch_size
-        self.head_dim = (
-            config.head_dim if hasattr(config, "head_dim")
-            else config.hidden_size // config.num_attention_heads
-        )
+            self.head_dim = (
+                config.head_dim if hasattr(config, "head_dim")
+                else config.hidden_size // config.num_attention_heads
+            )
-        self._dtype = dtype
-        self.num_key_value_heads = (
-            config.num_attention_heads
-            if getattr(config, "num_key_value_heads", None) is None
-            else config.num_key_value_heads
-        )
+            self._dtype = dtype
+            self.num_key_value_heads = (
+                config.num_attention_heads
+                if getattr(config, "num_key_value_heads", None) is None
+                else config.num_key_value_heads
+            )
-        # If the attribute does not exist in the config, fallback to a simple StaticCache
-        if hasattr(config, "layer_types"):
-            self.is_sliding = [
-                layer_type != "full_attention" for layer_type in config.layer_types]
-        else:
-            self.is_sliding = [False] * config.num_hidden_layers
-        self.key_cache: list[torch.Tensor] = []
-        self.value_cache: list[torch.Tensor] = []
-        global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                self.max_cache_len, self.head_dim)
-        sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                self.sliding_window_len, self.head_dim)
-        self.sliding_window = min(config.sliding_window, max_cache_len)
-        device = torch.device(device) if device is not None else None
-        for i in range(config.num_hidden_layers):
-            layer_device = layer_device_map[i] if layer_device_map is not None else device
-            cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
-            new_layer_key_cache = torch.zeros(
-                cache_shape, dtype=self._dtype, device=layer_device)
-            new_layer_value_cache = torch.zeros(
-                cache_shape, dtype=self._dtype, device=layer_device)
-            torch._dynamo.mark_static_address(new_layer_key_cache)
-            torch._dynamo.mark_static_address(new_layer_value_cache)
-            self.key_cache.append(new_layer_key_cache)
-            self.value_cache.append(new_layer_value_cache)
-    """
-    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
-    layer_types = None
-    if key_value_pairs:
-        assert (
-            not max_batch_size and not max_cache_len
-        ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
-        max_batch_size = key_value_pairs[0][0].shape[0]
-        sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
-        if len(sets_of_dim) == 1:
-            max_cache_len = sets_of_dim.pop()
-            sliding_window = max_cache_len
+            # If the attribute does not exist in the config, fallback to a simple StaticCache
+            if hasattr(config, "layer_types"):
+                self.is_sliding = [
+                    layer_type != "full_attention" for layer_type in config.layer_types]
+            else:
+                self.is_sliding = [False] * config.num_hidden_layers
+            self.key_cache: list[torch.Tensor] = []
+            self.value_cache: list[torch.Tensor] = []
+            global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                    self.max_cache_len, self.head_dim)
+            sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                    self.sliding_window_len, self.head_dim)
+            self.sliding_window = min(config.sliding_window, max_cache_len)
+            device = torch.device(device) if device is not None else None
+            for i in range(config.num_hidden_layers):
+                layer_device = layer_device_map[i] if layer_device_map is not None else device
+                cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
+                new_layer_key_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                new_layer_value_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                torch._dynamo.mark_static_address(new_layer_key_cache)
+                torch._dynamo.mark_static_address(new_layer_value_cache)
+                self.key_cache.append(new_layer_key_cache)
+                self.value_cache.append(new_layer_value_cache)
+        """
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
+        layer_types = None
+        if key_value_pairs:
+            assert (
+                not max_batch_size and not max_cache_len
+            ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
+            max_batch_size = key_value_pairs[0][0].shape[0]
+            sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
+            if len(sets_of_dim) == 1:
+                max_cache_len = sets_of_dim.pop()
+                sliding_window = max_cache_len
+            else:
+                assert (
+                    len(sets_of_dim) == 2
+                ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
+                max_cache_len = max(sets_of_dim)
+                sliding_window = min(sets_of_dim)
+                layer_types = [
+                    "full_attention" if i == max_cache_len else "sliding_attention"
+                    for i in [kv[0].shape[2] for kv in key_value_pairs]
+                ]
         else:
             assert (
-                len(sets_of_dim) == 2
-            ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
-            max_cache_len = max(sets_of_dim)
-            sliding_window = min(sets_of_dim)
-            layer_types = [
-                "full_attention" if i == max_cache_len else "sliding_attention"
-                for i in [kv[0].shape[2] for kv in key_value_pairs]
-            ]
-    else:
-        assert (
-            max_batch_size and max_cache_len
-        ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
-        if sliding_window is None:
-            sliding_window = max_cache_len
-    _max_cache_len = max_cache_len
-    _sliding_window = sliding_window
-    class _config:
-        max_cache_len = _max_cache_len
-        batch_size = max_batch_size
-        num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
-        head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
-        num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
-        num_hidden_layers = len(key_value_pairs)
-        sliding_window = _sliding_window
-        num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
-        def get_text_config(self, *args, **kwargs):
-            return self
-    if layer_types:
-        _config.layer_types = layer_types  # type: ignore[attr-defined]
-    cache = transformers.cache_utils.HybridCache(
-        config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
-    )
-    for i, (key, value) in enumerate(key_value_pairs):
-        cache.update(
-            key,
-            value,
-            i,
-            cache_kwargs={
-                "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
-                    key.device
-                )
-            },
+                max_batch_size and max_cache_len
+            ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
+            if sliding_window is None:
+                sliding_window = max_cache_len
+        _max_cache_len = max_cache_len
+        _sliding_window = sliding_window
+        class _config:
+            max_cache_len = _max_cache_len
+            batch_size = max_batch_size
+            num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
+            head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
+            num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
+            num_hidden_layers = len(key_value_pairs)
+            sliding_window = _sliding_window
+            num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
+            def get_text_config(self, *args, **kwargs):
+                return self
+        if layer_types:
+            _config.layer_types = layer_types  # type: ignore[attr-defined]
+        cache = transformers.cache_utils.HybridCache(
+            config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
         )
-    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
-        # The cache constructor contains the two following lines
-        # (in cache_utils.py) which append empty layers when the cache is
-        # initialized. We need to remove them.
-        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
-        # self.append_new_layers(self.num_hidden_layers - 1)
-        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
-    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
-        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
-        f"{len(key_value_pairs)} expected."
-    )
-    return finalize_cache(cache)
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(
+                key,
+                value,
+                i,
+                cache_kwargs={
+                    "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
+                        key.device
+                    )
+                },
+            )
+        if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+            # The cache constructor contains the two following lines
+            # (in cache_utils.py) which append empty layers when the cache is
+            # initialized. We need to remove them.
+            # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+            # self.append_new_layers(self.num_hidden_layers - 1)
+            cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+        assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+            f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+            f"{len(key_value_pairs)} expected."
+        )
+        return finalize_cache(cache)
+else:
+    make_hybrid_cache = None  # type: ignore[assignment]
 def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -787,6 +787,8 @@ def string_type(
         return f"ultralytics.{obj.__class__.__name__}(...)"
     if obj.__class__.__name__ == "FakeTensorMode":
         return f"{obj}"
+    if obj.__class__.__name__ == "FakeTensorContext":
+        return "FakeTensorContext(...)"
     if verbose:
         print(f"[string_type] END:{type(obj)}")
@@ -1016,6 +1018,8 @@ def max_diff(
     You may use :func:`string_diff` to display the discrepancies in one string.
     """
+    if verbose >= 10:
+        print(f"[max_diff] {type(expected)} ? {type(got)}")
     if expected is None and got is None:
         return dict(abs=0, rel=0, sum=0, n=0, dnan=0)
@@ -1061,8 +1065,8 @@ def max_diff(
     if expected.__class__.__name__ == "CausalLMOutputWithPast":
         if verbose >= 6:
             print(
-                f"[max_diff] CausalLMOutputWithPast: {string_type(expected)} "
-                f"? {string_type(got)}"
+                f"[max_diff] CausalLMOutputWithPast: {string_type(expected, with_shape=True)} "
+                f"? {string_type(got, with_shape=True)}"
             )
         if got.__class__.__name__ == "CausalLMOutputWithPast":
             return max_diff(

onnx-diagnostic 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

onnx-diagnostic 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl