PyPI - onnx-diagnostic - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -391,17 +391,22 @@ def make_static_cache(
     return finalize_cache(cache)
-def make_encoder_decoder_cache(
-    self_attention_cache: transformers.cache_utils.DynamicCache,
-    cross_attention_cache: transformers.cache_utils.DynamicCache,
-) -> transformers.cache_utils.EncoderDecoderCache:
-    """Creates an EncoderDecoderCache."""
-    return transformers.cache_utils.EncoderDecoderCache(
-        # self_attention_cache=self_attention_cache,
-        # cross_attention_cache=cross_attention_cache
-        self_attention_cache,
-        cross_attention_cache,
-    )
+if hasattr(transformers.cache_utils, "EncoderDecoderCache"):
+    def make_encoder_decoder_cache(
+        self_attention_cache: transformers.cache_utils.DynamicCache,
+        cross_attention_cache: transformers.cache_utils.DynamicCache,
+    ) -> transformers.cache_utils.EncoderDecoderCache:
+        """Creates an EncoderDecoderCache."""
+        return transformers.cache_utils.EncoderDecoderCache(
+            # self_attention_cache=self_attention_cache,
+            # cross_attention_cache=cross_attention_cache
+            self_attention_cache,
+            cross_attention_cache,
+        )
+else:
+    make_encoder_decoder_cache = None  # type: ignore[assignment]
 def make_mamba_cache(
@@ -454,220 +459,229 @@ def make_mamba_cache(
     return finalize_cache(cache)
-def make_sliding_window_cache(
-    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
-) -> transformers.cache_utils.SlidingWindowCache:
-    "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
-    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
+if hasattr(transformers.cache_utils, "SlidingWindowCache"):
-    class _config:
-        def __init__(self):
-            self.head_dim = key_value_pairs[0][0].shape[-1]
-            self.num_attention_heads = key_value_pairs[0][0].shape[1]
-            self.num_hidden_layers = len(key_value_pairs)
-            self.sliding_window = key_value_pairs[0][0].shape[2]
-        def get_text_config(self, *args, **kwargs):
-            return self
-    cache = transformers.cache_utils.SlidingWindowCache(
-        config=_config(),
-        max_batch_size=key_value_pairs[0][0].shape[0],
-        max_cache_len=key_value_pairs[0][0].shape[2],  # same as sliding_window
-        device=key_value_pairs[0][0].device,
-        dtype=key_value_pairs[0][0].dtype,
-    )
-    ca = CacheKeyValue(cache)
-    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
-        # transformers>= 4.55.2, layers are empty
-        cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
-        for i, (key, value) in enumerate(key_value_pairs):
-            cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
-        return cache
+    def make_sliding_window_cache(
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
+    ) -> transformers.cache_utils.SlidingWindowCache:
+        "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
-    for i in range(len(key_value_pairs)):
-        assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
-            f"Shape mismatch, expected {cache.key_cache[i].shape}, "
-            f"got {key_value_pairs[i][0].shape}"
+        class _config:
+            def __init__(self):
+                self.head_dim = key_value_pairs[0][0].shape[-1]
+                self.num_attention_heads = key_value_pairs[0][0].shape[1]
+                self.num_hidden_layers = len(key_value_pairs)
+                self.sliding_window = key_value_pairs[0][0].shape[2]
+            def get_text_config(self, *args, **kwargs):
+                return self
+        cache = transformers.cache_utils.SlidingWindowCache(
+            config=_config(),
+            max_batch_size=key_value_pairs[0][0].shape[0],
+            max_cache_len=key_value_pairs[0][0].shape[2],  # same as sliding_window
+            device=key_value_pairs[0][0].device,
+            dtype=key_value_pairs[0][0].dtype,
         )
-        ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
-        assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
-            f"Shape mismatch, expected {cache.value_cache[i].shape}, "
-            f"got {key_value_pairs[i][1].shape}"
+        ca = CacheKeyValue(cache)
+        if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+            # transformers>= 4.55.2, layers are empty
+            cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
+            for i, (key, value) in enumerate(key_value_pairs):
+                cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
+            return cache
+        for i in range(len(key_value_pairs)):
+            assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
+                f"Shape mismatch, expected {cache.key_cache[i].shape}, "
+                f"got {key_value_pairs[i][0].shape}"
+            )
+            ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
+            assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
+                f"Shape mismatch, expected {cache.value_cache[i].shape}, "
+                f"got {key_value_pairs[i][1].shape}"
+            )
+            ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
+        if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+            # The cache constructor contains the two following lines
+            # (in cache_utils.py) which append empty layers when the cache is
+            # initialized. We need to remove them.
+            # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+            # self.append_new_layers(self.num_hidden_layers - 1)
+            cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+        assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+            f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+            f"{len(key_value_pairs)} expected."
         )
-        ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
-    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
-        # The cache constructor contains the two following lines
-        # (in cache_utils.py) which append empty layers when the cache is
-        # initialized. We need to remove them.
-        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
-        # self.append_new_layers(self.num_hidden_layers - 1)
-        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
-    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
-        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
-        f"{len(key_value_pairs)} expected."
-    )
-    return finalize_cache(cache)
+        return finalize_cache(cache)
+else:
+    make_sliding_window_cache = None  # type: ignore[assignment]
-def make_hybrid_cache(
-    key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
-    max_cache_len: Optional[int] = None,
-    max_batch_size: Optional[int] = None,
-    sliding_window: Optional[int] = None,
-) -> transformers.cache_utils.HybridCache:
-    """
-    Creates an instance of :class:`transformers.cache_utils.HybridCache`.
-    This version is valid for ``transformers < 4.50``.
+if hasattr(transformers.cache_utils, "HybridCache"):
-    :param key_value_pairs: list of pairs of (key, values)
-    :return: :class:`transformers.cache_utils.HybridCache`
+    def make_hybrid_cache(
+        key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
+        max_cache_len: Optional[int] = None,
+        max_batch_size: Optional[int] = None,
+        sliding_window: Optional[int] = None,
+    ) -> transformers.cache_utils.HybridCache:
+        """
+        Creates an instance of :class:`transformers.cache_utils.HybridCache`.
+        This version is valid for ``transformers < 4.50``.
-    Example:
+        :param key_value_pairs: list of pairs of (key, values)
+        :return: :class:`transformers.cache_utils.HybridCache`
-    .. runpython::
-        :showcode:
+        Example:
-        import torch
-        from onnx_diagnostic.helpers import string_type
-        from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
+        .. runpython::
+            :showcode:
-        n_layers = 2
-        bsize, nheads, slen, dim = 2, 4, 3, 7
+            import torch
+            from onnx_diagnostic.helpers import string_type
+            from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
-        past_key_values = make_hybrid_cache(
-            [
-                (
-                    torch.randn(bsize, nheads, slen, dim),
-                    torch.randn(bsize, nheads, slen, dim),
-                )
-                for i in range(n_layers)
-            ]
-        )
-        print(string_type(past_key_values, with_shape=True))
+            n_layers = 2
+            bsize, nheads, slen, dim = 2, 4, 3, 7
+            past_key_values = make_hybrid_cache(
+                [
+                    (
+                        torch.randn(bsize, nheads, slen, dim),
+                        torch.randn(bsize, nheads, slen, dim),
+                    )
+                    for i in range(n_layers)
+                ]
+            )
+            print(string_type(past_key_values, with_shape=True))
-    This part defines how the shapes are working in one HybridCache.
+        This part defines how the shapes are working in one HybridCache.
-    .. code-block:: python
+        .. code-block:: python
-        self.max_cache_len = (
-            max_cache_len if max_cache_len is not None else config.max_position_embeddings)
+            self.max_cache_len = (
+                max_cache_len if max_cache_len is not None else config.max_position_embeddings)
-        # Sliding layers can't be larger than the overall max cache len
-        self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
-        self.max_batch_size = max_batch_size
+            # Sliding layers can't be larger than the overall max cache len
+            self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
+            self.max_batch_size = max_batch_size
-        self.head_dim = (
-            config.head_dim if hasattr(config, "head_dim")
-            else config.hidden_size // config.num_attention_heads
-        )
+            self.head_dim = (
+                config.head_dim if hasattr(config, "head_dim")
+                else config.hidden_size // config.num_attention_heads
+            )
-        self._dtype = dtype
-        self.num_key_value_heads = (
-            config.num_attention_heads
-            if getattr(config, "num_key_value_heads", None) is None
-            else config.num_key_value_heads
-        )
+            self._dtype = dtype
+            self.num_key_value_heads = (
+                config.num_attention_heads
+                if getattr(config, "num_key_value_heads", None) is None
+                else config.num_key_value_heads
+            )
-        # If the attribute does not exist in the config, fallback to a simple StaticCache
-        if hasattr(config, "layer_types"):
-            self.is_sliding = [
-                layer_type != "full_attention" for layer_type in config.layer_types]
-        else:
-            self.is_sliding = [False] * config.num_hidden_layers
-        self.key_cache: list[torch.Tensor] = []
-        self.value_cache: list[torch.Tensor] = []
-        global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                self.max_cache_len, self.head_dim)
-        sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
-                                self.sliding_window_len, self.head_dim)
-        self.sliding_window = min(config.sliding_window, max_cache_len)
-        device = torch.device(device) if device is not None else None
-        for i in range(config.num_hidden_layers):
-            layer_device = layer_device_map[i] if layer_device_map is not None else device
-            cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
-            new_layer_key_cache = torch.zeros(
-                cache_shape, dtype=self._dtype, device=layer_device)
-            new_layer_value_cache = torch.zeros(
-                cache_shape, dtype=self._dtype, device=layer_device)
-            torch._dynamo.mark_static_address(new_layer_key_cache)
-            torch._dynamo.mark_static_address(new_layer_value_cache)
-            self.key_cache.append(new_layer_key_cache)
-            self.value_cache.append(new_layer_value_cache)
-    """
-    key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
-    layer_types = None
-    if key_value_pairs:
-        assert (
-            not max_batch_size and not max_cache_len
-        ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
-        max_batch_size = key_value_pairs[0][0].shape[0]
-        sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
-        if len(sets_of_dim) == 1:
-            max_cache_len = sets_of_dim.pop()
-            sliding_window = max_cache_len
+            # If the attribute does not exist in the config, fallback to a simple StaticCache
+            if hasattr(config, "layer_types"):
+                self.is_sliding = [
+                    layer_type != "full_attention" for layer_type in config.layer_types]
+            else:
+                self.is_sliding = [False] * config.num_hidden_layers
+            self.key_cache: list[torch.Tensor] = []
+            self.value_cache: list[torch.Tensor] = []
+            global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                    self.max_cache_len, self.head_dim)
+            sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                    self.sliding_window_len, self.head_dim)
+            self.sliding_window = min(config.sliding_window, max_cache_len)
+            device = torch.device(device) if device is not None else None
+            for i in range(config.num_hidden_layers):
+                layer_device = layer_device_map[i] if layer_device_map is not None else device
+                cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
+                new_layer_key_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                new_layer_value_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                torch._dynamo.mark_static_address(new_layer_key_cache)
+                torch._dynamo.mark_static_address(new_layer_value_cache)
+                self.key_cache.append(new_layer_key_cache)
+                self.value_cache.append(new_layer_value_cache)
+        """
+        key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
+        layer_types = None
+        if key_value_pairs:
+            assert (
+                not max_batch_size and not max_cache_len
+            ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
+            max_batch_size = key_value_pairs[0][0].shape[0]
+            sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
+            if len(sets_of_dim) == 1:
+                max_cache_len = sets_of_dim.pop()
+                sliding_window = max_cache_len
+            else:
+                assert (
+                    len(sets_of_dim) == 2
+                ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
+                max_cache_len = max(sets_of_dim)
+                sliding_window = min(sets_of_dim)
+                layer_types = [
+                    "full_attention" if i == max_cache_len else "sliding_attention"
+                    for i in [kv[0].shape[2] for kv in key_value_pairs]
+                ]
         else:
             assert (
-                len(sets_of_dim) == 2
-            ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
-            max_cache_len = max(sets_of_dim)
-            sliding_window = min(sets_of_dim)
-            layer_types = [
-                "full_attention" if i == max_cache_len else "sliding_attention"
-                for i in [kv[0].shape[2] for kv in key_value_pairs]
-            ]
-    else:
-        assert (
-            max_batch_size and max_cache_len
-        ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
-        if sliding_window is None:
-            sliding_window = max_cache_len
-    _max_cache_len = max_cache_len
-    _sliding_window = sliding_window
-    class _config:
-        max_cache_len = _max_cache_len
-        batch_size = max_batch_size
-        num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
-        head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
-        num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
-        num_hidden_layers = len(key_value_pairs)
-        sliding_window = _sliding_window
-        num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
-        def get_text_config(self, *args, **kwargs):
-            return self
-    if layer_types:
-        _config.layer_types = layer_types  # type: ignore[attr-defined]
-    cache = transformers.cache_utils.HybridCache(
-        config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
-    )
-    for i, (key, value) in enumerate(key_value_pairs):
-        cache.update(
-            key,
-            value,
-            i,
-            cache_kwargs={
-                "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
-                    key.device
-                )
-            },
+                max_batch_size and max_cache_len
+            ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
+            if sliding_window is None:
+                sliding_window = max_cache_len
+        _max_cache_len = max_cache_len
+        _sliding_window = sliding_window
+        class _config:
+            max_cache_len = _max_cache_len
+            batch_size = max_batch_size
+            num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
+            head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
+            num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
+            num_hidden_layers = len(key_value_pairs)
+            sliding_window = _sliding_window
+            num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
+            def get_text_config(self, *args, **kwargs):
+                return self
+        if layer_types:
+            _config.layer_types = layer_types  # type: ignore[attr-defined]
+        cache = transformers.cache_utils.HybridCache(
+            config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
         )
-    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
-        # The cache constructor contains the two following lines
-        # (in cache_utils.py) which append empty layers when the cache is
-        # initialized. We need to remove them.
-        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
-        # self.append_new_layers(self.num_hidden_layers - 1)
-        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
-    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
-        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
-        f"{len(key_value_pairs)} expected."
-    )
-    return finalize_cache(cache)
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(
+                key,
+                value,
+                i,
+                cache_kwargs={
+                    "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
+                        key.device
+                    )
+                },
+            )
+        if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+            # The cache constructor contains the two following lines
+            # (in cache_utils.py) which append empty layers when the cache is
+            # initialized. We need to remove them.
+            # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+            # self.append_new_layers(self.num_hidden_layers - 1)
+            cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+        assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+            f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+            f"{len(key_value_pairs)} expected."
+        )
+        return finalize_cache(cache)
+else:
+    make_hybrid_cache = None  # type: ignore[assignment]
 def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -787,6 +787,8 @@ def string_type(
         return f"ultralytics.{obj.__class__.__name__}(...)"
     if obj.__class__.__name__ == "FakeTensorMode":
         return f"{obj}"
+    if obj.__class__.__name__ == "FakeTensorContext":
+        return "FakeTensorContext(...)"
     if verbose:
         print(f"[string_type] END:{type(obj)}")

onnx_diagnostic/helpers/log_helper.py CHANGED Viewed

@@ -901,13 +901,19 @@ class CubeLogs:
             else g.groupby([*key_index, *key_columns], dropna=False).sum()
         )
         not_unique = r[r["count"] > 1]
+        if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
+            filename = os.environ.get("DUPLICATE")
+            subset = data.set_index([*key_index, *key_columns]).merge(
+                not_unique.head(), left_index=True, right_index=True
+            )
+            subset.to_excel(filename)
         assert not_unique.shape[0] == 0, (
             f"view_def.name={view_def.name!r}, "
             f"unable to run the pivot with index={sorted(key_index)}, "
             f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
             f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
-            f"not unique={set(data.columns) - unique}"
-            f"\n--\n{not_unique.head(10)}"
+            f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
+            f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
         )
         # pivot
@@ -1000,8 +1006,12 @@ class CubeLogs:
         keys = set(self.keys_time) - {columns_to_fix}
         select = data[self.keys_time]
         select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
         assert select_agg[columns_to_fix].max() <= 1, (
-            f"Column {columns_to_fix!r} has two distinct values at least for one date\n"
+            f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+            f"max={select_agg[columns_to_fix].max()}\n"
             f"{select_agg[select_agg[columns_to_fix] > 1]}"
         )
@@ -1038,6 +1048,16 @@ class CubeLogs:
             f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
             f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
         )
+        select = res[self.keys_time]
+        select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
+        # assert select_agg[columns_to_fix].max() <= 1, (
+        #    f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+        #    f"max={select_agg[columns_to_fix].max()}\n"
+        #    f"{select_agg[select_agg[columns_to_fix] > 1]}"
+        # )
         return res
     def _dropna(
@@ -1977,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
         * **cmd:** command lines
         * **raw-short:** raw data without all the unused columns
         """
-        fix_aggregation_change = ["model_speedup_input_set", "model_test_with"]
+        # This does not work.
+        # used to be ["model_speedup_input_set", "model_test_with"]
+        fix_aggregation_change = []  # type: ignore[var-annotated]
         fs = ["suite", "model_suite", "task", "model_name", "model_task"]
         index_cols = self._filter_column(fs, self.keys_time)
         assert index_cols, (

onnx_diagnostic/helpers/mini_onnx_builder.py CHANGED Viewed

@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
     :return: ModelProto
     The function raises an error if not supported.
+    An example:
+    .. code-block:: python
+        from onnx_diagnostic.helpers.mini_onnx_builder import (
+            create_onnx_model_from_input_tensors,
+        )
+        import onnx
+        proto = create_onnx_model_from_input_tensors(
+            dict(
+                query_states=query_states,
+                key_states=key_states,
+                value_states=value_states,
+                cu_seqlens=cu_seqlens,
+                max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
+                scaling=self.scaling,
+                attn_output=attn_output,
+            )
+        )
+        onnx.save(proto, "attention_inputs.onnx")
     """
     if switch_low_high is None:
         switch_low_high = sys.byteorder != "big"
@@ -461,7 +482,17 @@ def _unflatten(
         if spl[-1] == "array":
             return pos + 1, outputs[pos]
         if spl[-1] == "tensor":
-            return pos + 1, torch.from_numpy(outputs[pos]).to(device)
+            try:
+                return pos + 1, torch.from_numpy(outputs[pos]).to(device)
+            except TypeError:
+                # it should be more robust
+                import ml_dtypes
+                if outputs[pos].dtype == ml_dtypes.bfloat16:
+                    return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
+                        torch.bfloat16
+                    )
+                raise
         raise AssertionError(f"Unexpected name {name!r} in {names}")
     res: List[Any] = []
@@ -532,6 +563,12 @@ def _unflatten(
             return d
         return ty(res)
+    if end and len(res) == 1:
+        if res[0] is None:
+            return next_pos, ty()
+        if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
+            return next_pos, ty()
+        return next_pos, _make(ty, res)
     return next_pos, (
         ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
     )
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
     :return: restored data
     See example :ref:`l-plot-intermediate-results` for an example.
+    .. code-block:: python
+        import os
+        from onnx_diagnostic.helpers.mini_onnx_builder import (
+            create_input_tensors_from_onnx_model,
+        )
+        from onnx_diagnostic.helpers import string_type
+        restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
+        for k, v in restored.items():
+            print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
     """
     if engine == "ExtendedReferenceEvaluator":
         from ..reference import ExtendedReferenceEvaluator
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
             return float(output[0])
         if name == "tensor":
             return torch.from_numpy(output).to(device)
-        raise AssertionError(f"Unexpected name {name!r} in {names}")
+        assert name.startswith(
+            ("list_", "list.", "dict.", "tuple_", "tuple.")
+        ), f"Unexpected name {name!r} in {names}"
     return _unflatten(sep, names, got, device=device)[1]

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int:  # noqa: F821
     try:
         return oh.np_dtype_to_tensor_dtype(dt)
     except ValueError:
-        try:
-            import ml_dtypes
-        except ImportError:
-            ml_dtypes = None  # type: ignore
-        if ml_dtypes is not None:
-            if dt == ml_dtypes.bfloat16:
-                return TensorProto.BFLOAT16
-            if dt == ml_dtypes.float8_e4m3fn:
-                return TensorProto.FLOAT8E4M3FN
-            if dt == ml_dtypes.float8_e4m3fnuz:
-                return TensorProto.FLOAT8E4M3FNUZ
-            if dt == ml_dtypes.float8_e5m2:
-                return TensorProto.FLOAT8E5M2
-            if dt == ml_dtypes.float8_e5m2fnuz:
-                return TensorProto.FLOAT8E5M2FNUZ
+        import ml_dtypes
+        if dt == ml_dtypes.bfloat16:
+            return TensorProto.BFLOAT16
+        if dt == ml_dtypes.float8_e4m3fn:
+            return TensorProto.FLOAT8E4M3FN
+        if dt == ml_dtypes.float8_e4m3fnuz:
+            return TensorProto.FLOAT8E4M3FNUZ
+        if dt == ml_dtypes.float8_e5m2:
+            return TensorProto.FLOAT8E5M2
+        if dt == ml_dtypes.float8_e5m2fnuz:
+            return TensorProto.FLOAT8E5M2FNUZ
     if dt == np.float32:
         return TensorProto.FLOAT
     if dt == np.float16:

onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl

onnx-diagnostic 0.8.1py3-none-any.whl → 0.8.2py3-none-any.whl