PyPI - onnx-diagnostic - Versions diffs - 0.7.6__py3-none-any.whl → 0.7.8__py3-none-any.whl - Mend

onnx-diagnostic 0.7.6py3-none-any.whl → 0.7.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.6"
+__version__ = "0.7.8"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -306,7 +306,7 @@ class _ParseDict(argparse.Action):
                 value = split_items[1]
                 if value in ("True", "true", "False", "false"):
-                    d[key] = bool(value)
+                    d[key] = value in ("True", "true")
                     continue
                 try:
                     d[key] = int(value)
@@ -323,6 +323,54 @@ class _ParseDict(argparse.Action):
         setattr(namespace, self.dest, d)
+class _BoolOrParseDictPatch(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        if not values:
+            return
+        if len(values) == 1 and values[0] in (
+            "True",
+            "False",
+            "true",
+            "false",
+            "0",
+            "1",
+            0,
+            1,
+        ):
+            setattr(namespace, self.dest, values[0] in ("True", "true", 1, "1"))
+            return
+        d = getattr(namespace, self.dest) or {}
+        if not isinstance(d, dict):
+            d = {
+                "patch_sympy": d,
+                "patch_torch": d,
+                "patch_transformers": d,
+                "patch_diffusers": d,
+            }
+        for item in values:
+            split_items = item.split("=", 1)
+            key = split_items[0].strip()  # we remove blanks around keys, as is logical
+            value = split_items[1]
+            if value in ("True", "true", "False", "false"):
+                d[key] = value in ("True", "true")
+                continue
+            try:
+                d[key] = int(value)
+                continue
+            except (TypeError, ValueError):
+                pass
+            try:
+                d[key] = float(value)
+                continue
+            except (TypeError, ValueError):
+                pass
+            d[key] = _parse_json(value)
+        setattr(namespace, self.dest, d)
 def get_parser_validate() -> ArgumentParser:
     parser = ArgumentParser(
         prog="validate",
@@ -383,8 +431,13 @@ def get_parser_validate() -> ArgumentParser:
     parser.add_argument(
         "--patch",
         default=True,
-        action=BooleanOptionalAction,
-        help="Applies patches before exporting.",
+        action=_BoolOrParseDictPatch,
+        nargs="*",
+        help="Applies patches before exporting, it can be a boolean "
+        "to enable to disable the patches or be more finetuned. It is possible to "
+        "disable patch for torch by adding "
+        '--patch "patch_sympy=False" --patch "patch_torch=False", '
+        "default is True.",
     )
     parser.add_argument(
         "--rewrite",

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -887,19 +887,30 @@ class ModelInputs:
         # In case DynamicCache is not registered.
         if obj.__class__.__name__ == "DynamicCache":
-            kc = set(len(o.key_cache) for o in objs)
-            assert (
-                len(kc) == 1
-            ), f"All attribute 'key_cache' should have the same length but found {kc}"
-            vc = set(len(o.value_cache) for o in objs)
-            assert (
-                len(vc) == 1
-            ), f"All attribute 'value_cache' should have the same length but found {vc}"
+            if hasattr(obj, "layers"):
+                kc = set(len(o.layers) for o in objs)
+                assert (
+                    len(kc) == 1
+                ), f"All attribute 'key_cache' should have the same length but found {kc}"
+                vc = kc.copy()
+            else:
+                kc = set(len(o.key_cache) for o in objs)
+                assert (
+                    len(kc) == 1
+                ), f"All attribute 'key_cache' should have the same length but found {kc}"
+                vc = set(len(o.value_cache) for o in objs)
+                assert (
+                    len(vc) == 1
+                ), f"All attribute 'value_cache' should have the same length but found {vc}"
             key_cache = []
             for i in range(kc.pop()):
                 key_cache.append(
                     self.guess_dynamic_dimensions(
-                        *[o.key_cache[i] for o in objs],
+                        *[
+                            o.layers[i].keys if hasattr(o, "layers") else o.key_cache[i]
+                            for o in objs
+                        ],
                         auto=auto if isinstance(auto, bool) else f"{auto}_{i}kdc",
                     )
                 )
@@ -907,7 +918,10 @@ class ModelInputs:
             for i in range(vc.pop()):
                 value_cache.append(
                     self.guess_dynamic_dimensions(
-                        *[o.value_cache[i] for o in objs],
+                        *[
+                            o.layers[i].values if hasattr(o, "layers") else o.value_cache[i]
+                            for o in objs
+                        ],
                         auto=auto if isinstance(auto, bool) else f"{auto}_{i}vdc",
                     )
                 )

onnx_diagnostic/export/shape_helper.py CHANGED Viewed

@@ -9,6 +9,8 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
     All dimensions are considered as dynamic.
     ``dim_prefix`` can be a string (the function uses it as a prefix),
     or ``torch.export.Dim.AUTO`` or ``torch.export.Dim.DYNAMIC``.
+    Depending on the version of transformers, serializations function
+    of DynamicCache class is automatically serialized or not (>= 4.51, < 4.55).
     .. runpython::
         :showcode:
@@ -17,6 +19,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
         import torch
         from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
         from onnx_diagnostic.export.shape_helper import all_dynamic_shape_from_inputs
+        from onnx_diagnostic.torch_export_patches import torch_export_patches
         bsize, nheads, slen, dim = 2, 1, 30, 96
         inputs = dict(
@@ -25,10 +28,11 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
             position_ids=torch.arange(3, dtype=torch.int64),
             past_key_values=make_dynamic_cache(
                 [(torch.randn(bsize, nheads, slen, dim),
-                  torch.randn(bsize, nheads, slen, dim))]
+                    torch.randn(bsize, nheads, slen, dim))]
             ),
         )
-        ds = all_dynamic_shape_from_inputs(inputs)
+        with torch_export_patches(patch_transformers=True):
+            ds = all_dynamic_shape_from_inputs(inputs)
         pprint.pprint(ds)
     For this function to work, patches must be enabled if :epkg:`transformers`

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -41,9 +41,14 @@ class CacheKeyValue:
                     f"or value_cache={string_type(self.value_cache)}, "
                     f"cache.layers={string_type(cache.layers)}"
                 )
-        elif cache is not None:
+        elif cache is not None and hasattr(cache, "key_cache"):
             self.key_cache = cache.key_cache
             self.value_cache = cache.value_cache
+        elif cache is None:
+            self.key_cache = None
+            self.value_cache = None
+        else:
+            raise NotImplementedError(f"type(cache)={type(cache)}")
     def make_dynamic_cache(self):
         """Do the reverse operation."""
@@ -91,13 +96,16 @@ def flatten_unflatten_for_dynamic_shapes(
             return tuple(subtrees)
         if spec.type is list:
             return list(subtrees)
+        if spec.type is None and not subtrees:
+            return None
         if spec.context:
             # This is a custom class with attributes.
             # It is returned as a list.
             return list(subtrees)
         raise ValueError(
             f"Unable to interpret spec type {spec.type} "
-            f"(type is {type(spec.type)}, context is {spec.context})."
+            f"(type is {type(spec.type)}, context is {spec.context}), "
+            f"spec={spec}, subtrees={subtrees}"
         )
     # This is a list.
     return subtrees
@@ -126,6 +134,8 @@ def is_cache_dynamic_registered(fast: bool = False) -> bool:
     )
     values, spec = torch.utils._pytree.tree_flatten(cache)
     cache2 = torch.utils._pytree.tree_unflatten(values, spec)
+    if hasattr(cache2, "layers") and hasattr(cache, "layers"):
+        return len(cache2.layers) == len(cache.layers)
     return len(cache2.key_cache) == len(cache.value_cache)
@@ -176,7 +186,7 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
             f"Unexpected number of layers in the cache ({len(cache.layers)}), "
             f"{len(key_value_pairs)} expected."
         )
-        return cache
+        return finalize_cache(cache)
 else:
@@ -260,6 +270,9 @@ def make_static_cache(
             self.num_attention_heads = key_value_pairs[0][0].shape[1]
             self.num_hidden_layers = len(key_value_pairs)
+        def get_text_config(self):
+            return self
     assert max_cache_len is not None, (
         f"max_cache_len={max_cache_len} cannot be setup "
         f"automatically yet from shape {key_value_pairs[0][0].shape}"
@@ -280,6 +293,33 @@ def make_static_cache(
         max_cache_len=max_cache_len,
     )
     ca = CacheKeyValue(cache)
+    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+        # transformers>= 4.55.2, layers are empty
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(key, value, i)
+        return cache
+    torch._check(
+        not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(cache.layers)={len(cache.layers)}"
+        ),
+    )
+    torch._check(
+        len(key_value_pairs) == len(ca.key_cache),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(ca.key_cache)={len(ca.key_cache)}"
+        ),
+    )
+    torch._check(
+        len(key_value_pairs) == len(ca.value_cache),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(ca.value_cache)={len(ca.value_cache)}"
+        ),
+    )
     for i in range(len(key_value_pairs)):
         assert (
             key_value_pairs[i][0].shape == key_value_pairs[i][1].shape
@@ -298,7 +338,7 @@ def make_static_cache(
         f"Unexpected number of layers in the cache ({len(cache.layers)}), "
         f"{len(key_value_pairs)} expected."
     )
-    return cache
+    return finalize_cache(cache)
 def make_encoder_decoder_cache(
@@ -307,7 +347,10 @@ def make_encoder_decoder_cache(
 ) -> transformers.cache_utils.EncoderDecoderCache:
     """Creates an EncoderDecoderCache."""
     return transformers.cache_utils.EncoderDecoderCache(
-        self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
+        # self_attention_cache=self_attention_cache,
+        # cross_attention_cache=cross_attention_cache
+        self_attention_cache,
+        cross_attention_cache,
     )
@@ -323,6 +366,9 @@ def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -
             self.num_hidden_layers = len(key_value_pairs)
             self.dtype = dtype
+        def get_text_config(self):
+            return self
     cache = MambaCache(
         _config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
@@ -348,7 +394,7 @@ def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -
             f"got {key_value_pairs[i][1].shape}"
         )
         cache.ssm_states[i][:, :, :] = key_value_pairs[i][1]
-    return cache
+    return finalize_cache(cache)
 def make_sliding_window_cache(
@@ -363,6 +409,9 @@ def make_sliding_window_cache(
             self.num_hidden_layers = len(key_value_pairs)
             self.sliding_window = key_value_pairs[0][0].shape[2]
+        def get_text_config(self):
+            return self
     cache = transformers.cache_utils.SlidingWindowCache(
         config=_config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
@@ -371,6 +420,13 @@ def make_sliding_window_cache(
         dtype=key_value_pairs[0][0].dtype,
     )
     ca = CacheKeyValue(cache)
+    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+        # transformers>= 4.55.2, layers are empty
+        cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
+        return cache
     for i in range(len(key_value_pairs)):
         assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
             f"Shape mismatch, expected {cache.key_cache[i].shape}, "
@@ -393,7 +449,7 @@ def make_sliding_window_cache(
         f"Unexpected number of layers in the cache ({len(cache.layers)}), "
         f"{len(key_value_pairs)} expected."
     )
-    return cache
+    return finalize_cache(cache)
 def make_hybrid_cache(
@@ -521,6 +577,9 @@ def make_hybrid_cache(
         sliding_window = _sliding_window
         num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
+        def get_text_config(self):
+            return self
     if layer_types:
         _config.layer_types = layer_types  # type: ignore[attr-defined]
@@ -549,4 +608,21 @@ def make_hybrid_cache(
         f"Unexpected number of layers in the cache ({len(cache.layers)}), "
         f"{len(key_value_pairs)} expected."
     )
+    return finalize_cache(cache)
+def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:
+    """
+    Ensures the created cache is consistent.
+    Returns the cache modified inplace.
+    """
+    if (
+        hasattr(cache, "layer_class_to_replicate")
+        and hasattr(cache, "layers")
+        and cache.layers
+        and not cache.layer_class_to_replicate
+    ):
+        # This is used to expand the cache when it does not contains enough layers.
+        # This is needed since transformers>4.55.3
+        cache.layer_class_to_replicate = cache.layers[0].__class__
     return cache

onnx_diagnostic/helpers/config_helper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import importlib
 import inspect
+import os
 import re
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import transformers
@@ -110,3 +111,59 @@ def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[typ
     )
     cls_name = unique.pop()
     return getattr(transformers, cls_name)
+def default_num_hidden_layers():
+    """
+    Returns the default number of layers.
+    It is lower when the unit tests are running
+    when ``UNITTEST_GOING=1``.
+    """
+    import torch
+    if torch.cuda.is_available():
+        capa = torch.cuda.get_device_capability(0)
+        if capa[0] < 9:
+            return 2
+    return 2 if os.environ.get("UNITTEST_GOING", "0") == "1" else 4
+def build_diff_config(config0, config1):
+    """
+    Returns all the modified values between two configuration
+    """
+    import torch
+    diff = {}
+    for k in config0:
+        assert isinstance(k, str), f"k={k!r}, wrong type in {config0}"
+        if k not in config1:
+            v0 = getattr(config0, k) if hasattr(config0, k) else config0[k]
+            diff[k] = f"-{v0}"
+    for k in config1:
+        assert isinstance(k, str), f"k={k!r}, wrong type in {config1}"
+        if k not in config0:
+            v1 = getattr(config1, k) if hasattr(config1, k) else config1[k]
+            diff[k] = f"+{v1}"
+    for k in config0:
+        if k not in config1:
+            continue
+        v0 = getattr(config0, k) if hasattr(config0, k) else config0[k]
+        v1 = getattr(config1, k) if hasattr(config1, k) else config1[k]
+        if (
+            v0 is None
+            or v1 is None
+            or isinstance(v1, (float, int, bool, str, list, tuple, torch.dtype))
+            or (
+                isinstance(v0, dict)
+                and isinstance(v1, dict)
+                and all(isinstance(k, int) for k in v1)
+            )
+        ):
+            if v1 != v0:
+                diff[k] = f"{v0} -> {v1}"
+        else:
+            d = build_diff_config(v0, v1)
+            if d:
+                diff[k] = d
+    return diff

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -36,11 +36,12 @@ def size_type(dtype: Any) -> int:
             TensorProto.FLOAT8E4M3FNUZ,
             TensorProto.FLOAT8E5M2,
             TensorProto.FLOAT8E5M2FNUZ,
+            getattr(TensorProto, "FLOAT8E8M0", None),
         }:
             return 1
         if dtype in {TensorProto.COMPLEX128}:
             return 16
-        from .helpers.onnx_helper import onnx_dtype_name
+        from .onnx_helper import onnx_dtype_name
         raise AssertionError(
             f"Unable to return the element size for type {onnx_dtype_name(dtype)}"
@@ -1478,8 +1479,12 @@ def max_diff(
     # backup function in case pytorch does not know how to serialize.
     if expected.__class__.__name__ == "DynamicCache":
         if got.__class__.__name__ == "DynamicCache":
+            from .cache_helper import CacheKeyValue
             if verbose >= 6:
                 print(f"[max_diff] DynamicCache: {string_type(expected)} ? {string_type(got)}")
+            expected = CacheKeyValue(expected)
+            got = CacheKeyValue(got)
             return max_diff(
                 [expected.key_cache, expected.value_cache],
                 [got.key_cache, got.value_cache],

onnx_diagnostic/reference/ops/op_cast_like.py CHANGED Viewed

@@ -11,22 +11,26 @@ try:
         float8e5m2fnuz,
     )
 except ImportError:
+    bfloat16 = None
     from onnx.reference.ops.op_cast import cast_to
 from ...helpers.onnx_helper import np_dtype_to_tensor_dtype
 def _cast_like(x, y, saturate):
-    if y.dtype == bfloat16 and y.dtype.descr[0][0] == "bfloat16":
-        # np.uint16 == np.uint16 is True as well as np.uint16 == bfloat16
-        to = TensorProto.BFLOAT16
-    elif y.dtype == float8e4m3fn and y.dtype.descr[0][0] == "e4m3fn":
-        to = TensorProto.FLOAT8E4M3FN
-    elif y.dtype == float8e4m3fnuz and y.dtype.descr[0][0] == "e4m3fnuz":
-        to = TensorProto.FLOAT8E4M3FNUZ
-    elif y.dtype == float8e5m2 and y.dtype.descr[0][0] == "e5m2":
-        to = TensorProto.FLOAT8E5M2
-    elif y.dtype == float8e5m2fnuz and y.dtype.descr[0][0] == "e5m2fnuz":
-        to = TensorProto.FLOAT8E5M2FNUZ
+    if bfloat16 is not None:
+        if y.dtype == bfloat16 and y.dtype.descr[0][0] == "bfloat16":
+            # np.uint16 == np.uint16 is True as well as np.uint16 == bfloat16
+            to = TensorProto.BFLOAT16
+        elif y.dtype == float8e4m3fn and y.dtype.descr[0][0] == "e4m3fn":
+            to = TensorProto.FLOAT8E4M3FN
+        elif y.dtype == float8e4m3fnuz and y.dtype.descr[0][0] == "e4m3fnuz":
+            to = TensorProto.FLOAT8E4M3FNUZ
+        elif y.dtype == float8e5m2 and y.dtype.descr[0][0] == "e5m2":
+            to = TensorProto.FLOAT8E5M2
+        elif y.dtype == float8e5m2fnuz and y.dtype.descr[0][0] == "e5m2fnuz":
+            to = TensorProto.FLOAT8E5M2FNUZ
+        else:
+            to = np_dtype_to_tensor_dtype(y.dtype)  # type: ignore
     else:
         to = np_dtype_to_tensor_dtype(y.dtype)  # type: ignore
     return (cast_to(x, to, saturate),)

onnx_diagnostic/reference/torch_ops/__init__.py CHANGED Viewed

@@ -45,6 +45,7 @@ from .unary_ops import (
     Erf_9,
     Exp_1,
     Identity_1,
+    IsNaN_9,
     Log_1,
     Neg_1,
     Not_1,

onnx_diagnostic/reference/torch_ops/unary_ops.py CHANGED Viewed

@@ -37,6 +37,13 @@ class Identity_1(OpRunKernel):
         return OpRunTensor(x.tensor)
+class IsNaN_9(OpRunKernel):
+    """IsNaN"""
+    def run(self, x: OpRunTensor) -> OpRunTensor:
+        return OpRunTensor(x.tensor.isnan())
 class Log_1(OpRunKernel):
     """Log"""

onnx_diagnostic/tasks/automatic_speech_recognition.py CHANGED Viewed

@@ -2,7 +2,11 @@ from typing import Any, Callable, Dict, Optional, Tuple
 import torch
 import transformers
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
-from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
 __TASK__ = "automatic-speech-recognition"
@@ -15,7 +19,7 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
     if hasattr(config, "decoder_layers"):
         config.decoder_layers = min(config.decoder_layers, 2)
     if hasattr(config, "num_hidden_layers"):
-        config.num_hidden_layers = min(config.num_hidden_layers, 2)
+        config.num_hidden_layers = min(config.num_hidden_layers, nhl())
     update_config(config, kwargs)
     return kwargs

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -1,15 +1,20 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 __TASK__ = "feature-extraction"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
     check_hasattr(config, "num_hidden_layers")
-    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
+    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, nhl()))
     update_config(config, kwargs)
     return kwargs
@@ -160,5 +165,4 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         if hasattr(config, att):
             kwargs[att] = getattr(config, att)
     kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
-    print(kwargs)
     return kwargs, get_inputs

onnx_diagnostic/tasks/fill_mask.py CHANGED Viewed

@@ -1,6 +1,10 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
 __TASK__ = "fill-mask"
@@ -9,7 +13,7 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
     check_hasattr(config, "num_attention_heads", "num_hidden_layers")
     kwargs = dict(
-        num_hidden_layers=min(config.num_hidden_layers, 2),
+        num_hidden_layers=min(config.num_hidden_layers, nhl()),
         num_attention_heads=min(config.num_attention_heads, 4),
     )
     update_config(config, kwargs)

onnx_diagnostic/tasks/image_classification.py CHANGED Viewed

@@ -1,6 +1,10 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
 __TASK__ = "image-classification"
@@ -17,7 +21,7 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
     check_hasattr(config, ("num_hidden_layers", "hidden_sizes"))
     kwargs = dict(
         num_hidden_layers=(
-            min(config.num_hidden_layers, 2)
+            min(config.num_hidden_layers, nhl())
             if hasattr(config, "num_hidden_layers")
             else len(config.hidden_sizes)
         )

onnx-diagnostic 0.7.6__py3-none-any.whl → 0.7.8__py3-none-any.whl

onnx-diagnostic 0.7.6py3-none-any.whl → 0.7.8py3-none-any.whl