PyPI - onnx-diagnostic - Versions diffs - 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl - Mend

onnx-diagnostic 0.7.5py3-none-any.whl → 0.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +56 -3
onnx_diagnostic/export/dynamic_shapes.py +24 -10
onnx_diagnostic/export/shape_helper.py +6 -2
onnx_diagnostic/ext_test_case.py +2 -0
onnx_diagnostic/helpers/_log_helper.py +6 -6
onnx_diagnostic/helpers/cache_helper.py +326 -18
onnx_diagnostic/helpers/config_helper.py +10 -0
onnx_diagnostic/helpers/helper.py +152 -11
onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
onnx_diagnostic/helpers/onnx_helper.py +13 -7
onnx_diagnostic/helpers/torch_helper.py +33 -11
onnx_diagnostic/reference/ops/op_cast_like.py +15 -11
onnx_diagnostic/reference/torch_ops/__init__.py +1 -0
onnx_diagnostic/reference/torch_ops/unary_ops.py +7 -0
onnx_diagnostic/tasks/__init__.py +2 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +6 -2
onnx_diagnostic/tasks/feature_extraction.py +7 -3
onnx_diagnostic/tasks/fill_mask.py +6 -2
onnx_diagnostic/tasks/image_classification.py +6 -2
onnx_diagnostic/tasks/image_text_to_text.py +289 -62
onnx_diagnostic/tasks/mask_generation.py +143 -0
onnx_diagnostic/tasks/mixture_of_expert.py +2 -2
onnx_diagnostic/tasks/object_detection.py +6 -2
onnx_diagnostic/tasks/sentence_similarity.py +6 -2
onnx_diagnostic/tasks/summarization.py +7 -2
onnx_diagnostic/tasks/text2text_generation.py +7 -2
onnx_diagnostic/tasks/text_classification.py +6 -2
onnx_diagnostic/tasks/text_generation.py +14 -16
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +3 -3
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
onnx_diagnostic/torch_export_patches/patch_inputs.py +5 -2
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -4
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +428 -129
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +60 -41
onnx_diagnostic/torch_models/hghub/hub_data.py +5 -0
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
onnx_diagnostic/torch_models/validate.py +1 -0
{onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/RECORD +43 -42
{onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/top_level.txt +0 -0

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.7.5"
+__version__ = "0.7.7"
 __author__ = "Xavier Dupré"

onnx_diagnostic/_command_lines_parser.py CHANGED Viewed

@@ -306,7 +306,7 @@ class _ParseDict(argparse.Action):
                 value = split_items[1]
                 if value in ("True", "true", "False", "false"):
-                    d[key] = bool(value)
+                    d[key] = value in ("True", "true")
                     continue
                 try:
                     d[key] = int(value)
@@ -323,6 +323,54 @@ class _ParseDict(argparse.Action):
         setattr(namespace, self.dest, d)
+class _BoolOrParseDictPatch(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        if not values:
+            return
+        if len(values) == 1 and values[0] in (
+            "True",
+            "False",
+            "true",
+            "false",
+            "0",
+            "1",
+            0,
+            1,
+        ):
+            setattr(namespace, self.dest, values[0] in ("True", "true", 1, "1"))
+            return
+        d = getattr(namespace, self.dest) or {}
+        if not isinstance(d, dict):
+            d = {
+                "patch_sympy": d,
+                "patch_torch": d,
+                "patch_transformers": d,
+                "patch_diffusers": d,
+            }
+        for item in values:
+            split_items = item.split("=", 1)
+            key = split_items[0].strip()  # we remove blanks around keys, as is logical
+            value = split_items[1]
+            if value in ("True", "true", "False", "false"):
+                d[key] = value in ("True", "true")
+                continue
+            try:
+                d[key] = int(value)
+                continue
+            except (TypeError, ValueError):
+                pass
+            try:
+                d[key] = float(value)
+                continue
+            except (TypeError, ValueError):
+                pass
+            d[key] = _parse_json(value)
+        setattr(namespace, self.dest, d)
 def get_parser_validate() -> ArgumentParser:
     parser = ArgumentParser(
         prog="validate",
@@ -383,8 +431,13 @@ def get_parser_validate() -> ArgumentParser:
     parser.add_argument(
         "--patch",
         default=True,
-        action=BooleanOptionalAction,
-        help="Applies patches before exporting.",
+        action=_BoolOrParseDictPatch,
+        nargs="*",
+        help="Applies patches before exporting, it can be a boolean "
+        "to enable to disable the patches or be more finetuned. It is possible to "
+        "disable patch for torch by adding "
+        '--patch "patch_sympy=False" --patch "patch_torch=False", '
+        "default is True.",
     )
     parser.add_argument(
         "--rewrite",

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -887,19 +887,30 @@ class ModelInputs:
         # In case DynamicCache is not registered.
         if obj.__class__.__name__ == "DynamicCache":
-            kc = set(len(o.key_cache) for o in objs)
-            assert (
-                len(kc) == 1
-            ), f"All attribute 'key_cache' should have the same length but found {kc}"
-            vc = set(len(o.value_cache) for o in objs)
-            assert (
-                len(vc) == 1
-            ), f"All attribute 'value_cache' should have the same length but found {vc}"
+            if hasattr(obj, "layers"):
+                kc = set(len(o.layers) for o in objs)
+                assert (
+                    len(kc) == 1
+                ), f"All attribute 'key_cache' should have the same length but found {kc}"
+                vc = kc.copy()
+            else:
+                kc = set(len(o.key_cache) for o in objs)
+                assert (
+                    len(kc) == 1
+                ), f"All attribute 'key_cache' should have the same length but found {kc}"
+                vc = set(len(o.value_cache) for o in objs)
+                assert (
+                    len(vc) == 1
+                ), f"All attribute 'value_cache' should have the same length but found {vc}"
             key_cache = []
             for i in range(kc.pop()):
                 key_cache.append(
                     self.guess_dynamic_dimensions(
-                        *[o.key_cache[i] for o in objs],
+                        *[
+                            o.layers[i].keys if hasattr(o, "layers") else o.key_cache[i]
+                            for o in objs
+                        ],
                         auto=auto if isinstance(auto, bool) else f"{auto}_{i}kdc",
                     )
                 )
@@ -907,7 +918,10 @@ class ModelInputs:
             for i in range(vc.pop()):
                 value_cache.append(
                     self.guess_dynamic_dimensions(
-                        *[o.value_cache[i] for o in objs],
+                        *[
+                            o.layers[i].values if hasattr(o, "layers") else o.value_cache[i]
+                            for o in objs
+                        ],
                         auto=auto if isinstance(auto, bool) else f"{auto}_{i}vdc",
                     )
                 )

onnx_diagnostic/export/shape_helper.py CHANGED Viewed

@@ -9,6 +9,8 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
     All dimensions are considered as dynamic.
     ``dim_prefix`` can be a string (the function uses it as a prefix),
     or ``torch.export.Dim.AUTO`` or ``torch.export.Dim.DYNAMIC``.
+    Depending on the version of transformers, serializations function
+    of DynamicCache class is automatically serialized or not (>= 4.51, < 4.55).
     .. runpython::
         :showcode:
@@ -17,6 +19,7 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
         import torch
         from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
         from onnx_diagnostic.export.shape_helper import all_dynamic_shape_from_inputs
+        from onnx_diagnostic.torch_export_patches import torch_export_patches
         bsize, nheads, slen, dim = 2, 1, 30, 96
         inputs = dict(
@@ -25,10 +28,11 @@ def all_dynamic_shape_from_inputs(inputs: Any, dim_prefix: Any = "d") -> Any:
             position_ids=torch.arange(3, dtype=torch.int64),
             past_key_values=make_dynamic_cache(
                 [(torch.randn(bsize, nheads, slen, dim),
-                  torch.randn(bsize, nheads, slen, dim))]
+                    torch.randn(bsize, nheads, slen, dim))]
             ),
         )
-        ds = all_dynamic_shape_from_inputs(inputs)
+        with torch_export_patches(patch_transformers=True):
+            ds = all_dynamic_shape_from_inputs(inputs)
         pprint.pprint(ds)
     For this function to work, patches must be enabled if :epkg:`transformers`

onnx_diagnostic/ext_test_case.py CHANGED Viewed

@@ -1058,6 +1058,8 @@ class ExtTestCase(unittest.TestCase):
         elif hasattr(expected, "shape"):
             self.assertEqual(type(expected), type(value), msg=msg)
             self.assertEqualArray(expected, value, msg=msg, atol=atol, rtol=rtol)
+        elif expected is None:
+            assert value is None, f"Expected is None but value is of type {type(value)}"
         else:
             raise AssertionError(
                 f"Comparison not implemented for types {type(expected)} and {type(value)}"

onnx_diagnostic/helpers/_log_helper.py CHANGED Viewed

@@ -33,13 +33,13 @@ def mann_kendall(series: Sequence[float], threshold: float = 0.5):
     .. math::
         sign(x) = \\left\\{ \\begin{array}{l} -1 if x < 0 \\\\ 0 if x = 0 \\\\ +1 otherwise
-        \\right.
+        \\end{array} \\right.
     And:
     .. math::
-        Var(S)= \\frac{n(n-1)(2n+5)} - \\sum_t t(t-1)(2t+5)}{18}
+        Var(S)= \\frac{n(n-1)(2n+5) - \\sum_t t(t-1)(2t+5)}{18}
     """
     aseries = np.asarray(series)
     stat = 0
@@ -251,7 +251,7 @@ def open_dataframe(
 ) -> pandas.DataFrame:
     """
     Opens a filename defined by function
-    :func:`onnx_diagnostic.helpers.log_helper.enumerate_csv_files`.
+    :func:`onnx_diagnostic.helpers._log_helper.enumerate_csv_files`.
     :param data: a dataframe, a filename, a tuple indicating the file is coming
         from a zip file
@@ -260,17 +260,17 @@ def open_dataframe(
     if isinstance(data, pandas.DataFrame):
         return data
     if isinstance(data, str):
-        df = pandas.read_csv(data)
+        df = pandas.read_csv(data, low_memory=False)
         df["RAWFILENAME"] = data
         return df
     if isinstance(data, tuple):
         if not data[-1]:
-            df = pandas.read_csv(data[2])
+            df = pandas.read_csv(data[2], low_memory=False)
             df["RAWFILENAME"] = data[2]
             return df
         zf = zipfile.ZipFile(data[-1])
         with zf.open(data[2]) as f:
-            df = pandas.read_csv(f)
+            df = pandas.read_csv(f, low_memory=False)
             df["RAWFILENAME"] = f"{data[-1]}/{data[2]}"
         zf.close()
         return df

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -4,6 +4,56 @@ import torch
 import transformers
 import transformers.cache_utils
+try:
+    from transformers.models.mamba.modeling_mamba import MambaCache
+except ImportError:
+    from transformers.cache_utils import MambaCache
+class CacheKeyValue:
+    """
+    Starting transformers>=4.54, the cache API has deprecated
+    ``cache.key_cache`` and ``cache.value_cache``.
+    This class wraps a cache independently from transformers version and enables
+    attributes ``key_cache`` and ``value_cache``.
+    .. code-block:: python
+        capi = CacheKeyValue(cache)
+        capi.key_cache
+        capi.value_cache
+    """
+    def __init__(self, cache=None):
+        if hasattr(cache, "layers"):
+            layers = [
+                layer
+                for layer in cache.layers
+                if layer is not None and layer.keys is not None and layer.values is not None
+            ]
+            self.key_cache = [layer.keys for layer in layers]
+            self.value_cache = [layer.values for layer in layers]
+            if None in self.key_cache or None in self.value_cache:
+                from .helper import string_type
+                raise AssertionError(
+                    f"issue with key_cache={string_type(self.key_cache)}, "
+                    f"or value_cache={string_type(self.value_cache)}, "
+                    f"cache.layers={string_type(cache.layers)}"
+                )
+        elif cache is not None and hasattr(cache, "key_cache"):
+            self.key_cache = cache.key_cache
+            self.value_cache = cache.value_cache
+        elif cache is None:
+            self.key_cache = None
+            self.value_cache = None
+        else:
+            raise NotImplementedError(f"type(cache)={type(cache)}")
+    def make_dynamic_cache(self):
+        """Do the reverse operation."""
+        return make_dynamic_cache(list(zip(self.key_cache, self.value_cache)))
 def flatten_unflatten_for_dynamic_shapes(
     obj: Any,
@@ -81,6 +131,8 @@ def is_cache_dynamic_registered(fast: bool = False) -> bool:
     )
     values, spec = torch.utils._pytree.tree_flatten(cache)
     cache2 = torch.utils._pytree.tree_unflatten(values, spec)
+    if hasattr(cache2, "layers") and hasattr(cache, "layers"):
+        return len(cache2.layers) == len(cache.layers)
     return len(cache2.key_cache) == len(cache.value_cache)
@@ -119,7 +171,19 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
             )
             print(string_type(past_key_values, with_shape=True))
         """
-        return transformers.cache_utils.DynamicCache(key_value_pairs)
+        cache = transformers.cache_utils.DynamicCache(key_value_pairs)
+        if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+            # The cache constructor contains the two following lines
+            # (in cache_utils.py) which append empty layers when the cache is
+            # initialized. We need to remove them.
+            # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+            # self.append_new_layers(self.num_hidden_layers - 1)
+            cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+        assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+            f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+            f"{len(key_value_pairs)} expected."
+        )
+        return finalize_cache(cache)
 else:
@@ -203,6 +267,9 @@ def make_static_cache(
             self.num_attention_heads = key_value_pairs[0][0].shape[1]
             self.num_hidden_layers = len(key_value_pairs)
+        def get_text_config(self):
+            return self
     assert max_cache_len is not None, (
         f"max_cache_len={max_cache_len} cannot be setup "
         f"automatically yet from shape {key_value_pairs[0][0].shape}"
@@ -216,20 +283,59 @@ def make_static_cache(
         ),
     )
     cache = transformers.cache_utils.StaticCache(
-        _config(),
+        config=_config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
         device=key_value_pairs[0][0].device,
         dtype=key_value_pairs[0][0].dtype,
         max_cache_len=max_cache_len,
     )
+    ca = CacheKeyValue(cache)
+    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+        # transformers>= 4.55.2, layers are empty
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(key, value, i)
+        return cache
+    torch._check(
+        not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(cache.layers)={len(cache.layers)}"
+        ),
+    )
+    torch._check(
+        len(key_value_pairs) == len(ca.key_cache),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(ca.key_cache)={len(ca.key_cache)}"
+        ),
+    )
+    torch._check(
+        len(key_value_pairs) == len(ca.value_cache),
+        lambda: (
+            f"Length mismatch len(key_value_pairs)={len(key_value_pairs)}, "
+            f"len(ca.value_cache)={len(ca.value_cache)}"
+        ),
+    )
     for i in range(len(key_value_pairs)):
         assert (
             key_value_pairs[i][0].shape == key_value_pairs[i][1].shape
         ), f"Shape mismatch {key_value_pairs[i][0].shape} != {key_value_pairs[i][1].shape}"
         d = key_value_pairs[i][1].shape[2]
-        cache.key_cache[i][:, :, :d, :] = key_value_pairs[i][0]
-        cache.value_cache[i][:, :, :d, :] = key_value_pairs[i][1]
-    return cache
+        ca.key_cache[i][:, :, :d, :] = key_value_pairs[i][0]
+        ca.value_cache[i][:, :, :d, :] = key_value_pairs[i][1]
+    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+        # The cache constructor contains the two following lines
+        # (in cache_utils.py) which append empty layers when the cache is
+        # initialized. We need to remove them.
+        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+        # self.append_new_layers(self.num_hidden_layers - 1)
+        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+        f"{len(key_value_pairs)} expected."
+    )
+    return finalize_cache(cache)
 def make_encoder_decoder_cache(
@@ -238,14 +344,15 @@ def make_encoder_decoder_cache(
 ) -> transformers.cache_utils.EncoderDecoderCache:
     """Creates an EncoderDecoderCache."""
     return transformers.cache_utils.EncoderDecoderCache(
-        self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
+        # self_attention_cache=self_attention_cache,
+        # cross_attention_cache=cross_attention_cache
+        self_attention_cache,
+        cross_attention_cache,
     )
-def make_mamba_cache(
-    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
-) -> transformers.cache_utils.MambaCache:
-    "Creates a :class:`transformers.cache_utils.MambaCache`."
+def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -> MambaCache:
+    "Creates a ``MambaCache``."
     dtype = key_value_pairs[0][0].dtype
     class _config:
@@ -256,7 +363,10 @@ def make_mamba_cache(
             self.num_hidden_layers = len(key_value_pairs)
             self.dtype = dtype
-    cache = transformers.cache_utils.MambaCache(
+        def get_text_config(self):
+            return self
+    cache = MambaCache(
         _config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
         device=key_value_pairs[0][0].device,
@@ -281,12 +391,12 @@ def make_mamba_cache(
             f"got {key_value_pairs[i][1].shape}"
         )
         cache.ssm_states[i][:, :, :] = key_value_pairs[i][1]
-    return cache
+    return finalize_cache(cache)
 def make_sliding_window_cache(
     key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
-) -> transformers.cache_utils.MambaCache:
+) -> transformers.cache_utils.SlidingWindowCache:
     "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
     class _config:
@@ -296,22 +406,220 @@ def make_sliding_window_cache(
             self.num_hidden_layers = len(key_value_pairs)
             self.sliding_window = key_value_pairs[0][0].shape[2]
+        def get_text_config(self):
+            return self
     cache = transformers.cache_utils.SlidingWindowCache(
-        _config(),
+        config=_config(),
         max_batch_size=key_value_pairs[0][0].shape[0],
         max_cache_len=key_value_pairs[0][0].shape[2],  # same as sliding_window
         device=key_value_pairs[0][0].device,
         dtype=key_value_pairs[0][0].dtype,
     )
+    ca = CacheKeyValue(cache)
+    if hasattr(cache, "layers") and len(ca.key_cache) == 0:
+        # transformers>= 4.55.2, layers are empty
+        cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
+        for i, (key, value) in enumerate(key_value_pairs):
+            cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
+        return cache
     for i in range(len(key_value_pairs)):
-        assert cache.key_cache[i].shape == key_value_pairs[i][0].shape, (
+        assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
             f"Shape mismatch, expected {cache.key_cache[i].shape}, "
             f"got {key_value_pairs[i][0].shape}"
         )
-        cache.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
-        assert cache.value_cache[i].shape == key_value_pairs[i][1].shape, (
+        ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
+        assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
             f"Shape mismatch, expected {cache.value_cache[i].shape}, "
             f"got {key_value_pairs[i][1].shape}"
         )
-        cache.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
+        ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
+    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+        # The cache constructor contains the two following lines
+        # (in cache_utils.py) which append empty layers when the cache is
+        # initialized. We need to remove them.
+        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+        # self.append_new_layers(self.num_hidden_layers - 1)
+        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+        f"{len(key_value_pairs)} expected."
+    )
+    return finalize_cache(cache)
+def make_hybrid_cache(
+    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+    max_cache_len: Optional[int] = None,
+    max_batch_size: Optional[int] = None,
+    sliding_window: Optional[int] = None,
+) -> transformers.cache_utils.HybridCache:
+    """
+    Creates an instance of :class:`transformers.cache_utils.HybridCache`.
+    This version is valid for ``transformers < 4.50``.
+    :param key_value_pairs: list of pairs of (key, values)
+    :return: :class:`transformers.cache_utils.HybridCache`
+    Example:
+    .. runpython::
+        :showcode:
+        import torch
+        from onnx_diagnostic.helpers import string_type
+        from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
+        n_layers = 2
+        bsize, nheads, slen, dim = 2, 4, 3, 7
+        past_key_values = make_hybrid_cache(
+            [
+                (
+                    torch.randn(bsize, nheads, slen, dim),
+                    torch.randn(bsize, nheads, slen, dim),
+                )
+                for i in range(n_layers)
+            ]
+        )
+        print(string_type(past_key_values, with_shape=True))
+    This part defines how the shapes are working in one HybridCache.
+    .. code-block:: python
+            self.max_cache_len = (
+                max_cache_len if max_cache_len is not None else config.max_position_embeddings)
+            # Sliding layers can't be larger than the overall max cache len
+            self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
+            self.max_batch_size = max_batch_size
+            self.head_dim = (
+                config.head_dim if hasattr(config, "head_dim")
+                else config.hidden_size // config.num_attention_heads
+            )
+            self._dtype = dtype
+            self.num_key_value_heads = (
+                config.num_attention_heads
+                if getattr(config, "num_key_value_heads", None) is None
+                else config.num_key_value_heads
+            )
+            # If the attribute does not exist in the config, fallback to a simple StaticCache
+            if hasattr(config, "layer_types"):
+                self.is_sliding = [
+                    layer_type != "full_attention" for layer_type in config.layer_types]
+            else:
+                self.is_sliding = [False] * config.num_hidden_layers
+            self.key_cache: list[torch.Tensor] = []
+            self.value_cache: list[torch.Tensor] = []
+            global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                  self.max_cache_len, self.head_dim)
+            sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
+                                   self.sliding_window_len, self.head_dim)
+            self.sliding_window = min(config.sliding_window, max_cache_len)
+            device = torch.device(device) if device is not None else None
+            for i in range(config.num_hidden_layers):
+                layer_device = layer_device_map[i] if layer_device_map is not None else device
+                cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
+                new_layer_key_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                new_layer_value_cache = torch.zeros(
+                    cache_shape, dtype=self._dtype, device=layer_device)
+                torch._dynamo.mark_static_address(new_layer_key_cache)
+                torch._dynamo.mark_static_address(new_layer_value_cache)
+                self.key_cache.append(new_layer_key_cache)
+                self.value_cache.append(new_layer_value_cache)
+    """
+    layer_types = None
+    if key_value_pairs:
+        assert (
+            not max_batch_size and not max_cache_len
+        ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
+        max_batch_size = key_value_pairs[0][0].shape[0]
+        sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
+        if len(sets_of_dim) == 1:
+            max_cache_len = sets_of_dim.pop()
+            sliding_window = max_cache_len
+        else:
+            assert (
+                len(sets_of_dim) == 2
+            ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
+            max_cache_len = max(sets_of_dim)
+            sliding_window = min(sets_of_dim)
+            layer_types = [
+                "full_attention" if i == max_cache_len else "sliding_attention"
+                for i in [kv[0].shape[2] for kv in key_value_pairs]
+            ]
+    else:
+        assert (
+            max_batch_size and max_cache_len
+        ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
+        if sliding_window is None:
+            sliding_window = max_cache_len
+    _max_cache_len = max_cache_len
+    _sliding_window = sliding_window
+    class _config:
+        max_cache_len = _max_cache_len
+        batch_size = max_batch_size
+        num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
+        head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
+        num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
+        num_hidden_layers = len(key_value_pairs)
+        sliding_window = _sliding_window
+        num_key_value_heads = key_value_pairs[0][1].shape[1]  # transformers 4.48.3
+        def get_text_config(self):
+            return self
+    if layer_types:
+        _config.layer_types = layer_types  # type: ignore[attr-defined]
+    cache = transformers.cache_utils.HybridCache(
+        config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
+    )
+    for i, (key, value) in enumerate(key_value_pairs):
+        cache.update(
+            key,
+            value,
+            i,
+            cache_kwargs={
+                "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
+                    key.device
+                )
+            },
+        )
+    if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
+        # The cache constructor contains the two following lines
+        # (in cache_utils.py) which append empty layers when the cache is
+        # initialized. We need to remove them.
+        # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
+        # self.append_new_layers(self.num_hidden_layers - 1)
+        cache.layers[:] = cache.layers[-len(key_value_pairs) :]
+    assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
+        f"Unexpected number of layers in the cache ({len(cache.layers)}), "
+        f"{len(key_value_pairs)} expected."
+    )
+    return finalize_cache(cache)
+def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:
+    """
+    Ensures the created cache is consistent.
+    Returns the cache modified inplace.
+    """
+    if (
+        hasattr(cache, "layer_class_to_replicate")
+        and hasattr(cache, "layers")
+        and cache.layers
+        and not cache.layer_class_to_replicate
+    ):
+        # This is used to expand the cache when it does not contains enough layers.
+        # This is needed since transformers>4.55.3
+        cache.layer_class_to_replicate = cache.layers[0].__class__
     return cache

onnx_diagnostic/helpers/config_helper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import importlib
 import inspect
+import os
 import re
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import transformers
@@ -110,3 +111,12 @@ def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[typ
     )
     cls_name = unique.pop()
     return getattr(transformers, cls_name)
+def default_num_hidden_layers():
+    """
+    Returns the default number of layers.
+    It is lower when the unit tests are running
+    when ``UNITTEST_GOING=1``.
+    """
+    return 2 if os.environ.get("UNITTEST_GOING", "0") == "1" else 4

onnx-diagnostic 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl

onnx-diagnostic 0.7.5py3-none-any.whl → 0.7.7py3-none-any.whl