PyPI - onnx-diagnostic - Versions diffs - 0.8.11__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

onnx-diagnostic 0.8.11py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

onnx_diagnostic/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ Patches, Investigates onnx models.
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
-__version__ = "0.8.11"
+__version__ = "0.9.0"
 __author__ = "Xavier Dupré"

onnx_diagnostic/ci_models/data/Blanca_Lake_Hudak.jpg ADDED Viewed

Binary file

onnx_diagnostic/ci_models/data/Ice_worm_glacier.jpg ADDED Viewed

Binary file

onnx_diagnostic/ci_models/data/__init__.py ADDED Viewed

File without changes

onnx_diagnostic/ci_models/export_phi4_mm.py CHANGED Viewed

@@ -668,12 +668,17 @@ def get_inputs_for_part(
                 f"{user_prompt}<|image_1|>\n<|image_2|>\n<|image_3|>\n<|image_4|>\n"
                 f"What is shown in these four images?{prompt_suffix}{assistant_prompt}"
             )
-            url = "https://img.freepik.com/free-photo/painting-mountain-lake-with-mountain-background_188544-9126.jpg?w=2000"
-            image_2 = Image.open(requests.get(url, stream=True).raw)
+            image_2_path = os.path.join(
+                os.path.dirname(__file__), "data", "Blanca_Lake_Hudak.jpg"
+            )
+            image_2 = Image.open(image_2_path)
             url = (
                 "https://th.bing.com/th/id/OIP.gCvQ1vmPVJmrq1nnzM3ZHQHaEo?rs=1&pid=ImgDetMain"
             )
-            image_3 = Image.open(requests.get(url, stream=True).raw)
+            image_3_path = os.path.join(
+                os.path.dirname(__file__), "data", "Ice_worm_glacier.jpg"
+            )
+            image_3 = Image.open(image_3_path)
             images = [image_1, image_2, image_3, image_4]
             inputs = processor(prompt, images=images, return_tensors="pt").to(device)

onnx_diagnostic/export/api.py CHANGED Viewed

@@ -428,6 +428,16 @@ class WrapperToExportMethodToOnnx(torch.nn.Module):
                 new_kwargs[k] = v
         return new_kwargs
+    def is_empty_cache(self, cache):
+        if cache.__class__.__name__ == "DynamicCache" and hasattr(cache, "layers"):
+            if len(cache.layers) == 1 and cache.layers[0].keys is None:
+                return True
+            if len(cache.layers) == 0:
+                return True
+        if cache is None:
+            return True
+        return False
     def forward(self, *args, **kwargs):
         if not self._export_done:
             inp_args = args
@@ -443,6 +453,7 @@ class WrapperToExportMethodToOnnx(torch.nn.Module):
                     if v is not None
                     and (not self.skip_kwargs_names or k not in self.skip_kwargs_names)
                     and not isinstance(v, (bool, int, float))
+                    and not self.is_empty_cache(v)
                 }
             )
             inp_args, inp_kwargs = torch_deepcopy((inp_args, inp_kwargs))

onnx_diagnostic/export/dynamic_shapes.py CHANGED Viewed

@@ -834,7 +834,7 @@ class ModelInputs:
         """Guesses the dynamic shapes for one argument."""
         if len(objs) == 0:
             return None
-        set_types = set(type(o) for o in objs)
+        set_types = set(type(o) for o in objs if o is not None)
         assert (
             len(set_types) == 1
         ), f"Unexpected variety of input type {set_types}{msg() if msg else ''})"

onnx_diagnostic/helpers/cache_helper.py CHANGED Viewed

@@ -4,6 +4,19 @@ import torch
 import transformers
 import transformers.cache_utils
+KWARGS_LAYER = {}
+if hasattr(transformers.cache_utils, "DynamicSlidingWindowLayer"):
+    KWARGS_LAYER.update(
+        {
+            transformers.cache_utils.DynamicSlidingWindowLayer: lambda tensor: {
+                "sliding_window": tensor.shape[2]
+            },
+            transformers.cache_utils.StaticSlidingWindowLayer: lambda tensor: {
+                "sliding_window": tensor.shape[2]
+            },
+        }
+    )
 class CacheKeyValue:
     """
@@ -185,6 +198,7 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
     def make_dynamic_cache(
         key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
         cls_layers: Optional[Union[str, List[type]]] = None,
+        cls_kwargs: Optional[Union[Dict[str, int], List[Dict[str, int]]]] = None,
     ) -> transformers.cache_utils.DynamicCache:
         """
         Creates an instance of :class:`transformers.cache_utils.DynamicCache`.
@@ -194,6 +208,8 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
         :param cls_layers: to select the appropriate class to use on each layer,
             if specified, sliding_window is ignored, it can be a string
             if all layers are expected to follow the same class
+        :param cls_kwargs: arguments used to build a specific layer,
+            such as ``sliding_window`` for ``DynamicSlidingWindowLayer``
         :return: :class:`transformers.cache_utils.DynamicCache`
         Example:
@@ -224,49 +240,70 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
         are supported.
         """
         key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
-        cls_kwargs = {}
         if isinstance(cls_layers, str):
             assert hasattr(
                 transformers.cache_utils, cls_layers
-            ), f"Unable to find class {cls_layers!r} in transformers.cache_utils"
-            cls_layer = getattr(transformers.cache_utils, cls_layers)
-            if cls_layers == "DynamicSlidingWindowLayer":
-                cls_kwargs["sliding_window"] = key_value_pairs[0][0].shape[2]
-                assert isinstance(
-                    cls_kwargs["sliding_window"], int
-                ), f"sliding_window must be an integer but shape={key_value_pairs[0][0].shape}"
-        elif cls_layers is not None:
-            unique = set(cls_layers)
-            assert len(unique) == 1, f"Not implemented when cls_layers={cls_layers}"
-            cls_layer = unique.pop()
-            if (
-                hasattr(transformers.cache_utils, "DynamicSlidingWindowLayer")
-                and cls_layer == transformers.cache_utils.DynamicSlidingWindowLayer
-            ):
-                from .helper import string_type
-                assert key_value_pairs and key_value_pairs[0], (
-                    f"not implemented for key_value_pairs="
-                    f"{string_type(key_value_pairs, with_shape=True)}"
-                )
-                cls_kwargs["sliding_window"] = key_value_pairs[0][0].shape[2]
-                assert isinstance(
-                    cls_kwargs["sliding_window"], int
-                ), f"sliding_window must be an integer but shape={key_value_pairs[0][0].shape}"
+            ), f"Missing layer class {cls_layers!r}"
+            cls_layers = getattr(transformers.cache_utils, cls_layers)
+        if cls_layers and not isinstance(cls_layers, list):
+            cls_layers = [cls_layers for _ in key_value_pairs]  # type: ignore[misc]
+        if cls_layers is not None and isinstance(cls_layers, list):
+            assert len(cls_layers) == len(key_value_pairs), (
+                f"Length mismatch {len(key_value_pairs)} expected but "
+                f"{len(cls_layers)} layer types are given."
+            )
+            if cls_kwargs is None:
+                cls_kwargs = [{} for _kv in key_value_pairs]  # type: ignore[assignment]
+            assert len(cls_layers) == len(cls_kwargs), (
+                f"Length mismatch {len(cls_kwargs)} expected but "
+                f"{len(cls_layers)} layer types are given, "
+                f"cls_layers={cls_layers}, cls_kwargs={cls_kwargs}"
+            )
+            cls_layer = None
+            assert (
+                key_value_pairs and key_value_pairs[0]
+            ), f"not implemented for type(key_value_pairs[0])={type(key_value_pairs[0])}"
+            for kv, clsy, kws in zip(key_value_pairs, cls_layers, cls_kwargs):
+                default_values = KWARGS_LAYER.get(clsy, lambda tensor: {})(kv[0])
+                for k, v in default_values.items():
+                    if k not in kws:
+                        kws[k] = v  # type: ignore[index]
         else:
+            assert cls_kwargs is None, "cls_layers must be a list if cls_kwargs is specified"
+            assert (
+                cls_layers is None
+            ), f"cls_layers must be list or a string but it is {cls_layers}"
+            cls_kwargs = {}
             cls_layer = (
                 transformers.cache_utils.DynamicLayer
                 if hasattr(transformers.cache_utils, "DynamicLayer")
                 else None
             )
+        if cls_layer is not None:
+            assert isinstance(cls_kwargs, dict), (
+                f"one layer = one set of arguments, cls_layer={cls_layer}, "
+                f"cls_kwargs={cls_kwargs}"
+            )
+            cls_layers = [cls_layer for _ in key_value_pairs]
+            cls_kwargs = (
+                cls_kwargs  # type: ignore[assignment]
+                if isinstance(cls_kwargs, list)
+                else [cls_kwargs for _ in key_value_pairs]
+            )
+        elif cls_layers is not None:
+            assert isinstance(cls_layers, list), f"Unexpected type cls_layers={cls_layers}"
+            assert isinstance(cls_kwargs, list), f"Unexpected type cls_kwargs={cls_kwargs}"
         if (
             key_value_pairs
             and isinstance(key_value_pairs[0][0], torch._subclasses.fake_tensor.FakeTensor)
             and pv.Version(transformers.__version__) >= pv.Version("4.56")
         ):
             cache = transformers.cache_utils.DynamicCache()
-            cache.layers.extend([cls_layer(**cls_kwargs) for _ in key_value_pairs])
+            cache.layers.extend(
+                [cls_layer(**kws) for cls_layer, kws in zip(cls_layers, cls_kwargs)]  # type: ignore[operator, arg-type]
+            )
             for i, layer in enumerate(cache.layers):
                 k, v = key_value_pairs[i][0], key_value_pairs[i][1]
                 layer.dtype = k.dtype
@@ -281,8 +318,25 @@ if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
             return finalize_cache(cache)
         cache = transformers.cache_utils.DynamicCache()
-        if hasattr(cache, "layers") and cls_layer != transformers.cache_utils.DynamicLayer:
-            cache.layers.extend([cls_layer(**cls_kwargs) for _ in key_value_pairs])
+        if hasattr(cache, "layers") and (
+            cls_layer is None or cls_layer != transformers.cache_utils.DynamicLayer
+        ):
+            assert isinstance(cls_layers, list) and isinstance(cls_kwargs, list), (
+                f"Wrong type {type(cls_layers)} for cls_layers or "
+                f"{type(cls_kwargs)} for cls_kwargs"
+            )
+            assert len(cls_kwargs) == len(cls_layers) and len(cls_kwargs) == len(
+                key_value_pairs
+            ), (
+                f"Length mismatch between len(cls_kwargs)={len(cls_kwargs)}, "
+                f"len(cls_layers)={len(cls_layers)}, "
+                f"len(key_value_pairs)={len(key_value_pairs)}, "
+                f"cls_kwargs={cls_kwargs}, cls_layers={cls_layers}"
+            )
+            del cache.layers[:]
+            cache.layers.extend(
+                [cls_layer(**kws) for cls_layer, kws in zip(cls_layers, cls_kwargs)]  # type: ignore[operator, arg-type]
+            )
             for i, layer in enumerate(cache.layers):
                 layer.keys, layer.values = key_value_pairs[i][0], key_value_pairs[i][1]
                 layer.is_initialized = True
@@ -306,6 +360,7 @@ else:
     def make_dynamic_cache(
         key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
         cls_layers: Optional[Union[str, List[type]]] = None,
+        cls_kwargs: Optional[Union[Dict[str, int], List[Dict[str, int]]]] = None,
     ) -> transformers.cache_utils.DynamicCache:
         """
         Creates an instance of :class:`transformers.cache_utils.DynamicCache`.
@@ -337,7 +392,9 @@ else:
             )
             print(string_type(past_key_values, with_shape=True))
         """
-        assert not cls_layers, "cls_layers cannot be used for transformers<5."
+        assert (
+            not cls_layers and not cls_kwargs
+        ), "cls_layers, cls_kwargs cannot be used for transformers<5."
         key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
         cache = transformers.cache_utils.DynamicCache(len(key_value_pairs))  # type: ignore
         for i, (key, value) in enumerate(key_value_pairs):
@@ -775,4 +832,13 @@ def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_
         # This is used to expand the cache when it does not contains enough layers.
         # This is needed since transformers>4.55.3
         cache.layer_class_to_replicate = cache.layers[0].__class__
+    assert (
+        not hasattr(cache, "layers")
+        or len(cache.layers) != 1
+        or cache.layers[0].keys is not None
+    ), (
+        f"Size mismatch between {len(cache.layers)=}, "
+        f"first key={cache.layers[0].keys}, "  # type: ignore[attr-defined]
+        f"first value={cache.layers[0].values}"  # type: ignore[attr-defined]
+    )
     return cache

onnx_diagnostic/helpers/helper.py CHANGED Viewed

@@ -574,6 +574,32 @@ def string_type(
             print(f"[string_type] CACHE1:{type(obj)}")
         return f"MambaCache(conv_states={c}, ssm_states={d})"
+    if (
+        obj.__class__.__name__ in {"DynamicCache"}
+        and hasattr(obj, "layers")
+        and any(lay.__class__.__name__ != "DynamicLayer" for lay in obj.layers)
+    ):
+        slay = []
+        for lay in obj.layers:
+            skeys = string_type(
+                lay.keys,
+                with_shape=with_shape,
+                with_min_max=with_min_max,
+                with_device=with_device,
+                limit=limit,
+                verbose=verbose,
+            )
+            svalues = string_type(
+                lay.keys,
+                with_shape=with_shape,
+                with_min_max=with_min_max,
+                with_device=with_device,
+                limit=limit,
+                verbose=verbose,
+            )
+            slay.append(f"{lay.__class__.__name__}({skeys}, {svalues})")
+        return f"{obj.__class__.__name__}({', '.join(slay)})"
     if obj.__class__.__name__ in {
         "DynamicCache",
         "SlidingWindowCache",
@@ -829,6 +855,19 @@ def string_type(
         return f"{obj}"
     if obj.__class__.__name__ == "FakeTensorContext":
         return "FakeTensorContext(...)"
+    if obj.__class__.__name__ == "Chat":
+        import transformers.utils.chat_template_utils as ctu
+        assert isinstance(obj, ctu.Chat), f"unexpected type {type(obj)}"
+        msg = string_type(
+            obj.messages,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+            verbose=verbose,
+        )
+        return f"Chat({msg})"
     if verbose:
         print(f"[string_type] END:{type(obj)}")

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -1742,7 +1742,7 @@ def _find_used_names(node_list, node_indices):
         possible_outputs |= {o for o in node_list[i_node].output if o}
     # find all requires input from the other nodes
     set_indices = set(node_indices)
-    not_known: Set[str] = set()
+    not_known = set()
     ranges = list(range(len(node_list)))
     for i_node in ranges[::-1]:
         if i_node in set_indices:

onnx_diagnostic/helpers/ort_session.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 from torch._C import _from_dlpack
 import onnxruntime
 from onnxruntime.capi import _pybind_state as ORTC
-from .helper import size_type
+from .helper import size_type, string_type
 from .onnx_helper import (
     onnx_dtype_to_np_dtype,
     np_dtype_to_tensor_dtype,
@@ -511,6 +511,10 @@ class InferenceSessionForTorch(_InferenceSession):
         device = -1
         for k, v in feeds.items():
             assert k != "", f"Input cannot be empty but feeds names={list(feeds)}"
+            assert hasattr(v, "device"), (
+                f"Unepxected class {type(v)} for input {k!r}, "
+                f"feeds={string_type(feeds, with_shape=True)}"
+            )
             device = max(device, v.get_device())
             assert hasattr(v, "__dlpack__"), f"class {type(v)} should be serialized"
             if not v.is_contiguous():

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -115,7 +115,7 @@ def make_feeds(
 def _get_dim(i: int, s: Union[str, int], batch: int = 1) -> int:
     if isinstance(s, int):
         return s
-    if s == "batch":
+    if s == "batch" or i == 0:
         return batch
     # Everything else is cache length or sequence length.
     return 0
@@ -153,9 +153,13 @@ def make_empty_cache(
             [i.type for i in sess.get_inputs()[2:]],
         )
     """
+    assert batch > 0, f"batch size = {batch} must be positive"
     feeds = {}
     for name, shape, dtype in zip(onnx_input_names, onnx_input_shapes, onnx_input_types):
         new_shape = tuple(_get_dim(i, s, batch=batch) for i, s in enumerate(shape))
+        assert (
+            new_shape and new_shape[0] > 0
+        ), f"new_shape={new_shape} cannot have a null batch size, name={name!r}, shape={shape}"
         feeds[name] = torch.empty(new_shape, dtype=rt_type_to_torch_dtype(dtype))
     return feeds
@@ -272,6 +276,7 @@ def generate_and_validate(
 def onnx_generate(
     model_or_path: Union[onnx.ModelProto, str, InferenceSessionForTorch],
     input_ids: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
     eos_token_id: int = 2,
     max_new_tokens=100,
     return_session: bool = False,
@@ -330,7 +335,9 @@ def onnx_generate(
             )
         print("-- generate with onnx")
-        onnx_outputs = onnx_generate(model_name, input_ids[:1], 2, max_new_tokens=10)
+        onnx_outputs = onnx_generate(
+            model_name, input_ids[:1], eos_token_id=2, max_new_tokens=10
+        )
         print("-- onnx output", onnx_outputs)
         # The example continues with other functions doing the same.
@@ -364,6 +371,7 @@ def onnx_generate(
     input_names = session.input_names
     input_types = session.input_types
     has_position_ids = "position_ids" in session.input_names
+    has_cache_position = "cache_position" in session.input_names
     assert (
         len(input_names) > 2
@@ -377,21 +385,46 @@ def onnx_generate(
         not has_position_ids or input_names[2] == "position_ids"
     ), f"position_ids must the third input but input_names={input_names}"
+    cache_names, cache_shapes, cache_types = [], [], []
+    for name, shape, dt in zip(input_names, input_shapes, input_types):
+        if name.startswith("past_key_values"):
+            cache_names.append(name)
+            cache_shapes.append(shape)
+            cache_types.append(dt)
     # First call: prefill
+    empty_cache = make_empty_cache(input_ids.shape[0], cache_names, cache_shapes, cache_types)
     feeds = dict(
         input_ids=input_ids,
-        attention_mask=torch.ones(
-            input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
-        ),
-        **make_empty_cache(
-            input_ids.shape[0], input_names[2:], input_shapes[2:], input_types[2:]
+        attention_mask=(
+            attention_mask
+            if attention_mask is not None
+            else torch.ones(input_ids.shape, dtype=input_ids.dtype, device=input_ids.device)
         ),
+        **empty_cache,
     )
     if has_position_ids:
-        feeds["position_ids"] = torch.unsqueeze(
+        assert (
+            input_ids.shape[1] > 0
+        ), f"unexpected value for input_ids shape={input_ids.shape}"
+        position_ids = torch.unsqueeze(
             torch.arange(input_ids.shape[1], dtype=torch.int64, device=input_ids.device), 0
         )
+        feeds["position_ids"] = position_ids
+    if has_cache_position:
+        assert empty_cache, "no cache means no cache_position"
+        first_tensor = next(iter(empty_cache.values()))
+        cache_position = torch.arange(
+            first_tensor.shape[2],
+            input_ids.shape[1] + first_tensor.shape[2],
+            dtype=torch.int64,
+            device=input_ids.device,
+        )
+        feeds["cache_position"] = cache_position
+    # prefill step
     outputs = session.run(None, feeds)
     # Next calls: decode
@@ -424,7 +457,18 @@ def onnx_generate(
                 ),
                 0,
             )
-        feeds.update(dict(zip(input_names[3 if has_position_ids else 2 :], outputs[1:])))
+        if has_cache_position:
+            feeds["cache_position"] = torch.arange(
+                input_ids.shape[1],
+                input_ids.shape[1] + 1,
+                dtype=torch.int64,
+                device=input_ids.device,
+            )
+        feeds.update(
+            dict(zip([n for n in input_names if n.startswith("past_key_values")], outputs[1:]))
+        )
+        # generate/decoding step
         outputs = session.run(None, feeds)
     if return_session:

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -851,9 +851,14 @@ def torch_deepcopy(value: Any) -> Any:
         from .cache_helper import CacheKeyValue
         ca = CacheKeyValue(value)
-        return make_dynamic_cache(
-            torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))), cls_layers=ca.cls_layers
+        pairs = list(zip(ca.key_cache, ca.value_cache))
+        assert not hasattr(value, "layers") or len(value.layers) == len(pairs), (
+            f"Size mismatch between {len(value.layers)=} and {len(pairs)=}. "
+            f"value={string_type(value, with_shape=True)}, "
+            f"first key={value.layers[0].keys}, "
+            f"first value={value.layers[0].values}"
         )
+        return make_dynamic_cache(torch_deepcopy(pairs), cls_layers=ca.cls_layers)
     if value.__class__.__name__ == "StaticCache":
         from .cache_helper import CacheKeyValue

onnx-diagnostic 0.8.11__py3-none-any.whl → 0.9.0__py3-none-any.whl

onnx-diagnostic 0.8.11py3-none-any.whl → 0.9.0py3-none-any.whl