PyPI - onnx-diagnostic - Versions diffs - 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl - Mend

onnx-diagnostic 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +66 -8
onnx_diagnostic/ext_test_case.py +2 -0
onnx_diagnostic/helpers/_log_helper.py +461 -0
onnx_diagnostic/helpers/cache_helper.py +250 -15
onnx_diagnostic/helpers/helper.py +146 -10
onnx_diagnostic/helpers/log_helper.py +404 -315
onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
onnx_diagnostic/helpers/onnx_helper.py +13 -7
onnx_diagnostic/helpers/torch_helper.py +33 -11
onnx_diagnostic/tasks/__init__.py +2 -0
onnx_diagnostic/tasks/feature_extraction.py +86 -5
onnx_diagnostic/tasks/image_text_to_text.py +260 -56
onnx_diagnostic/tasks/mask_generation.py +139 -0
onnx_diagnostic/tasks/text2text_generation.py +2 -2
onnx_diagnostic/tasks/text_generation.py +6 -2
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +7 -1
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
onnx_diagnostic/torch_export_patches/patch_inputs.py +4 -1
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +397 -128
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +57 -40
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +5 -0
onnx_diagnostic/torch_models/validate.py +26 -3
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/METADATA +1 -1
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/RECORD +29 -27
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/top_level.txt +0 -0

onnx_diagnostic/helpers/mini_onnx_builder.py CHANGED Viewed

@@ -367,10 +367,12 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
         elif obj.__class__.__name__ == "DynamicCache":
             # transformers
             import transformers
+            from .cache_helper import CacheKeyValue
             assert isinstance(
                 obj, transformers.cache_utils.DynamicCache
             ), f"Unexpected type {type(obj)}"
+            obj = CacheKeyValue(obj)
             atts = ["key_cache", "value_cache"]
             for i, att in enumerate(atts):
                 if i == len(atts) - 1:
@@ -494,9 +496,12 @@ def _unflatten(
     def _make(ty: type, res: Any) -> Any:
         if ty.__name__ == "DynamicCache":
-            r = ty()
+            from .cache_helper import CacheKeyValue
+            cc = CacheKeyValue()
             for k, v in res:
-                setattr(r, k, v)
+                setattr(cc, k, v)
+            r = cc.make_dynamic_cache()
             return r
         if ty is dict:
             d = {}

onnx_diagnostic/helpers/onnx_helper.py CHANGED Viewed

@@ -540,13 +540,19 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te
         ), f"Unable to convert type {type(tensor)} into TensorProto."
         return proto_from_tensor(tensor, name=name)
-    from onnx.reference.ops.op_cast import (
-        bfloat16,
-        float8e4m3fn,
-        float8e4m3fnuz,
-        float8e5m2,
-        float8e5m2fnuz,
-    )
+    try:
+        from onnx.reference.ops.op_cast import (
+            bfloat16,
+            float8e4m3fn,
+            float8e4m3fnuz,
+            float8e5m2,
+            float8e5m2fnuz,
+        )
+    except ImportError:
+        bfloat16 = None
+    if bfloat16 is None:
+        return onh.from_array(tensor, name)
     dt = tensor.dtype
     if dt == float8e4m3fn and dt.descr[0][0] == "e4m3fn":

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -14,9 +14,11 @@ from .helper import string_type, size_type
 from .cache_helper import (
     make_dynamic_cache,
     make_encoder_decoder_cache,
+    make_hybrid_cache,
     make_sliding_window_cache,
     make_mamba_cache,
     make_static_cache,
+    CacheKeyValue,
 )
 from .mini_onnx_builder import create_onnx_model_from_input_tensors
 from .onnx_helper import (
@@ -720,20 +722,22 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
     if type(value) is dict:
         return {k: to_any(t, to_value) for k, t in value.items()}
     if value.__class__.__name__ == "DynamicCache":
+        cc = CacheKeyValue(value)
         return make_dynamic_cache(
             list(
                 zip(
-                    [t.to(to_value) for t in value.key_cache],
-                    [t.to(to_value) for t in value.value_cache],
+                    [t.to(to_value) if t is not None else t for t in cc.key_cache],
+                    [t.to(to_value) if t is not None else t for t in cc.value_cache],
                 )
             )
         )
     if value.__class__.__name__ == "StaticCache":
+        cc = CacheKeyValue(value)
         return make_static_cache(
             list(
                 zip(
-                    [t.to(to_value) for t in value.key_cache],
-                    [t.to(to_value) for t in value.value_cache],
+                    [t.to(to_value) if t is not None else t for t in cc.key_cache],
+                    [t.to(to_value) if t is not None else t for t in cc.value_cache],
                 )
             ),
             max_cache_len=value.max_cache_len,
@@ -781,17 +785,29 @@ def torch_deepcopy(value: Any) -> Any:
     if hasattr(value, "clone"):
         return value.clone()
     if value.__class__.__name__ == "DynamicCache":
-        return make_dynamic_cache(
-            torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
-        )
+        from .cache_helper import CacheKeyValue
+        ca = CacheKeyValue(value)
+        return make_dynamic_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
     if value.__class__.__name__ == "StaticCache":
+        from .cache_helper import CacheKeyValue
+        ca = CacheKeyValue(value)
         return make_static_cache(
-            torch_deepcopy(list(zip(value.key_cache, value.value_cache))),
+            torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))),
             max_cache_len=value.max_cache_len,
         )
+    if value.__class__.__name__ == "HybridCache":
+        from .cache_helper import CacheKeyValue
+        ca = CacheKeyValue(value)
+        return make_hybrid_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
     if value.__class__.__name__ == "SlidingWindowCache":
+        from .cache_helper import CacheKeyValue
+        ca = CacheKeyValue(value)
         return make_sliding_window_cache(
-            torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
+            torch_deepcopy(list(zip(ca.key_cache, ca.value_cache)))
         )
     if value.__class__.__name__ == "EncoderDecoderCache":
         return make_encoder_decoder_cache(
@@ -825,8 +841,14 @@ def torch_tensor_size(value: Any) -> Any:
         return value.copy()
     if hasattr(value, "clone"):
         return value.numel() * size_type(value.dtype)
-    if value.__class__.__name__ in {"DynamicCache", "SlidingWindowCache"}:
-        return torch_tensor_size(value.key_cache) + torch_tensor_size(value.value_cache)
+    if value.__class__.__name__ in {
+        "DynamicCache",
+        "SlidingWindowCache",
+        "HybridCache",
+        "StaticCache",
+    }:
+        cc = CacheKeyValue(value)
+        return torch_tensor_size(cc.key_cache) + torch_tensor_size(cc.value_cache)
     if value.__class__.__name__ == "EncoderDecoderCache":
         return torch_tensor_size(value.self_attention_cache) + torch_tensor_size(
             value.cross_attention_cache

onnx_diagnostic/tasks/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@ from . import (
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
+    mask_generation,
 )
 __TASKS__ = [
@@ -31,6 +32,7 @@ __TASKS__ = [
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
+    mask_generation,
 ]

onnx_diagnostic/tasks/feature_extraction.py CHANGED Viewed

@@ -1,17 +1,15 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
 from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 __TASK__ = "feature-extraction"
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
-    kwargs = dict(
-        num_hidden_layers=min(config.num_hidden_layers, 2),
-        num_attention_heads=min(config.num_attention_heads, 4),
-    )
+    check_hasattr(config, "num_hidden_layers")
+    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
     update_config(config, kwargs)
     return kwargs
@@ -22,6 +20,12 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
+    sequence_length2: int = 3,
+    decoder_attention_heads: Optional[int] = None,
+    encoder_attention_heads: Optional[int] = None,
+    encoder_ffn_dim: Optional[int] = None,
+    decoder_ffn_dim: Optional[int] = None,
+    num_hidden_layers: Optional[int] = None,
     add_second_input: int = 1,
     **kwargs,  # unused
 ):
@@ -50,6 +54,66 @@ def get_inputs(
         ),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
+    if (
+        encoder_attention_heads
+        and decoder_attention_heads
+        and encoder_ffn_dim
+        and decoder_ffn_dim
+        and num_hidden_layers
+    ):
+        inputs["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
+        cache_length = "cache_length_key"
+        cache_length2 = "cache_length_val"
+        shapes["past_key_values"] = [  # type: ignore[assignment]
+            [
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            [
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+            ],
+        ]
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         assert (
@@ -61,6 +125,12 @@ def get_inputs(
             batch_size=batch_size + 1,
             sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            sequence_length2=sequence_length2,
+            decoder_attention_heads=decoder_attention_heads,
+            encoder_attention_heads=encoder_attention_heads,
+            encoder_ffn_dim=encoder_ffn_dim,
+            decoder_ffn_dim=decoder_ffn_dim,
+            num_hidden_layers=num_hidden_layers,
             add_second_input=0,
             **kwargs,
         )["inputs"]
@@ -80,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         sequence_length=30,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
+    for att in [
+        "decoder_attention_heads",
+        "encoder_attention_heads",
+        "encoder_ffn_dim",
+        "decoder_ffn_dim",
+        "num_hidden_layers",
+    ]:
+        if hasattr(config, att):
+            kwargs[att] = getattr(config, att)
+    kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
+    print(kwargs)
     return kwargs, get_inputs

onnx-diagnostic 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl

onnx-diagnostic 0.7.4py3-none-any.whl → 0.7.6py3-none-any.whl