onnx-diagnostic 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +66 -8
- onnx_diagnostic/ext_test_case.py +2 -0
- onnx_diagnostic/helpers/_log_helper.py +461 -0
- onnx_diagnostic/helpers/cache_helper.py +250 -15
- onnx_diagnostic/helpers/helper.py +146 -10
- onnx_diagnostic/helpers/log_helper.py +404 -315
- onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
- onnx_diagnostic/helpers/onnx_helper.py +13 -7
- onnx_diagnostic/helpers/torch_helper.py +33 -11
- onnx_diagnostic/tasks/__init__.py +2 -0
- onnx_diagnostic/tasks/feature_extraction.py +86 -5
- onnx_diagnostic/tasks/image_text_to_text.py +260 -56
- onnx_diagnostic/tasks/mask_generation.py +139 -0
- onnx_diagnostic/tasks/text2text_generation.py +2 -2
- onnx_diagnostic/tasks/text_generation.py +6 -2
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +7 -1
- onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
- onnx_diagnostic/torch_export_patches/patch_inputs.py +4 -1
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +397 -128
- onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +57 -40
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
- onnx_diagnostic/torch_models/hghub/model_inputs.py +5 -0
- onnx_diagnostic/torch_models/validate.py +26 -3
- {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/METADATA +1 -1
- {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/RECORD +29 -27
- {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/top_level.txt +0 -0
|
@@ -367,10 +367,12 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
|
|
|
367
367
|
elif obj.__class__.__name__ == "DynamicCache":
|
|
368
368
|
# transformers
|
|
369
369
|
import transformers
|
|
370
|
+
from .cache_helper import CacheKeyValue
|
|
370
371
|
|
|
371
372
|
assert isinstance(
|
|
372
373
|
obj, transformers.cache_utils.DynamicCache
|
|
373
374
|
), f"Unexpected type {type(obj)}"
|
|
375
|
+
obj = CacheKeyValue(obj)
|
|
374
376
|
atts = ["key_cache", "value_cache"]
|
|
375
377
|
for i, att in enumerate(atts):
|
|
376
378
|
if i == len(atts) - 1:
|
|
@@ -494,9 +496,12 @@ def _unflatten(
|
|
|
494
496
|
|
|
495
497
|
def _make(ty: type, res: Any) -> Any:
|
|
496
498
|
if ty.__name__ == "DynamicCache":
|
|
497
|
-
|
|
499
|
+
from .cache_helper import CacheKeyValue
|
|
500
|
+
|
|
501
|
+
cc = CacheKeyValue()
|
|
498
502
|
for k, v in res:
|
|
499
|
-
setattr(
|
|
503
|
+
setattr(cc, k, v)
|
|
504
|
+
r = cc.make_dynamic_cache()
|
|
500
505
|
return r
|
|
501
506
|
if ty is dict:
|
|
502
507
|
d = {}
|
|
@@ -540,13 +540,19 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te
|
|
|
540
540
|
), f"Unable to convert type {type(tensor)} into TensorProto."
|
|
541
541
|
return proto_from_tensor(tensor, name=name)
|
|
542
542
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
543
|
+
try:
|
|
544
|
+
from onnx.reference.ops.op_cast import (
|
|
545
|
+
bfloat16,
|
|
546
|
+
float8e4m3fn,
|
|
547
|
+
float8e4m3fnuz,
|
|
548
|
+
float8e5m2,
|
|
549
|
+
float8e5m2fnuz,
|
|
550
|
+
)
|
|
551
|
+
except ImportError:
|
|
552
|
+
bfloat16 = None
|
|
553
|
+
|
|
554
|
+
if bfloat16 is None:
|
|
555
|
+
return onh.from_array(tensor, name)
|
|
550
556
|
|
|
551
557
|
dt = tensor.dtype
|
|
552
558
|
if dt == float8e4m3fn and dt.descr[0][0] == "e4m3fn":
|
|
@@ -14,9 +14,11 @@ from .helper import string_type, size_type
|
|
|
14
14
|
from .cache_helper import (
|
|
15
15
|
make_dynamic_cache,
|
|
16
16
|
make_encoder_decoder_cache,
|
|
17
|
+
make_hybrid_cache,
|
|
17
18
|
make_sliding_window_cache,
|
|
18
19
|
make_mamba_cache,
|
|
19
20
|
make_static_cache,
|
|
21
|
+
CacheKeyValue,
|
|
20
22
|
)
|
|
21
23
|
from .mini_onnx_builder import create_onnx_model_from_input_tensors
|
|
22
24
|
from .onnx_helper import (
|
|
@@ -720,20 +722,22 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
|
|
|
720
722
|
if type(value) is dict:
|
|
721
723
|
return {k: to_any(t, to_value) for k, t in value.items()}
|
|
722
724
|
if value.__class__.__name__ == "DynamicCache":
|
|
725
|
+
cc = CacheKeyValue(value)
|
|
723
726
|
return make_dynamic_cache(
|
|
724
727
|
list(
|
|
725
728
|
zip(
|
|
726
|
-
[t.to(to_value) for t in
|
|
727
|
-
[t.to(to_value) for t in
|
|
729
|
+
[t.to(to_value) if t is not None else t for t in cc.key_cache],
|
|
730
|
+
[t.to(to_value) if t is not None else t for t in cc.value_cache],
|
|
728
731
|
)
|
|
729
732
|
)
|
|
730
733
|
)
|
|
731
734
|
if value.__class__.__name__ == "StaticCache":
|
|
735
|
+
cc = CacheKeyValue(value)
|
|
732
736
|
return make_static_cache(
|
|
733
737
|
list(
|
|
734
738
|
zip(
|
|
735
|
-
[t.to(to_value) for t in
|
|
736
|
-
[t.to(to_value) for t in
|
|
739
|
+
[t.to(to_value) if t is not None else t for t in cc.key_cache],
|
|
740
|
+
[t.to(to_value) if t is not None else t for t in cc.value_cache],
|
|
737
741
|
)
|
|
738
742
|
),
|
|
739
743
|
max_cache_len=value.max_cache_len,
|
|
@@ -781,17 +785,29 @@ def torch_deepcopy(value: Any) -> Any:
|
|
|
781
785
|
if hasattr(value, "clone"):
|
|
782
786
|
return value.clone()
|
|
783
787
|
if value.__class__.__name__ == "DynamicCache":
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
)
|
|
788
|
+
from .cache_helper import CacheKeyValue
|
|
789
|
+
|
|
790
|
+
ca = CacheKeyValue(value)
|
|
791
|
+
return make_dynamic_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
|
|
787
792
|
if value.__class__.__name__ == "StaticCache":
|
|
793
|
+
from .cache_helper import CacheKeyValue
|
|
794
|
+
|
|
795
|
+
ca = CacheKeyValue(value)
|
|
788
796
|
return make_static_cache(
|
|
789
|
-
torch_deepcopy(list(zip(
|
|
797
|
+
torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))),
|
|
790
798
|
max_cache_len=value.max_cache_len,
|
|
791
799
|
)
|
|
800
|
+
if value.__class__.__name__ == "HybridCache":
|
|
801
|
+
from .cache_helper import CacheKeyValue
|
|
802
|
+
|
|
803
|
+
ca = CacheKeyValue(value)
|
|
804
|
+
return make_hybrid_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
|
|
792
805
|
if value.__class__.__name__ == "SlidingWindowCache":
|
|
806
|
+
from .cache_helper import CacheKeyValue
|
|
807
|
+
|
|
808
|
+
ca = CacheKeyValue(value)
|
|
793
809
|
return make_sliding_window_cache(
|
|
794
|
-
torch_deepcopy(list(zip(
|
|
810
|
+
torch_deepcopy(list(zip(ca.key_cache, ca.value_cache)))
|
|
795
811
|
)
|
|
796
812
|
if value.__class__.__name__ == "EncoderDecoderCache":
|
|
797
813
|
return make_encoder_decoder_cache(
|
|
@@ -825,8 +841,14 @@ def torch_tensor_size(value: Any) -> Any:
|
|
|
825
841
|
return value.copy()
|
|
826
842
|
if hasattr(value, "clone"):
|
|
827
843
|
return value.numel() * size_type(value.dtype)
|
|
828
|
-
if value.__class__.__name__ in {
|
|
829
|
-
|
|
844
|
+
if value.__class__.__name__ in {
|
|
845
|
+
"DynamicCache",
|
|
846
|
+
"SlidingWindowCache",
|
|
847
|
+
"HybridCache",
|
|
848
|
+
"StaticCache",
|
|
849
|
+
}:
|
|
850
|
+
cc = CacheKeyValue(value)
|
|
851
|
+
return torch_tensor_size(cc.key_cache) + torch_tensor_size(cc.value_cache)
|
|
830
852
|
if value.__class__.__name__ == "EncoderDecoderCache":
|
|
831
853
|
return torch_tensor_size(value.self_attention_cache) + torch_tensor_size(
|
|
832
854
|
value.cross_attention_cache
|
|
@@ -14,6 +14,7 @@ from . import (
|
|
|
14
14
|
text_to_image,
|
|
15
15
|
text2text_generation,
|
|
16
16
|
zero_shot_image_classification,
|
|
17
|
+
mask_generation,
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
__TASKS__ = [
|
|
@@ -31,6 +32,7 @@ __TASKS__ = [
|
|
|
31
32
|
text_to_image,
|
|
32
33
|
text2text_generation,
|
|
33
34
|
zero_shot_image_classification,
|
|
35
|
+
mask_generation,
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
from typing import Any, Callable, Dict, Optional, Tuple
|
|
2
2
|
import torch
|
|
3
3
|
from ..helpers.config_helper import update_config, check_hasattr
|
|
4
|
+
from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
|
|
4
5
|
|
|
5
6
|
__TASK__ = "feature-extraction"
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def reduce_model_config(config: Any) -> Dict[str, Any]:
|
|
9
10
|
"""Reduces a model size."""
|
|
10
|
-
check_hasattr(config, "
|
|
11
|
-
kwargs = dict(
|
|
12
|
-
num_hidden_layers=min(config.num_hidden_layers, 2),
|
|
13
|
-
num_attention_heads=min(config.num_attention_heads, 4),
|
|
14
|
-
)
|
|
11
|
+
check_hasattr(config, "num_hidden_layers")
|
|
12
|
+
kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
|
|
15
13
|
update_config(config, kwargs)
|
|
16
14
|
return kwargs
|
|
17
15
|
|
|
@@ -22,6 +20,12 @@ def get_inputs(
|
|
|
22
20
|
batch_size: int,
|
|
23
21
|
sequence_length: int,
|
|
24
22
|
dummy_max_token_id: int,
|
|
23
|
+
sequence_length2: int = 3,
|
|
24
|
+
decoder_attention_heads: Optional[int] = None,
|
|
25
|
+
encoder_attention_heads: Optional[int] = None,
|
|
26
|
+
encoder_ffn_dim: Optional[int] = None,
|
|
27
|
+
decoder_ffn_dim: Optional[int] = None,
|
|
28
|
+
num_hidden_layers: Optional[int] = None,
|
|
25
29
|
add_second_input: int = 1,
|
|
26
30
|
**kwargs, # unused
|
|
27
31
|
):
|
|
@@ -50,6 +54,66 @@ def get_inputs(
|
|
|
50
54
|
),
|
|
51
55
|
attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
|
|
52
56
|
)
|
|
57
|
+
if (
|
|
58
|
+
encoder_attention_heads
|
|
59
|
+
and decoder_attention_heads
|
|
60
|
+
and encoder_ffn_dim
|
|
61
|
+
and decoder_ffn_dim
|
|
62
|
+
and num_hidden_layers
|
|
63
|
+
):
|
|
64
|
+
inputs["past_key_values"] = make_encoder_decoder_cache(
|
|
65
|
+
make_dynamic_cache(
|
|
66
|
+
[
|
|
67
|
+
(
|
|
68
|
+
torch.randn(
|
|
69
|
+
batch_size,
|
|
70
|
+
encoder_attention_heads,
|
|
71
|
+
sequence_length,
|
|
72
|
+
encoder_ffn_dim,
|
|
73
|
+
),
|
|
74
|
+
torch.randn(
|
|
75
|
+
batch_size,
|
|
76
|
+
encoder_attention_heads,
|
|
77
|
+
sequence_length,
|
|
78
|
+
encoder_ffn_dim,
|
|
79
|
+
),
|
|
80
|
+
)
|
|
81
|
+
for i in range(num_hidden_layers)
|
|
82
|
+
]
|
|
83
|
+
),
|
|
84
|
+
make_dynamic_cache(
|
|
85
|
+
[
|
|
86
|
+
(
|
|
87
|
+
torch.randn(
|
|
88
|
+
batch_size,
|
|
89
|
+
decoder_attention_heads,
|
|
90
|
+
sequence_length2,
|
|
91
|
+
decoder_ffn_dim,
|
|
92
|
+
),
|
|
93
|
+
torch.randn(
|
|
94
|
+
batch_size,
|
|
95
|
+
decoder_attention_heads,
|
|
96
|
+
sequence_length2,
|
|
97
|
+
decoder_ffn_dim,
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
for i in range(num_hidden_layers)
|
|
101
|
+
]
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
cache_length = "cache_length_key"
|
|
105
|
+
cache_length2 = "cache_length_val"
|
|
106
|
+
shapes["past_key_values"] = [ # type: ignore[assignment]
|
|
107
|
+
[
|
|
108
|
+
[{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
|
|
109
|
+
[{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
|
|
110
|
+
],
|
|
111
|
+
[
|
|
112
|
+
[{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
|
|
113
|
+
[{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
|
|
114
|
+
],
|
|
115
|
+
]
|
|
116
|
+
|
|
53
117
|
res = dict(inputs=inputs, dynamic_shapes=shapes)
|
|
54
118
|
if add_second_input:
|
|
55
119
|
assert (
|
|
@@ -61,6 +125,12 @@ def get_inputs(
|
|
|
61
125
|
batch_size=batch_size + 1,
|
|
62
126
|
sequence_length=sequence_length + add_second_input,
|
|
63
127
|
dummy_max_token_id=dummy_max_token_id,
|
|
128
|
+
sequence_length2=sequence_length2,
|
|
129
|
+
decoder_attention_heads=decoder_attention_heads,
|
|
130
|
+
encoder_attention_heads=encoder_attention_heads,
|
|
131
|
+
encoder_ffn_dim=encoder_ffn_dim,
|
|
132
|
+
decoder_ffn_dim=decoder_ffn_dim,
|
|
133
|
+
num_hidden_layers=num_hidden_layers,
|
|
64
134
|
add_second_input=0,
|
|
65
135
|
**kwargs,
|
|
66
136
|
)["inputs"]
|
|
@@ -80,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
|
|
|
80
150
|
sequence_length=30,
|
|
81
151
|
dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
|
|
82
152
|
)
|
|
153
|
+
for att in [
|
|
154
|
+
"decoder_attention_heads",
|
|
155
|
+
"encoder_attention_heads",
|
|
156
|
+
"encoder_ffn_dim",
|
|
157
|
+
"decoder_ffn_dim",
|
|
158
|
+
"num_hidden_layers",
|
|
159
|
+
]:
|
|
160
|
+
if hasattr(config, att):
|
|
161
|
+
kwargs[att] = getattr(config, att)
|
|
162
|
+
kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
|
|
163
|
+
print(kwargs)
|
|
83
164
|
return kwargs, get_inputs
|