onnx-diagnostic 0.7.4__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. onnx_diagnostic/__init__.py +1 -1
  2. onnx_diagnostic/_command_lines_parser.py +66 -8
  3. onnx_diagnostic/ext_test_case.py +2 -0
  4. onnx_diagnostic/helpers/_log_helper.py +461 -0
  5. onnx_diagnostic/helpers/cache_helper.py +250 -15
  6. onnx_diagnostic/helpers/helper.py +146 -10
  7. onnx_diagnostic/helpers/log_helper.py +404 -315
  8. onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
  9. onnx_diagnostic/helpers/onnx_helper.py +13 -7
  10. onnx_diagnostic/helpers/torch_helper.py +33 -11
  11. onnx_diagnostic/tasks/__init__.py +2 -0
  12. onnx_diagnostic/tasks/feature_extraction.py +86 -5
  13. onnx_diagnostic/tasks/image_text_to_text.py +260 -56
  14. onnx_diagnostic/tasks/mask_generation.py +139 -0
  15. onnx_diagnostic/tasks/text2text_generation.py +2 -2
  16. onnx_diagnostic/tasks/text_generation.py +6 -2
  17. onnx_diagnostic/torch_export_patches/onnx_export_errors.py +7 -1
  18. onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
  19. onnx_diagnostic/torch_export_patches/patch_inputs.py +4 -1
  20. onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +397 -128
  21. onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +57 -40
  22. onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
  23. onnx_diagnostic/torch_models/hghub/model_inputs.py +5 -0
  24. onnx_diagnostic/torch_models/validate.py +26 -3
  25. {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/METADATA +1 -1
  26. {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/RECORD +29 -27
  27. {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/WHEEL +0 -0
  28. {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/licenses/LICENSE.txt +0 -0
  29. {onnx_diagnostic-0.7.4.dist-info → onnx_diagnostic-0.7.6.dist-info}/top_level.txt +0 -0
@@ -367,10 +367,12 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
367
367
  elif obj.__class__.__name__ == "DynamicCache":
368
368
  # transformers
369
369
  import transformers
370
+ from .cache_helper import CacheKeyValue
370
371
 
371
372
  assert isinstance(
372
373
  obj, transformers.cache_utils.DynamicCache
373
374
  ), f"Unexpected type {type(obj)}"
375
+ obj = CacheKeyValue(obj)
374
376
  atts = ["key_cache", "value_cache"]
375
377
  for i, att in enumerate(atts):
376
378
  if i == len(atts) - 1:
@@ -494,9 +496,12 @@ def _unflatten(
494
496
 
495
497
  def _make(ty: type, res: Any) -> Any:
496
498
  if ty.__name__ == "DynamicCache":
497
- r = ty()
499
+ from .cache_helper import CacheKeyValue
500
+
501
+ cc = CacheKeyValue()
498
502
  for k, v in res:
499
- setattr(r, k, v)
503
+ setattr(cc, k, v)
504
+ r = cc.make_dynamic_cache()
500
505
  return r
501
506
  if ty is dict:
502
507
  d = {}
@@ -540,13 +540,19 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te
540
540
  ), f"Unable to convert type {type(tensor)} into TensorProto."
541
541
  return proto_from_tensor(tensor, name=name)
542
542
 
543
- from onnx.reference.ops.op_cast import (
544
- bfloat16,
545
- float8e4m3fn,
546
- float8e4m3fnuz,
547
- float8e5m2,
548
- float8e5m2fnuz,
549
- )
543
+ try:
544
+ from onnx.reference.ops.op_cast import (
545
+ bfloat16,
546
+ float8e4m3fn,
547
+ float8e4m3fnuz,
548
+ float8e5m2,
549
+ float8e5m2fnuz,
550
+ )
551
+ except ImportError:
552
+ bfloat16 = None
553
+
554
+ if bfloat16 is None:
555
+ return onh.from_array(tensor, name)
550
556
 
551
557
  dt = tensor.dtype
552
558
  if dt == float8e4m3fn and dt.descr[0][0] == "e4m3fn":
@@ -14,9 +14,11 @@ from .helper import string_type, size_type
14
14
  from .cache_helper import (
15
15
  make_dynamic_cache,
16
16
  make_encoder_decoder_cache,
17
+ make_hybrid_cache,
17
18
  make_sliding_window_cache,
18
19
  make_mamba_cache,
19
20
  make_static_cache,
21
+ CacheKeyValue,
20
22
  )
21
23
  from .mini_onnx_builder import create_onnx_model_from_input_tensors
22
24
  from .onnx_helper import (
@@ -720,20 +722,22 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
720
722
  if type(value) is dict:
721
723
  return {k: to_any(t, to_value) for k, t in value.items()}
722
724
  if value.__class__.__name__ == "DynamicCache":
725
+ cc = CacheKeyValue(value)
723
726
  return make_dynamic_cache(
724
727
  list(
725
728
  zip(
726
- [t.to(to_value) for t in value.key_cache],
727
- [t.to(to_value) for t in value.value_cache],
729
+ [t.to(to_value) if t is not None else t for t in cc.key_cache],
730
+ [t.to(to_value) if t is not None else t for t in cc.value_cache],
728
731
  )
729
732
  )
730
733
  )
731
734
  if value.__class__.__name__ == "StaticCache":
735
+ cc = CacheKeyValue(value)
732
736
  return make_static_cache(
733
737
  list(
734
738
  zip(
735
- [t.to(to_value) for t in value.key_cache],
736
- [t.to(to_value) for t in value.value_cache],
739
+ [t.to(to_value) if t is not None else t for t in cc.key_cache],
740
+ [t.to(to_value) if t is not None else t for t in cc.value_cache],
737
741
  )
738
742
  ),
739
743
  max_cache_len=value.max_cache_len,
@@ -781,17 +785,29 @@ def torch_deepcopy(value: Any) -> Any:
781
785
  if hasattr(value, "clone"):
782
786
  return value.clone()
783
787
  if value.__class__.__name__ == "DynamicCache":
784
- return make_dynamic_cache(
785
- torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
786
- )
788
+ from .cache_helper import CacheKeyValue
789
+
790
+ ca = CacheKeyValue(value)
791
+ return make_dynamic_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
787
792
  if value.__class__.__name__ == "StaticCache":
793
+ from .cache_helper import CacheKeyValue
794
+
795
+ ca = CacheKeyValue(value)
788
796
  return make_static_cache(
789
- torch_deepcopy(list(zip(value.key_cache, value.value_cache))),
797
+ torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))),
790
798
  max_cache_len=value.max_cache_len,
791
799
  )
800
+ if value.__class__.__name__ == "HybridCache":
801
+ from .cache_helper import CacheKeyValue
802
+
803
+ ca = CacheKeyValue(value)
804
+ return make_hybrid_cache(torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))))
792
805
  if value.__class__.__name__ == "SlidingWindowCache":
806
+ from .cache_helper import CacheKeyValue
807
+
808
+ ca = CacheKeyValue(value)
793
809
  return make_sliding_window_cache(
794
- torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
810
+ torch_deepcopy(list(zip(ca.key_cache, ca.value_cache)))
795
811
  )
796
812
  if value.__class__.__name__ == "EncoderDecoderCache":
797
813
  return make_encoder_decoder_cache(
@@ -825,8 +841,14 @@ def torch_tensor_size(value: Any) -> Any:
825
841
  return value.copy()
826
842
  if hasattr(value, "clone"):
827
843
  return value.numel() * size_type(value.dtype)
828
- if value.__class__.__name__ in {"DynamicCache", "SlidingWindowCache"}:
829
- return torch_tensor_size(value.key_cache) + torch_tensor_size(value.value_cache)
844
+ if value.__class__.__name__ in {
845
+ "DynamicCache",
846
+ "SlidingWindowCache",
847
+ "HybridCache",
848
+ "StaticCache",
849
+ }:
850
+ cc = CacheKeyValue(value)
851
+ return torch_tensor_size(cc.key_cache) + torch_tensor_size(cc.value_cache)
830
852
  if value.__class__.__name__ == "EncoderDecoderCache":
831
853
  return torch_tensor_size(value.self_attention_cache) + torch_tensor_size(
832
854
  value.cross_attention_cache
@@ -14,6 +14,7 @@ from . import (
14
14
  text_to_image,
15
15
  text2text_generation,
16
16
  zero_shot_image_classification,
17
+ mask_generation,
17
18
  )
18
19
 
19
20
  __TASKS__ = [
@@ -31,6 +32,7 @@ __TASKS__ = [
31
32
  text_to_image,
32
33
  text2text_generation,
33
34
  zero_shot_image_classification,
35
+ mask_generation,
34
36
  ]
35
37
 
36
38
 
@@ -1,17 +1,15 @@
1
1
  from typing import Any, Callable, Dict, Optional, Tuple
2
2
  import torch
3
3
  from ..helpers.config_helper import update_config, check_hasattr
4
+ from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
4
5
 
5
6
  __TASK__ = "feature-extraction"
6
7
 
7
8
 
8
9
  def reduce_model_config(config: Any) -> Dict[str, Any]:
9
10
  """Reduces a model size."""
10
- check_hasattr(config, "num_attention_heads", "num_hidden_layers")
11
- kwargs = dict(
12
- num_hidden_layers=min(config.num_hidden_layers, 2),
13
- num_attention_heads=min(config.num_attention_heads, 4),
14
- )
11
+ check_hasattr(config, "num_hidden_layers")
12
+ kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
15
13
  update_config(config, kwargs)
16
14
  return kwargs
17
15
 
@@ -22,6 +20,12 @@ def get_inputs(
22
20
  batch_size: int,
23
21
  sequence_length: int,
24
22
  dummy_max_token_id: int,
23
+ sequence_length2: int = 3,
24
+ decoder_attention_heads: Optional[int] = None,
25
+ encoder_attention_heads: Optional[int] = None,
26
+ encoder_ffn_dim: Optional[int] = None,
27
+ decoder_ffn_dim: Optional[int] = None,
28
+ num_hidden_layers: Optional[int] = None,
25
29
  add_second_input: int = 1,
26
30
  **kwargs, # unused
27
31
  ):
@@ -50,6 +54,66 @@ def get_inputs(
50
54
  ),
51
55
  attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
52
56
  )
57
+ if (
58
+ encoder_attention_heads
59
+ and decoder_attention_heads
60
+ and encoder_ffn_dim
61
+ and decoder_ffn_dim
62
+ and num_hidden_layers
63
+ ):
64
+ inputs["past_key_values"] = make_encoder_decoder_cache(
65
+ make_dynamic_cache(
66
+ [
67
+ (
68
+ torch.randn(
69
+ batch_size,
70
+ encoder_attention_heads,
71
+ sequence_length,
72
+ encoder_ffn_dim,
73
+ ),
74
+ torch.randn(
75
+ batch_size,
76
+ encoder_attention_heads,
77
+ sequence_length,
78
+ encoder_ffn_dim,
79
+ ),
80
+ )
81
+ for i in range(num_hidden_layers)
82
+ ]
83
+ ),
84
+ make_dynamic_cache(
85
+ [
86
+ (
87
+ torch.randn(
88
+ batch_size,
89
+ decoder_attention_heads,
90
+ sequence_length2,
91
+ decoder_ffn_dim,
92
+ ),
93
+ torch.randn(
94
+ batch_size,
95
+ decoder_attention_heads,
96
+ sequence_length2,
97
+ decoder_ffn_dim,
98
+ ),
99
+ )
100
+ for i in range(num_hidden_layers)
101
+ ]
102
+ ),
103
+ )
104
+ cache_length = "cache_length_key"
105
+ cache_length2 = "cache_length_val"
106
+ shapes["past_key_values"] = [ # type: ignore[assignment]
107
+ [
108
+ [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
109
+ [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
110
+ ],
111
+ [
112
+ [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
113
+ [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
114
+ ],
115
+ ]
116
+
53
117
  res = dict(inputs=inputs, dynamic_shapes=shapes)
54
118
  if add_second_input:
55
119
  assert (
@@ -61,6 +125,12 @@ def get_inputs(
61
125
  batch_size=batch_size + 1,
62
126
  sequence_length=sequence_length + add_second_input,
63
127
  dummy_max_token_id=dummy_max_token_id,
128
+ sequence_length2=sequence_length2,
129
+ decoder_attention_heads=decoder_attention_heads,
130
+ encoder_attention_heads=encoder_attention_heads,
131
+ encoder_ffn_dim=encoder_ffn_dim,
132
+ decoder_ffn_dim=decoder_ffn_dim,
133
+ num_hidden_layers=num_hidden_layers,
64
134
  add_second_input=0,
65
135
  **kwargs,
66
136
  )["inputs"]
@@ -80,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
80
150
  sequence_length=30,
81
151
  dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
82
152
  )
153
+ for att in [
154
+ "decoder_attention_heads",
155
+ "encoder_attention_heads",
156
+ "encoder_ffn_dim",
157
+ "decoder_ffn_dim",
158
+ "num_hidden_layers",
159
+ ]:
160
+ if hasattr(config, att):
161
+ kwargs[att] = getattr(config, att)
162
+ kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
163
+ print(kwargs)
83
164
  return kwargs, get_inputs