onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/export/api.py +35 -5
- onnx_diagnostic/export/control_flow.py +511 -0
- onnx_diagnostic/export/control_flow_research.py +135 -0
- onnx_diagnostic/ext_test_case.py +33 -9
- onnx_diagnostic/helpers/cache_helper.py +217 -203
- onnx_diagnostic/helpers/helper.py +2 -0
- onnx_diagnostic/helpers/log_helper.py +26 -4
- onnx_diagnostic/helpers/mini_onnx_builder.py +54 -2
- onnx_diagnostic/helpers/onnx_helper.py +12 -15
- onnx_diagnostic/helpers/rt_helper.py +547 -0
- onnx_diagnostic/helpers/torch_helper.py +5 -0
- onnx_diagnostic/tasks/image_text_to_text.py +5 -1
- onnx_diagnostic/torch_export_patches/eval/model_cases.py +28 -0
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1 -1
- onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +11 -7
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +561 -59
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +53 -0
- onnx_diagnostic/torch_models/hghub/model_inputs.py +15 -2
- {onnx_diagnostic-0.8.1.dist-info → onnx_diagnostic-0.8.2.dist-info}/METADATA +1 -1
- {onnx_diagnostic-0.8.1.dist-info → onnx_diagnostic-0.8.2.dist-info}/RECORD +24 -22
- {onnx_diagnostic-0.8.1.dist-info → onnx_diagnostic-0.8.2.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.8.1.dist-info → onnx_diagnostic-0.8.2.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.8.1.dist-info → onnx_diagnostic-0.8.2.dist-info}/top_level.txt +0 -0
|
@@ -391,17 +391,22 @@ def make_static_cache(
|
|
|
391
391
|
return finalize_cache(cache)
|
|
392
392
|
|
|
393
393
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
394
|
+
if hasattr(transformers.cache_utils, "EncoderDecoderCache"):
|
|
395
|
+
|
|
396
|
+
def make_encoder_decoder_cache(
|
|
397
|
+
self_attention_cache: transformers.cache_utils.DynamicCache,
|
|
398
|
+
cross_attention_cache: transformers.cache_utils.DynamicCache,
|
|
399
|
+
) -> transformers.cache_utils.EncoderDecoderCache:
|
|
400
|
+
"""Creates an EncoderDecoderCache."""
|
|
401
|
+
return transformers.cache_utils.EncoderDecoderCache(
|
|
402
|
+
# self_attention_cache=self_attention_cache,
|
|
403
|
+
# cross_attention_cache=cross_attention_cache
|
|
404
|
+
self_attention_cache,
|
|
405
|
+
cross_attention_cache,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
else:
|
|
409
|
+
make_encoder_decoder_cache = None # type: ignore[assignment]
|
|
405
410
|
|
|
406
411
|
|
|
407
412
|
def make_mamba_cache(
|
|
@@ -454,220 +459,229 @@ def make_mamba_cache(
|
|
|
454
459
|
return finalize_cache(cache)
|
|
455
460
|
|
|
456
461
|
|
|
457
|
-
|
|
458
|
-
key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
|
|
459
|
-
) -> transformers.cache_utils.SlidingWindowCache:
|
|
460
|
-
"Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
|
|
461
|
-
key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
|
|
462
|
+
if hasattr(transformers.cache_utils, "SlidingWindowCache"):
|
|
462
463
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
self.sliding_window = key_value_pairs[0][0].shape[2]
|
|
469
|
-
|
|
470
|
-
def get_text_config(self, *args, **kwargs):
|
|
471
|
-
return self
|
|
472
|
-
|
|
473
|
-
cache = transformers.cache_utils.SlidingWindowCache(
|
|
474
|
-
config=_config(),
|
|
475
|
-
max_batch_size=key_value_pairs[0][0].shape[0],
|
|
476
|
-
max_cache_len=key_value_pairs[0][0].shape[2], # same as sliding_window
|
|
477
|
-
device=key_value_pairs[0][0].device,
|
|
478
|
-
dtype=key_value_pairs[0][0].dtype,
|
|
479
|
-
)
|
|
480
|
-
ca = CacheKeyValue(cache)
|
|
481
|
-
if hasattr(cache, "layers") and len(ca.key_cache) == 0:
|
|
482
|
-
# transformers>= 4.55.2, layers are empty
|
|
483
|
-
cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
|
|
484
|
-
for i, (key, value) in enumerate(key_value_pairs):
|
|
485
|
-
cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
|
|
486
|
-
return cache
|
|
464
|
+
def make_sliding_window_cache(
|
|
465
|
+
key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
|
|
466
|
+
) -> transformers.cache_utils.SlidingWindowCache:
|
|
467
|
+
"Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
|
|
468
|
+
key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
|
|
487
469
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
470
|
+
class _config:
|
|
471
|
+
def __init__(self):
|
|
472
|
+
self.head_dim = key_value_pairs[0][0].shape[-1]
|
|
473
|
+
self.num_attention_heads = key_value_pairs[0][0].shape[1]
|
|
474
|
+
self.num_hidden_layers = len(key_value_pairs)
|
|
475
|
+
self.sliding_window = key_value_pairs[0][0].shape[2]
|
|
476
|
+
|
|
477
|
+
def get_text_config(self, *args, **kwargs):
|
|
478
|
+
return self
|
|
479
|
+
|
|
480
|
+
cache = transformers.cache_utils.SlidingWindowCache(
|
|
481
|
+
config=_config(),
|
|
482
|
+
max_batch_size=key_value_pairs[0][0].shape[0],
|
|
483
|
+
max_cache_len=key_value_pairs[0][0].shape[2], # same as sliding_window
|
|
484
|
+
device=key_value_pairs[0][0].device,
|
|
485
|
+
dtype=key_value_pairs[0][0].dtype,
|
|
492
486
|
)
|
|
493
|
-
ca
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
487
|
+
ca = CacheKeyValue(cache)
|
|
488
|
+
if hasattr(cache, "layers") and len(ca.key_cache) == 0:
|
|
489
|
+
# transformers>= 4.55.2, layers are empty
|
|
490
|
+
cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
|
|
491
|
+
for i, (key, value) in enumerate(key_value_pairs):
|
|
492
|
+
cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
|
|
493
|
+
return cache
|
|
494
|
+
|
|
495
|
+
for i in range(len(key_value_pairs)):
|
|
496
|
+
assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
|
|
497
|
+
f"Shape mismatch, expected {cache.key_cache[i].shape}, "
|
|
498
|
+
f"got {key_value_pairs[i][0].shape}"
|
|
499
|
+
)
|
|
500
|
+
ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
|
|
501
|
+
assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
|
|
502
|
+
f"Shape mismatch, expected {cache.value_cache[i].shape}, "
|
|
503
|
+
f"got {key_value_pairs[i][1].shape}"
|
|
504
|
+
)
|
|
505
|
+
ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
|
|
506
|
+
if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
|
|
507
|
+
# The cache constructor contains the two following lines
|
|
508
|
+
# (in cache_utils.py) which append empty layers when the cache is
|
|
509
|
+
# initialized. We need to remove them.
|
|
510
|
+
# self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
|
|
511
|
+
# self.append_new_layers(self.num_hidden_layers - 1)
|
|
512
|
+
cache.layers[:] = cache.layers[-len(key_value_pairs) :]
|
|
513
|
+
assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
|
|
514
|
+
f"Unexpected number of layers in the cache ({len(cache.layers)}), "
|
|
515
|
+
f"{len(key_value_pairs)} expected."
|
|
497
516
|
)
|
|
498
|
-
|
|
499
|
-
if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
|
|
500
|
-
# The cache constructor contains the two following lines
|
|
501
|
-
# (in cache_utils.py) which append empty layers when the cache is
|
|
502
|
-
# initialized. We need to remove them.
|
|
503
|
-
# self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
|
|
504
|
-
# self.append_new_layers(self.num_hidden_layers - 1)
|
|
505
|
-
cache.layers[:] = cache.layers[-len(key_value_pairs) :]
|
|
506
|
-
assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
|
|
507
|
-
f"Unexpected number of layers in the cache ({len(cache.layers)}), "
|
|
508
|
-
f"{len(key_value_pairs)} expected."
|
|
509
|
-
)
|
|
510
|
-
return finalize_cache(cache)
|
|
517
|
+
return finalize_cache(cache)
|
|
511
518
|
|
|
519
|
+
else:
|
|
520
|
+
make_sliding_window_cache = None # type: ignore[assignment]
|
|
512
521
|
|
|
513
|
-
|
|
514
|
-
key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
|
|
515
|
-
max_cache_len: Optional[int] = None,
|
|
516
|
-
max_batch_size: Optional[int] = None,
|
|
517
|
-
sliding_window: Optional[int] = None,
|
|
518
|
-
) -> transformers.cache_utils.HybridCache:
|
|
519
|
-
"""
|
|
520
|
-
Creates an instance of :class:`transformers.cache_utils.HybridCache`.
|
|
521
|
-
This version is valid for ``transformers < 4.50``.
|
|
522
|
+
if hasattr(transformers.cache_utils, "HybridCache"):
|
|
522
523
|
|
|
523
|
-
|
|
524
|
-
|
|
524
|
+
def make_hybrid_cache(
|
|
525
|
+
key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
|
|
526
|
+
max_cache_len: Optional[int] = None,
|
|
527
|
+
max_batch_size: Optional[int] = None,
|
|
528
|
+
sliding_window: Optional[int] = None,
|
|
529
|
+
) -> transformers.cache_utils.HybridCache:
|
|
530
|
+
"""
|
|
531
|
+
Creates an instance of :class:`transformers.cache_utils.HybridCache`.
|
|
532
|
+
This version is valid for ``transformers < 4.50``.
|
|
525
533
|
|
|
526
|
-
|
|
534
|
+
:param key_value_pairs: list of pairs of (key, values)
|
|
535
|
+
:return: :class:`transformers.cache_utils.HybridCache`
|
|
527
536
|
|
|
528
|
-
|
|
529
|
-
:showcode:
|
|
537
|
+
Example:
|
|
530
538
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
|
|
539
|
+
.. runpython::
|
|
540
|
+
:showcode:
|
|
534
541
|
|
|
535
|
-
|
|
536
|
-
|
|
542
|
+
import torch
|
|
543
|
+
from onnx_diagnostic.helpers import string_type
|
|
544
|
+
from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
|
|
537
545
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
546
|
+
n_layers = 2
|
|
547
|
+
bsize, nheads, slen, dim = 2, 4, 3, 7
|
|
548
|
+
|
|
549
|
+
past_key_values = make_hybrid_cache(
|
|
550
|
+
[
|
|
551
|
+
(
|
|
552
|
+
torch.randn(bsize, nheads, slen, dim),
|
|
553
|
+
torch.randn(bsize, nheads, slen, dim),
|
|
554
|
+
)
|
|
555
|
+
for i in range(n_layers)
|
|
556
|
+
]
|
|
557
|
+
)
|
|
558
|
+
print(string_type(past_key_values, with_shape=True))
|
|
548
559
|
|
|
549
|
-
|
|
560
|
+
This part defines how the shapes are working in one HybridCache.
|
|
550
561
|
|
|
551
|
-
|
|
562
|
+
.. code-block:: python
|
|
552
563
|
|
|
553
|
-
|
|
554
|
-
|
|
564
|
+
self.max_cache_len = (
|
|
565
|
+
max_cache_len if max_cache_len is not None else config.max_position_embeddings)
|
|
555
566
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
567
|
+
# Sliding layers can't be larger than the overall max cache len
|
|
568
|
+
self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
|
|
569
|
+
self.max_batch_size = max_batch_size
|
|
559
570
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
571
|
+
self.head_dim = (
|
|
572
|
+
config.head_dim if hasattr(config, "head_dim")
|
|
573
|
+
else config.hidden_size // config.num_attention_heads
|
|
574
|
+
)
|
|
564
575
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
576
|
+
self._dtype = dtype
|
|
577
|
+
self.num_key_value_heads = (
|
|
578
|
+
config.num_attention_heads
|
|
579
|
+
if getattr(config, "num_key_value_heads", None) is None
|
|
580
|
+
else config.num_key_value_heads
|
|
581
|
+
)
|
|
571
582
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
583
|
+
# If the attribute does not exist in the config, fallback to a simple StaticCache
|
|
584
|
+
if hasattr(config, "layer_types"):
|
|
585
|
+
self.is_sliding = [
|
|
586
|
+
layer_type != "full_attention" for layer_type in config.layer_types]
|
|
587
|
+
else:
|
|
588
|
+
self.is_sliding = [False] * config.num_hidden_layers
|
|
589
|
+
|
|
590
|
+
self.key_cache: list[torch.Tensor] = []
|
|
591
|
+
self.value_cache: list[torch.Tensor] = []
|
|
592
|
+
global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
|
|
593
|
+
self.max_cache_len, self.head_dim)
|
|
594
|
+
sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
|
|
595
|
+
self.sliding_window_len, self.head_dim)
|
|
596
|
+
self.sliding_window = min(config.sliding_window, max_cache_len)
|
|
597
|
+
device = torch.device(device) if device is not None else None
|
|
598
|
+
for i in range(config.num_hidden_layers):
|
|
599
|
+
layer_device = layer_device_map[i] if layer_device_map is not None else device
|
|
600
|
+
cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
|
|
601
|
+
new_layer_key_cache = torch.zeros(
|
|
602
|
+
cache_shape, dtype=self._dtype, device=layer_device)
|
|
603
|
+
new_layer_value_cache = torch.zeros(
|
|
604
|
+
cache_shape, dtype=self._dtype, device=layer_device)
|
|
605
|
+
torch._dynamo.mark_static_address(new_layer_key_cache)
|
|
606
|
+
torch._dynamo.mark_static_address(new_layer_value_cache)
|
|
607
|
+
self.key_cache.append(new_layer_key_cache)
|
|
608
|
+
self.value_cache.append(new_layer_value_cache)
|
|
609
|
+
"""
|
|
610
|
+
key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
|
|
611
|
+
layer_types = None
|
|
612
|
+
if key_value_pairs:
|
|
613
|
+
assert (
|
|
614
|
+
not max_batch_size and not max_cache_len
|
|
615
|
+
), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
|
|
616
|
+
max_batch_size = key_value_pairs[0][0].shape[0]
|
|
617
|
+
sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
|
|
618
|
+
if len(sets_of_dim) == 1:
|
|
619
|
+
max_cache_len = sets_of_dim.pop()
|
|
620
|
+
sliding_window = max_cache_len
|
|
621
|
+
else:
|
|
622
|
+
assert (
|
|
623
|
+
len(sets_of_dim) == 2
|
|
624
|
+
), f"Not implemented for more than 2 dimensions {sets_of_dim}"
|
|
625
|
+
max_cache_len = max(sets_of_dim)
|
|
626
|
+
sliding_window = min(sets_of_dim)
|
|
627
|
+
layer_types = [
|
|
628
|
+
"full_attention" if i == max_cache_len else "sliding_attention"
|
|
629
|
+
for i in [kv[0].shape[2] for kv in key_value_pairs]
|
|
630
|
+
]
|
|
610
631
|
else:
|
|
611
632
|
assert (
|
|
612
|
-
|
|
613
|
-
),
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
num_key_value_heads = key_value_pairs[0][1].shape[1] # transformers 4.48.3
|
|
638
|
-
|
|
639
|
-
def get_text_config(self, *args, **kwargs):
|
|
640
|
-
return self
|
|
641
|
-
|
|
642
|
-
if layer_types:
|
|
643
|
-
_config.layer_types = layer_types # type: ignore[attr-defined]
|
|
644
|
-
|
|
645
|
-
cache = transformers.cache_utils.HybridCache(
|
|
646
|
-
config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
|
|
647
|
-
)
|
|
648
|
-
for i, (key, value) in enumerate(key_value_pairs):
|
|
649
|
-
cache.update(
|
|
650
|
-
key,
|
|
651
|
-
value,
|
|
652
|
-
i,
|
|
653
|
-
cache_kwargs={
|
|
654
|
-
"cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
|
|
655
|
-
key.device
|
|
656
|
-
)
|
|
657
|
-
},
|
|
633
|
+
max_batch_size and max_cache_len
|
|
634
|
+
), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
|
|
635
|
+
if sliding_window is None:
|
|
636
|
+
sliding_window = max_cache_len
|
|
637
|
+
_max_cache_len = max_cache_len
|
|
638
|
+
_sliding_window = sliding_window
|
|
639
|
+
|
|
640
|
+
class _config:
|
|
641
|
+
max_cache_len = _max_cache_len
|
|
642
|
+
batch_size = max_batch_size
|
|
643
|
+
num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
|
|
644
|
+
head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
|
|
645
|
+
num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
|
|
646
|
+
num_hidden_layers = len(key_value_pairs)
|
|
647
|
+
sliding_window = _sliding_window
|
|
648
|
+
num_key_value_heads = key_value_pairs[0][1].shape[1] # transformers 4.48.3
|
|
649
|
+
|
|
650
|
+
def get_text_config(self, *args, **kwargs):
|
|
651
|
+
return self
|
|
652
|
+
|
|
653
|
+
if layer_types:
|
|
654
|
+
_config.layer_types = layer_types # type: ignore[attr-defined]
|
|
655
|
+
|
|
656
|
+
cache = transformers.cache_utils.HybridCache(
|
|
657
|
+
config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
|
|
658
658
|
)
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
659
|
+
for i, (key, value) in enumerate(key_value_pairs):
|
|
660
|
+
cache.update(
|
|
661
|
+
key,
|
|
662
|
+
value,
|
|
663
|
+
i,
|
|
664
|
+
cache_kwargs={
|
|
665
|
+
"cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
|
|
666
|
+
key.device
|
|
667
|
+
)
|
|
668
|
+
},
|
|
669
|
+
)
|
|
670
|
+
if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
|
|
671
|
+
# The cache constructor contains the two following lines
|
|
672
|
+
# (in cache_utils.py) which append empty layers when the cache is
|
|
673
|
+
# initialized. We need to remove them.
|
|
674
|
+
# self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
|
|
675
|
+
# self.append_new_layers(self.num_hidden_layers - 1)
|
|
676
|
+
cache.layers[:] = cache.layers[-len(key_value_pairs) :]
|
|
677
|
+
assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
|
|
678
|
+
f"Unexpected number of layers in the cache ({len(cache.layers)}), "
|
|
679
|
+
f"{len(key_value_pairs)} expected."
|
|
680
|
+
)
|
|
681
|
+
return finalize_cache(cache)
|
|
682
|
+
|
|
683
|
+
else:
|
|
684
|
+
make_hybrid_cache = None # type: ignore[assignment]
|
|
671
685
|
|
|
672
686
|
|
|
673
687
|
def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:
|
|
@@ -787,6 +787,8 @@ def string_type(
|
|
|
787
787
|
return f"ultralytics.{obj.__class__.__name__}(...)"
|
|
788
788
|
if obj.__class__.__name__ == "FakeTensorMode":
|
|
789
789
|
return f"{obj}"
|
|
790
|
+
if obj.__class__.__name__ == "FakeTensorContext":
|
|
791
|
+
return "FakeTensorContext(...)"
|
|
790
792
|
|
|
791
793
|
if verbose:
|
|
792
794
|
print(f"[string_type] END:{type(obj)}")
|
|
@@ -901,13 +901,19 @@ class CubeLogs:
|
|
|
901
901
|
else g.groupby([*key_index, *key_columns], dropna=False).sum()
|
|
902
902
|
)
|
|
903
903
|
not_unique = r[r["count"] > 1]
|
|
904
|
+
if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
|
|
905
|
+
filename = os.environ.get("DUPLICATE")
|
|
906
|
+
subset = data.set_index([*key_index, *key_columns]).merge(
|
|
907
|
+
not_unique.head(), left_index=True, right_index=True
|
|
908
|
+
)
|
|
909
|
+
subset.to_excel(filename)
|
|
904
910
|
assert not_unique.shape[0] == 0, (
|
|
905
911
|
f"view_def.name={view_def.name!r}, "
|
|
906
912
|
f"unable to run the pivot with index={sorted(key_index)}, "
|
|
907
913
|
f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
|
|
908
914
|
f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
|
|
909
|
-
f"not unique={set(data.columns) - unique}"
|
|
910
|
-
f"\n--\n{not_unique.head(10)}"
|
|
915
|
+
f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
|
|
916
|
+
f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
|
|
911
917
|
)
|
|
912
918
|
|
|
913
919
|
# pivot
|
|
@@ -1000,8 +1006,12 @@ class CubeLogs:
|
|
|
1000
1006
|
keys = set(self.keys_time) - {columns_to_fix}
|
|
1001
1007
|
select = data[self.keys_time]
|
|
1002
1008
|
select_agg = select.groupby(list(keys)).count()
|
|
1009
|
+
if select_agg.shape[0] == 0:
|
|
1010
|
+
# nothing to fix
|
|
1011
|
+
return data
|
|
1003
1012
|
assert select_agg[columns_to_fix].max() <= 1, (
|
|
1004
|
-
f"Column {columns_to_fix!r} has two distinct values at least for one date
|
|
1013
|
+
f"Column {columns_to_fix!r} has two distinct values at least for one date, "
|
|
1014
|
+
f"max={select_agg[columns_to_fix].max()}\n"
|
|
1005
1015
|
f"{select_agg[select_agg[columns_to_fix] > 1]}"
|
|
1006
1016
|
)
|
|
1007
1017
|
|
|
@@ -1038,6 +1048,16 @@ class CubeLogs:
|
|
|
1038
1048
|
f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
|
|
1039
1049
|
f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
|
|
1040
1050
|
)
|
|
1051
|
+
select = res[self.keys_time]
|
|
1052
|
+
select_agg = select.groupby(list(keys)).count()
|
|
1053
|
+
if select_agg.shape[0] == 0:
|
|
1054
|
+
# nothing to fix
|
|
1055
|
+
return data
|
|
1056
|
+
# assert select_agg[columns_to_fix].max() <= 1, (
|
|
1057
|
+
# f"Column {columns_to_fix!r} has two distinct values at least for one date, "
|
|
1058
|
+
# f"max={select_agg[columns_to_fix].max()}\n"
|
|
1059
|
+
# f"{select_agg[select_agg[columns_to_fix] > 1]}"
|
|
1060
|
+
# )
|
|
1041
1061
|
return res
|
|
1042
1062
|
|
|
1043
1063
|
def _dropna(
|
|
@@ -1977,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
|
|
|
1977
1997
|
* **cmd:** command lines
|
|
1978
1998
|
* **raw-short:** raw data without all the unused columns
|
|
1979
1999
|
"""
|
|
1980
|
-
|
|
2000
|
+
# This does not work.
|
|
2001
|
+
# used to be ["model_speedup_input_set", "model_test_with"]
|
|
2002
|
+
fix_aggregation_change = [] # type: ignore[var-annotated]
|
|
1981
2003
|
fs = ["suite", "model_suite", "task", "model_name", "model_task"]
|
|
1982
2004
|
index_cols = self._filter_column(fs, self.keys_time)
|
|
1983
2005
|
assert index_cols, (
|
|
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
|
|
|
422
422
|
:return: ModelProto
|
|
423
423
|
|
|
424
424
|
The function raises an error if not supported.
|
|
425
|
+
An example:
|
|
426
|
+
|
|
427
|
+
.. code-block:: python
|
|
428
|
+
|
|
429
|
+
from onnx_diagnostic.helpers.mini_onnx_builder import (
|
|
430
|
+
create_onnx_model_from_input_tensors,
|
|
431
|
+
)
|
|
432
|
+
import onnx
|
|
433
|
+
|
|
434
|
+
proto = create_onnx_model_from_input_tensors(
|
|
435
|
+
dict(
|
|
436
|
+
query_states=query_states,
|
|
437
|
+
key_states=key_states,
|
|
438
|
+
value_states=value_states,
|
|
439
|
+
cu_seqlens=cu_seqlens,
|
|
440
|
+
max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
|
|
441
|
+
scaling=self.scaling,
|
|
442
|
+
attn_output=attn_output,
|
|
443
|
+
)
|
|
444
|
+
)
|
|
445
|
+
onnx.save(proto, "attention_inputs.onnx")
|
|
425
446
|
"""
|
|
426
447
|
if switch_low_high is None:
|
|
427
448
|
switch_low_high = sys.byteorder != "big"
|
|
@@ -461,7 +482,17 @@ def _unflatten(
|
|
|
461
482
|
if spl[-1] == "array":
|
|
462
483
|
return pos + 1, outputs[pos]
|
|
463
484
|
if spl[-1] == "tensor":
|
|
464
|
-
|
|
485
|
+
try:
|
|
486
|
+
return pos + 1, torch.from_numpy(outputs[pos]).to(device)
|
|
487
|
+
except TypeError:
|
|
488
|
+
# it should be more robust
|
|
489
|
+
import ml_dtypes
|
|
490
|
+
|
|
491
|
+
if outputs[pos].dtype == ml_dtypes.bfloat16:
|
|
492
|
+
return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
|
|
493
|
+
torch.bfloat16
|
|
494
|
+
)
|
|
495
|
+
raise
|
|
465
496
|
raise AssertionError(f"Unexpected name {name!r} in {names}")
|
|
466
497
|
|
|
467
498
|
res: List[Any] = []
|
|
@@ -532,6 +563,12 @@ def _unflatten(
|
|
|
532
563
|
return d
|
|
533
564
|
return ty(res)
|
|
534
565
|
|
|
566
|
+
if end and len(res) == 1:
|
|
567
|
+
if res[0] is None:
|
|
568
|
+
return next_pos, ty()
|
|
569
|
+
if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
|
|
570
|
+
return next_pos, ty()
|
|
571
|
+
return next_pos, _make(ty, res)
|
|
535
572
|
return next_pos, (
|
|
536
573
|
ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
|
|
537
574
|
)
|
|
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
|
|
|
557
594
|
:return: restored data
|
|
558
595
|
|
|
559
596
|
See example :ref:`l-plot-intermediate-results` for an example.
|
|
597
|
+
|
|
598
|
+
.. code-block:: python
|
|
599
|
+
|
|
600
|
+
import os
|
|
601
|
+
from onnx_diagnostic.helpers.mini_onnx_builder import (
|
|
602
|
+
create_input_tensors_from_onnx_model,
|
|
603
|
+
)
|
|
604
|
+
from onnx_diagnostic.helpers import string_type
|
|
605
|
+
|
|
606
|
+
restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
|
|
607
|
+
for k, v in restored.items():
|
|
608
|
+
print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
|
|
609
|
+
|
|
560
610
|
"""
|
|
561
611
|
if engine == "ExtendedReferenceEvaluator":
|
|
562
612
|
from ..reference import ExtendedReferenceEvaluator
|
|
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
|
|
|
595
645
|
return float(output[0])
|
|
596
646
|
if name == "tensor":
|
|
597
647
|
return torch.from_numpy(output).to(device)
|
|
598
|
-
|
|
648
|
+
assert name.startswith(
|
|
649
|
+
("list_", "list.", "dict.", "tuple_", "tuple.")
|
|
650
|
+
), f"Unexpected name {name!r} in {names}"
|
|
599
651
|
|
|
600
652
|
return _unflatten(sep, names, got, device=device)[1]
|
|
@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int: # noqa: F821
|
|
|
671
671
|
try:
|
|
672
672
|
return oh.np_dtype_to_tensor_dtype(dt)
|
|
673
673
|
except ValueError:
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
if
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
return TensorProto.FLOAT8E5M2
|
|
687
|
-
if dt == ml_dtypes.float8_e5m2fnuz:
|
|
688
|
-
return TensorProto.FLOAT8E5M2FNUZ
|
|
674
|
+
import ml_dtypes
|
|
675
|
+
|
|
676
|
+
if dt == ml_dtypes.bfloat16:
|
|
677
|
+
return TensorProto.BFLOAT16
|
|
678
|
+
if dt == ml_dtypes.float8_e4m3fn:
|
|
679
|
+
return TensorProto.FLOAT8E4M3FN
|
|
680
|
+
if dt == ml_dtypes.float8_e4m3fnuz:
|
|
681
|
+
return TensorProto.FLOAT8E4M3FNUZ
|
|
682
|
+
if dt == ml_dtypes.float8_e5m2:
|
|
683
|
+
return TensorProto.FLOAT8E5M2
|
|
684
|
+
if dt == ml_dtypes.float8_e5m2fnuz:
|
|
685
|
+
return TensorProto.FLOAT8E5M2FNUZ
|
|
689
686
|
if dt == np.float32:
|
|
690
687
|
return TensorProto.FLOAT
|
|
691
688
|
if dt == np.float16:
|