onnx-diagnostic 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -391,17 +391,22 @@ def make_static_cache(
391
391
  return finalize_cache(cache)
392
392
 
393
393
 
394
- def make_encoder_decoder_cache(
395
- self_attention_cache: transformers.cache_utils.DynamicCache,
396
- cross_attention_cache: transformers.cache_utils.DynamicCache,
397
- ) -> transformers.cache_utils.EncoderDecoderCache:
398
- """Creates an EncoderDecoderCache."""
399
- return transformers.cache_utils.EncoderDecoderCache(
400
- # self_attention_cache=self_attention_cache,
401
- # cross_attention_cache=cross_attention_cache
402
- self_attention_cache,
403
- cross_attention_cache,
404
- )
394
+ if hasattr(transformers.cache_utils, "EncoderDecoderCache"):
395
+
396
+ def make_encoder_decoder_cache(
397
+ self_attention_cache: transformers.cache_utils.DynamicCache,
398
+ cross_attention_cache: transformers.cache_utils.DynamicCache,
399
+ ) -> transformers.cache_utils.EncoderDecoderCache:
400
+ """Creates an EncoderDecoderCache."""
401
+ return transformers.cache_utils.EncoderDecoderCache(
402
+ # self_attention_cache=self_attention_cache,
403
+ # cross_attention_cache=cross_attention_cache
404
+ self_attention_cache,
405
+ cross_attention_cache,
406
+ )
407
+
408
+ else:
409
+ make_encoder_decoder_cache = None # type: ignore[assignment]
405
410
 
406
411
 
407
412
  def make_mamba_cache(
@@ -454,220 +459,229 @@ def make_mamba_cache(
454
459
  return finalize_cache(cache)
455
460
 
456
461
 
457
- def make_sliding_window_cache(
458
- key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
459
- ) -> transformers.cache_utils.SlidingWindowCache:
460
- "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
461
- key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
462
+ if hasattr(transformers.cache_utils, "SlidingWindowCache"):
462
463
 
463
- class _config:
464
- def __init__(self):
465
- self.head_dim = key_value_pairs[0][0].shape[-1]
466
- self.num_attention_heads = key_value_pairs[0][0].shape[1]
467
- self.num_hidden_layers = len(key_value_pairs)
468
- self.sliding_window = key_value_pairs[0][0].shape[2]
469
-
470
- def get_text_config(self, *args, **kwargs):
471
- return self
472
-
473
- cache = transformers.cache_utils.SlidingWindowCache(
474
- config=_config(),
475
- max_batch_size=key_value_pairs[0][0].shape[0],
476
- max_cache_len=key_value_pairs[0][0].shape[2], # same as sliding_window
477
- device=key_value_pairs[0][0].device,
478
- dtype=key_value_pairs[0][0].dtype,
479
- )
480
- ca = CacheKeyValue(cache)
481
- if hasattr(cache, "layers") and len(ca.key_cache) == 0:
482
- # transformers>= 4.55.2, layers are empty
483
- cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
484
- for i, (key, value) in enumerate(key_value_pairs):
485
- cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
486
- return cache
464
+ def make_sliding_window_cache(
465
+ key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
466
+ ) -> transformers.cache_utils.SlidingWindowCache:
467
+ "Creates a :class:`transformers.cache_utils.SlidingWindowCache`."
468
+ key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
487
469
 
488
- for i in range(len(key_value_pairs)):
489
- assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
490
- f"Shape mismatch, expected {cache.key_cache[i].shape}, "
491
- f"got {key_value_pairs[i][0].shape}"
470
+ class _config:
471
+ def __init__(self):
472
+ self.head_dim = key_value_pairs[0][0].shape[-1]
473
+ self.num_attention_heads = key_value_pairs[0][0].shape[1]
474
+ self.num_hidden_layers = len(key_value_pairs)
475
+ self.sliding_window = key_value_pairs[0][0].shape[2]
476
+
477
+ def get_text_config(self, *args, **kwargs):
478
+ return self
479
+
480
+ cache = transformers.cache_utils.SlidingWindowCache(
481
+ config=_config(),
482
+ max_batch_size=key_value_pairs[0][0].shape[0],
483
+ max_cache_len=key_value_pairs[0][0].shape[2], # same as sliding_window
484
+ device=key_value_pairs[0][0].device,
485
+ dtype=key_value_pairs[0][0].dtype,
492
486
  )
493
- ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
494
- assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
495
- f"Shape mismatch, expected {cache.value_cache[i].shape}, "
496
- f"got {key_value_pairs[i][1].shape}"
487
+ ca = CacheKeyValue(cache)
488
+ if hasattr(cache, "layers") and len(ca.key_cache) == 0:
489
+ # transformers>= 4.55.2, layers are empty
490
+ cache_position = torch.arange(key_value_pairs[0][0].shape[2], dtype=torch.int64)
491
+ for i, (key, value) in enumerate(key_value_pairs):
492
+ cache.update(key, value, i, cache_kwargs={"cache_position": cache_position})
493
+ return cache
494
+
495
+ for i in range(len(key_value_pairs)):
496
+ assert ca.key_cache[i].shape == key_value_pairs[i][0].shape, (
497
+ f"Shape mismatch, expected {cache.key_cache[i].shape}, "
498
+ f"got {key_value_pairs[i][0].shape}"
499
+ )
500
+ ca.key_cache[i][:, :, :, :] = key_value_pairs[i][0]
501
+ assert ca.value_cache[i].shape == key_value_pairs[i][1].shape, (
502
+ f"Shape mismatch, expected {cache.value_cache[i].shape}, "
503
+ f"got {key_value_pairs[i][1].shape}"
504
+ )
505
+ ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
506
+ if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
507
+ # The cache constructor contains the two following lines
508
+ # (in cache_utils.py) which append empty layers when the cache is
509
+ # initialized. We need to remove them.
510
+ # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
511
+ # self.append_new_layers(self.num_hidden_layers - 1)
512
+ cache.layers[:] = cache.layers[-len(key_value_pairs) :]
513
+ assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
514
+ f"Unexpected number of layers in the cache ({len(cache.layers)}), "
515
+ f"{len(key_value_pairs)} expected."
497
516
  )
498
- ca.value_cache[i][:, :, :, :] = key_value_pairs[i][1]
499
- if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
500
- # The cache constructor contains the two following lines
501
- # (in cache_utils.py) which append empty layers when the cache is
502
- # initialized. We need to remove them.
503
- # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
504
- # self.append_new_layers(self.num_hidden_layers - 1)
505
- cache.layers[:] = cache.layers[-len(key_value_pairs) :]
506
- assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
507
- f"Unexpected number of layers in the cache ({len(cache.layers)}), "
508
- f"{len(key_value_pairs)} expected."
509
- )
510
- return finalize_cache(cache)
517
+ return finalize_cache(cache)
511
518
 
519
+ else:
520
+ make_sliding_window_cache = None # type: ignore[assignment]
512
521
 
513
- def make_hybrid_cache(
514
- key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
515
- max_cache_len: Optional[int] = None,
516
- max_batch_size: Optional[int] = None,
517
- sliding_window: Optional[int] = None,
518
- ) -> transformers.cache_utils.HybridCache:
519
- """
520
- Creates an instance of :class:`transformers.cache_utils.HybridCache`.
521
- This version is valid for ``transformers < 4.50``.
522
+ if hasattr(transformers.cache_utils, "HybridCache"):
522
523
 
523
- :param key_value_pairs: list of pairs of (key, values)
524
- :return: :class:`transformers.cache_utils.HybridCache`
524
+ def make_hybrid_cache(
525
+ key_value_pairs: Union[List[torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]],
526
+ max_cache_len: Optional[int] = None,
527
+ max_batch_size: Optional[int] = None,
528
+ sliding_window: Optional[int] = None,
529
+ ) -> transformers.cache_utils.HybridCache:
530
+ """
531
+ Creates an instance of :class:`transformers.cache_utils.HybridCache`.
532
+ This version is valid for ``transformers < 4.50``.
525
533
 
526
- Example:
534
+ :param key_value_pairs: list of pairs of (key, values)
535
+ :return: :class:`transformers.cache_utils.HybridCache`
527
536
 
528
- .. runpython::
529
- :showcode:
537
+ Example:
530
538
 
531
- import torch
532
- from onnx_diagnostic.helpers import string_type
533
- from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
539
+ .. runpython::
540
+ :showcode:
534
541
 
535
- n_layers = 2
536
- bsize, nheads, slen, dim = 2, 4, 3, 7
542
+ import torch
543
+ from onnx_diagnostic.helpers import string_type
544
+ from onnx_diagnostic.helpers.cache_helper import make_hybrid_cache
537
545
 
538
- past_key_values = make_hybrid_cache(
539
- [
540
- (
541
- torch.randn(bsize, nheads, slen, dim),
542
- torch.randn(bsize, nheads, slen, dim),
543
- )
544
- for i in range(n_layers)
545
- ]
546
- )
547
- print(string_type(past_key_values, with_shape=True))
546
+ n_layers = 2
547
+ bsize, nheads, slen, dim = 2, 4, 3, 7
548
+
549
+ past_key_values = make_hybrid_cache(
550
+ [
551
+ (
552
+ torch.randn(bsize, nheads, slen, dim),
553
+ torch.randn(bsize, nheads, slen, dim),
554
+ )
555
+ for i in range(n_layers)
556
+ ]
557
+ )
558
+ print(string_type(past_key_values, with_shape=True))
548
559
 
549
- This part defines how the shapes are working in one HybridCache.
560
+ This part defines how the shapes are working in one HybridCache.
550
561
 
551
- .. code-block:: python
562
+ .. code-block:: python
552
563
 
553
- self.max_cache_len = (
554
- max_cache_len if max_cache_len is not None else config.max_position_embeddings)
564
+ self.max_cache_len = (
565
+ max_cache_len if max_cache_len is not None else config.max_position_embeddings)
555
566
 
556
- # Sliding layers can't be larger than the overall max cache len
557
- self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
558
- self.max_batch_size = max_batch_size
567
+ # Sliding layers can't be larger than the overall max cache len
568
+ self.sliding_window_len = min(config.sliding_window, self.max_cache_len)
569
+ self.max_batch_size = max_batch_size
559
570
 
560
- self.head_dim = (
561
- config.head_dim if hasattr(config, "head_dim")
562
- else config.hidden_size // config.num_attention_heads
563
- )
571
+ self.head_dim = (
572
+ config.head_dim if hasattr(config, "head_dim")
573
+ else config.hidden_size // config.num_attention_heads
574
+ )
564
575
 
565
- self._dtype = dtype
566
- self.num_key_value_heads = (
567
- config.num_attention_heads
568
- if getattr(config, "num_key_value_heads", None) is None
569
- else config.num_key_value_heads
570
- )
576
+ self._dtype = dtype
577
+ self.num_key_value_heads = (
578
+ config.num_attention_heads
579
+ if getattr(config, "num_key_value_heads", None) is None
580
+ else config.num_key_value_heads
581
+ )
571
582
 
572
- # If the attribute does not exist in the config, fallback to a simple StaticCache
573
- if hasattr(config, "layer_types"):
574
- self.is_sliding = [
575
- layer_type != "full_attention" for layer_type in config.layer_types]
576
- else:
577
- self.is_sliding = [False] * config.num_hidden_layers
578
-
579
- self.key_cache: list[torch.Tensor] = []
580
- self.value_cache: list[torch.Tensor] = []
581
- global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
582
- self.max_cache_len, self.head_dim)
583
- sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
584
- self.sliding_window_len, self.head_dim)
585
- self.sliding_window = min(config.sliding_window, max_cache_len)
586
- device = torch.device(device) if device is not None else None
587
- for i in range(config.num_hidden_layers):
588
- layer_device = layer_device_map[i] if layer_device_map is not None else device
589
- cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
590
- new_layer_key_cache = torch.zeros(
591
- cache_shape, dtype=self._dtype, device=layer_device)
592
- new_layer_value_cache = torch.zeros(
593
- cache_shape, dtype=self._dtype, device=layer_device)
594
- torch._dynamo.mark_static_address(new_layer_key_cache)
595
- torch._dynamo.mark_static_address(new_layer_value_cache)
596
- self.key_cache.append(new_layer_key_cache)
597
- self.value_cache.append(new_layer_value_cache)
598
- """
599
- key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
600
- layer_types = None
601
- if key_value_pairs:
602
- assert (
603
- not max_batch_size and not max_cache_len
604
- ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
605
- max_batch_size = key_value_pairs[0][0].shape[0]
606
- sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
607
- if len(sets_of_dim) == 1:
608
- max_cache_len = sets_of_dim.pop()
609
- sliding_window = max_cache_len
583
+ # If the attribute does not exist in the config, fallback to a simple StaticCache
584
+ if hasattr(config, "layer_types"):
585
+ self.is_sliding = [
586
+ layer_type != "full_attention" for layer_type in config.layer_types]
587
+ else:
588
+ self.is_sliding = [False] * config.num_hidden_layers
589
+
590
+ self.key_cache: list[torch.Tensor] = []
591
+ self.value_cache: list[torch.Tensor] = []
592
+ global_cache_shape = (self.max_batch_size, self.num_key_value_heads,
593
+ self.max_cache_len, self.head_dim)
594
+ sliding_cache_shape = (self.max_batch_size, self.num_key_value_heads,
595
+ self.sliding_window_len, self.head_dim)
596
+ self.sliding_window = min(config.sliding_window, max_cache_len)
597
+ device = torch.device(device) if device is not None else None
598
+ for i in range(config.num_hidden_layers):
599
+ layer_device = layer_device_map[i] if layer_device_map is not None else device
600
+ cache_shape = sliding_cache_shape if self.is_sliding[i] else global_cache_shape
601
+ new_layer_key_cache = torch.zeros(
602
+ cache_shape, dtype=self._dtype, device=layer_device)
603
+ new_layer_value_cache = torch.zeros(
604
+ cache_shape, dtype=self._dtype, device=layer_device)
605
+ torch._dynamo.mark_static_address(new_layer_key_cache)
606
+ torch._dynamo.mark_static_address(new_layer_value_cache)
607
+ self.key_cache.append(new_layer_key_cache)
608
+ self.value_cache.append(new_layer_value_cache)
609
+ """
610
+ key_value_pairs = _preprocess_key_value_pairs(key_value_pairs)
611
+ layer_types = None
612
+ if key_value_pairs:
613
+ assert (
614
+ not max_batch_size and not max_cache_len
615
+ ), "key_value_pairs is not empty, do not specify max_cache_len and max_batch_size"
616
+ max_batch_size = key_value_pairs[0][0].shape[0]
617
+ sets_of_dim = set(kv[0].shape[2] for kv in key_value_pairs)
618
+ if len(sets_of_dim) == 1:
619
+ max_cache_len = sets_of_dim.pop()
620
+ sliding_window = max_cache_len
621
+ else:
622
+ assert (
623
+ len(sets_of_dim) == 2
624
+ ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
625
+ max_cache_len = max(sets_of_dim)
626
+ sliding_window = min(sets_of_dim)
627
+ layer_types = [
628
+ "full_attention" if i == max_cache_len else "sliding_attention"
629
+ for i in [kv[0].shape[2] for kv in key_value_pairs]
630
+ ]
610
631
  else:
611
632
  assert (
612
- len(sets_of_dim) == 2
613
- ), f"Not implemented for more than 2 dimensions {sets_of_dim}"
614
- max_cache_len = max(sets_of_dim)
615
- sliding_window = min(sets_of_dim)
616
- layer_types = [
617
- "full_attention" if i == max_cache_len else "sliding_attention"
618
- for i in [kv[0].shape[2] for kv in key_value_pairs]
619
- ]
620
- else:
621
- assert (
622
- max_batch_size and max_cache_len
623
- ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
624
- if sliding_window is None:
625
- sliding_window = max_cache_len
626
- _max_cache_len = max_cache_len
627
- _sliding_window = sliding_window
628
-
629
- class _config:
630
- max_cache_len = _max_cache_len
631
- batch_size = max_batch_size
632
- num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
633
- head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
634
- num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
635
- num_hidden_layers = len(key_value_pairs)
636
- sliding_window = _sliding_window
637
- num_key_value_heads = key_value_pairs[0][1].shape[1] # transformers 4.48.3
638
-
639
- def get_text_config(self, *args, **kwargs):
640
- return self
641
-
642
- if layer_types:
643
- _config.layer_types = layer_types # type: ignore[attr-defined]
644
-
645
- cache = transformers.cache_utils.HybridCache(
646
- config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
647
- )
648
- for i, (key, value) in enumerate(key_value_pairs):
649
- cache.update(
650
- key,
651
- value,
652
- i,
653
- cache_kwargs={
654
- "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
655
- key.device
656
- )
657
- },
633
+ max_batch_size and max_cache_len
634
+ ), "key_value_pairs is empty, max_batch_size and max_cache_len are required"
635
+ if sliding_window is None:
636
+ sliding_window = max_cache_len
637
+ _max_cache_len = max_cache_len
638
+ _sliding_window = sliding_window
639
+
640
+ class _config:
641
+ max_cache_len = _max_cache_len
642
+ batch_size = max_batch_size
643
+ num_heads = key_value_pairs[0][0].shape[1] if key_value_pairs else None
644
+ head_dim = key_value_pairs[0][0].shape[-1] if key_value_pairs else None
645
+ num_attention_heads = key_value_pairs[0][1].shape[1] if key_value_pairs else None
646
+ num_hidden_layers = len(key_value_pairs)
647
+ sliding_window = _sliding_window
648
+ num_key_value_heads = key_value_pairs[0][1].shape[1] # transformers 4.48.3
649
+
650
+ def get_text_config(self, *args, **kwargs):
651
+ return self
652
+
653
+ if layer_types:
654
+ _config.layer_types = layer_types # type: ignore[attr-defined]
655
+
656
+ cache = transformers.cache_utils.HybridCache(
657
+ config=_config(), max_cache_len=max_cache_len, max_batch_size=max_batch_size
658
658
  )
659
- if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
660
- # The cache constructor contains the two following lines
661
- # (in cache_utils.py) which append empty layers when the cache is
662
- # initialized. We need to remove them.
663
- # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
664
- # self.append_new_layers(self.num_hidden_layers - 1)
665
- cache.layers[:] = cache.layers[-len(key_value_pairs) :]
666
- assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
667
- f"Unexpected number of layers in the cache ({len(cache.layers)}), "
668
- f"{len(key_value_pairs)} expected."
669
- )
670
- return finalize_cache(cache)
659
+ for i, (key, value) in enumerate(key_value_pairs):
660
+ cache.update(
661
+ key,
662
+ value,
663
+ i,
664
+ cache_kwargs={
665
+ "cache_position": torch.arange(0, key.shape[2], dtype=torch.int64).to(
666
+ key.device
667
+ )
668
+ },
669
+ )
670
+ if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
671
+ # The cache constructor contains the two following lines
672
+ # (in cache_utils.py) which append empty layers when the cache is
673
+ # initialized. We need to remove them.
674
+ # self.num_hidden_layers = getattr(config, "num_hidden_layers", 1)
675
+ # self.append_new_layers(self.num_hidden_layers - 1)
676
+ cache.layers[:] = cache.layers[-len(key_value_pairs) :]
677
+ assert not hasattr(cache, "layers") or len(key_value_pairs) == len(cache.layers), (
678
+ f"Unexpected number of layers in the cache ({len(cache.layers)}), "
679
+ f"{len(key_value_pairs)} expected."
680
+ )
681
+ return finalize_cache(cache)
682
+
683
+ else:
684
+ make_hybrid_cache = None # type: ignore[assignment]
671
685
 
672
686
 
673
687
  def finalize_cache(cache: transformers.cache_utils.Cache) -> transformers.cache_utils.Cache:
@@ -787,6 +787,8 @@ def string_type(
787
787
  return f"ultralytics.{obj.__class__.__name__}(...)"
788
788
  if obj.__class__.__name__ == "FakeTensorMode":
789
789
  return f"{obj}"
790
+ if obj.__class__.__name__ == "FakeTensorContext":
791
+ return "FakeTensorContext(...)"
790
792
 
791
793
  if verbose:
792
794
  print(f"[string_type] END:{type(obj)}")
@@ -901,13 +901,19 @@ class CubeLogs:
901
901
  else g.groupby([*key_index, *key_columns], dropna=False).sum()
902
902
  )
903
903
  not_unique = r[r["count"] > 1]
904
+ if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
905
+ filename = os.environ.get("DUPLICATE")
906
+ subset = data.set_index([*key_index, *key_columns]).merge(
907
+ not_unique.head(), left_index=True, right_index=True
908
+ )
909
+ subset.to_excel(filename)
904
910
  assert not_unique.shape[0] == 0, (
905
911
  f"view_def.name={view_def.name!r}, "
906
912
  f"unable to run the pivot with index={sorted(key_index)}, "
907
913
  f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
908
914
  f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
909
- f"not unique={set(data.columns) - unique}"
910
- f"\n--\n{not_unique.head(10)}"
915
+ f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
916
+ f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
911
917
  )
912
918
 
913
919
  # pivot
@@ -1000,8 +1006,12 @@ class CubeLogs:
1000
1006
  keys = set(self.keys_time) - {columns_to_fix}
1001
1007
  select = data[self.keys_time]
1002
1008
  select_agg = select.groupby(list(keys)).count()
1009
+ if select_agg.shape[0] == 0:
1010
+ # nothing to fix
1011
+ return data
1003
1012
  assert select_agg[columns_to_fix].max() <= 1, (
1004
- f"Column {columns_to_fix!r} has two distinct values at least for one date\n"
1013
+ f"Column {columns_to_fix!r} has two distinct values at least for one date, "
1014
+ f"max={select_agg[columns_to_fix].max()}\n"
1005
1015
  f"{select_agg[select_agg[columns_to_fix] > 1]}"
1006
1016
  )
1007
1017
 
@@ -1038,6 +1048,16 @@ class CubeLogs:
1038
1048
  f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
1039
1049
  f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
1040
1050
  )
1051
+ select = res[self.keys_time]
1052
+ select_agg = select.groupby(list(keys)).count()
1053
+ if select_agg.shape[0] == 0:
1054
+ # nothing to fix
1055
+ return data
1056
+ # assert select_agg[columns_to_fix].max() <= 1, (
1057
+ # f"Column {columns_to_fix!r} has two distinct values at least for one date, "
1058
+ # f"max={select_agg[columns_to_fix].max()}\n"
1059
+ # f"{select_agg[select_agg[columns_to_fix] > 1]}"
1060
+ # )
1041
1061
  return res
1042
1062
 
1043
1063
  def _dropna(
@@ -1977,7 +1997,9 @@ class CubeLogsPerformance(CubeLogs):
1977
1997
  * **cmd:** command lines
1978
1998
  * **raw-short:** raw data without all the unused columns
1979
1999
  """
1980
- fix_aggregation_change = ["model_speedup_input_set", "model_test_with"]
2000
+ # This does not work.
2001
+ # used to be ["model_speedup_input_set", "model_test_with"]
2002
+ fix_aggregation_change = [] # type: ignore[var-annotated]
1981
2003
  fs = ["suite", "model_suite", "task", "model_name", "model_task"]
1982
2004
  index_cols = self._filter_column(fs, self.keys_time)
1983
2005
  assert index_cols, (
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
422
422
  :return: ModelProto
423
423
 
424
424
  The function raises an error if not supported.
425
+ An example:
426
+
427
+ .. code-block:: python
428
+
429
+ from onnx_diagnostic.helpers.mini_onnx_builder import (
430
+ create_onnx_model_from_input_tensors,
431
+ )
432
+ import onnx
433
+
434
+ proto = create_onnx_model_from_input_tensors(
435
+ dict(
436
+ query_states=query_states,
437
+ key_states=key_states,
438
+ value_states=value_states,
439
+ cu_seqlens=cu_seqlens,
440
+ max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
441
+ scaling=self.scaling,
442
+ attn_output=attn_output,
443
+ )
444
+ )
445
+ onnx.save(proto, "attention_inputs.onnx")
425
446
  """
426
447
  if switch_low_high is None:
427
448
  switch_low_high = sys.byteorder != "big"
@@ -461,7 +482,17 @@ def _unflatten(
461
482
  if spl[-1] == "array":
462
483
  return pos + 1, outputs[pos]
463
484
  if spl[-1] == "tensor":
464
- return pos + 1, torch.from_numpy(outputs[pos]).to(device)
485
+ try:
486
+ return pos + 1, torch.from_numpy(outputs[pos]).to(device)
487
+ except TypeError:
488
+ # it should be more robust
489
+ import ml_dtypes
490
+
491
+ if outputs[pos].dtype == ml_dtypes.bfloat16:
492
+ return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
493
+ torch.bfloat16
494
+ )
495
+ raise
465
496
  raise AssertionError(f"Unexpected name {name!r} in {names}")
466
497
 
467
498
  res: List[Any] = []
@@ -532,6 +563,12 @@ def _unflatten(
532
563
  return d
533
564
  return ty(res)
534
565
 
566
+ if end and len(res) == 1:
567
+ if res[0] is None:
568
+ return next_pos, ty()
569
+ if isinstance(res[0], tuple) and len(res[0]) == 2 and res[0] == ("dict.", None):
570
+ return next_pos, ty()
571
+ return next_pos, _make(ty, res)
535
572
  return next_pos, (
536
573
  ty() if len(res) == 1 and res[0] in (("dict.", None), None) else _make(ty, res)
537
574
  )
@@ -557,6 +594,19 @@ def create_input_tensors_from_onnx_model(
557
594
  :return: restored data
558
595
 
559
596
  See example :ref:`l-plot-intermediate-results` for an example.
597
+
598
+ .. code-block:: python
599
+
600
+ import os
601
+ from onnx_diagnostic.helpers.mini_onnx_builder import (
602
+ create_input_tensors_from_onnx_model,
603
+ )
604
+ from onnx_diagnostic.helpers import string_type
605
+
606
+ restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
607
+ for k, v in restored.items():
608
+ print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
609
+
560
610
  """
561
611
  if engine == "ExtendedReferenceEvaluator":
562
612
  from ..reference import ExtendedReferenceEvaluator
@@ -595,6 +645,8 @@ def create_input_tensors_from_onnx_model(
595
645
  return float(output[0])
596
646
  if name == "tensor":
597
647
  return torch.from_numpy(output).to(device)
598
- raise AssertionError(f"Unexpected name {name!r} in {names}")
648
+ assert name.startswith(
649
+ ("list_", "list.", "dict.", "tuple_", "tuple.")
650
+ ), f"Unexpected name {name!r} in {names}"
599
651
 
600
652
  return _unflatten(sep, names, got, device=device)[1]
@@ -671,21 +671,18 @@ def np_dtype_to_tensor_dtype(dt: np.dtype) -> int: # noqa: F821
671
671
  try:
672
672
  return oh.np_dtype_to_tensor_dtype(dt)
673
673
  except ValueError:
674
- try:
675
- import ml_dtypes
676
- except ImportError:
677
- ml_dtypes = None # type: ignore
678
- if ml_dtypes is not None:
679
- if dt == ml_dtypes.bfloat16:
680
- return TensorProto.BFLOAT16
681
- if dt == ml_dtypes.float8_e4m3fn:
682
- return TensorProto.FLOAT8E4M3FN
683
- if dt == ml_dtypes.float8_e4m3fnuz:
684
- return TensorProto.FLOAT8E4M3FNUZ
685
- if dt == ml_dtypes.float8_e5m2:
686
- return TensorProto.FLOAT8E5M2
687
- if dt == ml_dtypes.float8_e5m2fnuz:
688
- return TensorProto.FLOAT8E5M2FNUZ
674
+ import ml_dtypes
675
+
676
+ if dt == ml_dtypes.bfloat16:
677
+ return TensorProto.BFLOAT16
678
+ if dt == ml_dtypes.float8_e4m3fn:
679
+ return TensorProto.FLOAT8E4M3FN
680
+ if dt == ml_dtypes.float8_e4m3fnuz:
681
+ return TensorProto.FLOAT8E4M3FNUZ
682
+ if dt == ml_dtypes.float8_e5m2:
683
+ return TensorProto.FLOAT8E5M2
684
+ if dt == ml_dtypes.float8_e5m2fnuz:
685
+ return TensorProto.FLOAT8E5M2FNUZ
689
686
  if dt == np.float32:
690
687
  return TensorProto.FLOAT
691
688
  if dt == np.float16: