onnx-diagnostic 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. onnx_diagnostic/__init__.py +1 -1
  2. onnx_diagnostic/_command_lines_parser.py +56 -3
  3. onnx_diagnostic/export/dynamic_shapes.py +24 -10
  4. onnx_diagnostic/export/shape_helper.py +6 -2
  5. onnx_diagnostic/ext_test_case.py +2 -0
  6. onnx_diagnostic/helpers/_log_helper.py +6 -6
  7. onnx_diagnostic/helpers/cache_helper.py +326 -18
  8. onnx_diagnostic/helpers/config_helper.py +10 -0
  9. onnx_diagnostic/helpers/helper.py +152 -11
  10. onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
  11. onnx_diagnostic/helpers/onnx_helper.py +13 -7
  12. onnx_diagnostic/helpers/torch_helper.py +33 -11
  13. onnx_diagnostic/reference/ops/op_cast_like.py +15 -11
  14. onnx_diagnostic/reference/torch_ops/__init__.py +1 -0
  15. onnx_diagnostic/reference/torch_ops/unary_ops.py +7 -0
  16. onnx_diagnostic/tasks/__init__.py +2 -0
  17. onnx_diagnostic/tasks/automatic_speech_recognition.py +6 -2
  18. onnx_diagnostic/tasks/feature_extraction.py +7 -3
  19. onnx_diagnostic/tasks/fill_mask.py +6 -2
  20. onnx_diagnostic/tasks/image_classification.py +6 -2
  21. onnx_diagnostic/tasks/image_text_to_text.py +289 -62
  22. onnx_diagnostic/tasks/mask_generation.py +143 -0
  23. onnx_diagnostic/tasks/mixture_of_expert.py +2 -2
  24. onnx_diagnostic/tasks/object_detection.py +6 -2
  25. onnx_diagnostic/tasks/sentence_similarity.py +6 -2
  26. onnx_diagnostic/tasks/summarization.py +7 -2
  27. onnx_diagnostic/tasks/text2text_generation.py +7 -2
  28. onnx_diagnostic/tasks/text_classification.py +6 -2
  29. onnx_diagnostic/tasks/text_generation.py +14 -16
  30. onnx_diagnostic/torch_export_patches/onnx_export_errors.py +3 -3
  31. onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
  32. onnx_diagnostic/torch_export_patches/patch_inputs.py +5 -2
  33. onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -4
  34. onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +428 -129
  35. onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +60 -41
  36. onnx_diagnostic/torch_models/hghub/hub_data.py +5 -0
  37. onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
  38. onnx_diagnostic/torch_models/validate.py +1 -0
  39. {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/METADATA +2 -2
  40. {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/RECORD +43 -42
  41. {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/WHEEL +0 -0
  42. {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/licenses/LICENSE.txt +0 -0
  43. {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,25 @@
1
1
  from typing import Any, List, Set, Tuple
2
2
  import torch
3
- import transformers
4
3
  from transformers.cache_utils import (
5
4
  DynamicCache,
6
- MambaCache,
7
5
  EncoderDecoderCache,
6
+ HybridCache,
8
7
  SlidingWindowCache,
9
8
  StaticCache,
10
9
  )
10
+
11
+ try:
12
+ from transformers.models.mamba.modeling_mamba import MambaCache
13
+ except ImportError:
14
+ from transformers.cache_utils import MambaCache
11
15
  from transformers.modeling_outputs import BaseModelOutput
12
- from ...helpers.cache_helper import make_static_cache
16
+ from ...helpers.cache_helper import (
17
+ make_dynamic_cache,
18
+ make_hybrid_cache,
19
+ make_sliding_window_cache,
20
+ make_static_cache,
21
+ CacheKeyValue,
22
+ )
13
23
  from . import make_serialization_function_for_dataclass
14
24
 
15
25
 
@@ -29,6 +39,12 @@ def flatten_mamba_cache(
29
39
  mamba_cache: MambaCache,
30
40
  ) -> Tuple[List[Any], torch.utils._pytree.Context]:
31
41
  """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
42
+ assert isinstance(mamba_cache.conv_states, list) and isinstance(
43
+ mamba_cache.ssm_states, list
44
+ ), (
45
+ f"Unexpected types for conv_states and ssm_states {type(mamba_cache.conv_states)}, "
46
+ f"{type(mamba_cache.ssm_states)}"
47
+ )
32
48
  flat = [
33
49
  ("conv_states", mamba_cache.conv_states),
34
50
  ("ssm_states", mamba_cache.ssm_states),
@@ -85,9 +101,8 @@ def flatten_dynamic_cache(
85
101
  dynamic_cache: DynamicCache,
86
102
  ) -> Tuple[List[Any], torch.utils._pytree.Context]:
87
103
  """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
88
- if hasattr(transformers.cache_utils, "_flatten_dynamic_cache"):
89
- return transformers.cache_utils._flatten_dynamic_cache(dynamic_cache)
90
- flat = [("key_cache", dynamic_cache.key_cache), ("value_cache", dynamic_cache.value_cache)]
104
+ ca = CacheKeyValue(dynamic_cache)
105
+ flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
91
106
  return [f[1] for f in flat], [f[0] for f in flat]
92
107
 
93
108
 
@@ -95,8 +110,6 @@ def flatten_with_keys_dynamic_cache(
95
110
  dynamic_cache: DynamicCache,
96
111
  ) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
97
112
  """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
98
- if hasattr(transformers.cache_utils, "_flatten_with_keys_dynamic_cache"):
99
- return transformers.cache_utils._flatten_with_keys_dynamic_cache(dynamic_cache)
100
113
  values, context = flatten_dynamic_cache(dynamic_cache)
101
114
  return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
102
115
 
@@ -105,15 +118,36 @@ def unflatten_dynamic_cache(
105
118
  values: List[Any], context: torch.utils._pytree.Context, output_type=None
106
119
  ) -> DynamicCache:
107
120
  """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
108
- if hasattr(transformers.cache_utils, "_unflatten_dynamic_cache"):
109
- assert output_type is None, f"output_type={output_type} not supported"
110
- return transformers.cache_utils._unflatten_dynamic_cache(values, context)
121
+ return make_dynamic_cache(list(zip(values[0], values[1])))
111
122
 
112
- cache = transformers.cache_utils.DynamicCache()
113
- values = dict(zip(context, values))
114
- for k, v in values.items():
115
- setattr(cache, k, v)
116
- return cache
123
+
124
+ #############
125
+ # HybridCache
126
+ #############
127
+
128
+
129
+ def flatten_hybrid_cache(
130
+ cache: HybridCache,
131
+ ) -> Tuple[List[Any], torch.utils._pytree.Context]:
132
+ """Serializes a :class:`transformers.cache_utils.HybridCache` with python objects."""
133
+ ca = CacheKeyValue(cache)
134
+ flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
135
+ return [f[1] for f in flat], [f[0] for f in flat]
136
+
137
+
138
+ def flatten_with_keys_hybrid_cache(
139
+ cache: HybridCache,
140
+ ) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
141
+ """Serializes a :class:`transformers.cache_utils.HybridCache` with python objects."""
142
+ values, context = flatten_hybrid_cache(cache)
143
+ return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
144
+
145
+
146
+ def unflatten_hybrid_cache(
147
+ values: List[Any], context: torch.utils._pytree.Context, output_type=None
148
+ ) -> HybridCache:
149
+ """Restores a :class:`transformers.cache_utils.HybridCache` from python objects."""
150
+ return make_hybrid_cache(list(zip(values[0], values[1])))
117
151
 
118
152
 
119
153
  #############
@@ -125,12 +159,13 @@ def flatten_static_cache(
125
159
  cache: StaticCache,
126
160
  ) -> Tuple[List[Any], torch.utils._pytree.Context]:
127
161
  """Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
128
- assert not cache.key_cache or cache.max_cache_len == cache.key_cache[0].shape[2], (
162
+ ca = CacheKeyValue(cache)
163
+ assert not ca.key_cache or cache.max_cache_len == ca.key_cache[0].shape[2], (
129
164
  f"Serialization doet not work when "
130
165
  f"cache.max_cache_len={cache.max_cache_len} != "
131
- f"cache.key_cache[0].shape[2]={cache.key_cache[0].shape[2]}"
166
+ f"cache.key_cache[0].shape[2]={ca.keu_cache[0].shape[2]}"
132
167
  )
133
- flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
168
+ flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
134
169
  return [f[1] for f in flat], [f[0] for f in flat]
135
170
 
136
171
 
@@ -163,7 +198,8 @@ def flatten_sliding_window_cache(
163
198
  Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
164
199
  with python objects.
165
200
  """
166
- flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
201
+ ca = CacheKeyValue(cache)
202
+ flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
167
203
  return [f[1] for f in flat], [f[0] for f in flat]
168
204
 
169
205
 
@@ -183,26 +219,7 @@ def unflatten_sliding_window_cache(
183
219
  ) -> SlidingWindowCache:
184
220
  """Restores a :class:`transformers.cache_utils.SlidingWindowCache` from python objects."""
185
221
  key_cache, value_cache = values
186
-
187
- class _config:
188
- def __init__(self):
189
- self.head_dim = key_cache[0].shape[-1]
190
- self.num_attention_heads = key_cache[0].shape[1]
191
- self.num_hidden_layers = len(key_cache)
192
- self.sliding_window = key_cache[0].shape[2]
193
-
194
- cache = SlidingWindowCache(
195
- _config(),
196
- max_batch_size=key_cache[0].shape[0],
197
- max_cache_len=key_cache[0].shape[2], # sligding window
198
- device=key_cache[0].device,
199
- dtype=key_cache[0].dtype,
200
- )
201
-
202
- values = dict(zip(context, values))
203
- for k, v in values.items():
204
- setattr(cache, k, v)
205
- return cache
222
+ return make_sliding_window_cache(list(zip(values[0], values[1])))
206
223
 
207
224
 
208
225
  #####################
@@ -244,7 +261,9 @@ def unflatten_encoder_decoder_cache(
244
261
  ) -> EncoderDecoderCache:
245
262
  """Restores a :class:`transformers.cache_utils.EncoderDecoderCache` from python objects."""
246
263
  dictionary = torch.utils._pytree._dict_unflatten(values, context)
247
- return EncoderDecoderCache(**dictionary)
264
+ return EncoderDecoderCache(
265
+ dictionary["self_attention_cache"], dictionary["cross_attention_cache"]
266
+ )
248
267
 
249
268
 
250
269
  #############
@@ -37,6 +37,7 @@ __data_arch__ = textwrap.dedent(
37
37
  DebertaModel,feature-extraction
38
38
  DebertaV2Model,feature-extraction
39
39
  DecisionTransformerModel,reinforcement-learning
40
+ DeepseekV3ForCausalLM,text-generation
40
41
  DeiTModel,image-feature-extraction
41
42
  DetrModel,image-feature-extraction
42
43
  Dinov2Model,image-feature-extraction
@@ -52,9 +53,12 @@ __data_arch__ = textwrap.dedent(
52
53
  GPTJModel,feature-extraction
53
54
  GPTNeoModel,feature-extraction
54
55
  GPTNeoXForCausalLM,text-generation
56
+ GptOssForCausalLM,text-generation
55
57
  GemmaForCausalLM,text-generation
56
58
  Gemma2ForCausalLM,text-generation
57
59
  Gemma3ForConditionalGeneration,image-text-to-text
60
+ Gemma3ForCausalLM,text-generation
61
+ Glm4vMoeForConditionalGeneration,image-text-to-text
58
62
  GraniteForCausalLM,text-generation
59
63
  GroupViTModel,feature-extraction
60
64
  HieraForImageClassification,image-classification
@@ -107,6 +111,7 @@ __data_arch__ = textwrap.dedent(
107
111
  PvtForImageClassification,image-classification
108
112
  Qwen2ForCausalLM,text-generation
109
113
  Qwen2_5_VLForConditionalGeneration,image-text-to-text
114
+ Qwen3MoeForCausalLM,text-generation
110
115
  RTDetrForObjectDetection,object-detection
111
116
  RegNetModel,image-feature-extraction
112
117
  RemBertModel,feature-extraction
@@ -1366,6 +1366,236 @@ def _ccached_fxmarty_tiny_random_gemmaforcausallm():
1366
1366
  )
1367
1367
 
1368
1368
 
1369
+ def _ccached_fxmarty_sam_vit_tiny_random():
1370
+ "fxmarty/sam-vit-tiny-random"
1371
+ return transformers.SamConfig(
1372
+ **{
1373
+ "_commit_hash": "a7c34ea5d2b33a3bc34d34dc9a7b2417c0eaa809",
1374
+ "_name_or_path": "facebook/sam-vit-base",
1375
+ "architectures": ["SamModel"],
1376
+ "initializer_range": 0.02,
1377
+ "mask_decoder_config": {
1378
+ "_name_or_path": "",
1379
+ "add_cross_attention": false,
1380
+ "architectures": null,
1381
+ "attention_downsample_rate": 2,
1382
+ "bad_words_ids": null,
1383
+ "begin_suppress_tokens": null,
1384
+ "bos_token_id": null,
1385
+ "chunk_size_feed_forward": 0,
1386
+ "cross_attention_hidden_size": null,
1387
+ "decoder_start_token_id": null,
1388
+ "diversity_penalty": 0.0,
1389
+ "do_sample": false,
1390
+ "early_stopping": false,
1391
+ "encoder_no_repeat_ngram_size": 0,
1392
+ "eos_token_id": null,
1393
+ "exponential_decay_length_penalty": null,
1394
+ "finetuning_task": null,
1395
+ "forced_bos_token_id": null,
1396
+ "forced_eos_token_id": null,
1397
+ "hidden_act": "relu",
1398
+ "hidden_size": 32,
1399
+ "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
1400
+ "iou_head_depth": 3,
1401
+ "iou_head_hidden_dim": 256,
1402
+ "is_decoder": false,
1403
+ "is_encoder_decoder": false,
1404
+ "label2id": {"LABEL_0": 0, "LABEL_1": 1},
1405
+ "layer_norm_eps": 1e-06,
1406
+ "length_penalty": 1.0,
1407
+ "max_length": 20,
1408
+ "min_length": 0,
1409
+ "mlp_dim": 2048,
1410
+ "model_type": "",
1411
+ "no_repeat_ngram_size": 0,
1412
+ "num_attention_heads": 8,
1413
+ "num_beam_groups": 1,
1414
+ "num_beams": 1,
1415
+ "num_hidden_layers": 2,
1416
+ "num_multimask_outputs": 3,
1417
+ "num_return_sequences": 1,
1418
+ "output_attentions": false,
1419
+ "output_hidden_states": false,
1420
+ "output_scores": false,
1421
+ "pad_token_id": null,
1422
+ "prefix": null,
1423
+ "problem_type": null,
1424
+ "pruned_heads": {},
1425
+ "remove_invalid_values": false,
1426
+ "repetition_penalty": 1.0,
1427
+ "return_dict": true,
1428
+ "return_dict_in_generate": false,
1429
+ "sep_token_id": null,
1430
+ "suppress_tokens": null,
1431
+ "task_specific_params": null,
1432
+ "temperature": 1.0,
1433
+ "tf_legacy_loss": false,
1434
+ "tie_encoder_decoder": false,
1435
+ "tie_word_embeddings": true,
1436
+ "tokenizer_class": null,
1437
+ "top_k": 50,
1438
+ "top_p": 1.0,
1439
+ "torch_dtype": null,
1440
+ "torchscript": false,
1441
+ "transformers_version": "4.29.0.dev0",
1442
+ "typical_p": 1.0,
1443
+ "use_bfloat16": false,
1444
+ },
1445
+ "model_type": "sam",
1446
+ "prompt_encoder_config": {
1447
+ "_name_or_path": "",
1448
+ "add_cross_attention": false,
1449
+ "architectures": null,
1450
+ "bad_words_ids": null,
1451
+ "begin_suppress_tokens": null,
1452
+ "bos_token_id": null,
1453
+ "chunk_size_feed_forward": 0,
1454
+ "cross_attention_hidden_size": null,
1455
+ "decoder_start_token_id": null,
1456
+ "diversity_penalty": 0.0,
1457
+ "do_sample": false,
1458
+ "early_stopping": false,
1459
+ "encoder_no_repeat_ngram_size": 0,
1460
+ "eos_token_id": null,
1461
+ "exponential_decay_length_penalty": null,
1462
+ "finetuning_task": null,
1463
+ "forced_bos_token_id": null,
1464
+ "forced_eos_token_id": null,
1465
+ "hidden_act": "gelu",
1466
+ "hidden_size": 32,
1467
+ "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
1468
+ "image_embedding_size": 64,
1469
+ "image_size": 1024,
1470
+ "is_decoder": false,
1471
+ "is_encoder_decoder": false,
1472
+ "label2id": {"LABEL_0": 0, "LABEL_1": 1},
1473
+ "layer_norm_eps": 1e-06,
1474
+ "length_penalty": 1.0,
1475
+ "mask_input_channels": 16,
1476
+ "max_length": 20,
1477
+ "min_length": 0,
1478
+ "model_type": "",
1479
+ "no_repeat_ngram_size": 0,
1480
+ "num_beam_groups": 1,
1481
+ "num_beams": 1,
1482
+ "num_point_embeddings": 4,
1483
+ "num_return_sequences": 1,
1484
+ "output_attentions": false,
1485
+ "output_hidden_states": false,
1486
+ "output_scores": false,
1487
+ "pad_token_id": null,
1488
+ "patch_size": 16,
1489
+ "prefix": null,
1490
+ "problem_type": null,
1491
+ "pruned_heads": {},
1492
+ "remove_invalid_values": false,
1493
+ "repetition_penalty": 1.0,
1494
+ "return_dict": true,
1495
+ "return_dict_in_generate": false,
1496
+ "sep_token_id": null,
1497
+ "suppress_tokens": null,
1498
+ "task_specific_params": null,
1499
+ "temperature": 1.0,
1500
+ "tf_legacy_loss": false,
1501
+ "tie_encoder_decoder": false,
1502
+ "tie_word_embeddings": true,
1503
+ "tokenizer_class": null,
1504
+ "top_k": 50,
1505
+ "top_p": 1.0,
1506
+ "torch_dtype": null,
1507
+ "torchscript": false,
1508
+ "transformers_version": "4.29.0.dev0",
1509
+ "typical_p": 1.0,
1510
+ "use_bfloat16": false,
1511
+ },
1512
+ "torch_dtype": "float32",
1513
+ "transformers_version": null,
1514
+ "vision_config": {
1515
+ "_name_or_path": "",
1516
+ "add_cross_attention": false,
1517
+ "architectures": null,
1518
+ "attention_dropout": 0.0,
1519
+ "bad_words_ids": null,
1520
+ "begin_suppress_tokens": null,
1521
+ "bos_token_id": null,
1522
+ "chunk_size_feed_forward": 0,
1523
+ "cross_attention_hidden_size": null,
1524
+ "decoder_start_token_id": null,
1525
+ "diversity_penalty": 0.0,
1526
+ "do_sample": false,
1527
+ "dropout": 0.0,
1528
+ "early_stopping": false,
1529
+ "encoder_no_repeat_ngram_size": 0,
1530
+ "eos_token_id": null,
1531
+ "exponential_decay_length_penalty": null,
1532
+ "finetuning_task": null,
1533
+ "forced_bos_token_id": null,
1534
+ "forced_eos_token_id": null,
1535
+ "global_attn_indexes": [2, 5, 8, 11],
1536
+ "hidden_act": "gelu",
1537
+ "hidden_size": 96,
1538
+ "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
1539
+ "image_size": 1024,
1540
+ "initializer_factor": 1.0,
1541
+ "initializer_range": 1e-10,
1542
+ "intermediate_size": 768,
1543
+ "is_decoder": false,
1544
+ "is_encoder_decoder": false,
1545
+ "label2id": {"LABEL_0": 0, "LABEL_1": 1},
1546
+ "layer_norm_eps": 1e-06,
1547
+ "length_penalty": 1.0,
1548
+ "max_length": 20,
1549
+ "min_length": 0,
1550
+ "mlp_dim": 384,
1551
+ "mlp_ratio": 4.0,
1552
+ "model_type": "",
1553
+ "no_repeat_ngram_size": 0,
1554
+ "num_attention_heads": 1,
1555
+ "num_beam_groups": 1,
1556
+ "num_beams": 1,
1557
+ "num_channels": 3,
1558
+ "num_hidden_layers": 12,
1559
+ "num_pos_feats": 16,
1560
+ "num_return_sequences": 1,
1561
+ "output_attentions": false,
1562
+ "output_channels": 32,
1563
+ "output_hidden_states": false,
1564
+ "output_scores": false,
1565
+ "pad_token_id": null,
1566
+ "patch_size": 16,
1567
+ "prefix": null,
1568
+ "problem_type": null,
1569
+ "projection_dim": 64,
1570
+ "pruned_heads": {},
1571
+ "qkv_bias": true,
1572
+ "remove_invalid_values": false,
1573
+ "repetition_penalty": 1.0,
1574
+ "return_dict": true,
1575
+ "return_dict_in_generate": false,
1576
+ "sep_token_id": null,
1577
+ "suppress_tokens": null,
1578
+ "task_specific_params": null,
1579
+ "temperature": 1.0,
1580
+ "tf_legacy_loss": false,
1581
+ "tie_encoder_decoder": false,
1582
+ "tie_word_embeddings": true,
1583
+ "tokenizer_class": null,
1584
+ "top_k": 50,
1585
+ "top_p": 1.0,
1586
+ "torch_dtype": null,
1587
+ "torchscript": false,
1588
+ "transformers_version": "4.29.0.dev0",
1589
+ "typical_p": 1.0,
1590
+ "use_abs_pos": true,
1591
+ "use_bfloat16": false,
1592
+ "use_rel_pos": true,
1593
+ "window_size": 14,
1594
+ },
1595
+ }
1596
+ )
1597
+
1598
+
1369
1599
  def _ccached_hf_internal_testing_tiny_random_gptneoxforcausallm():
1370
1600
  "hf-internal-testing/tiny-random-GPTNeoXForCausalLM"
1371
1601
  return transformers.GPTNeoXConfig(
@@ -4330,3 +4560,61 @@ def _ccached_diffusers_tiny_torch_full_checker_unet():
4330
4560
  "up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"],
4331
4561
  "use_linear_projection": false,
4332
4562
  }
4563
+
4564
+
4565
+ def _ccached_riny_random_gemma_3():
4566
+ "tiny-random/gemma-3"
4567
+ return transformers.Gemma3Config(
4568
+ **{
4569
+ "architectures": ["Gemma3ForConditionalGeneration"],
4570
+ "boi_token_index": 255999,
4571
+ "eoi_token_index": 256000,
4572
+ "eos_token_id": [1, 106],
4573
+ "image_token_index": 262144,
4574
+ "initializer_range": 0.02,
4575
+ "mm_tokens_per_image": 256,
4576
+ "model_type": "gemma3",
4577
+ "text_config": {
4578
+ "attention_bias": false,
4579
+ "attention_dropout": 0.0,
4580
+ "attn_logit_softcapping": null,
4581
+ "cache_implementation": "hybrid",
4582
+ "final_logit_softcapping": null,
4583
+ "head_dim": 32,
4584
+ "hidden_activation": "gelu_pytorch_tanh",
4585
+ "hidden_size": 32,
4586
+ "initializer_range": 0.02,
4587
+ "intermediate_size": 128,
4588
+ "max_position_embeddings": 131072,
4589
+ "model_type": "gemma3_text",
4590
+ "num_attention_heads": 1,
4591
+ "num_hidden_layers": 2,
4592
+ "num_key_value_heads": 1,
4593
+ "query_pre_attn_scalar": 168,
4594
+ "rms_norm_eps": 1e-06,
4595
+ "rope_local_base_freq": 10000.0,
4596
+ "rope_scaling": {"factor": 8.0, "rope_type": "linear"},
4597
+ "rope_theta": 1000000.0,
4598
+ "sliding_window": 1024,
4599
+ "sliding_window_pattern": 2,
4600
+ "use_cache": true,
4601
+ "vocab_size": 262208,
4602
+ },
4603
+ "torch_dtype": "bfloat16",
4604
+ "transformers_version": "4.50.0.dev0",
4605
+ "vision_config": {
4606
+ "attention_dropout": 0.0,
4607
+ "hidden_act": "gelu_pytorch_tanh",
4608
+ "hidden_size": 32,
4609
+ "image_size": 896,
4610
+ "intermediate_size": 128,
4611
+ "layer_norm_eps": 1e-06,
4612
+ "model_type": "siglip_vision_model",
4613
+ "num_attention_heads": 1,
4614
+ "num_channels": 3,
4615
+ "num_hidden_layers": 2,
4616
+ "patch_size": 14,
4617
+ "vision_use_head": false,
4618
+ },
4619
+ }
4620
+ )
@@ -426,6 +426,7 @@ def validate_model(
426
426
  print(f"[validate_model] validate model id {model_id!r}, subfolder={subfolder!r}")
427
427
  else:
428
428
  print(f"[validate_model] validate model id {model_id!r}")
429
+ print(f"[validate_model] patch={patch!r}")
429
430
  if model_options:
430
431
  print(f"[validate_model] model_options={model_options!r}")
431
432
  print(f"[validate_model] get dummy inputs with input_options={input_options}...")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: onnx-diagnostic
3
- Version: 0.7.5
3
+ Version: 0.7.7
4
4
  Summary: Investigate ONNX models
5
5
  Home-page: https://github.com/sdpython/onnx-diagnostic
6
6
  Author: Xavier Dupré
@@ -27,7 +27,7 @@ Requires-Dist: numpy
27
27
  Requires-Dist: onnx>=1.16.0
28
28
  Requires-Dist: onnxruntime>=1.21
29
29
  Requires-Dist: optree
30
- Requires-Dist: torch>=2.7
30
+ Requires-Dist: torch>=2.8
31
31
  Requires-Dist: torch_geometric
32
32
  Dynamic: author
33
33
  Dynamic: author-email