onnx-diagnostic 0.7.5__py3-none-any.whl → 0.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +56 -3
- onnx_diagnostic/export/dynamic_shapes.py +24 -10
- onnx_diagnostic/export/shape_helper.py +6 -2
- onnx_diagnostic/ext_test_case.py +2 -0
- onnx_diagnostic/helpers/_log_helper.py +6 -6
- onnx_diagnostic/helpers/cache_helper.py +326 -18
- onnx_diagnostic/helpers/config_helper.py +10 -0
- onnx_diagnostic/helpers/helper.py +152 -11
- onnx_diagnostic/helpers/mini_onnx_builder.py +7 -2
- onnx_diagnostic/helpers/onnx_helper.py +13 -7
- onnx_diagnostic/helpers/torch_helper.py +33 -11
- onnx_diagnostic/reference/ops/op_cast_like.py +15 -11
- onnx_diagnostic/reference/torch_ops/__init__.py +1 -0
- onnx_diagnostic/reference/torch_ops/unary_ops.py +7 -0
- onnx_diagnostic/tasks/__init__.py +2 -0
- onnx_diagnostic/tasks/automatic_speech_recognition.py +6 -2
- onnx_diagnostic/tasks/feature_extraction.py +7 -3
- onnx_diagnostic/tasks/fill_mask.py +6 -2
- onnx_diagnostic/tasks/image_classification.py +6 -2
- onnx_diagnostic/tasks/image_text_to_text.py +289 -62
- onnx_diagnostic/tasks/mask_generation.py +143 -0
- onnx_diagnostic/tasks/mixture_of_expert.py +2 -2
- onnx_diagnostic/tasks/object_detection.py +6 -2
- onnx_diagnostic/tasks/sentence_similarity.py +6 -2
- onnx_diagnostic/tasks/summarization.py +7 -2
- onnx_diagnostic/tasks/text2text_generation.py +7 -2
- onnx_diagnostic/tasks/text_classification.py +6 -2
- onnx_diagnostic/tasks/text_generation.py +14 -16
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +3 -3
- onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +17 -1
- onnx_diagnostic/torch_export_patches/patch_inputs.py +5 -2
- onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -4
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +428 -129
- onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +60 -41
- onnx_diagnostic/torch_models/hghub/hub_data.py +5 -0
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +288 -0
- onnx_diagnostic/torch_models/validate.py +1 -0
- {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/METADATA +2 -2
- {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/RECORD +43 -42
- {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.7.5.dist-info → onnx_diagnostic-0.7.7.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,25 @@
|
|
|
1
1
|
from typing import Any, List, Set, Tuple
|
|
2
2
|
import torch
|
|
3
|
-
import transformers
|
|
4
3
|
from transformers.cache_utils import (
|
|
5
4
|
DynamicCache,
|
|
6
|
-
MambaCache,
|
|
7
5
|
EncoderDecoderCache,
|
|
6
|
+
HybridCache,
|
|
8
7
|
SlidingWindowCache,
|
|
9
8
|
StaticCache,
|
|
10
9
|
)
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from transformers.models.mamba.modeling_mamba import MambaCache
|
|
13
|
+
except ImportError:
|
|
14
|
+
from transformers.cache_utils import MambaCache
|
|
11
15
|
from transformers.modeling_outputs import BaseModelOutput
|
|
12
|
-
from ...helpers.cache_helper import
|
|
16
|
+
from ...helpers.cache_helper import (
|
|
17
|
+
make_dynamic_cache,
|
|
18
|
+
make_hybrid_cache,
|
|
19
|
+
make_sliding_window_cache,
|
|
20
|
+
make_static_cache,
|
|
21
|
+
CacheKeyValue,
|
|
22
|
+
)
|
|
13
23
|
from . import make_serialization_function_for_dataclass
|
|
14
24
|
|
|
15
25
|
|
|
@@ -29,6 +39,12 @@ def flatten_mamba_cache(
|
|
|
29
39
|
mamba_cache: MambaCache,
|
|
30
40
|
) -> Tuple[List[Any], torch.utils._pytree.Context]:
|
|
31
41
|
"""Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
|
|
42
|
+
assert isinstance(mamba_cache.conv_states, list) and isinstance(
|
|
43
|
+
mamba_cache.ssm_states, list
|
|
44
|
+
), (
|
|
45
|
+
f"Unexpected types for conv_states and ssm_states {type(mamba_cache.conv_states)}, "
|
|
46
|
+
f"{type(mamba_cache.ssm_states)}"
|
|
47
|
+
)
|
|
32
48
|
flat = [
|
|
33
49
|
("conv_states", mamba_cache.conv_states),
|
|
34
50
|
("ssm_states", mamba_cache.ssm_states),
|
|
@@ -85,9 +101,8 @@ def flatten_dynamic_cache(
|
|
|
85
101
|
dynamic_cache: DynamicCache,
|
|
86
102
|
) -> Tuple[List[Any], torch.utils._pytree.Context]:
|
|
87
103
|
"""Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
flat = [("key_cache", dynamic_cache.key_cache), ("value_cache", dynamic_cache.value_cache)]
|
|
104
|
+
ca = CacheKeyValue(dynamic_cache)
|
|
105
|
+
flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
|
|
91
106
|
return [f[1] for f in flat], [f[0] for f in flat]
|
|
92
107
|
|
|
93
108
|
|
|
@@ -95,8 +110,6 @@ def flatten_with_keys_dynamic_cache(
|
|
|
95
110
|
dynamic_cache: DynamicCache,
|
|
96
111
|
) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
|
|
97
112
|
"""Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
|
|
98
|
-
if hasattr(transformers.cache_utils, "_flatten_with_keys_dynamic_cache"):
|
|
99
|
-
return transformers.cache_utils._flatten_with_keys_dynamic_cache(dynamic_cache)
|
|
100
113
|
values, context = flatten_dynamic_cache(dynamic_cache)
|
|
101
114
|
return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
|
|
102
115
|
|
|
@@ -105,15 +118,36 @@ def unflatten_dynamic_cache(
|
|
|
105
118
|
values: List[Any], context: torch.utils._pytree.Context, output_type=None
|
|
106
119
|
) -> DynamicCache:
|
|
107
120
|
"""Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
|
|
108
|
-
|
|
109
|
-
assert output_type is None, f"output_type={output_type} not supported"
|
|
110
|
-
return transformers.cache_utils._unflatten_dynamic_cache(values, context)
|
|
121
|
+
return make_dynamic_cache(list(zip(values[0], values[1])))
|
|
111
122
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
123
|
+
|
|
124
|
+
#############
|
|
125
|
+
# HybridCache
|
|
126
|
+
#############
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def flatten_hybrid_cache(
|
|
130
|
+
cache: HybridCache,
|
|
131
|
+
) -> Tuple[List[Any], torch.utils._pytree.Context]:
|
|
132
|
+
"""Serializes a :class:`transformers.cache_utils.HybridCache` with python objects."""
|
|
133
|
+
ca = CacheKeyValue(cache)
|
|
134
|
+
flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
|
|
135
|
+
return [f[1] for f in flat], [f[0] for f in flat]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def flatten_with_keys_hybrid_cache(
|
|
139
|
+
cache: HybridCache,
|
|
140
|
+
) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
|
|
141
|
+
"""Serializes a :class:`transformers.cache_utils.HybridCache` with python objects."""
|
|
142
|
+
values, context = flatten_hybrid_cache(cache)
|
|
143
|
+
return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def unflatten_hybrid_cache(
|
|
147
|
+
values: List[Any], context: torch.utils._pytree.Context, output_type=None
|
|
148
|
+
) -> HybridCache:
|
|
149
|
+
"""Restores a :class:`transformers.cache_utils.HybridCache` from python objects."""
|
|
150
|
+
return make_hybrid_cache(list(zip(values[0], values[1])))
|
|
117
151
|
|
|
118
152
|
|
|
119
153
|
#############
|
|
@@ -125,12 +159,13 @@ def flatten_static_cache(
|
|
|
125
159
|
cache: StaticCache,
|
|
126
160
|
) -> Tuple[List[Any], torch.utils._pytree.Context]:
|
|
127
161
|
"""Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
|
|
128
|
-
|
|
162
|
+
ca = CacheKeyValue(cache)
|
|
163
|
+
assert not ca.key_cache or cache.max_cache_len == ca.key_cache[0].shape[2], (
|
|
129
164
|
f"Serialization doet not work when "
|
|
130
165
|
f"cache.max_cache_len={cache.max_cache_len} != "
|
|
131
|
-
f"cache.key_cache[0].shape[2]={
|
|
166
|
+
f"cache.key_cache[0].shape[2]={ca.keu_cache[0].shape[2]}"
|
|
132
167
|
)
|
|
133
|
-
flat = [("key_cache",
|
|
168
|
+
flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
|
|
134
169
|
return [f[1] for f in flat], [f[0] for f in flat]
|
|
135
170
|
|
|
136
171
|
|
|
@@ -163,7 +198,8 @@ def flatten_sliding_window_cache(
|
|
|
163
198
|
Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
|
|
164
199
|
with python objects.
|
|
165
200
|
"""
|
|
166
|
-
|
|
201
|
+
ca = CacheKeyValue(cache)
|
|
202
|
+
flat = [("key_cache", ca.key_cache), ("value_cache", ca.value_cache)]
|
|
167
203
|
return [f[1] for f in flat], [f[0] for f in flat]
|
|
168
204
|
|
|
169
205
|
|
|
@@ -183,26 +219,7 @@ def unflatten_sliding_window_cache(
|
|
|
183
219
|
) -> SlidingWindowCache:
|
|
184
220
|
"""Restores a :class:`transformers.cache_utils.SlidingWindowCache` from python objects."""
|
|
185
221
|
key_cache, value_cache = values
|
|
186
|
-
|
|
187
|
-
class _config:
|
|
188
|
-
def __init__(self):
|
|
189
|
-
self.head_dim = key_cache[0].shape[-1]
|
|
190
|
-
self.num_attention_heads = key_cache[0].shape[1]
|
|
191
|
-
self.num_hidden_layers = len(key_cache)
|
|
192
|
-
self.sliding_window = key_cache[0].shape[2]
|
|
193
|
-
|
|
194
|
-
cache = SlidingWindowCache(
|
|
195
|
-
_config(),
|
|
196
|
-
max_batch_size=key_cache[0].shape[0],
|
|
197
|
-
max_cache_len=key_cache[0].shape[2], # sligding window
|
|
198
|
-
device=key_cache[0].device,
|
|
199
|
-
dtype=key_cache[0].dtype,
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
values = dict(zip(context, values))
|
|
203
|
-
for k, v in values.items():
|
|
204
|
-
setattr(cache, k, v)
|
|
205
|
-
return cache
|
|
222
|
+
return make_sliding_window_cache(list(zip(values[0], values[1])))
|
|
206
223
|
|
|
207
224
|
|
|
208
225
|
#####################
|
|
@@ -244,7 +261,9 @@ def unflatten_encoder_decoder_cache(
|
|
|
244
261
|
) -> EncoderDecoderCache:
|
|
245
262
|
"""Restores a :class:`transformers.cache_utils.EncoderDecoderCache` from python objects."""
|
|
246
263
|
dictionary = torch.utils._pytree._dict_unflatten(values, context)
|
|
247
|
-
return EncoderDecoderCache(
|
|
264
|
+
return EncoderDecoderCache(
|
|
265
|
+
dictionary["self_attention_cache"], dictionary["cross_attention_cache"]
|
|
266
|
+
)
|
|
248
267
|
|
|
249
268
|
|
|
250
269
|
#############
|
|
@@ -37,6 +37,7 @@ __data_arch__ = textwrap.dedent(
|
|
|
37
37
|
DebertaModel,feature-extraction
|
|
38
38
|
DebertaV2Model,feature-extraction
|
|
39
39
|
DecisionTransformerModel,reinforcement-learning
|
|
40
|
+
DeepseekV3ForCausalLM,text-generation
|
|
40
41
|
DeiTModel,image-feature-extraction
|
|
41
42
|
DetrModel,image-feature-extraction
|
|
42
43
|
Dinov2Model,image-feature-extraction
|
|
@@ -52,9 +53,12 @@ __data_arch__ = textwrap.dedent(
|
|
|
52
53
|
GPTJModel,feature-extraction
|
|
53
54
|
GPTNeoModel,feature-extraction
|
|
54
55
|
GPTNeoXForCausalLM,text-generation
|
|
56
|
+
GptOssForCausalLM,text-generation
|
|
55
57
|
GemmaForCausalLM,text-generation
|
|
56
58
|
Gemma2ForCausalLM,text-generation
|
|
57
59
|
Gemma3ForConditionalGeneration,image-text-to-text
|
|
60
|
+
Gemma3ForCausalLM,text-generation
|
|
61
|
+
Glm4vMoeForConditionalGeneration,image-text-to-text
|
|
58
62
|
GraniteForCausalLM,text-generation
|
|
59
63
|
GroupViTModel,feature-extraction
|
|
60
64
|
HieraForImageClassification,image-classification
|
|
@@ -107,6 +111,7 @@ __data_arch__ = textwrap.dedent(
|
|
|
107
111
|
PvtForImageClassification,image-classification
|
|
108
112
|
Qwen2ForCausalLM,text-generation
|
|
109
113
|
Qwen2_5_VLForConditionalGeneration,image-text-to-text
|
|
114
|
+
Qwen3MoeForCausalLM,text-generation
|
|
110
115
|
RTDetrForObjectDetection,object-detection
|
|
111
116
|
RegNetModel,image-feature-extraction
|
|
112
117
|
RemBertModel,feature-extraction
|
|
@@ -1366,6 +1366,236 @@ def _ccached_fxmarty_tiny_random_gemmaforcausallm():
|
|
|
1366
1366
|
)
|
|
1367
1367
|
|
|
1368
1368
|
|
|
1369
|
+
def _ccached_fxmarty_sam_vit_tiny_random():
|
|
1370
|
+
"fxmarty/sam-vit-tiny-random"
|
|
1371
|
+
return transformers.SamConfig(
|
|
1372
|
+
**{
|
|
1373
|
+
"_commit_hash": "a7c34ea5d2b33a3bc34d34dc9a7b2417c0eaa809",
|
|
1374
|
+
"_name_or_path": "facebook/sam-vit-base",
|
|
1375
|
+
"architectures": ["SamModel"],
|
|
1376
|
+
"initializer_range": 0.02,
|
|
1377
|
+
"mask_decoder_config": {
|
|
1378
|
+
"_name_or_path": "",
|
|
1379
|
+
"add_cross_attention": false,
|
|
1380
|
+
"architectures": null,
|
|
1381
|
+
"attention_downsample_rate": 2,
|
|
1382
|
+
"bad_words_ids": null,
|
|
1383
|
+
"begin_suppress_tokens": null,
|
|
1384
|
+
"bos_token_id": null,
|
|
1385
|
+
"chunk_size_feed_forward": 0,
|
|
1386
|
+
"cross_attention_hidden_size": null,
|
|
1387
|
+
"decoder_start_token_id": null,
|
|
1388
|
+
"diversity_penalty": 0.0,
|
|
1389
|
+
"do_sample": false,
|
|
1390
|
+
"early_stopping": false,
|
|
1391
|
+
"encoder_no_repeat_ngram_size": 0,
|
|
1392
|
+
"eos_token_id": null,
|
|
1393
|
+
"exponential_decay_length_penalty": null,
|
|
1394
|
+
"finetuning_task": null,
|
|
1395
|
+
"forced_bos_token_id": null,
|
|
1396
|
+
"forced_eos_token_id": null,
|
|
1397
|
+
"hidden_act": "relu",
|
|
1398
|
+
"hidden_size": 32,
|
|
1399
|
+
"id2label": {"0": "LABEL_0", "1": "LABEL_1"},
|
|
1400
|
+
"iou_head_depth": 3,
|
|
1401
|
+
"iou_head_hidden_dim": 256,
|
|
1402
|
+
"is_decoder": false,
|
|
1403
|
+
"is_encoder_decoder": false,
|
|
1404
|
+
"label2id": {"LABEL_0": 0, "LABEL_1": 1},
|
|
1405
|
+
"layer_norm_eps": 1e-06,
|
|
1406
|
+
"length_penalty": 1.0,
|
|
1407
|
+
"max_length": 20,
|
|
1408
|
+
"min_length": 0,
|
|
1409
|
+
"mlp_dim": 2048,
|
|
1410
|
+
"model_type": "",
|
|
1411
|
+
"no_repeat_ngram_size": 0,
|
|
1412
|
+
"num_attention_heads": 8,
|
|
1413
|
+
"num_beam_groups": 1,
|
|
1414
|
+
"num_beams": 1,
|
|
1415
|
+
"num_hidden_layers": 2,
|
|
1416
|
+
"num_multimask_outputs": 3,
|
|
1417
|
+
"num_return_sequences": 1,
|
|
1418
|
+
"output_attentions": false,
|
|
1419
|
+
"output_hidden_states": false,
|
|
1420
|
+
"output_scores": false,
|
|
1421
|
+
"pad_token_id": null,
|
|
1422
|
+
"prefix": null,
|
|
1423
|
+
"problem_type": null,
|
|
1424
|
+
"pruned_heads": {},
|
|
1425
|
+
"remove_invalid_values": false,
|
|
1426
|
+
"repetition_penalty": 1.0,
|
|
1427
|
+
"return_dict": true,
|
|
1428
|
+
"return_dict_in_generate": false,
|
|
1429
|
+
"sep_token_id": null,
|
|
1430
|
+
"suppress_tokens": null,
|
|
1431
|
+
"task_specific_params": null,
|
|
1432
|
+
"temperature": 1.0,
|
|
1433
|
+
"tf_legacy_loss": false,
|
|
1434
|
+
"tie_encoder_decoder": false,
|
|
1435
|
+
"tie_word_embeddings": true,
|
|
1436
|
+
"tokenizer_class": null,
|
|
1437
|
+
"top_k": 50,
|
|
1438
|
+
"top_p": 1.0,
|
|
1439
|
+
"torch_dtype": null,
|
|
1440
|
+
"torchscript": false,
|
|
1441
|
+
"transformers_version": "4.29.0.dev0",
|
|
1442
|
+
"typical_p": 1.0,
|
|
1443
|
+
"use_bfloat16": false,
|
|
1444
|
+
},
|
|
1445
|
+
"model_type": "sam",
|
|
1446
|
+
"prompt_encoder_config": {
|
|
1447
|
+
"_name_or_path": "",
|
|
1448
|
+
"add_cross_attention": false,
|
|
1449
|
+
"architectures": null,
|
|
1450
|
+
"bad_words_ids": null,
|
|
1451
|
+
"begin_suppress_tokens": null,
|
|
1452
|
+
"bos_token_id": null,
|
|
1453
|
+
"chunk_size_feed_forward": 0,
|
|
1454
|
+
"cross_attention_hidden_size": null,
|
|
1455
|
+
"decoder_start_token_id": null,
|
|
1456
|
+
"diversity_penalty": 0.0,
|
|
1457
|
+
"do_sample": false,
|
|
1458
|
+
"early_stopping": false,
|
|
1459
|
+
"encoder_no_repeat_ngram_size": 0,
|
|
1460
|
+
"eos_token_id": null,
|
|
1461
|
+
"exponential_decay_length_penalty": null,
|
|
1462
|
+
"finetuning_task": null,
|
|
1463
|
+
"forced_bos_token_id": null,
|
|
1464
|
+
"forced_eos_token_id": null,
|
|
1465
|
+
"hidden_act": "gelu",
|
|
1466
|
+
"hidden_size": 32,
|
|
1467
|
+
"id2label": {"0": "LABEL_0", "1": "LABEL_1"},
|
|
1468
|
+
"image_embedding_size": 64,
|
|
1469
|
+
"image_size": 1024,
|
|
1470
|
+
"is_decoder": false,
|
|
1471
|
+
"is_encoder_decoder": false,
|
|
1472
|
+
"label2id": {"LABEL_0": 0, "LABEL_1": 1},
|
|
1473
|
+
"layer_norm_eps": 1e-06,
|
|
1474
|
+
"length_penalty": 1.0,
|
|
1475
|
+
"mask_input_channels": 16,
|
|
1476
|
+
"max_length": 20,
|
|
1477
|
+
"min_length": 0,
|
|
1478
|
+
"model_type": "",
|
|
1479
|
+
"no_repeat_ngram_size": 0,
|
|
1480
|
+
"num_beam_groups": 1,
|
|
1481
|
+
"num_beams": 1,
|
|
1482
|
+
"num_point_embeddings": 4,
|
|
1483
|
+
"num_return_sequences": 1,
|
|
1484
|
+
"output_attentions": false,
|
|
1485
|
+
"output_hidden_states": false,
|
|
1486
|
+
"output_scores": false,
|
|
1487
|
+
"pad_token_id": null,
|
|
1488
|
+
"patch_size": 16,
|
|
1489
|
+
"prefix": null,
|
|
1490
|
+
"problem_type": null,
|
|
1491
|
+
"pruned_heads": {},
|
|
1492
|
+
"remove_invalid_values": false,
|
|
1493
|
+
"repetition_penalty": 1.0,
|
|
1494
|
+
"return_dict": true,
|
|
1495
|
+
"return_dict_in_generate": false,
|
|
1496
|
+
"sep_token_id": null,
|
|
1497
|
+
"suppress_tokens": null,
|
|
1498
|
+
"task_specific_params": null,
|
|
1499
|
+
"temperature": 1.0,
|
|
1500
|
+
"tf_legacy_loss": false,
|
|
1501
|
+
"tie_encoder_decoder": false,
|
|
1502
|
+
"tie_word_embeddings": true,
|
|
1503
|
+
"tokenizer_class": null,
|
|
1504
|
+
"top_k": 50,
|
|
1505
|
+
"top_p": 1.0,
|
|
1506
|
+
"torch_dtype": null,
|
|
1507
|
+
"torchscript": false,
|
|
1508
|
+
"transformers_version": "4.29.0.dev0",
|
|
1509
|
+
"typical_p": 1.0,
|
|
1510
|
+
"use_bfloat16": false,
|
|
1511
|
+
},
|
|
1512
|
+
"torch_dtype": "float32",
|
|
1513
|
+
"transformers_version": null,
|
|
1514
|
+
"vision_config": {
|
|
1515
|
+
"_name_or_path": "",
|
|
1516
|
+
"add_cross_attention": false,
|
|
1517
|
+
"architectures": null,
|
|
1518
|
+
"attention_dropout": 0.0,
|
|
1519
|
+
"bad_words_ids": null,
|
|
1520
|
+
"begin_suppress_tokens": null,
|
|
1521
|
+
"bos_token_id": null,
|
|
1522
|
+
"chunk_size_feed_forward": 0,
|
|
1523
|
+
"cross_attention_hidden_size": null,
|
|
1524
|
+
"decoder_start_token_id": null,
|
|
1525
|
+
"diversity_penalty": 0.0,
|
|
1526
|
+
"do_sample": false,
|
|
1527
|
+
"dropout": 0.0,
|
|
1528
|
+
"early_stopping": false,
|
|
1529
|
+
"encoder_no_repeat_ngram_size": 0,
|
|
1530
|
+
"eos_token_id": null,
|
|
1531
|
+
"exponential_decay_length_penalty": null,
|
|
1532
|
+
"finetuning_task": null,
|
|
1533
|
+
"forced_bos_token_id": null,
|
|
1534
|
+
"forced_eos_token_id": null,
|
|
1535
|
+
"global_attn_indexes": [2, 5, 8, 11],
|
|
1536
|
+
"hidden_act": "gelu",
|
|
1537
|
+
"hidden_size": 96,
|
|
1538
|
+
"id2label": {"0": "LABEL_0", "1": "LABEL_1"},
|
|
1539
|
+
"image_size": 1024,
|
|
1540
|
+
"initializer_factor": 1.0,
|
|
1541
|
+
"initializer_range": 1e-10,
|
|
1542
|
+
"intermediate_size": 768,
|
|
1543
|
+
"is_decoder": false,
|
|
1544
|
+
"is_encoder_decoder": false,
|
|
1545
|
+
"label2id": {"LABEL_0": 0, "LABEL_1": 1},
|
|
1546
|
+
"layer_norm_eps": 1e-06,
|
|
1547
|
+
"length_penalty": 1.0,
|
|
1548
|
+
"max_length": 20,
|
|
1549
|
+
"min_length": 0,
|
|
1550
|
+
"mlp_dim": 384,
|
|
1551
|
+
"mlp_ratio": 4.0,
|
|
1552
|
+
"model_type": "",
|
|
1553
|
+
"no_repeat_ngram_size": 0,
|
|
1554
|
+
"num_attention_heads": 1,
|
|
1555
|
+
"num_beam_groups": 1,
|
|
1556
|
+
"num_beams": 1,
|
|
1557
|
+
"num_channels": 3,
|
|
1558
|
+
"num_hidden_layers": 12,
|
|
1559
|
+
"num_pos_feats": 16,
|
|
1560
|
+
"num_return_sequences": 1,
|
|
1561
|
+
"output_attentions": false,
|
|
1562
|
+
"output_channels": 32,
|
|
1563
|
+
"output_hidden_states": false,
|
|
1564
|
+
"output_scores": false,
|
|
1565
|
+
"pad_token_id": null,
|
|
1566
|
+
"patch_size": 16,
|
|
1567
|
+
"prefix": null,
|
|
1568
|
+
"problem_type": null,
|
|
1569
|
+
"projection_dim": 64,
|
|
1570
|
+
"pruned_heads": {},
|
|
1571
|
+
"qkv_bias": true,
|
|
1572
|
+
"remove_invalid_values": false,
|
|
1573
|
+
"repetition_penalty": 1.0,
|
|
1574
|
+
"return_dict": true,
|
|
1575
|
+
"return_dict_in_generate": false,
|
|
1576
|
+
"sep_token_id": null,
|
|
1577
|
+
"suppress_tokens": null,
|
|
1578
|
+
"task_specific_params": null,
|
|
1579
|
+
"temperature": 1.0,
|
|
1580
|
+
"tf_legacy_loss": false,
|
|
1581
|
+
"tie_encoder_decoder": false,
|
|
1582
|
+
"tie_word_embeddings": true,
|
|
1583
|
+
"tokenizer_class": null,
|
|
1584
|
+
"top_k": 50,
|
|
1585
|
+
"top_p": 1.0,
|
|
1586
|
+
"torch_dtype": null,
|
|
1587
|
+
"torchscript": false,
|
|
1588
|
+
"transformers_version": "4.29.0.dev0",
|
|
1589
|
+
"typical_p": 1.0,
|
|
1590
|
+
"use_abs_pos": true,
|
|
1591
|
+
"use_bfloat16": false,
|
|
1592
|
+
"use_rel_pos": true,
|
|
1593
|
+
"window_size": 14,
|
|
1594
|
+
},
|
|
1595
|
+
}
|
|
1596
|
+
)
|
|
1597
|
+
|
|
1598
|
+
|
|
1369
1599
|
def _ccached_hf_internal_testing_tiny_random_gptneoxforcausallm():
|
|
1370
1600
|
"hf-internal-testing/tiny-random-GPTNeoXForCausalLM"
|
|
1371
1601
|
return transformers.GPTNeoXConfig(
|
|
@@ -4330,3 +4560,61 @@ def _ccached_diffusers_tiny_torch_full_checker_unet():
|
|
|
4330
4560
|
"up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"],
|
|
4331
4561
|
"use_linear_projection": false,
|
|
4332
4562
|
}
|
|
4563
|
+
|
|
4564
|
+
|
|
4565
|
+
def _ccached_riny_random_gemma_3():
|
|
4566
|
+
"tiny-random/gemma-3"
|
|
4567
|
+
return transformers.Gemma3Config(
|
|
4568
|
+
**{
|
|
4569
|
+
"architectures": ["Gemma3ForConditionalGeneration"],
|
|
4570
|
+
"boi_token_index": 255999,
|
|
4571
|
+
"eoi_token_index": 256000,
|
|
4572
|
+
"eos_token_id": [1, 106],
|
|
4573
|
+
"image_token_index": 262144,
|
|
4574
|
+
"initializer_range": 0.02,
|
|
4575
|
+
"mm_tokens_per_image": 256,
|
|
4576
|
+
"model_type": "gemma3",
|
|
4577
|
+
"text_config": {
|
|
4578
|
+
"attention_bias": false,
|
|
4579
|
+
"attention_dropout": 0.0,
|
|
4580
|
+
"attn_logit_softcapping": null,
|
|
4581
|
+
"cache_implementation": "hybrid",
|
|
4582
|
+
"final_logit_softcapping": null,
|
|
4583
|
+
"head_dim": 32,
|
|
4584
|
+
"hidden_activation": "gelu_pytorch_tanh",
|
|
4585
|
+
"hidden_size": 32,
|
|
4586
|
+
"initializer_range": 0.02,
|
|
4587
|
+
"intermediate_size": 128,
|
|
4588
|
+
"max_position_embeddings": 131072,
|
|
4589
|
+
"model_type": "gemma3_text",
|
|
4590
|
+
"num_attention_heads": 1,
|
|
4591
|
+
"num_hidden_layers": 2,
|
|
4592
|
+
"num_key_value_heads": 1,
|
|
4593
|
+
"query_pre_attn_scalar": 168,
|
|
4594
|
+
"rms_norm_eps": 1e-06,
|
|
4595
|
+
"rope_local_base_freq": 10000.0,
|
|
4596
|
+
"rope_scaling": {"factor": 8.0, "rope_type": "linear"},
|
|
4597
|
+
"rope_theta": 1000000.0,
|
|
4598
|
+
"sliding_window": 1024,
|
|
4599
|
+
"sliding_window_pattern": 2,
|
|
4600
|
+
"use_cache": true,
|
|
4601
|
+
"vocab_size": 262208,
|
|
4602
|
+
},
|
|
4603
|
+
"torch_dtype": "bfloat16",
|
|
4604
|
+
"transformers_version": "4.50.0.dev0",
|
|
4605
|
+
"vision_config": {
|
|
4606
|
+
"attention_dropout": 0.0,
|
|
4607
|
+
"hidden_act": "gelu_pytorch_tanh",
|
|
4608
|
+
"hidden_size": 32,
|
|
4609
|
+
"image_size": 896,
|
|
4610
|
+
"intermediate_size": 128,
|
|
4611
|
+
"layer_norm_eps": 1e-06,
|
|
4612
|
+
"model_type": "siglip_vision_model",
|
|
4613
|
+
"num_attention_heads": 1,
|
|
4614
|
+
"num_channels": 3,
|
|
4615
|
+
"num_hidden_layers": 2,
|
|
4616
|
+
"patch_size": 14,
|
|
4617
|
+
"vision_use_head": false,
|
|
4618
|
+
},
|
|
4619
|
+
}
|
|
4620
|
+
)
|
|
@@ -426,6 +426,7 @@ def validate_model(
|
|
|
426
426
|
print(f"[validate_model] validate model id {model_id!r}, subfolder={subfolder!r}")
|
|
427
427
|
else:
|
|
428
428
|
print(f"[validate_model] validate model id {model_id!r}")
|
|
429
|
+
print(f"[validate_model] patch={patch!r}")
|
|
429
430
|
if model_options:
|
|
430
431
|
print(f"[validate_model] model_options={model_options!r}")
|
|
431
432
|
print(f"[validate_model] get dummy inputs with input_options={input_options}...")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: onnx-diagnostic
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.7
|
|
4
4
|
Summary: Investigate ONNX models
|
|
5
5
|
Home-page: https://github.com/sdpython/onnx-diagnostic
|
|
6
6
|
Author: Xavier Dupré
|
|
@@ -27,7 +27,7 @@ Requires-Dist: numpy
|
|
|
27
27
|
Requires-Dist: onnx>=1.16.0
|
|
28
28
|
Requires-Dist: onnxruntime>=1.21
|
|
29
29
|
Requires-Dist: optree
|
|
30
|
-
Requires-Dist: torch>=2.
|
|
30
|
+
Requires-Dist: torch>=2.8
|
|
31
31
|
Requires-Dist: torch_geometric
|
|
32
32
|
Dynamic: author
|
|
33
33
|
Dynamic: author-email
|