onnx-diagnostic 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +1 -1
- onnx_diagnostic/_command_lines_parser.py +2 -2
- onnx_diagnostic/helpers/_log_helper.py +4 -2
- onnx_diagnostic/helpers/cache_helper.py +4 -1
- onnx_diagnostic/helpers/config_helper.py +47 -0
- onnx_diagnostic/helpers/log_helper.py +7 -1
- onnx_diagnostic/helpers/model_builder_helper.py +5 -0
- onnx_diagnostic/tasks/automatic_speech_recognition.py +1 -1
- onnx_diagnostic/tasks/feature_extraction.py +1 -1
- onnx_diagnostic/tasks/fill_mask.py +1 -1
- onnx_diagnostic/tasks/image_text_to_text.py +17 -4
- onnx_diagnostic/tasks/sentence_similarity.py +1 -1
- onnx_diagnostic/tasks/summarization.py +1 -1
- onnx_diagnostic/tasks/text2text_generation.py +1 -1
- onnx_diagnostic/tasks/text_classification.py +1 -1
- onnx_diagnostic/tasks/text_generation.py +1 -1
- onnx_diagnostic/tasks/zero_shot_image_classification.py +1 -1
- onnx_diagnostic/torch_export_patches/patches/patch_torch.py +4 -1
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +108 -1
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +70 -1
- onnx_diagnostic/torch_models/hghub/model_inputs.py +13 -1
- onnx_diagnostic/torch_models/validate.py +25 -3
- {onnx_diagnostic-0.7.7.dist-info → onnx_diagnostic-0.7.9.dist-info}/METADATA +1 -1
- {onnx_diagnostic-0.7.7.dist-info → onnx_diagnostic-0.7.9.dist-info}/RECORD +27 -27
- {onnx_diagnostic-0.7.7.dist-info → onnx_diagnostic-0.7.9.dist-info}/WHEEL +0 -0
- {onnx_diagnostic-0.7.7.dist-info → onnx_diagnostic-0.7.9.dist-info}/licenses/LICENSE.txt +0 -0
- {onnx_diagnostic-0.7.7.dist-info → onnx_diagnostic-0.7.9.dist-info}/top_level.txt +0 -0
onnx_diagnostic/__init__.py
CHANGED
|
@@ -850,13 +850,13 @@ def get_parser_agg() -> ArgumentParser:
|
|
|
850
850
|
"--filter-in",
|
|
851
851
|
default="",
|
|
852
852
|
help="adds a filter to filter in data, syntax is\n"
|
|
853
|
-
'``"<column1>:<value1>;<value2
|
|
853
|
+
'``"<column1>:<value1>;<value2>//<column2>:<value3>"`` ...',
|
|
854
854
|
)
|
|
855
855
|
parser.add_argument(
|
|
856
856
|
"--filter-out",
|
|
857
857
|
default="",
|
|
858
858
|
help="adds a filter to filter out data, syntax is\n"
|
|
859
|
-
'``"<column1>:<value1>;<value2
|
|
859
|
+
'``"<column1>:<value1>;<value2>//<column2>:<value3>"`` ...',
|
|
860
860
|
)
|
|
861
861
|
parser.add_argument(
|
|
862
862
|
"--sbs",
|
|
@@ -118,9 +118,11 @@ def filter_data(
|
|
|
118
118
|
if isinstance(fmt, str):
|
|
119
119
|
cols = fmt.split("//")
|
|
120
120
|
for c in cols:
|
|
121
|
-
assert ":" in c, f"Unexpected value {c!r} in fmt={fmt!r}"
|
|
121
|
+
assert ":" in c, f"Unexpected value {c!r} in fmt={fmt!r}, cols={cols!r}"
|
|
122
122
|
spl = c.split(":")
|
|
123
|
-
assert
|
|
123
|
+
assert (
|
|
124
|
+
len(spl) == 2
|
|
125
|
+
), f"Unexpected value {c!r} in fmt={fmt!r}, spl={spl}, cols={cols}"
|
|
124
126
|
name, fil = spl
|
|
125
127
|
cond[name] = set(fil.split(";"))
|
|
126
128
|
return cond
|
|
@@ -96,13 +96,16 @@ def flatten_unflatten_for_dynamic_shapes(
|
|
|
96
96
|
return tuple(subtrees)
|
|
97
97
|
if spec.type is list:
|
|
98
98
|
return list(subtrees)
|
|
99
|
+
if spec.type is None and not subtrees:
|
|
100
|
+
return None
|
|
99
101
|
if spec.context:
|
|
100
102
|
# This is a custom class with attributes.
|
|
101
103
|
# It is returned as a list.
|
|
102
104
|
return list(subtrees)
|
|
103
105
|
raise ValueError(
|
|
104
106
|
f"Unable to interpret spec type {spec.type} "
|
|
105
|
-
f"(type is {type(spec.type)}, context is {spec.context})
|
|
107
|
+
f"(type is {type(spec.type)}, context is {spec.context}), "
|
|
108
|
+
f"spec={spec}, subtrees={subtrees}"
|
|
106
109
|
)
|
|
107
110
|
# This is a list.
|
|
108
111
|
return subtrees
|
|
@@ -119,4 +119,51 @@ def default_num_hidden_layers():
|
|
|
119
119
|
It is lower when the unit tests are running
|
|
120
120
|
when ``UNITTEST_GOING=1``.
|
|
121
121
|
"""
|
|
122
|
+
import torch
|
|
123
|
+
|
|
124
|
+
if torch.cuda.is_available():
|
|
125
|
+
capa = torch.cuda.get_device_capability(0)
|
|
126
|
+
if capa[0] < 9:
|
|
127
|
+
return 2
|
|
122
128
|
return 2 if os.environ.get("UNITTEST_GOING", "0") == "1" else 4
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def build_diff_config(config0, config1):
|
|
132
|
+
"""
|
|
133
|
+
Returns all the modified values between two configuration
|
|
134
|
+
"""
|
|
135
|
+
import torch
|
|
136
|
+
|
|
137
|
+
diff = {}
|
|
138
|
+
for k in config0:
|
|
139
|
+
assert isinstance(k, str), f"k={k!r}, wrong type in {config0}"
|
|
140
|
+
if k not in config1:
|
|
141
|
+
v0 = getattr(config0, k) if hasattr(config0, k) else config0[k]
|
|
142
|
+
diff[k] = f"-{v0}"
|
|
143
|
+
for k in config1:
|
|
144
|
+
assert isinstance(k, str), f"k={k!r}, wrong type in {config1}"
|
|
145
|
+
if k not in config0:
|
|
146
|
+
v1 = getattr(config1, k) if hasattr(config1, k) else config1[k]
|
|
147
|
+
diff[k] = f"+{v1}"
|
|
148
|
+
for k in config0:
|
|
149
|
+
if k not in config1:
|
|
150
|
+
continue
|
|
151
|
+
v0 = getattr(config0, k) if hasattr(config0, k) else config0[k]
|
|
152
|
+
v1 = getattr(config1, k) if hasattr(config1, k) else config1[k]
|
|
153
|
+
if (
|
|
154
|
+
v0 is None
|
|
155
|
+
or v1 is None
|
|
156
|
+
or isinstance(v1, (float, int, bool, str, list, tuple, torch.dtype))
|
|
157
|
+
or (
|
|
158
|
+
isinstance(v0, dict)
|
|
159
|
+
and isinstance(v1, dict)
|
|
160
|
+
and all(isinstance(k, int) for k in v1)
|
|
161
|
+
)
|
|
162
|
+
):
|
|
163
|
+
if v1 != v0:
|
|
164
|
+
diff[k] = f"{v0} -> {v1}"
|
|
165
|
+
else:
|
|
166
|
+
d = build_diff_config(v0, v1)
|
|
167
|
+
if d:
|
|
168
|
+
diff[k] = d
|
|
169
|
+
return diff
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import enum
|
|
2
2
|
import io
|
|
3
|
+
import os
|
|
3
4
|
import pprint
|
|
4
5
|
import re
|
|
5
6
|
import warnings
|
|
@@ -270,6 +271,10 @@ class CubePlot:
|
|
|
270
271
|
def _to_images_bar(
|
|
271
272
|
self, verbose: int = 0, merge: bool = True, title_suffix: Optional[str] = None
|
|
272
273
|
) -> List[bytes]:
|
|
274
|
+
"""
|
|
275
|
+
Environment variable ``FIGSIZEH`` can be set to increase the
|
|
276
|
+
graph height. Default is 1.0.
|
|
277
|
+
"""
|
|
273
278
|
assert merge, f"merge={merge} not implemented yet"
|
|
274
279
|
import matplotlib.pyplot as plt
|
|
275
280
|
|
|
@@ -279,7 +284,8 @@ class CubePlot:
|
|
|
279
284
|
n_cols = 3
|
|
280
285
|
nn = df.shape[1] // n_cols
|
|
281
286
|
nn += int(df.shape[1] % n_cols != 0)
|
|
282
|
-
|
|
287
|
+
ratio = float(os.environ.get("FIGSIZEH", "1"))
|
|
288
|
+
fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 3 * ratio))
|
|
283
289
|
pos = 0
|
|
284
290
|
imgs = []
|
|
285
291
|
for c in self._make_loop(df.columns, verbose):
|
|
@@ -201,10 +201,12 @@ def create_model_builder(
|
|
|
201
201
|
arch_map = {
|
|
202
202
|
"ChatGLMForConditionalGeneration": builder.ChatGLMModel,
|
|
203
203
|
"ChatGLMModel": builder.ChatGLMModel,
|
|
204
|
+
"Ernie4_5_ForCausalLM": builder.ErnieModel,
|
|
204
205
|
"GemmaForCausalLM": builder.Gemma2Model,
|
|
205
206
|
"Gemma3ForCausalLM": builder.Gemma3Model,
|
|
206
207
|
"Gemma3ForConditionalGeneration": builder.Gemma3Model,
|
|
207
208
|
"GraniteForCausalLM": builder.GraniteModel,
|
|
209
|
+
"GptOssForCausalLM": builder.GPTOSSModel,
|
|
208
210
|
"LlamaForCausalLM": builder.LlamaModel,
|
|
209
211
|
"MistralForCausalLM": builder.MistralModel,
|
|
210
212
|
"NemotronForCausalLM": builder.NemotronModel,
|
|
@@ -235,6 +237,7 @@ def create_model_builder(
|
|
|
235
237
|
"Phi4MMForCausalLM": builder.Phi4MMModel,
|
|
236
238
|
"Qwen2ForCausalLM": builder.QwenModel,
|
|
237
239
|
"Qwen3ForCausalLM": builder.Qwen3Model,
|
|
240
|
+
"SmolLM3ForCausalLM": builder.SmolLM3Model,
|
|
238
241
|
}
|
|
239
242
|
|
|
240
243
|
assert config.architectures[0] in arch_map, (
|
|
@@ -276,6 +279,8 @@ def create_model_builder(
|
|
|
276
279
|
for key in text_config:
|
|
277
280
|
if not hasattr(config, key):
|
|
278
281
|
setattr(config, key, getattr(text_config, key))
|
|
282
|
+
elif config.architectures[0] == "GptOssForCausalLM":
|
|
283
|
+
delattr(config, "quantization_config")
|
|
279
284
|
elif (
|
|
280
285
|
config.architectures[0] == "PhiMoEForCausalLM"
|
|
281
286
|
and config.max_position_embeddings != config.original_max_position_embeddings
|
|
@@ -47,7 +47,7 @@ def get_inputs(
|
|
|
47
47
|
assert (
|
|
48
48
|
"cls_cache" not in kwargs
|
|
49
49
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
50
|
-
batch =
|
|
50
|
+
batch = "batch"
|
|
51
51
|
seq_length = "sequence_length"
|
|
52
52
|
shapes = {
|
|
53
53
|
"input_ids": {0: batch, 1: seq_length},
|
|
@@ -42,7 +42,7 @@ def get_inputs(
|
|
|
42
42
|
assert (
|
|
43
43
|
"cls_cache" not in kwargs
|
|
44
44
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
45
|
-
batch =
|
|
45
|
+
batch = "batch"
|
|
46
46
|
seq_length = "sequence_length"
|
|
47
47
|
shapes = {
|
|
48
48
|
"input_ids": {0: batch, 1: seq_length},
|
|
@@ -23,14 +23,20 @@ def reduce_model_config(config: Any) -> Dict[str, Any]:
|
|
|
23
23
|
config.vision_config.num_hidden_layers = min(
|
|
24
24
|
config.vision_config.num_hidden_layers, 2
|
|
25
25
|
)
|
|
26
|
+
if hasattr(config.vision_config, "num_heads"):
|
|
27
|
+
config.vision_config.num_heads = min(config.vision_config.num_heads, 4)
|
|
26
28
|
if hasattr(config.vision_config, "image_size"):
|
|
27
|
-
config.vision_config.image_size = min(config.vision_config.image_size,
|
|
29
|
+
config.vision_config.image_size = min(config.vision_config.image_size, 168 // 2)
|
|
28
30
|
if hasattr(config.vision_config, "intermediate_size"):
|
|
29
31
|
config.vision_config.intermediate_size = min(
|
|
30
32
|
config.vision_config.intermediate_size, 1076
|
|
31
33
|
)
|
|
32
34
|
if hasattr(config.vision_config, "patch_size"):
|
|
33
|
-
config.vision_config.patch_size = min(config.vision_config.patch_size,
|
|
35
|
+
config.vision_config.patch_size = min(config.vision_config.patch_size, 1)
|
|
36
|
+
if hasattr(config.vision_config, "temporal_patch_size"):
|
|
37
|
+
config.vision_config.temporal_patch_size = min(
|
|
38
|
+
config.vision_config.temporal_patch_size, 8
|
|
39
|
+
)
|
|
34
40
|
if hasattr(config.vision_config, "hidden_size"):
|
|
35
41
|
config.vision_config.hidden_size = min(config.vision_config.hidden_size, 16)
|
|
36
42
|
if hasattr(config, "text_config"):
|
|
@@ -101,7 +107,7 @@ def _get_inputs_gemma3(
|
|
|
101
107
|
assert (
|
|
102
108
|
"cls_cache" not in kwargs
|
|
103
109
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
104
|
-
batch =
|
|
110
|
+
batch = "batch"
|
|
105
111
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
106
112
|
# cache_length = "cache_length" # torch.export.Dim("cache_length", min=1, max=4096)
|
|
107
113
|
|
|
@@ -224,7 +230,7 @@ def get_inputs(
|
|
|
224
230
|
assert (
|
|
225
231
|
"cls_cache" not in kwargs
|
|
226
232
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
227
|
-
batch =
|
|
233
|
+
batch = "batch"
|
|
228
234
|
batch_img = torch.export.Dim("batch_img", min=1, max=1024)
|
|
229
235
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
230
236
|
cache_length = "cache_length" # torch.export.Dim("cache_length", min=1, max=4096)
|
|
@@ -245,6 +251,7 @@ def get_inputs(
|
|
|
245
251
|
else {0: batch_img}
|
|
246
252
|
),
|
|
247
253
|
"image_attention_mask": {0: batch, 1: seq_length, 2: images},
|
|
254
|
+
"image_grid_thw": {0: batch},
|
|
248
255
|
"use_cache": None,
|
|
249
256
|
}
|
|
250
257
|
|
|
@@ -256,6 +263,11 @@ def get_inputs(
|
|
|
256
263
|
# input_ids[input_ids == image_token_index] = pad_token_id
|
|
257
264
|
token_type_ids = torch.zeros_like(input_ids)
|
|
258
265
|
token_type_ids[input_ids == image_token_index] = 1
|
|
266
|
+
image_grid_thw = torch.zeros((n_images, 3), dtype=torch.int64)
|
|
267
|
+
image_grid_thw[:, 1] = height
|
|
268
|
+
image_grid_thw[:, 2] = width
|
|
269
|
+
image_grid_thw[0, :] //= 2
|
|
270
|
+
image_grid_thw[:, 0] = torch.arange(n_images, dtype=image_grid_thw.dtype)
|
|
259
271
|
|
|
260
272
|
inputs = dict(
|
|
261
273
|
input_ids=input_ids,
|
|
@@ -291,6 +303,7 @@ def get_inputs(
|
|
|
291
303
|
torch.int64
|
|
292
304
|
),
|
|
293
305
|
token_type_ids=token_type_ids,
|
|
306
|
+
image_grid_thw=image_grid_thw,
|
|
294
307
|
use_cache=True, # Gemma3 does not set this value to true when a cache is provided
|
|
295
308
|
)
|
|
296
309
|
res = dict(inputs=inputs, dynamic_shapes=shapes)
|
|
@@ -42,7 +42,7 @@ def get_inputs(
|
|
|
42
42
|
assert (
|
|
43
43
|
"cls_cache" not in kwargs
|
|
44
44
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
45
|
-
batch =
|
|
45
|
+
batch = "batch"
|
|
46
46
|
seq_length = "seq_length"
|
|
47
47
|
shapes = {
|
|
48
48
|
"input_ids": {0: batch, 1: seq_length},
|
|
@@ -70,7 +70,7 @@ def get_inputs(
|
|
|
70
70
|
assert (
|
|
71
71
|
"cls_cache" not in kwargs
|
|
72
72
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
73
|
-
batch =
|
|
73
|
+
batch = "batch"
|
|
74
74
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
75
75
|
cache_length = "cache_length_key" # torch.export.Dim("cache_length", min=1, max=4096)
|
|
76
76
|
cache_length2 = "cache_length_val" # torch.export.Dim("cache_length2", min=1, max=4096)
|
|
@@ -72,7 +72,7 @@ def get_inputs(
|
|
|
72
72
|
assert (
|
|
73
73
|
"cls_cache" not in kwargs
|
|
74
74
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
75
|
-
batch =
|
|
75
|
+
batch = "batch"
|
|
76
76
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
77
77
|
cache_length = "cache_length_key"
|
|
78
78
|
cache_length2 = "cache_length_val"
|
|
@@ -42,7 +42,7 @@ def get_inputs(
|
|
|
42
42
|
assert (
|
|
43
43
|
"cls_cache" not in kwargs
|
|
44
44
|
), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
|
|
45
|
-
batch =
|
|
45
|
+
batch = "batch"
|
|
46
46
|
seq_length = "seq_length" # torch.export.Dim("sequence_length", min=1, max=1024)
|
|
47
47
|
shapes = {
|
|
48
48
|
"input_ids": {0: batch, 1: seq_length},
|
|
@@ -83,7 +83,7 @@ def get_inputs(
|
|
|
83
83
|
:class:`transformers.cache_utils.DynamicCache`
|
|
84
84
|
:return: dictionary
|
|
85
85
|
"""
|
|
86
|
-
batch =
|
|
86
|
+
batch = "batch"
|
|
87
87
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
88
88
|
cache_length = "cache_length" # torch.export.Dim("cache_length", min=1, max=4096)
|
|
89
89
|
|
|
@@ -65,7 +65,7 @@ def get_inputs(
|
|
|
65
65
|
input_width, int
|
|
66
66
|
), f"Unexpected type for input_height {type(input_height)}{config}"
|
|
67
67
|
|
|
68
|
-
batch =
|
|
68
|
+
batch = "batch"
|
|
69
69
|
seq_length = "seq_length" # torch.export.Dim("seq_length", min=1, max=4096)
|
|
70
70
|
shapes = {
|
|
71
71
|
"input_ids": {0: batch, 1: seq_length},
|
|
@@ -205,7 +205,10 @@ class patched_ShapeEnv:
|
|
|
205
205
|
# Precondition: a == tgt
|
|
206
206
|
assert isinstance(a, sympy.Symbol)
|
|
207
207
|
|
|
208
|
-
if
|
|
208
|
+
if (
|
|
209
|
+
getattr(self, "allow_complex_guards_as_runtime_asserts", False)
|
|
210
|
+
or getattr(self, "prefer_deferred_runtime_asserts_over_guards", False)
|
|
211
|
+
) and not _is_supported_equivalence(tgt):
|
|
209
212
|
# continuing leads to placeholder shapes
|
|
210
213
|
# having complex expressions that we can't resolve
|
|
211
214
|
return
|
|
@@ -1032,7 +1032,8 @@ def patched_modeling_marian_eager_attention_forward(
|
|
|
1032
1032
|
|
|
1033
1033
|
|
|
1034
1034
|
class common_RotaryEmbedding(torch.nn.Module):
|
|
1035
|
-
|
|
1035
|
+
# This may cause some issues.
|
|
1036
|
+
# @torch.no_grad()
|
|
1036
1037
|
@patched_dynamic_rope_update
|
|
1037
1038
|
def forward(self, x, position_ids):
|
|
1038
1039
|
inv_freq_expanded = (
|
|
@@ -1482,3 +1483,109 @@ class patched_VisionAttention(torch.nn.Module):
|
|
|
1482
1483
|
attn_output = attn_output.reshape(seq_length, -1)
|
|
1483
1484
|
attn_output = self.proj(attn_output)
|
|
1484
1485
|
return attn_output
|
|
1486
|
+
|
|
1487
|
+
|
|
1488
|
+
try:
|
|
1489
|
+
import transformers.models.qwen3_moe
|
|
1490
|
+
|
|
1491
|
+
patch_qwen3 = True
|
|
1492
|
+
except ImportError:
|
|
1493
|
+
patch_qwen3 = False
|
|
1494
|
+
|
|
1495
|
+
if patch_qwen3:
|
|
1496
|
+
|
|
1497
|
+
class patched_Qwen3MoeSparseMoeBlock(torch.nn.Module):
|
|
1498
|
+
_PATCHES_ = ["forward", "_forward_expert_loop"]
|
|
1499
|
+
_PATCHED_CLASS_ = (
|
|
1500
|
+
transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlock
|
|
1501
|
+
)
|
|
1502
|
+
|
|
1503
|
+
def _forward_expert_loop(
|
|
1504
|
+
self,
|
|
1505
|
+
final_hidden_states,
|
|
1506
|
+
expert_mask_idx,
|
|
1507
|
+
hidden_states,
|
|
1508
|
+
routing_weights,
|
|
1509
|
+
expert_idx: int,
|
|
1510
|
+
):
|
|
1511
|
+
# idx, top_x = torch.where(expert_mask_idx.squeeze(0))
|
|
1512
|
+
idx, top_x = torch.nonzero(expert_mask_idx, as_tuple=True)
|
|
1513
|
+
hidden_dim = hidden_states.shape[-1]
|
|
1514
|
+
current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
|
|
1515
|
+
expert_current_state = self.experts[expert_idx](current_state)
|
|
1516
|
+
current_hidden_states = expert_current_state * routing_weights[top_x, idx, None]
|
|
1517
|
+
return final_hidden_states.index_add(
|
|
1518
|
+
0, top_x, current_hidden_states.to(hidden_states.dtype)
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
|
1522
|
+
""" """
|
|
1523
|
+
batch_size, sequence_length, hidden_dim = hidden_states.shape
|
|
1524
|
+
hidden_states = hidden_states.view(-1, hidden_dim)
|
|
1525
|
+
# router_logits: (batch * sequence_length, n_experts)
|
|
1526
|
+
router_logits = self.gate(hidden_states)
|
|
1527
|
+
|
|
1528
|
+
routing_weights = torch.nn.functional.softmax(
|
|
1529
|
+
router_logits, dim=1, dtype=torch.float
|
|
1530
|
+
)
|
|
1531
|
+
routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
|
|
1532
|
+
if self.norm_topk_prob: # only diff with mixtral sparse moe block!
|
|
1533
|
+
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
|
|
1534
|
+
# we cast back to the input dtype
|
|
1535
|
+
routing_weights = routing_weights.to(hidden_states.dtype)
|
|
1536
|
+
|
|
1537
|
+
final_hidden_states = torch.zeros(
|
|
1538
|
+
(batch_size * sequence_length, hidden_dim),
|
|
1539
|
+
dtype=hidden_states.dtype,
|
|
1540
|
+
device=hidden_states.device,
|
|
1541
|
+
)
|
|
1542
|
+
|
|
1543
|
+
# One hot encode the selected experts to create an expert mask
|
|
1544
|
+
# this will be used to easily index which expert is going to be sollicitated
|
|
1545
|
+
expert_mask = torch.nn.functional.one_hot(
|
|
1546
|
+
selected_experts, num_classes=self.num_experts
|
|
1547
|
+
).permute(2, 1, 0)
|
|
1548
|
+
|
|
1549
|
+
# Loop over all available experts in the model
|
|
1550
|
+
# and perform the computation on each expert
|
|
1551
|
+
expert_sum = expert_mask.sum(dim=(-1, -2))
|
|
1552
|
+
# expert_hit = torch.greater(expert_sum, 0).nonzero()
|
|
1553
|
+
# for expert_idx in expert_hit:
|
|
1554
|
+
for expert_idx in range(self.num_experts):
|
|
1555
|
+
# initial code has a squeeze but it is not possible to do that.
|
|
1556
|
+
# expert_mask_idx = expert_mask[expert_idx].squeeze(0)
|
|
1557
|
+
expert_mask_idx = expert_mask[expert_idx]
|
|
1558
|
+
final_hidden_states = torch.cond(
|
|
1559
|
+
(expert_sum[expert_idx] > 0).item(),
|
|
1560
|
+
lambda final_hidden_states, expert_mask, hidden_states, routing_weights, _i=expert_idx: self._forward_expert_loop( # noqa: E501
|
|
1561
|
+
final_hidden_states,
|
|
1562
|
+
expert_mask,
|
|
1563
|
+
hidden_states,
|
|
1564
|
+
routing_weights,
|
|
1565
|
+
expert_idx=_i,
|
|
1566
|
+
),
|
|
1567
|
+
lambda final_hidden_states, *args: final_hidden_states.clone(),
|
|
1568
|
+
[final_hidden_states, expert_mask_idx, hidden_states, routing_weights],
|
|
1569
|
+
)
|
|
1570
|
+
|
|
1571
|
+
# if expert_sum[expert_idx] > 0:
|
|
1572
|
+
# idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
|
1573
|
+
|
|
1574
|
+
# Index the correct hidden states and compute the expert hidden state for
|
|
1575
|
+
# the current expert. We need to make sure to multiply the output hidden
|
|
1576
|
+
# states by `routing_weights` on the corresponding tokens (top-1 and top-2)
|
|
1577
|
+
# current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
|
|
1578
|
+
# current_hidden_states = (
|
|
1579
|
+
# expert_layer(current_state) * routing_weights[top_x, idx, None]
|
|
1580
|
+
# )
|
|
1581
|
+
|
|
1582
|
+
# However `index_add_` only support torch tensors for indexing so we'll use
|
|
1583
|
+
# the `top_x` tensor here.
|
|
1584
|
+
# final_hidden_states.index_add_(
|
|
1585
|
+
# 0, top_x, current_hidden_states.to(hidden_states.dtype)
|
|
1586
|
+
# )
|
|
1587
|
+
|
|
1588
|
+
final_hidden_states = final_hidden_states.reshape(
|
|
1589
|
+
batch_size, sequence_length, hidden_dim
|
|
1590
|
+
)
|
|
1591
|
+
return final_hidden_states, router_logits
|
|
@@ -4562,7 +4562,7 @@ def _ccached_diffusers_tiny_torch_full_checker_unet():
|
|
|
4562
4562
|
}
|
|
4563
4563
|
|
|
4564
4564
|
|
|
4565
|
-
def
|
|
4565
|
+
def _ccached_tiny_random_gemma_3():
|
|
4566
4566
|
"tiny-random/gemma-3"
|
|
4567
4567
|
return transformers.Gemma3Config(
|
|
4568
4568
|
**{
|
|
@@ -4618,3 +4618,72 @@ def _ccached_riny_random_gemma_3():
|
|
|
4618
4618
|
},
|
|
4619
4619
|
}
|
|
4620
4620
|
)
|
|
4621
|
+
|
|
4622
|
+
|
|
4623
|
+
def _ccached_zai_glm_45():
|
|
4624
|
+
"zai-org/GLM-4.5V"
|
|
4625
|
+
return transformers.Glm4vMoeConfig(
|
|
4626
|
+
**{
|
|
4627
|
+
"architectures": ["Glm4vMoeForConditionalGeneration"],
|
|
4628
|
+
"model_type": "glm4v_moe",
|
|
4629
|
+
"text_config": {
|
|
4630
|
+
"pad_token_id": 151329,
|
|
4631
|
+
"vocab_size": 151552,
|
|
4632
|
+
"eos_token_id": [151329, 151336, 151338],
|
|
4633
|
+
"image_end_token_id": 151340,
|
|
4634
|
+
"image_start_token_id": 151339,
|
|
4635
|
+
"image_token_id": 151363,
|
|
4636
|
+
"head_dim": 128,
|
|
4637
|
+
"attention_bias": true,
|
|
4638
|
+
"attention_dropout": 0.0,
|
|
4639
|
+
"first_k_dense_replace": 1,
|
|
4640
|
+
"hidden_act": "silu",
|
|
4641
|
+
"hidden_size": 4096,
|
|
4642
|
+
"initializer_range": 0.02,
|
|
4643
|
+
"intermediate_size": 10944,
|
|
4644
|
+
"max_position_embeddings": 65536,
|
|
4645
|
+
"model_type": "glm4v_moe_text",
|
|
4646
|
+
"moe_intermediate_size": 1408,
|
|
4647
|
+
"n_group": 1,
|
|
4648
|
+
"n_routed_experts": 128,
|
|
4649
|
+
"n_shared_experts": 1,
|
|
4650
|
+
"norm_topk_prob": true,
|
|
4651
|
+
"num_attention_heads": 96,
|
|
4652
|
+
"num_experts_per_tok": 8,
|
|
4653
|
+
"num_hidden_layers": 46,
|
|
4654
|
+
"num_key_value_heads": 8,
|
|
4655
|
+
"partial_rotary_factor": 0.5,
|
|
4656
|
+
"rms_norm_eps": 1e-05,
|
|
4657
|
+
"torch_dtype": "bfloat16",
|
|
4658
|
+
"rope_scaling": {"rope_type": "default", "mrope_section": [8, 12, 12]},
|
|
4659
|
+
"rope_theta": 10000.0,
|
|
4660
|
+
"routed_scaling_factor": 1.0,
|
|
4661
|
+
"topk_group": 1,
|
|
4662
|
+
"use_cache": true,
|
|
4663
|
+
"use_qk_norm": false,
|
|
4664
|
+
},
|
|
4665
|
+
"torch_dtype": "bfloat16",
|
|
4666
|
+
"transformers_version": "4.55.0.dev0",
|
|
4667
|
+
"video_end_token_id": 151342,
|
|
4668
|
+
"video_start_token_id": 151341,
|
|
4669
|
+
"video_token_id": 151364,
|
|
4670
|
+
"vision_config": {
|
|
4671
|
+
"attention_bias": false,
|
|
4672
|
+
"attention_dropout": 0.0,
|
|
4673
|
+
"depth": 24,
|
|
4674
|
+
"hidden_act": "silu",
|
|
4675
|
+
"hidden_size": 1536,
|
|
4676
|
+
"image_size": 336,
|
|
4677
|
+
"in_channels": 3,
|
|
4678
|
+
"initializer_range": 0.02,
|
|
4679
|
+
"intermediate_size": 10944,
|
|
4680
|
+
"model_type": "glm4v_moe",
|
|
4681
|
+
"num_heads": 12,
|
|
4682
|
+
"out_hidden_size": 4096,
|
|
4683
|
+
"patch_size": 14,
|
|
4684
|
+
"rms_norm_eps": 1e-05,
|
|
4685
|
+
"spatial_merge_size": 2,
|
|
4686
|
+
"temporal_patch_size": 2,
|
|
4687
|
+
},
|
|
4688
|
+
}
|
|
4689
|
+
)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import inspect
|
|
2
3
|
import os
|
|
3
4
|
import pprint
|
|
4
5
|
from typing import Any, Dict, Optional, Tuple
|
|
5
6
|
import torch
|
|
6
7
|
import transformers
|
|
7
|
-
from ...helpers.config_helper import update_config
|
|
8
|
+
from ...helpers.config_helper import update_config, build_diff_config
|
|
8
9
|
from ...tasks import reduce_model_config, random_input_kwargs
|
|
9
10
|
from .hub_api import task_from_arch, task_from_id, get_pretrained_config, download_code_modelid
|
|
10
11
|
|
|
@@ -121,6 +122,7 @@ def get_untrained_model_with_inputs(
|
|
|
121
122
|
)
|
|
122
123
|
|
|
123
124
|
# updating the configuration
|
|
125
|
+
config0 = copy.deepcopy(config)
|
|
124
126
|
mkwargs = reduce_model_config(config, task) if not same_as_pretrained else {}
|
|
125
127
|
if model_kwargs:
|
|
126
128
|
for k, v in model_kwargs.items():
|
|
@@ -133,6 +135,15 @@ def get_untrained_model_with_inputs(
|
|
|
133
135
|
mkwargs[k] = v
|
|
134
136
|
if mkwargs:
|
|
135
137
|
update_config(config, mkwargs)
|
|
138
|
+
try:
|
|
139
|
+
diff_config = build_diff_config(config0, config)
|
|
140
|
+
except (ValueError, AttributeError, TypeError) as e:
|
|
141
|
+
diff_config = f"DIFF CONFIG ERROR {e}"
|
|
142
|
+
if verbose:
|
|
143
|
+
if diff_config:
|
|
144
|
+
print("[get_untrained_model_with_inputs] -- updated config")
|
|
145
|
+
pprint.pprint(diff_config)
|
|
146
|
+
print("[get_untrained_model_with_inputs] --")
|
|
136
147
|
|
|
137
148
|
# SDPA
|
|
138
149
|
if model_kwargs and "attn_implementation" in model_kwargs:
|
|
@@ -232,6 +243,7 @@ def get_untrained_model_with_inputs(
|
|
|
232
243
|
|
|
233
244
|
res["input_kwargs"] = kwargs
|
|
234
245
|
res["model_kwargs"] = mkwargs
|
|
246
|
+
res["dump_info"] = dict(config_diff=diff_config)
|
|
235
247
|
|
|
236
248
|
sizes = compute_model_size(model)
|
|
237
249
|
res["model"] = model
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import inspect
|
|
3
3
|
import os
|
|
4
|
+
import pprint
|
|
4
5
|
import sys
|
|
5
6
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
6
7
|
import time
|
|
@@ -467,6 +468,21 @@ def validate_model(
|
|
|
467
468
|
f"inputs2 is True but second set is missing in data for "
|
|
468
469
|
f"model id {model_id!r}: {sorted(data)}"
|
|
469
470
|
)
|
|
471
|
+
if dump_folder:
|
|
472
|
+
with open(os.path.join(dump_folder, "model_config.txt"), "w") as f:
|
|
473
|
+
f.write(f"model_id: {model_id}\n------\n")
|
|
474
|
+
f.write(
|
|
475
|
+
pprint.pformat(
|
|
476
|
+
data["configuration"]
|
|
477
|
+
if type(data["configuration"]) is dict
|
|
478
|
+
else data["configuration"].to_dict()
|
|
479
|
+
)
|
|
480
|
+
)
|
|
481
|
+
dump_info = data.get("dump_info", None)
|
|
482
|
+
if dump_info:
|
|
483
|
+
with open(os.path.join(dump_folder, "model_dump_info.txt"), "w") as f:
|
|
484
|
+
f.write(f"model_id: {model_id}\n------\n")
|
|
485
|
+
f.write(pprint.pformat(dump_info))
|
|
470
486
|
|
|
471
487
|
if exporter == "modelbuilder":
|
|
472
488
|
# Models used with ModelBuilder do not like batch size > 1.
|
|
@@ -480,9 +496,15 @@ def validate_model(
|
|
|
480
496
|
cpl = CoupleInputsDynamicShapes(
|
|
481
497
|
tuple(), data[k], dynamic_shapes=data["dynamic_shapes"]
|
|
482
498
|
)
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
499
|
+
if patch_kwargs.get("patch", False):
|
|
500
|
+
with torch_export_patches(**patch_kwargs): # type: ignore[arg-type]
|
|
501
|
+
data[k] = cpl.change_dynamic_dimensions(
|
|
502
|
+
desired_values=dict(batch=1), only_desired=True
|
|
503
|
+
)
|
|
504
|
+
else:
|
|
505
|
+
data[k] = cpl.change_dynamic_dimensions(
|
|
506
|
+
desired_values=dict(batch=1), only_desired=True
|
|
507
|
+
)
|
|
486
508
|
if verbose:
|
|
487
509
|
print(f"[validate_model] batch=1 --> {string_type(data[k], with_shape=True)}")
|
|
488
510
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
onnx_diagnostic/__init__.py,sha256=
|
|
1
|
+
onnx_diagnostic/__init__.py,sha256=kVcl-JnGE4IT1aVApD12HyIKRM7Rq6QRFtmH09JgMwY,173
|
|
2
2
|
onnx_diagnostic/__main__.py,sha256=YmyV_Aq_ianDlHyKLHMa6h8YK3ZmFPpLVHLKjM91aCk,79
|
|
3
|
-
onnx_diagnostic/_command_lines_parser.py,sha256=
|
|
3
|
+
onnx_diagnostic/_command_lines_parser.py,sha256=TVPlDjsWZd_Zb9DzN3zj0OGxd8nz_nUsjQyGkmyMNsA,32939
|
|
4
4
|
onnx_diagnostic/api.py,sha256=BhCl_yCd78N7TlVtPOHjeYv1QBEy39TjZ647rcHqLh0,345
|
|
5
5
|
onnx_diagnostic/doc.py,sha256=t3RELgfooYnVMAi0JSpggWkQEgUsREz8NmRvn0TnLI8,2829
|
|
6
6
|
onnx_diagnostic/ext_test_case.py,sha256=emfQGiQSz5FVDhyJ1Acsv_Tast7tWl426TjtpNqxDBU,43558
|
|
@@ -9,18 +9,18 @@ onnx_diagnostic/export/dynamic_shapes.py,sha256=Go4_sIwiolCy_m1djQ3U_bX6C1EFw4al
|
|
|
9
9
|
onnx_diagnostic/export/shape_helper.py,sha256=PI_SgE1MNRKSrQ414eYoBZ54QGZbYisHSvqi9tstL2s,7795
|
|
10
10
|
onnx_diagnostic/export/validate.py,sha256=_PGUql2DJhIgGKo0WjTGUc5AgsZUx8fEs00MePy-w98,6043
|
|
11
11
|
onnx_diagnostic/helpers/__init__.py,sha256=GJ2GT7cgnlIveVUwMZhuvUwidbTJaKv8CsSIOpZDsJg,83
|
|
12
|
-
onnx_diagnostic/helpers/_log_helper.py,sha256=
|
|
12
|
+
onnx_diagnostic/helpers/_log_helper.py,sha256=OTwQH0OIxs9B6nrSvR7MoxMimSw_8mU0mj133NvLk5o,16832
|
|
13
13
|
onnx_diagnostic/helpers/args_helper.py,sha256=SRWnqC7EENg09RZlA50B_PcdiIhdbgA4C3ACfzl5nMs,4419
|
|
14
14
|
onnx_diagnostic/helpers/bench_run.py,sha256=CGA6VMJZMH2gDhVueT9ypNm4PMcjGrrGFYp08nhWj9k,16539
|
|
15
|
-
onnx_diagnostic/helpers/cache_helper.py,sha256=
|
|
16
|
-
onnx_diagnostic/helpers/config_helper.py,sha256=
|
|
15
|
+
onnx_diagnostic/helpers/cache_helper.py,sha256=dFiKPnD3qT_rel9C7Az9AEnbV2drfSMSdXBRotJJUU4,24686
|
|
16
|
+
onnx_diagnostic/helpers/config_helper.py,sha256=H2mOcMXfrcolFnt8EuqmRFkpQ3YdNRDfvm9ToI1vNH0,5618
|
|
17
17
|
onnx_diagnostic/helpers/doc_helper.py,sha256=pl5MZd3_FaE8BqQnqoBuSBxoNCFcd2OJd3eITUSku5c,5897
|
|
18
18
|
onnx_diagnostic/helpers/graph_helper.py,sha256=hevQT5a7_QuriVPQcbT5qe18n99Doyl5h3-qshx1-uk,14093
|
|
19
19
|
onnx_diagnostic/helpers/helper.py,sha256=OsQz2um10DgGiX3fgOulTDFQop0wCMX6shPonQgN71w,62940
|
|
20
|
-
onnx_diagnostic/helpers/log_helper.py,sha256=
|
|
20
|
+
onnx_diagnostic/helpers/log_helper.py,sha256=ODtMLFfJvkyss9PJwEZFd5_8bLcliaMq0A17t0dSIFA,82771
|
|
21
21
|
onnx_diagnostic/helpers/memory_peak.py,sha256=OT6mz0muBbBZY0pjgW2_eCk_lOtFRo-5w4jFo2Z6Kok,6380
|
|
22
22
|
onnx_diagnostic/helpers/mini_onnx_builder.py,sha256=FgK-Kws1WpSYdYJCPyONwQYY3AjbgUHimZlaYyiNUfE,21286
|
|
23
|
-
onnx_diagnostic/helpers/model_builder_helper.py,sha256=
|
|
23
|
+
onnx_diagnostic/helpers/model_builder_helper.py,sha256=tJi4VkP0TS2yyDSxQPNu9WRoSnPCAjr6L0J49X2LdXk,12810
|
|
24
24
|
onnx_diagnostic/helpers/onnx_helper.py,sha256=GApd3fmweLZ85GjEqo49ZCiOUSJ7vtXCBs-Tp3WlydI,39825
|
|
25
25
|
onnx_diagnostic/helpers/ort_session.py,sha256=UgUUeUslDxEFBc6w6f3HMq_a7bn4TBlItmojqWquSj4,29281
|
|
26
26
|
onnx_diagnostic/helpers/rt_helper.py,sha256=qbV6zyMs-iH6H65WHC2tu4h0psnHg0TX5fwfO_k-glg,4623
|
|
@@ -73,21 +73,21 @@ onnx_diagnostic/reference/torch_ops/sequence_ops.py,sha256=3EiVKpGfN4d1Iry4hgnr3
|
|
|
73
73
|
onnx_diagnostic/reference/torch_ops/shape_ops.py,sha256=pJrNR2UB4PlWl6cv4EDl1uGl8YTBUUMQkhJcsh5K4sA,4291
|
|
74
74
|
onnx_diagnostic/reference/torch_ops/unary_ops.py,sha256=dwu6HPr4V_roxu85U3VLTtDLx5bfxKalT_-zlQxZ5wc,1850
|
|
75
75
|
onnx_diagnostic/tasks/__init__.py,sha256=uWFP7HIr-VnxmXD5i_QAfXnLXc1HwUq2e8v9cKLqraQ,2492
|
|
76
|
-
onnx_diagnostic/tasks/automatic_speech_recognition.py,sha256=
|
|
77
|
-
onnx_diagnostic/tasks/feature_extraction.py,sha256=
|
|
78
|
-
onnx_diagnostic/tasks/fill_mask.py,sha256=
|
|
76
|
+
onnx_diagnostic/tasks/automatic_speech_recognition.py,sha256=umZmjGW1gDUFkqvBJnQyaL7D7-HqiwlQpsq6Ip187Dg,7150
|
|
77
|
+
onnx_diagnostic/tasks/feature_extraction.py,sha256=Zh9p_Q8FqEO2_aqI0cCiq8OXuM3WUZbwItlLOmLnNl8,5537
|
|
78
|
+
onnx_diagnostic/tasks/fill_mask.py,sha256=5Gt6zlj0p6vuifox7Wmj-TpHXJvPS0CEH8evgdBHDNA,2640
|
|
79
79
|
onnx_diagnostic/tasks/image_classification.py,sha256=nLpBBB1Gkog3Fk6pu2waiHcuQr4ILPptc9FhQ-pn460,4682
|
|
80
|
-
onnx_diagnostic/tasks/image_text_to_text.py,sha256=
|
|
80
|
+
onnx_diagnostic/tasks/image_text_to_text.py,sha256=wkFrUaEvQAW-D-jql2xSnae1XvQBl-sSbhmAmJ76qGo,17428
|
|
81
81
|
onnx_diagnostic/tasks/mask_generation.py,sha256=fjdD3rd-O-mFL0hQy3la3JXKth_0bH2HL7Eelq-3Dbs,5057
|
|
82
82
|
onnx_diagnostic/tasks/mixture_of_expert.py,sha256=al4tk1BrHidtRiHlAaiflWiJaAte0d5M8WcBioANG9k,2808
|
|
83
83
|
onnx_diagnostic/tasks/object_detection.py,sha256=3FiT8ya5FCd9lwjQCRXhAwXspNwYTlAD3Gpk8aAcG5w,4279
|
|
84
|
-
onnx_diagnostic/tasks/sentence_similarity.py,sha256=
|
|
85
|
-
onnx_diagnostic/tasks/summarization.py,sha256=
|
|
86
|
-
onnx_diagnostic/tasks/text2text_generation.py,sha256
|
|
87
|
-
onnx_diagnostic/tasks/text_classification.py,sha256=
|
|
88
|
-
onnx_diagnostic/tasks/text_generation.py,sha256=
|
|
84
|
+
onnx_diagnostic/tasks/sentence_similarity.py,sha256=vPqNZgAnIvY0rKWPUTs0IlU3RFQDkXAHL7IVfRFmilY,2655
|
|
85
|
+
onnx_diagnostic/tasks/summarization.py,sha256=8vB_JiRzDEacIvr8CYTuVQTH73xG_jNkndoS9RHJTSs,8292
|
|
86
|
+
onnx_diagnostic/tasks/text2text_generation.py,sha256=35eF_RlSeMdLTZPooLMAnszs-z0bkKZ34Iej3JgA96A,8602
|
|
87
|
+
onnx_diagnostic/tasks/text_classification.py,sha256=CGc72SpXFzTUyzAHEMPgyy_s187DaYGsRdrosxG80_Q,2711
|
|
88
|
+
onnx_diagnostic/tasks/text_generation.py,sha256=hV-oK1bWjtepxkA491Va_0CWrELZbfP4E3N8xQ950zk,12823
|
|
89
89
|
onnx_diagnostic/tasks/text_to_image.py,sha256=mOS3Ruosi3hzRMxXLDN7ZkAbi7NnQb7MWwQP_okGVHs,2962
|
|
90
|
-
onnx_diagnostic/tasks/zero_shot_image_classification.py,sha256=
|
|
90
|
+
onnx_diagnostic/tasks/zero_shot_image_classification.py,sha256=jJCMWuOqGv5ahCfjrcqxuYCJFhTgHV5KUf2yyv2yxYA,4624
|
|
91
91
|
onnx_diagnostic/torch_export_patches/__init__.py,sha256=0SaZedwznm1hQUCvXZsGZORV5vby954wEExr5faepGg,720
|
|
92
92
|
onnx_diagnostic/torch_export_patches/onnx_export_errors.py,sha256=Nx3HLII-KIemfMydraTRlwK9O0kgVug57SiLT9y9KOY,23749
|
|
93
93
|
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py,sha256=klvqiMjccwGhiRnLRVbwTi5WWkMfvtnOV5ycirPcAdA,11354
|
|
@@ -98,27 +98,27 @@ onnx_diagnostic/torch_export_patches/patch_module_helper.py,sha256=2U0AdyZuU0W54
|
|
|
98
98
|
onnx_diagnostic/torch_export_patches/eval/__init__.py,sha256=57x62uZNA80XiWgkG8Fe0_8YJcIVrvKLPqvwLDPJwgc,24008
|
|
99
99
|
onnx_diagnostic/torch_export_patches/eval/model_cases.py,sha256=DTvdHPtNQh25Akv5o3D4Jxf1L1-SJ7w14tgvj8AAns8,26577
|
|
100
100
|
onnx_diagnostic/torch_export_patches/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
|
-
onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=
|
|
102
|
-
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py,sha256=
|
|
101
|
+
onnx_diagnostic/torch_export_patches/patches/patch_torch.py,sha256=TFjuw--sTYPCoVEaYlYLJuElx_CUynJR6s6ypoZtRWw,18956
|
|
102
|
+
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py,sha256=tcDNJzOIivyOM6XbTm4munHKHAmVrOKE6nbqIdl-4dg,66290
|
|
103
103
|
onnx_diagnostic/torch_export_patches/serialization/__init__.py,sha256=BHLdRPtNAtNPAS-bPKEj3-foGSPvwAbZXrHzGGPDLEw,1876
|
|
104
104
|
onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py,sha256=drq3EH_yjcSuIWYsVeUWm8Cx6YCZFU6bP_1PLtPfY5I,945
|
|
105
105
|
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py,sha256=dAKi4zujlBxDvxvaVI_qH4qW9AlpVFMtCkvGTNCJCUY,9353
|
|
106
106
|
onnx_diagnostic/torch_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
107
|
onnx_diagnostic/torch_models/llms.py,sha256=soyg4yC87ptGoeulJhKqw5opGmuLvH1pn_ZDXZ4Jr8E,90
|
|
108
|
-
onnx_diagnostic/torch_models/validate.py,sha256=
|
|
108
|
+
onnx_diagnostic/torch_models/validate.py,sha256=IkWyuwKmIqetMN5ziD9jPwSgRAMzJnQqPElIQFJiJwc,65907
|
|
109
109
|
onnx_diagnostic/torch_models/hghub/__init__.py,sha256=vi1Q7YHdddj1soiBN42MSvJdFqe2_KUoWafHISjwOu8,58
|
|
110
110
|
onnx_diagnostic/torch_models/hghub/hub_api.py,sha256=Bvr-sTAhS6s6UCkt-KsY_7Mdai08-AQzvHrzbYCSuvk,13186
|
|
111
111
|
onnx_diagnostic/torch_models/hghub/hub_data.py,sha256=W05mciqUqhaYEfYNHtUeuwOMOZoQTuDidRLEIx4z1CE,8523
|
|
112
|
-
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py,sha256=
|
|
113
|
-
onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=
|
|
112
|
+
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py,sha256=mboN04WTZMPgfw_JOP01aINWjmq6qmOKQhDE28Fc_zY,282283
|
|
113
|
+
onnx_diagnostic/torch_models/hghub/model_inputs.py,sha256=h6Pi0dkUFXpDGudJ5mQQ9NSQCOjpF6Pm-J6_shsWiH4,11546
|
|
114
114
|
onnx_diagnostic/torch_models/untrained/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
115
|
onnx_diagnostic/torch_models/untrained/llm_phi2.py,sha256=ynBTDHJHCk44NjLT_t6OiFDBdPP0rFGPteiONDxvztw,3708
|
|
116
116
|
onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py,sha256=QXw_Bs2SzfeiQMf-tmtVl83SmVOL4-Um7Qy-f0E48QI,2507
|
|
117
117
|
onnx_diagnostic/torch_onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
118
|
onnx_diagnostic/torch_onnx/runtime_info.py,sha256=1g9F_Jf9AAgYQU4stbsrFXwQl-30mWlQrFbQ7val8Ps,9268
|
|
119
119
|
onnx_diagnostic/torch_onnx/sbs.py,sha256=1EL25DeYFzlBSiFG_XjePBLvsiItRXbdDrr5-QZW2mA,16878
|
|
120
|
-
onnx_diagnostic-0.7.
|
|
121
|
-
onnx_diagnostic-0.7.
|
|
122
|
-
onnx_diagnostic-0.7.
|
|
123
|
-
onnx_diagnostic-0.7.
|
|
124
|
-
onnx_diagnostic-0.7.
|
|
120
|
+
onnx_diagnostic-0.7.9.dist-info/licenses/LICENSE.txt,sha256=Vv6TXglX6Rc0d-f8aREhayhT-6PMQXEyOmI2NKlUCMc,1045
|
|
121
|
+
onnx_diagnostic-0.7.9.dist-info/METADATA,sha256=UIT85yMNIqhtCArUezpyfFnbkz1KY4Q11EjKCBKZVWs,7431
|
|
122
|
+
onnx_diagnostic-0.7.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
123
|
+
onnx_diagnostic-0.7.9.dist-info/top_level.txt,sha256=KwNkXewmcobM3ZT1DJLVWH6ebJzA5qKg7cWqKfpGNT4,16
|
|
124
|
+
onnx_diagnostic-0.7.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|