onnx-diagnostic 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. onnx_diagnostic/__init__.py +1 -1
  2. onnx_diagnostic/_command_lines_parser.py +154 -3
  3. onnx_diagnostic/ci_models/__init__.py +0 -0
  4. onnx_diagnostic/ci_models/ci_helpers.py +435 -0
  5. onnx_diagnostic/ci_models/export_phi4_mm.py +1062 -0
  6. onnx_diagnostic/ci_models/export_qwen25_vl.py +568 -0
  7. onnx_diagnostic/export/api.py +1 -0
  8. onnx_diagnostic/export/cf_simple_loop_for.py +537 -0
  9. onnx_diagnostic/export/control_flow_onnx.py +23 -17
  10. onnx_diagnostic/ext_test_case.py +23 -2
  11. onnx_diagnostic/helpers/bench_run.py +1 -1
  12. onnx_diagnostic/helpers/log_helper.py +1 -3
  13. onnx_diagnostic/helpers/optim_helper.py +116 -0
  14. onnx_diagnostic/tasks/image_text_to_text.py +15 -5
  15. onnx_diagnostic/tasks/text2text_generation.py +84 -48
  16. onnx_diagnostic/tasks/text_generation.py +3 -0
  17. onnx_diagnostic/torch_export_patches/onnx_export_errors.py +44 -2
  18. onnx_diagnostic/torch_export_patches/patch_expressions.py +4 -1
  19. onnx_diagnostic/torch_export_patches/patch_module.py +31 -23
  20. onnx_diagnostic/torch_export_patches/patches/_patch_transformers_funnel.py +80 -0
  21. onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py +86 -3
  22. onnx_diagnostic/torch_export_patches/patches/patch_torch.py +15 -0
  23. onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +23 -24
  24. onnx_diagnostic/torch_models/hghub/hub_api.py +11 -0
  25. onnx_diagnostic/torch_models/hghub/hub_data.py +9 -1
  26. onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +29 -8
  27. onnx_diagnostic/torch_models/hghub/model_inputs.py +24 -19
  28. onnx_diagnostic/torch_onnx/compare.py +357 -0
  29. {onnx_diagnostic-0.8.5.dist-info → onnx_diagnostic-0.8.7.dist-info}/METADATA +1 -1
  30. {onnx_diagnostic-0.8.5.dist-info → onnx_diagnostic-0.8.7.dist-info}/RECORD +33 -27
  31. onnx_diagnostic/export/control_flow.py +0 -214
  32. onnx_diagnostic/export/control_flow_research.py +0 -140
  33. {onnx_diagnostic-0.8.5.dist-info → onnx_diagnostic-0.8.7.dist-info}/WHEEL +0 -0
  34. {onnx_diagnostic-0.8.5.dist-info → onnx_diagnostic-0.8.7.dist-info}/licenses/LICENSE.txt +0 -0
  35. {onnx_diagnostic-0.8.5.dist-info → onnx_diagnostic-0.8.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,568 @@
1
+ r"""
2
+ Export visual and embedding parts of Qwen/Qwen2.5-VL-7B-Instruct
3
+ ================================================================
4
+
5
+ Requirements
6
+ ++++++++++++
7
+
8
+ ::
9
+
10
+ git+https://github.com/sdpython/experimental-experiment.git # optional
11
+ huggingface_hub
12
+ onnx-diagnostic>=0.8.6
13
+ onnxruntime>=1.23
14
+ torch>=2.10 # weekly is better
15
+ tqdm
16
+ transformers>=4.57
17
+
18
+ Examples
19
+ ++++++++
20
+
21
+ .. code-block:: bash
22
+
23
+ python -m onnx_diagnostic.ci_models.export_qwen25_vl \
24
+ -m Qwen/Qwen2.5-VL-7B-Instruct \
25
+ --device cpu --dtype float32 --exporter onnx-dynamo --pretrained --second-input --zip
26
+
27
+ To choose a specific Attention schema:
28
+
29
+ .. code-block:: bash
30
+
31
+ QWEN25ATTENTION=LOOPMHA python -m onnx_diagnostic.ci_models.export_qwen25_vl \
32
+ -m Qwen/Qwen2.5-VL-7B-Instruct \
33
+ --device cpu --dtype float32 --exporter onnx-dynamo --pretrained --second-input --zip
34
+
35
+ Cheat sheet for tar commands. To make a tar:
36
+ ``tar -czvf model.tar.gz model.onnx model.data``
37
+ And to untar:
38
+ ``tar -xzvf model.tar.gz``.
39
+
40
+ Rewritings
41
+ ++++++++++
42
+
43
+ * `overview <https://sdpython.github.io/doc/onnx-diagnostic/dev/status/patches_diff.html#auto-patch-transformers-qwen2-5-vlforconditionalgeneration-prepare-inputs-for-generation-patched-qwen2-5-vlforconditionalgeneration-prepare-inputs-for-generation>`_
44
+ * code: `_patch_transformers_qwen2_5.py <https://github.com/sdpython/onnx-diagnostic/blob/main/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py>`_
45
+
46
+ Attention
47
+ +++++++++
48
+
49
+ The attention is either implemented with ``MultiHeadAttention`` in a loop,
50
+ either with ``PackedMultiHeadAttention``. The choice is made based on the device.
51
+ It is possible to overwrite this by by setting environment variable
52
+ ``QWEN25ATTENTION`` to:
53
+
54
+ * ``PACKED``: PackedMultiHeadAttention
55
+ * ``LOOPMHA``: Loop over MultiHeadAttention
56
+ * ``LOOPA23``: Loop over Attention(23), needs opset 23+.
57
+ """
58
+
59
+ import os
60
+ import sys
61
+ import time
62
+ import warnings
63
+ from typing import Any, Dict, List, Tuple
64
+ from .ci_helpers import (
65
+ check_for_discrepancies_and_log_everything_into_a_json_file,
66
+ compute_expected_outputs,
67
+ get_parser,
68
+ get_torch_dtype_from_command_line_args,
69
+ remove_inplace_body_last_input_output_type_for_loop_because_they_might_be_sequences,
70
+ simplify_model_id_for_a_filename,
71
+ zip_model_and_data_into_a_single_file,
72
+ )
73
+
74
+
75
+ def get_untrained_model(model_id: str, second_input: bool, verbose: int) -> Dict[str, Any]:
76
+ """
77
+ Returns an untrained model.
78
+
79
+ :param model_id: model id
80
+ :param second_input: second input set
81
+ :param verbose: verbosity
82
+ :return: model and data
83
+ """
84
+ from ..torch_models.hghub.model_inputs import get_untrained_model_with_inputs
85
+
86
+ if model_id == "arnir0/Tiny-LLM":
87
+ # used to run a unit test
88
+ _config_reduction = None
89
+ else:
90
+
91
+ def _config_reduction(config, task):
92
+ return {
93
+ # "num_hidden_layers": 2,
94
+ "vision_config": {"depth": 2},
95
+ "text_config": {
96
+ "num_hidden_layers": 2,
97
+ "layer_types": ["full_attention", "full_attention"],
98
+ },
99
+ # "_attn_implementation": "flash_attention_2",
100
+ "_attn_implementation": "sdpa",
101
+ }
102
+
103
+ config_reduction = _config_reduction
104
+ data = get_untrained_model_with_inputs(
105
+ model_id,
106
+ verbose=verbose,
107
+ add_second_input=second_input,
108
+ config_reduction=config_reduction,
109
+ )
110
+ return data
111
+
112
+
113
+ def get_inputs_for_part(
114
+ part: str,
115
+ torch_dtype: "torch.dtype", # noqa: F821
116
+ device: str,
117
+ second_input: bool,
118
+ image_token_id: int,
119
+ bos_token_id: int,
120
+ eos_token_id: int,
121
+ ) -> Tuple[Dict[str, "torch.Tensor"], List[Dict[str, "torch.Tensor"]]]: # noqa: F821
122
+ import torch
123
+
124
+ if part == "visual":
125
+ export_inputs = dict(
126
+ pixel_values=torch.randn((1292, 1176), dtype=torch_dtype).to(device),
127
+ image_grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
128
+ )
129
+ other_inputs = []
130
+ if second_input:
131
+ other_inputs = [
132
+ dict(
133
+ pixel_values=torch.randn((1292, 1176), dtype=torch_dtype).to(device),
134
+ image_grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
135
+ ),
136
+ dict(
137
+ pixel_values=torch.rand((1292, 1176), dtype=torch_dtype).to(device),
138
+ image_grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
139
+ ),
140
+ dict(
141
+ pixel_values=torch.randn((14308, 1176), dtype=torch_dtype).to(device),
142
+ image_grid_thw=torch.tensor([[1, 98, 146]], dtype=torch.int64).to(device),
143
+ ),
144
+ dict(
145
+ pixel_values=torch.rand((14308, 1176), dtype=torch_dtype).to(device),
146
+ image_grid_thw=torch.tensor([[1, 98, 146]], dtype=torch.int64).to(device),
147
+ ),
148
+ ]
149
+ return export_inputs, other_inputs
150
+
151
+ if part == "embedding":
152
+
153
+ def fix(inputs):
154
+ img_start_index = 3
155
+ img_end_index = img_start_index + patches_per_image # 3 + 3577 = 3580
156
+
157
+ # Fill in with image token index
158
+ inputs["input_ids"][0][2] = bos_token_id # <start_of_image>
159
+ inputs["input_ids"][0][img_start_index:img_end_index] = image_token_id # <image>
160
+ inputs["input_ids"][0][img_end_index] = eos_token_id # <end_of_image>
161
+
162
+ inputs["input_ids"][1][2] = bos_token_id # <start_of_image>
163
+ inputs["input_ids"][1][img_start_index:img_end_index] = image_token_id # <image>
164
+ inputs["input_ids"][1][img_end_index] = eos_token_id # <end_of_image>
165
+ return inputs
166
+
167
+ batch_size, sequence_length, patches_per_image, out_hidden_size = (2, 3606, 3577, 3584)
168
+ num_logical_patches = batch_size * patches_per_image
169
+
170
+ def draw():
171
+ return {
172
+ "input_ids": torch.randint(
173
+ low=0,
174
+ high=image_token_id,
175
+ size=(batch_size, sequence_length),
176
+ dtype=torch.int64,
177
+ ).to(device),
178
+ "image_features": torch.randn(
179
+ num_logical_patches, out_hidden_size, dtype=torch_dtype
180
+ ).to(device),
181
+ }
182
+
183
+ return fix(draw()), ([fix(draw()), fix(draw())] if second_input else [])
184
+
185
+ raise NotImplementedError(f"No inputs yet implement for part={part!r}")
186
+
187
+
188
+ def main(
189
+ model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct",
190
+ device: str = "cpu",
191
+ dtype: str = "float32",
192
+ exporter: str = "onnx-dynamo",
193
+ pretrained: bool = True,
194
+ second_input: bool = True,
195
+ make_zip: bool = False,
196
+ output_folder: str = "dump_models",
197
+ existing_onnx: str | None = None,
198
+ part: str = "visual",
199
+ atol: float = 0.01,
200
+ mismatch01: float = 0.1,
201
+ profile_exporter: bool = False,
202
+ ):
203
+ """
204
+ Exports model Qwen/Qwen2.5-VL-7B-Instruct or pieces of it.
205
+ The script applies as well to other models based on the same architecture.
206
+
207
+ The function saves everything on disk. It does not generate new inputs
208
+ on the second run but reuses the saved ones. Same goes for the expected
209
+ outputs with are also saved on disk.
210
+
211
+ :param model_id: model id
212
+ :param device: device
213
+ :param dtype: dtype
214
+ :param exporter: exportor to use
215
+ :param pretrained: pretrained=False is usually used to test
216
+ :param second_input: checks discrepancies on more examples
217
+ :param make_zip: creates a zip at the end
218
+ :param output_folder: output folder
219
+ :param part: "" to export the whole model, ``"visual"`` for visual part,
220
+ ``"embedding"`` for the embedding part
221
+ :param atol: raises an exception if tolerance is above that threshold
222
+ :param mismatch01: raises an exception if the ratio of mismatches
223
+ is above that threshold
224
+ :param profile_exporter: profiles the exporter
225
+ """
226
+ prefix = simplify_model_id_for_a_filename(model_id)
227
+ if "QWEN25ATTENTION" in os.environ:
228
+ prefix = f"{prefix}.{os.environ['QWEN25ATTENTION']}"
229
+ basename = os.path.join(
230
+ output_folder, f"model.{prefix}.{part}.{device}.{dtype}.{exporter}"
231
+ )
232
+ filename = f"{basename}.onnx"
233
+ stat_file = f"{basename}.stats"
234
+
235
+ print("------------------------------------------------------------------")
236
+ print(f"-- model_id={model_id}")
237
+ print(f"-- part={part}")
238
+ print(f"-- device={device}")
239
+ print(f"-- dtype={dtype}")
240
+ print(f"-- exporter={exporter}")
241
+ print(f"-- pretrained={pretrained}")
242
+ print(f"-- second_input={second_input}")
243
+ print(f"-- make_zip={make_zip}")
244
+ print(f"-- output_folder={output_folder}")
245
+ print(f"-- atol={atol}")
246
+ print(f"-- mismatch01={mismatch01}")
247
+ print(f"-- profile_exporter={profile_exporter}")
248
+ print("------------------------------------------------------------------")
249
+ print(f"-- prefix={prefix}")
250
+ print(f"-- export in {filename!r}")
251
+ print("------------------------------------------------------------------")
252
+
253
+ if os.path.exists(stat_file) and not existing_onnx:
254
+ print(f"-- skipping because {stat_file!r} already exists")
255
+ return
256
+
257
+ print("-- import torch and others")
258
+ import torch
259
+ from transformers import AutoModel, AutoProcessor
260
+ from ..helpers import string_type
261
+ from ..torch_export_patches.patches._patch_transformers_qwen2_5 import (
262
+ PLUGS,
263
+ )
264
+ from ..torch_export_patches import torch_export_patches
265
+ from ..export.api import to_onnx
266
+
267
+ if output_folder and output_folder != ".":
268
+ os.makedirs(output_folder, exist_ok=True)
269
+
270
+ print(f"-- create model {model_id!r}")
271
+ print(
272
+ f"-- device={device!r}, dtype={dtype!r}, exporter={exporter!r}, "
273
+ f"pretrained={pretrained!r}"
274
+ )
275
+ torch_dtype = get_torch_dtype_from_command_line_args(dtype)
276
+
277
+ if pretrained:
278
+ print("-- pretrained model")
279
+ model = AutoModel.from_pretrained(
280
+ model_id, device_map=device, dtype=torch_dtype, attn_implementation="sdpa"
281
+ ).eval()
282
+ data = dict(model=model)
283
+ config = model.config
284
+ if not hasattr(config, "bos_token_id") or not config.bos_token_id:
285
+ config.bos_token_id = 151643
286
+ if not hasattr(config, "eos_token_id") or not config.eos_token_id:
287
+ config.eos_token_id = 151645
288
+ else:
289
+ print("-- random model")
290
+ data = get_untrained_model(model_id, second_input=second_input, verbose=1)
291
+ model = data["model"]
292
+ config = data["configuration"]
293
+
294
+ assert (
295
+ hasattr(config, "bos_token_id") and config.bos_token_id
296
+ ), f"missing 'bos_token_id' from config\n{config}"
297
+ assert (
298
+ hasattr(config, "eos_token_id") and config.eos_token_id
299
+ ), f"missing 'eos_token_id' from config\n{config}"
300
+ model = model.to(device).to(getattr(torch, dtype))
301
+
302
+ print(f"-- config._attn_implementation={model.config._attn_implementation}")
303
+ print(f"-- model.dtype={model.dtype}")
304
+ print(f"-- model.device={model.device}")
305
+ try:
306
+ processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
307
+ except OSError as e:
308
+ warnings.warn(f"Unable to access internet due to {e!r}", ResourceWarning, stacklevel=0)
309
+ return
310
+ print(f"-- processor={type(processor)}")
311
+
312
+ export_inputs, other_inputs = None, None
313
+ if not part:
314
+ # used to unit test
315
+ from ..helpers.torch_helper import to_any
316
+
317
+ assert "inputs" in data, f"key 'inputs' is missing from data (available {set(data)})"
318
+ model_to_export = data["model"]
319
+ export_inputs = to_any(to_any(data["inputs"], device), torch_dtype)
320
+ other_inputs = [
321
+ v for k, v in data.items() if k.startswith("inputs_") if k != "inputs_prompt"
322
+ ]
323
+ dynamic_shapes = data["dynamic_shapes"]
324
+ assert other_inputs, f"No other inputs was found from data (available {set(data)})"
325
+
326
+ elif part == "visual":
327
+
328
+ class VisualPart(torch.nn.Module):
329
+ def __init__(self, model):
330
+ super().__init__()
331
+ self.model = model
332
+
333
+ def forward(self, pixel_values, image_grid_thw):
334
+ return model.get_image_features(pixel_values, image_grid_thw)
335
+
336
+ assert hasattr(
337
+ model, "get_image_features"
338
+ ), f"get_image_features not found in class {type(model)}"
339
+ model_to_export = VisualPart(model)
340
+
341
+ dynamic_shapes = dict(
342
+ pixel_values={0: "hidden_width", 1: "hidden_height"},
343
+ image_grid_thw={}, # {0: "n_images"}, # TODO: fix
344
+ )
345
+
346
+ elif part == "embedding":
347
+
348
+ class EmbeddingPart(torch.nn.Module):
349
+ def __init__(self, model):
350
+ super().__init__()
351
+ self.model = model
352
+
353
+ def forward(self, input_ids, image_features):
354
+ inputs_embeds = None
355
+
356
+ if inputs_embeds is None:
357
+ inputs_embeds = self.model.get_input_embeddings()(input_ids)
358
+
359
+ def process_image(inputs_embeds, image_features):
360
+ image_embeds = image_features
361
+ image_embeds = torch.cat((image_embeds,), dim=0).to(
362
+ inputs_embeds.device, inputs_embeds.dtype
363
+ )
364
+ image_mask, _ = self.model.model.get_placeholder_mask(
365
+ input_ids, inputs_embeds=inputs_embeds, image_features=image_embeds
366
+ )
367
+ inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds)
368
+ return inputs_embeds
369
+
370
+ return torch.cond(
371
+ image_features.shape[0] == 0,
372
+ (lambda embs, _imgf: embs.clone()),
373
+ process_image,
374
+ [inputs_embeds, image_features],
375
+ )
376
+
377
+ assert hasattr(
378
+ model, "get_image_features"
379
+ ), f"get_image_features not found in class {type(model)}"
380
+ model_to_export = EmbeddingPart(model)
381
+
382
+ dynamic_shapes = {
383
+ "input_ids": {0: "batch_size", 1: "sequence_length"},
384
+ "image_features": {0: "num_logical_patches"},
385
+ }
386
+
387
+ else:
388
+ raise NotImplementedError(f"no export yet for part={part!r}")
389
+
390
+ print(f"-- part={part!r}")
391
+ print(f"-- model_to_export={type(model_to_export)}")
392
+ print(f"-- dynamic_shapes={dynamic_shapes}")
393
+ print("-- ############")
394
+ print("-- INPUT/OUTPUT")
395
+ print("-- ############")
396
+
397
+ input_filename = os.path.join(output_folder, f"inputs.{prefix}.{part}.{device}.{dtype}.pt")
398
+ if os.path.exists(input_filename):
399
+ print(f"-- restore inputs from {input_filename!r}")
400
+ data = torch.load(input_filename, weights_only=False)
401
+ export_inputs = data["export_inputs"]
402
+ other_inputs = data["other_inputs"]
403
+ dynamic_shapes = data["dynamic_shapes"]
404
+ elif export_inputs is not None:
405
+ data = dict(
406
+ export_inputs=export_inputs,
407
+ other_inputs=other_inputs,
408
+ dynamic_shapes=dynamic_shapes,
409
+ )
410
+ print(f"-- dump inputs into {input_filename!r}")
411
+ torch.save(data, input_filename)
412
+ else:
413
+ export_inputs, other_inputs = get_inputs_for_part(
414
+ part,
415
+ torch_dtype,
416
+ device,
417
+ second_input,
418
+ image_token_id=config.image_token_id,
419
+ bos_token_id=config.bos_token_id,
420
+ eos_token_id=config.eos_token_id,
421
+ )
422
+ data = dict(
423
+ export_inputs=export_inputs,
424
+ other_inputs=other_inputs,
425
+ dynamic_shapes=dynamic_shapes,
426
+ )
427
+ print(f"-- dump inputs into {input_filename!r}")
428
+ torch.save(data, input_filename)
429
+
430
+ print(f"-- export_inputs={string_type(export_inputs, with_shape=True, with_device=True)}")
431
+ print(f"-- other_inputs={string_type(other_inputs, with_shape=True, with_device=True)}")
432
+ print(f"-- dynamic_shapes={dynamic_shapes}")
433
+ output_filename = os.path.join(
434
+ output_folder, f"expected.{prefix}.visual.{device}.{dtype}.pt"
435
+ )
436
+
437
+ print("-- ##################")
438
+ print("-- # EXPECTED_OUTPUTS")
439
+ print("-- ##################")
440
+
441
+ compute_expected_outputs(output_filename, model_to_export, input_filename)
442
+
443
+ if existing_onnx and os.path.exists(existing_onnx):
444
+ print("-- ######")
445
+ print(f"-- USING EXISTING ONNX {existing_onnx!r}")
446
+ print("-- ######")
447
+
448
+ exporter = existing_onnx
449
+ filename = existing_onnx
450
+ target_opset = None
451
+ else:
452
+ print("-- ######")
453
+ print("-- EXPORT")
454
+ print("-- ######")
455
+
456
+ if exporter != "custom":
457
+ import packaging.version as pv
458
+
459
+ try:
460
+ import onnxscript
461
+
462
+ v_onnxscript = onnxscript.__version__
463
+ if pv.Version(v_onnxscript) <= pv.Version("0.5.6"):
464
+ print(f"-- onnxscript=={v_onnxscript} not recent enough")
465
+ print("-- stop.")
466
+ return
467
+ except AttributeError:
468
+ pass
469
+ except ImportError:
470
+ print("-- missing onnxscript, cannot continue")
471
+ print("-- stop.")
472
+ return
473
+
474
+ begin = time.perf_counter()
475
+
476
+ target_opset = 22
477
+ if (
478
+ exporter == "onnx-dynamo"
479
+ and device == "cuda"
480
+ and "QWEN25ATTENTION" not in os.environ
481
+ ):
482
+ os.environ["QWEN25ATTENTION"] = "PACKED"
483
+ elif "QWEN25ATTENTION" in os.environ and os.environ["QWEN25ATTENTION"] == "LOOPA23":
484
+ target_opset = 23
485
+
486
+ with torch_export_patches(
487
+ patch_torch=False,
488
+ patch_sympy=False,
489
+ patch_transformers=True,
490
+ verbose=1,
491
+ stop_if_static=2,
492
+ profile=(f"{basename}.profile.html" if profile_exporter else None),
493
+ ):
494
+ to_onnx(
495
+ model_to_export,
496
+ kwargs=export_inputs,
497
+ dynamic_shapes=dynamic_shapes,
498
+ filename=filename,
499
+ exporter=exporter,
500
+ verbose=1,
501
+ save_ep=None,
502
+ target_opset=target_opset,
503
+ optimize=True,
504
+ onnx_plugs=PLUGS,
505
+ )
506
+ export_duration = time.perf_counter() - begin
507
+
508
+ if exporter == "onnx-dynamo":
509
+ # onnx-dynamo fails at producing function body with sequences as input / output.
510
+ # They are replaced by tensor type one step in the model.
511
+ print("-- remove_body_last_input_output_for_loop")
512
+ remove_inplace_body_last_input_output_type_for_loop_because_they_might_be_sequences(
513
+ filename
514
+ )
515
+ print("-- done.")
516
+
517
+ print("-- ###############")
518
+ print("-- # DISCREPANCIES")
519
+ print("-- ###############")
520
+
521
+ info = {
522
+ "model_id": model_id,
523
+ "part": part,
524
+ "device": device,
525
+ "dtype": dtype,
526
+ "exporter": exporter,
527
+ "pretrained": pretrained,
528
+ "attention": os.environ.get("QWEN25ATTENTION", "default"),
529
+ }
530
+
531
+ check_for_discrepancies_and_log_everything_into_a_json_file(
532
+ agg_stat_file=os.path.join(output_folder, "collection_statistics.js"),
533
+ stat_file=stat_file,
534
+ export_duration=export_duration,
535
+ device=device,
536
+ model_file=filename,
537
+ cached_inputs=input_filename,
538
+ cached_expected_outputs=output_filename,
539
+ main_info=info,
540
+ atol=atol,
541
+ mismatch01=mismatch01,
542
+ )
543
+
544
+ if make_zip:
545
+ print("-- #####")
546
+ print("-- # ZIP")
547
+ print("-- #####")
548
+ zip_model_and_data_into_a_single_file(f"{basename}.zip", filename)
549
+
550
+
551
+ if __name__ == "__main__":
552
+ parser = get_parser("qwen25")
553
+ args = parser.parse_args(sys.argv[1:])
554
+ main(
555
+ model_id=args.mid,
556
+ device=args.device,
557
+ dtype=args.dtype,
558
+ exporter=args.exporter,
559
+ pretrained=args.pretrained,
560
+ second_input=args.second_input,
561
+ make_zip=args.zip,
562
+ output_folder=args.output_folder,
563
+ existing_onnx=args.existing_onnx,
564
+ part=args.part,
565
+ atol=args.atol,
566
+ mismatch01=args.mismatch01,
567
+ profile_exporter=args.profile_exporter,
568
+ )
@@ -154,6 +154,7 @@ def to_onnx(
154
154
  options=options,
155
155
  inline=inline,
156
156
  dispatcher=main_dispatcher,
157
+ optimize=optimize,
157
158
  **(exporter_kwargs or {}),
158
159
  )
159
160