onnx-diagnostic 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnx_diagnostic/__init__.py +7 -0
- onnx_diagnostic/__main__.py +4 -0
- onnx_diagnostic/_command_lines_parser.py +1141 -0
- onnx_diagnostic/api.py +15 -0
- onnx_diagnostic/doc.py +100 -0
- onnx_diagnostic/export/__init__.py +2 -0
- onnx_diagnostic/export/api.py +124 -0
- onnx_diagnostic/export/dynamic_shapes.py +1083 -0
- onnx_diagnostic/export/shape_helper.py +296 -0
- onnx_diagnostic/export/validate.py +173 -0
- onnx_diagnostic/ext_test_case.py +1290 -0
- onnx_diagnostic/helpers/__init__.py +1 -0
- onnx_diagnostic/helpers/_log_helper.py +463 -0
- onnx_diagnostic/helpers/args_helper.py +132 -0
- onnx_diagnostic/helpers/bench_run.py +450 -0
- onnx_diagnostic/helpers/cache_helper.py +687 -0
- onnx_diagnostic/helpers/config_helper.py +170 -0
- onnx_diagnostic/helpers/doc_helper.py +163 -0
- onnx_diagnostic/helpers/fake_tensor_helper.py +273 -0
- onnx_diagnostic/helpers/graph_helper.py +386 -0
- onnx_diagnostic/helpers/helper.py +1707 -0
- onnx_diagnostic/helpers/log_helper.py +2245 -0
- onnx_diagnostic/helpers/memory_peak.py +249 -0
- onnx_diagnostic/helpers/mini_onnx_builder.py +600 -0
- onnx_diagnostic/helpers/model_builder_helper.py +469 -0
- onnx_diagnostic/helpers/onnx_helper.py +1200 -0
- onnx_diagnostic/helpers/ort_session.py +736 -0
- onnx_diagnostic/helpers/rt_helper.py +476 -0
- onnx_diagnostic/helpers/torch_helper.py +987 -0
- onnx_diagnostic/reference/__init__.py +4 -0
- onnx_diagnostic/reference/evaluator.py +254 -0
- onnx_diagnostic/reference/ops/__init__.py +1 -0
- onnx_diagnostic/reference/ops/op_add_add_mul_mul.py +68 -0
- onnx_diagnostic/reference/ops/op_attention.py +60 -0
- onnx_diagnostic/reference/ops/op_average_pool_grad.py +63 -0
- onnx_diagnostic/reference/ops/op_bias_softmax.py +16 -0
- onnx_diagnostic/reference/ops/op_cast_like.py +46 -0
- onnx_diagnostic/reference/ops/op_complex.py +26 -0
- onnx_diagnostic/reference/ops/op_concat.py +15 -0
- onnx_diagnostic/reference/ops/op_constant_of_shape.py +67 -0
- onnx_diagnostic/reference/ops/op_fused_matmul.py +31 -0
- onnx_diagnostic/reference/ops/op_gather.py +29 -0
- onnx_diagnostic/reference/ops/op_gather_elements.py +45 -0
- onnx_diagnostic/reference/ops/op_gather_grad.py +12 -0
- onnx_diagnostic/reference/ops/op_memcpy_host.py +11 -0
- onnx_diagnostic/reference/ops/op_mul_sigmoid.py +23 -0
- onnx_diagnostic/reference/ops/op_negxplus1.py +8 -0
- onnx_diagnostic/reference/ops/op_qlinear_average_pool.py +40 -0
- onnx_diagnostic/reference/ops/op_qlinear_conv.py +102 -0
- onnx_diagnostic/reference/ops/op_quick_gelu.py +23 -0
- onnx_diagnostic/reference/ops/op_replace_zero.py +13 -0
- onnx_diagnostic/reference/ops/op_rotary.py +19 -0
- onnx_diagnostic/reference/ops/op_scan.py +65 -0
- onnx_diagnostic/reference/ops/op_scatter_elements.py +107 -0
- onnx_diagnostic/reference/ops/op_scatternd_of_shape.py +22 -0
- onnx_diagnostic/reference/ops/op_simplified_layer_normalization.py +8 -0
- onnx_diagnostic/reference/ops/op_skip_layer_normalization.py +13 -0
- onnx_diagnostic/reference/ops/op_slice.py +20 -0
- onnx_diagnostic/reference/ops/op_transpose_cast.py +16 -0
- onnx_diagnostic/reference/ops/op_tri_matrix.py +17 -0
- onnx_diagnostic/reference/ort_evaluator.py +652 -0
- onnx_diagnostic/reference/quantized_tensor.py +46 -0
- onnx_diagnostic/reference/report_results_comparison.py +95 -0
- onnx_diagnostic/reference/torch_evaluator.py +669 -0
- onnx_diagnostic/reference/torch_ops/__init__.py +56 -0
- onnx_diagnostic/reference/torch_ops/_op_run.py +335 -0
- onnx_diagnostic/reference/torch_ops/access_ops.py +94 -0
- onnx_diagnostic/reference/torch_ops/binary_ops.py +108 -0
- onnx_diagnostic/reference/torch_ops/controlflow_ops.py +121 -0
- onnx_diagnostic/reference/torch_ops/generator_ops.py +36 -0
- onnx_diagnostic/reference/torch_ops/nn_ops.py +196 -0
- onnx_diagnostic/reference/torch_ops/other_ops.py +106 -0
- onnx_diagnostic/reference/torch_ops/reduce_ops.py +130 -0
- onnx_diagnostic/reference/torch_ops/sequence_ops.py +65 -0
- onnx_diagnostic/reference/torch_ops/shape_ops.py +121 -0
- onnx_diagnostic/reference/torch_ops/unary_ops.py +93 -0
- onnx_diagnostic/tasks/__init__.py +90 -0
- onnx_diagnostic/tasks/automatic_speech_recognition.py +188 -0
- onnx_diagnostic/tasks/data/__init__.py +13 -0
- onnx_diagnostic/tasks/data/dummies_imagetext2text_generation_gemma3.onnx +0 -0
- onnx_diagnostic/tasks/feature_extraction.py +162 -0
- onnx_diagnostic/tasks/fill_mask.py +89 -0
- onnx_diagnostic/tasks/image_classification.py +144 -0
- onnx_diagnostic/tasks/image_text_to_text.py +581 -0
- onnx_diagnostic/tasks/image_to_video.py +127 -0
- onnx_diagnostic/tasks/mask_generation.py +143 -0
- onnx_diagnostic/tasks/mixture_of_expert.py +79 -0
- onnx_diagnostic/tasks/object_detection.py +134 -0
- onnx_diagnostic/tasks/sentence_similarity.py +89 -0
- onnx_diagnostic/tasks/summarization.py +227 -0
- onnx_diagnostic/tasks/text2text_generation.py +230 -0
- onnx_diagnostic/tasks/text_classification.py +89 -0
- onnx_diagnostic/tasks/text_generation.py +352 -0
- onnx_diagnostic/tasks/text_to_image.py +95 -0
- onnx_diagnostic/tasks/zero_shot_image_classification.py +128 -0
- onnx_diagnostic/torch_export_patches/__init__.py +21 -0
- onnx_diagnostic/torch_export_patches/eval/__init__.py +725 -0
- onnx_diagnostic/torch_export_patches/eval/model_cases.py +898 -0
- onnx_diagnostic/torch_export_patches/onnx_export_errors.py +1098 -0
- onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +311 -0
- onnx_diagnostic/torch_export_patches/patch_details.py +340 -0
- onnx_diagnostic/torch_export_patches/patch_expressions.py +108 -0
- onnx_diagnostic/torch_export_patches/patch_inputs.py +211 -0
- onnx_diagnostic/torch_export_patches/patch_module.py +1047 -0
- onnx_diagnostic/torch_export_patches/patch_module_helper.py +184 -0
- onnx_diagnostic/torch_export_patches/patches/__init__.py +0 -0
- onnx_diagnostic/torch_export_patches/patches/patch_torch.py +1090 -0
- onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +2139 -0
- onnx_diagnostic/torch_export_patches/serialization/__init__.py +46 -0
- onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py +34 -0
- onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +313 -0
- onnx_diagnostic/torch_models/__init__.py +0 -0
- onnx_diagnostic/torch_models/code_sample.py +343 -0
- onnx_diagnostic/torch_models/hghub/__init__.py +1 -0
- onnx_diagnostic/torch_models/hghub/hub_api.py +422 -0
- onnx_diagnostic/torch_models/hghub/hub_data.py +234 -0
- onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +4905 -0
- onnx_diagnostic/torch_models/hghub/model_inputs.py +388 -0
- onnx_diagnostic/torch_models/hghub/model_specific.py +76 -0
- onnx_diagnostic/torch_models/llms.py +2 -0
- onnx_diagnostic/torch_models/untrained/__init__.py +0 -0
- onnx_diagnostic/torch_models/untrained/llm_phi2.py +113 -0
- onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py +76 -0
- onnx_diagnostic/torch_models/validate.py +2124 -0
- onnx_diagnostic/torch_onnx/__init__.py +0 -0
- onnx_diagnostic/torch_onnx/runtime_info.py +289 -0
- onnx_diagnostic/torch_onnx/sbs.py +440 -0
- onnx_diagnostic-0.8.0.dist-info/METADATA +213 -0
- onnx_diagnostic-0.8.0.dist-info/RECORD +132 -0
- onnx_diagnostic-0.8.0.dist-info/WHEEL +5 -0
- onnx_diagnostic-0.8.0.dist-info/licenses/LICENSE.txt +19 -0
- onnx_diagnostic-0.8.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
import pprint
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Dict, Optional, Tuple
|
|
7
|
+
import torch
|
|
8
|
+
import transformers
|
|
9
|
+
from ...helpers.config_helper import update_config, build_diff_config
|
|
10
|
+
from ...tasks import reduce_model_config, random_input_kwargs
|
|
11
|
+
from .hub_api import (
|
|
12
|
+
task_from_arch,
|
|
13
|
+
task_from_id,
|
|
14
|
+
get_pretrained_config,
|
|
15
|
+
download_code_modelid,
|
|
16
|
+
architecture_from_config,
|
|
17
|
+
find_package_source,
|
|
18
|
+
)
|
|
19
|
+
from .model_specific import HANDLED_MODELS, load_specific_model, instantiate_specific_model
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _code_needing_rewriting(model: Any) -> Any:
|
|
23
|
+
from onnx_diagnostic.torch_export_patches.patch_module_helper import code_needing_rewriting
|
|
24
|
+
|
|
25
|
+
return code_needing_rewriting(model)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _preprocess_model_id(
|
|
29
|
+
model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
|
|
30
|
+
) -> Tuple[str, Optional[str], bool, bool]:
|
|
31
|
+
if subfolder or "//" not in model_id:
|
|
32
|
+
return model_id, subfolder, same_as_pretrained, use_pretrained
|
|
33
|
+
spl = model_id.split("//")
|
|
34
|
+
if spl[-1] == "pretrained":
|
|
35
|
+
return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
|
|
36
|
+
if spl[-1] in {"transformer", "vae"}:
|
|
37
|
+
# known subfolder
|
|
38
|
+
return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
|
|
39
|
+
return model_id, subfolder, same_as_pretrained, use_pretrained
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_untrained_model_with_inputs(
|
|
43
|
+
model_id: str,
|
|
44
|
+
config: Optional[Any] = None,
|
|
45
|
+
task: Optional[str] = "",
|
|
46
|
+
inputs_kwargs: Optional[Dict[str, Any]] = None,
|
|
47
|
+
model_kwargs: Optional[Dict[str, Any]] = None,
|
|
48
|
+
verbose: int = 0,
|
|
49
|
+
dynamic_rope: Optional[bool] = None,
|
|
50
|
+
use_pretrained: bool = False,
|
|
51
|
+
same_as_pretrained: bool = False,
|
|
52
|
+
use_preinstalled: bool = True,
|
|
53
|
+
add_second_input: int = 1,
|
|
54
|
+
subfolder: Optional[str] = None,
|
|
55
|
+
use_only_preinstalled: bool = False,
|
|
56
|
+
) -> Dict[str, Any]:
|
|
57
|
+
"""
|
|
58
|
+
Gets a non initialized model similar to the original model
|
|
59
|
+
based on the model id given to the function.
|
|
60
|
+
The model size is reduced compare to the original model.
|
|
61
|
+
No weight is downloaded, only the configuration file sometimes.
|
|
62
|
+
|
|
63
|
+
:param model_id: model id, ex: :epkg:`arnir0/Tiny-LLM`
|
|
64
|
+
:param config: to overwrite the configuration
|
|
65
|
+
:param task: model task, can be overwritten, otherwise, it is automatically determined
|
|
66
|
+
:param input_kwargs: parameters sent to input generation
|
|
67
|
+
:param model_kwargs: to change the model generation
|
|
68
|
+
:param verbose: display found information
|
|
69
|
+
:param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
|
|
70
|
+
:param same_as_pretrained: if True, do not change the default values
|
|
71
|
+
to get a smaller model
|
|
72
|
+
:param use_pretrained: download the pretrained weights as well
|
|
73
|
+
:param use_preinstalled: use preinstalled configurations
|
|
74
|
+
:param add_second_input: provides others inputs to check a model
|
|
75
|
+
supports different shapes
|
|
76
|
+
:param subfolder: subfolder to use for this model id
|
|
77
|
+
:param use_only_preinstalled: use only preinstalled version
|
|
78
|
+
:return: dictionary with a model, inputs, dynamic shapes, and the configuration,
|
|
79
|
+
some necessary rewriting as well
|
|
80
|
+
|
|
81
|
+
Example:
|
|
82
|
+
|
|
83
|
+
.. runpython::
|
|
84
|
+
:showcode:
|
|
85
|
+
|
|
86
|
+
import pprint
|
|
87
|
+
from onnx_diagnostic.helpers import string_type
|
|
88
|
+
from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
|
|
89
|
+
|
|
90
|
+
data = get_untrained_model_with_inputs("arnir0/Tiny-LLM", verbose=1)
|
|
91
|
+
|
|
92
|
+
print("-- model size:", data['size'])
|
|
93
|
+
print("-- number of parameters:", data['n_weights'])
|
|
94
|
+
print("-- inputs:", string_type(data['inputs'], with_shape=True))
|
|
95
|
+
print("-- dynamic shapes:", pprint.pformat(data['dynamic_shapes']))
|
|
96
|
+
print("-- configuration:", pprint.pformat(data['configuration']))
|
|
97
|
+
"""
|
|
98
|
+
if task == "":
|
|
99
|
+
task = None
|
|
100
|
+
assert not use_preinstalled or not use_only_preinstalled, (
|
|
101
|
+
f"model_id={model_id!r}, preinstalled model is only available "
|
|
102
|
+
f"if use_only_preinstalled is False."
|
|
103
|
+
)
|
|
104
|
+
model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
|
|
105
|
+
model_id,
|
|
106
|
+
subfolder,
|
|
107
|
+
same_as_pretrained=same_as_pretrained,
|
|
108
|
+
use_pretrained=use_pretrained,
|
|
109
|
+
)
|
|
110
|
+
if verbose:
|
|
111
|
+
print(
|
|
112
|
+
f"[get_untrained_model_with_inputs] model_id={model_id!r}, subfolder={subfolder!r}"
|
|
113
|
+
)
|
|
114
|
+
if use_preinstalled:
|
|
115
|
+
print(f"[get_untrained_model_with_inputs] use preinstalled {model_id!r}")
|
|
116
|
+
if config is None:
|
|
117
|
+
config = get_pretrained_config(
|
|
118
|
+
model_id,
|
|
119
|
+
use_preinstalled=use_preinstalled,
|
|
120
|
+
use_only_preinstalled=use_only_preinstalled,
|
|
121
|
+
subfolder=subfolder,
|
|
122
|
+
**(model_kwargs or {}),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
model, task_, mkwargs, diff_config = None, None, {}, None
|
|
126
|
+
if use_pretrained and same_as_pretrained:
|
|
127
|
+
if model_id in HANDLED_MODELS:
|
|
128
|
+
model, task_, config = load_specific_model(model_id, verbose=verbose)
|
|
129
|
+
|
|
130
|
+
if task is None:
|
|
131
|
+
task = task_
|
|
132
|
+
if model is None:
|
|
133
|
+
arch = architecture_from_config(config)
|
|
134
|
+
if task is None and arch is None:
|
|
135
|
+
task = task_from_id(model_id, subfolder=subfolder)
|
|
136
|
+
assert task is not None or arch is not None, (
|
|
137
|
+
f"Unable to determine the architecture for model {model_id!r}, "
|
|
138
|
+
f"archs={arch!r}, conf={config}"
|
|
139
|
+
)
|
|
140
|
+
if verbose:
|
|
141
|
+
print(f"[get_untrained_model_with_inputs] architecture={arch!r}")
|
|
142
|
+
print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
|
|
143
|
+
if task is None:
|
|
144
|
+
task = task_from_arch(arch, model_id=model_id, subfolder=subfolder)
|
|
145
|
+
if verbose:
|
|
146
|
+
print(f"[get_untrained_model_with_inputs] task={task!r}")
|
|
147
|
+
|
|
148
|
+
# model kwagrs
|
|
149
|
+
if dynamic_rope is not None:
|
|
150
|
+
assert (
|
|
151
|
+
type(config) is not dict
|
|
152
|
+
), f"Unable to set dynamic_rope if the configuration is a dictionary\n{config}"
|
|
153
|
+
assert hasattr(config, "rope_scaling"), f"Missing 'rope_scaling' in\n{config}"
|
|
154
|
+
config.rope_scaling = (
|
|
155
|
+
{"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# updating the configuration
|
|
159
|
+
config0 = copy.deepcopy(config)
|
|
160
|
+
mkwargs = reduce_model_config(config, task) if not same_as_pretrained else {}
|
|
161
|
+
if model_kwargs:
|
|
162
|
+
for k, v in model_kwargs.items():
|
|
163
|
+
if isinstance(v, dict):
|
|
164
|
+
if k in mkwargs:
|
|
165
|
+
mkwargs[k].update(v)
|
|
166
|
+
else:
|
|
167
|
+
mkwargs[k] = v
|
|
168
|
+
else:
|
|
169
|
+
mkwargs[k] = v
|
|
170
|
+
if mkwargs:
|
|
171
|
+
update_config(config, mkwargs)
|
|
172
|
+
try:
|
|
173
|
+
diff_config = build_diff_config(config0, config)
|
|
174
|
+
except (ValueError, AttributeError, TypeError) as e:
|
|
175
|
+
diff_config = f"DIFF CONFIG ERROR {e}"
|
|
176
|
+
if verbose:
|
|
177
|
+
if diff_config:
|
|
178
|
+
print("[get_untrained_model_with_inputs] -- updated config")
|
|
179
|
+
pprint.pprint(diff_config)
|
|
180
|
+
print("[get_untrained_model_with_inputs] --")
|
|
181
|
+
|
|
182
|
+
# SDPA
|
|
183
|
+
if model_kwargs and "attn_implementation" in model_kwargs:
|
|
184
|
+
if hasattr(config, "_attn_implementation_autoset"):
|
|
185
|
+
config._attn_implementation_autoset = False
|
|
186
|
+
config._attn_implementation = model_kwargs["attn_implementation"] # type: ignore[union-attr]
|
|
187
|
+
if verbose:
|
|
188
|
+
print(
|
|
189
|
+
f"[get_untrained_model_with_inputs] config._attn_implementation="
|
|
190
|
+
f"{config._attn_implementation!r}" # type: ignore[union-attr]
|
|
191
|
+
)
|
|
192
|
+
elif verbose:
|
|
193
|
+
print(
|
|
194
|
+
f"[get_untrained_model_with_inputs] default config._attn_implementation="
|
|
195
|
+
f"{getattr(config, '_attn_implementation', '?')!r}" # type: ignore[union-attr]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if find_package_source(config) == "diffusers":
|
|
199
|
+
import diffusers
|
|
200
|
+
|
|
201
|
+
package_source = diffusers
|
|
202
|
+
else:
|
|
203
|
+
package_source = transformers
|
|
204
|
+
|
|
205
|
+
if verbose:
|
|
206
|
+
print(
|
|
207
|
+
f"[get_untrained_model_with_inputs] package_source={package_source.__name__} "
|
|
208
|
+
f"from {package_source.__file__}"
|
|
209
|
+
)
|
|
210
|
+
if use_pretrained:
|
|
211
|
+
begin = time.perf_counter()
|
|
212
|
+
if verbose:
|
|
213
|
+
print(
|
|
214
|
+
f"[get_untrained_model_with_inputs] pretrained model_id {model_id!r}, "
|
|
215
|
+
f"subfolder={subfolder!r}"
|
|
216
|
+
)
|
|
217
|
+
model = transformers.AutoModel.from_pretrained(
|
|
218
|
+
model_id, subfolder=subfolder or "", trust_remote_code=True, **mkwargs
|
|
219
|
+
)
|
|
220
|
+
if verbose:
|
|
221
|
+
print(
|
|
222
|
+
f"[get_untrained_model_with_inputs] -- done(1) in "
|
|
223
|
+
f"{time.perf_counter() - begin}s"
|
|
224
|
+
)
|
|
225
|
+
else:
|
|
226
|
+
begin = time.perf_counter()
|
|
227
|
+
if verbose:
|
|
228
|
+
print(
|
|
229
|
+
f"[get_untrained_model_with_inputs] instantiate model_id {model_id!r}, "
|
|
230
|
+
f"subfolder={subfolder!r}"
|
|
231
|
+
)
|
|
232
|
+
if arch is not None:
|
|
233
|
+
try:
|
|
234
|
+
cls_model = getattr(package_source, arch)
|
|
235
|
+
except AttributeError as e:
|
|
236
|
+
# The code of the models is not in transformers but in the
|
|
237
|
+
# repository of the model. We need to download it.
|
|
238
|
+
pyfiles = download_code_modelid(model_id, verbose=verbose)
|
|
239
|
+
if pyfiles:
|
|
240
|
+
if "." in arch:
|
|
241
|
+
cls_name = arch
|
|
242
|
+
else:
|
|
243
|
+
modeling = [_ for _ in pyfiles if "/modeling_" in _]
|
|
244
|
+
assert len(modeling) == 1, (
|
|
245
|
+
f"Unable to guess the main file implemented class "
|
|
246
|
+
f"{arch!r} from {pyfiles}, found={modeling}."
|
|
247
|
+
)
|
|
248
|
+
last_name = os.path.splitext(os.path.split(modeling[0])[-1])[0]
|
|
249
|
+
cls_name = f"{last_name}.{arch}"
|
|
250
|
+
if verbose:
|
|
251
|
+
print(
|
|
252
|
+
f"[get_untrained_model_with_inputs] "
|
|
253
|
+
f"custom code for {cls_name!r}"
|
|
254
|
+
)
|
|
255
|
+
print(
|
|
256
|
+
f"[get_untrained_model_with_inputs] from folder "
|
|
257
|
+
f"{os.path.split(pyfiles[0])[0]!r}"
|
|
258
|
+
)
|
|
259
|
+
cls_model = (
|
|
260
|
+
transformers.dynamic_module_utils.get_class_from_dynamic_module(
|
|
261
|
+
cls_name,
|
|
262
|
+
pretrained_model_name_or_path=os.path.split(pyfiles[0])[0],
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
else:
|
|
266
|
+
raise AttributeError(
|
|
267
|
+
f"Unable to find class 'tranformers.{arch}'. "
|
|
268
|
+
f"The code needs to be downloaded, config="
|
|
269
|
+
f"\n{pprint.pformat(config)}."
|
|
270
|
+
) from e
|
|
271
|
+
else:
|
|
272
|
+
assert same_as_pretrained and use_pretrained, (
|
|
273
|
+
f"Model {model_id!r} cannot be built, the model cannot be built. "
|
|
274
|
+
f"It must be downloaded. Use same_as_pretrained=True "
|
|
275
|
+
f"and use_pretrained=True, arch={arch!r}, config={config}"
|
|
276
|
+
)
|
|
277
|
+
if verbose:
|
|
278
|
+
print(
|
|
279
|
+
f"[get_untrained_model_with_inputs] -- done(2) in "
|
|
280
|
+
f"{time.perf_counter() - begin}s"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
seed = int(os.environ.get("SEED", "17"))
|
|
284
|
+
torch.manual_seed(seed)
|
|
285
|
+
|
|
286
|
+
if verbose:
|
|
287
|
+
begin = time.perf_counter()
|
|
288
|
+
print(
|
|
289
|
+
f"[get_untrained_model_with_inputs] "
|
|
290
|
+
f"instantiate_specific_model {cls_model}"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
model = instantiate_specific_model(cls_model, config)
|
|
294
|
+
|
|
295
|
+
if verbose:
|
|
296
|
+
print(
|
|
297
|
+
f"[get_untrained_model_with_inputs] -- done(3) in "
|
|
298
|
+
f"{time.perf_counter() - begin}s (model is {type(model)})"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if model is None:
|
|
302
|
+
|
|
303
|
+
if verbose:
|
|
304
|
+
print(
|
|
305
|
+
f"[get_untrained_model_with_inputs] "
|
|
306
|
+
f"instantiate_specific_model(2) {cls_model}"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
if type(config) is dict:
|
|
311
|
+
model = cls_model(**config)
|
|
312
|
+
else:
|
|
313
|
+
model = cls_model(config)
|
|
314
|
+
except RuntimeError as e:
|
|
315
|
+
raise RuntimeError(
|
|
316
|
+
f"Unable to instantiate class {cls_model.__name__} with\n{config}"
|
|
317
|
+
) from e
|
|
318
|
+
|
|
319
|
+
if verbose:
|
|
320
|
+
print(
|
|
321
|
+
f"[get_untrained_model_with_inputs] -- done(4) in "
|
|
322
|
+
f"{time.perf_counter() - begin}s (model is {type(model)})"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# input kwargs
|
|
326
|
+
seed = int(os.environ.get("SEED", "17")) + 1
|
|
327
|
+
torch.manual_seed(seed)
|
|
328
|
+
kwargs, fct = random_input_kwargs(config, task) # type: ignore[arg-type]
|
|
329
|
+
if verbose:
|
|
330
|
+
print(f"[get_untrained_model_with_inputs] use fct={fct}")
|
|
331
|
+
if os.environ.get("PRINT_CONFIG") in (1, "1"):
|
|
332
|
+
print(f"-- input kwargs for task {task!r}")
|
|
333
|
+
pprint.pprint(kwargs)
|
|
334
|
+
if inputs_kwargs:
|
|
335
|
+
kwargs.update(inputs_kwargs)
|
|
336
|
+
|
|
337
|
+
# This line is important. Some models may produce different
|
|
338
|
+
# outputs even with the same inputs in training mode.
|
|
339
|
+
model.eval() # type: ignore[union-attr]
|
|
340
|
+
res = fct(model, config, add_second_input=add_second_input, **kwargs)
|
|
341
|
+
|
|
342
|
+
res["input_kwargs"] = kwargs
|
|
343
|
+
res["model_kwargs"] = mkwargs
|
|
344
|
+
if diff_config is not None:
|
|
345
|
+
res["dump_info"] = dict(config_diff=diff_config)
|
|
346
|
+
|
|
347
|
+
sizes = compute_model_size(model)
|
|
348
|
+
res["model"] = model
|
|
349
|
+
res["configuration"] = config
|
|
350
|
+
res["size"] = sizes[0]
|
|
351
|
+
res["n_weights"] = sizes[1]
|
|
352
|
+
res["task"] = task
|
|
353
|
+
|
|
354
|
+
update = {}
|
|
355
|
+
for k, v in res.items():
|
|
356
|
+
if k.startswith(("inputs", "dynamic_shapes")) and isinstance(v, dict):
|
|
357
|
+
update[k] = filter_out_unexpected_inputs(model, v, verbose=verbose)
|
|
358
|
+
res.update(update)
|
|
359
|
+
|
|
360
|
+
rewrite = _code_needing_rewriting(model.__class__.__name__)
|
|
361
|
+
if rewrite:
|
|
362
|
+
res["rewrite"] = rewrite
|
|
363
|
+
return res
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def filter_out_unexpected_inputs(
|
|
367
|
+
model: torch.nn.Module, kwargs: Dict[str, Any], verbose: int = 0
|
|
368
|
+
):
|
|
369
|
+
"""
|
|
370
|
+
Removes input names in kwargs if no parameter names was found in ``model.forward``.
|
|
371
|
+
"""
|
|
372
|
+
sig = inspect.signature(model.forward)
|
|
373
|
+
allowed = set(sig.parameters)
|
|
374
|
+
new_kwargs = {k: v for k, v in kwargs.items() if k in allowed}
|
|
375
|
+
diff = set(kwargs) - set(new_kwargs)
|
|
376
|
+
if diff and verbose:
|
|
377
|
+
print(f"[filter_out_unexpected_inputs] removed {diff}")
|
|
378
|
+
return new_kwargs
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def compute_model_size(model: torch.nn.Module) -> Tuple[int, int]:
|
|
382
|
+
"""Returns the size of the models (weights only) and the number of the parameters."""
|
|
383
|
+
param_size = 0
|
|
384
|
+
nparams = 0
|
|
385
|
+
for param in model.parameters():
|
|
386
|
+
param_size += param.nelement() * param.element_size()
|
|
387
|
+
nparams += param.nelement()
|
|
388
|
+
return param_size, nparams
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, Dict, Tuple
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def instantiate_specific_model(cls_model: type, config: Any) -> object:
|
|
5
|
+
"""
|
|
6
|
+
Instantiates some model requiring some specific code.
|
|
7
|
+
"""
|
|
8
|
+
if cls_model.__name__ == "CosmosTransformer3DModel":
|
|
9
|
+
return instantiate_CosmosTransformer3DModel(cls_model, config)
|
|
10
|
+
return None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def instantiate_CosmosTransformer3DModel(cls_model: type, config: Any) -> object:
|
|
14
|
+
kwargs = dict(
|
|
15
|
+
in_channels=config.in_channels,
|
|
16
|
+
out_channels=config.out_channels,
|
|
17
|
+
attention_head_dim=config.attention_head_dim,
|
|
18
|
+
mlp_ratio=config.mlp_ratio,
|
|
19
|
+
num_layers=config.num_layers,
|
|
20
|
+
text_embed_dim=config.text_embed_dim,
|
|
21
|
+
adaln_lora_dim=config.adaln_lora_dim,
|
|
22
|
+
max_size=config.max_size,
|
|
23
|
+
patch_size=config.patch_size,
|
|
24
|
+
rope_scale=config.rope_scale,
|
|
25
|
+
concat_padding_mask=config.concat_padding_mask,
|
|
26
|
+
extra_pos_embed_type=config.extra_pos_embed_type,
|
|
27
|
+
)
|
|
28
|
+
return cls_model(**kwargs)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SpecificConfig:
|
|
32
|
+
"""Creates a specific configuration for the loaded model."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, **kwargs):
|
|
35
|
+
self._atts = set(kwargs)
|
|
36
|
+
for k, v in kwargs.items():
|
|
37
|
+
setattr(self, k, v)
|
|
38
|
+
|
|
39
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
40
|
+
return {k: getattr(self, k) for k in self._atts if k != "_atts"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_specific_model(
|
|
44
|
+
model_id: str, verbose: int = 0, **kwargs
|
|
45
|
+
) -> Tuple[Any, str, SpecificConfig]:
|
|
46
|
+
"""
|
|
47
|
+
Some models do not have any generic to be loaded.
|
|
48
|
+
This functions
|
|
49
|
+
|
|
50
|
+
:param model_id: model id
|
|
51
|
+
:param verbose: verbosiy
|
|
52
|
+
:param kwargs: additional parameters
|
|
53
|
+
:return: the model, the task associated to it, a configuration
|
|
54
|
+
"""
|
|
55
|
+
assert model_id in HANDLED_MODELS, (
|
|
56
|
+
f"Unable to load model_id={model_id!r}, "
|
|
57
|
+
f"no function is mapped to this id in {sorted(HANDLED_MODELS)}"
|
|
58
|
+
)
|
|
59
|
+
return HANDLED_MODELS[model_id](model_id, verbose=verbose, **kwargs)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _load_bingsu_adetailer(model_id: str, verbose: int = 0) -> Tuple[Any, str, SpecificConfig]:
|
|
63
|
+
"""See `Bingsu/adetailer <https://huggingface.co/Bingsu/adetailer>`_."""
|
|
64
|
+
from huggingface_hub import hf_hub_download
|
|
65
|
+
from ultralytics import YOLO
|
|
66
|
+
|
|
67
|
+
path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
|
|
68
|
+
model = YOLO(path)
|
|
69
|
+
return (
|
|
70
|
+
model,
|
|
71
|
+
"object-detection",
|
|
72
|
+
SpecificConfig(architecture=type(model), image_size=224, num_channels=3),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
HANDLED_MODELS = {"Bingsu/adetailer": _load_bingsu_adetailer}
|
|
File without changes
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
import torch
|
|
3
|
+
import transformers
|
|
4
|
+
from ...helpers.cache_helper import make_dynamic_cache
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_phi2(
|
|
8
|
+
batch_size: int = 1,
|
|
9
|
+
sequence_length: int = 30,
|
|
10
|
+
sequence_length2: int = 3,
|
|
11
|
+
dynamic_rope: bool = False,
|
|
12
|
+
use_dim_not_dynamic: bool = False,
|
|
13
|
+
**kwargs,
|
|
14
|
+
) -> Dict[str, Any]:
|
|
15
|
+
"""
|
|
16
|
+
Gets a non initialized model similar to :epkg:`microsoft/phi-2`.
|
|
17
|
+
|
|
18
|
+
:param batch_size: batch size
|
|
19
|
+
:param sequence_length: sequence length
|
|
20
|
+
:param sequence_length2: new sequence length
|
|
21
|
+
:param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
|
|
22
|
+
:param use_dim_not_dynamic: uses ``torch.export.Dim`` and not a string for the batch size,
|
|
23
|
+
the sequence length and the cache length
|
|
24
|
+
:param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
|
|
25
|
+
:return: dictionary
|
|
26
|
+
|
|
27
|
+
See :ref:`l-plot-tiny-llm-export-patched` for an example with a similar model.
|
|
28
|
+
"""
|
|
29
|
+
config = {
|
|
30
|
+
"_name_or_path": "microsoft/phi-2",
|
|
31
|
+
"architectures": ["PhiForCausalLM"],
|
|
32
|
+
"attention_dropout": 0.0,
|
|
33
|
+
"bos_token_id": 50256,
|
|
34
|
+
"embd_pdrop": 0.0,
|
|
35
|
+
"eos_token_id": 50256,
|
|
36
|
+
"hidden_act": "gelu_new",
|
|
37
|
+
"hidden_size": 2560,
|
|
38
|
+
"initializer_range": 0.02,
|
|
39
|
+
"intermediate_size": 10240,
|
|
40
|
+
"layer_norm_eps": 1e-05,
|
|
41
|
+
"max_position_embeddings": 2048,
|
|
42
|
+
"model_type": "phi",
|
|
43
|
+
"num_attention_heads": 32,
|
|
44
|
+
"num_hidden_layers": 32,
|
|
45
|
+
"num_key_value_heads": 32,
|
|
46
|
+
"partial_rotary_factor": 0.4,
|
|
47
|
+
"qk_layernorm": False,
|
|
48
|
+
"resid_pdrop": 0.1,
|
|
49
|
+
"rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
|
|
50
|
+
"rope_theta": 10000.0,
|
|
51
|
+
"tie_word_embeddings": False,
|
|
52
|
+
"torch_dtype": "float16",
|
|
53
|
+
"transformers_version": "4.37.0",
|
|
54
|
+
"use_cache": True,
|
|
55
|
+
"vocab_size": 51200,
|
|
56
|
+
}
|
|
57
|
+
config.update(**kwargs)
|
|
58
|
+
conf = transformers.PhiConfig(**config)
|
|
59
|
+
model = transformers.PhiForCausalLM(conf)
|
|
60
|
+
model.eval()
|
|
61
|
+
|
|
62
|
+
# now the inputs
|
|
63
|
+
cache_last_dim = 80
|
|
64
|
+
max_token_id = config["vocab_size"] - 1
|
|
65
|
+
n_layers = config["num_hidden_layers"]
|
|
66
|
+
num_key_value_heads = config["num_key_value_heads"]
|
|
67
|
+
|
|
68
|
+
if use_dim_not_dynamic:
|
|
69
|
+
batch = torch.export.Dim("batch", min=1, max=1024)
|
|
70
|
+
seq_length = torch.export.Dim("seq_length", min=1, max=4096)
|
|
71
|
+
cache_length = torch.export.Dim("cache_length", min=1, max=4096)
|
|
72
|
+
else:
|
|
73
|
+
batch = "batch"
|
|
74
|
+
seq_length = "seq_length"
|
|
75
|
+
cache_length = "cache_length"
|
|
76
|
+
|
|
77
|
+
shapes = {
|
|
78
|
+
"input_ids": {0: batch, 1: seq_length},
|
|
79
|
+
"position_ids": {
|
|
80
|
+
0: batch,
|
|
81
|
+
1: torch.export.Dim.DYNAMIC, # cache_length + seq_length
|
|
82
|
+
},
|
|
83
|
+
"attention_mask": {
|
|
84
|
+
0: batch,
|
|
85
|
+
1: torch.export.Dim.DYNAMIC, # cache_length + seq_length
|
|
86
|
+
},
|
|
87
|
+
"past_key_values": [{0: batch, 2: cache_length} for _ in range(n_layers * 2)],
|
|
88
|
+
}
|
|
89
|
+
inputs = dict(
|
|
90
|
+
input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
|
|
91
|
+
torch.int64
|
|
92
|
+
),
|
|
93
|
+
attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
|
|
94
|
+
torch.int64
|
|
95
|
+
),
|
|
96
|
+
position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
|
|
97
|
+
.to(torch.int64)
|
|
98
|
+
.expand((batch_size, -1)),
|
|
99
|
+
past_key_values=make_dynamic_cache(
|
|
100
|
+
[
|
|
101
|
+
(
|
|
102
|
+
torch.randn(
|
|
103
|
+
batch_size, num_key_value_heads, sequence_length, cache_last_dim
|
|
104
|
+
),
|
|
105
|
+
torch.randn(
|
|
106
|
+
batch_size, num_key_value_heads, sequence_length, cache_last_dim
|
|
107
|
+
),
|
|
108
|
+
)
|
|
109
|
+
for i in range(n_layers)
|
|
110
|
+
]
|
|
111
|
+
),
|
|
112
|
+
)
|
|
113
|
+
return dict(inputs=inputs, model=model, dynamic_shapes=shapes, configuration=conf)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
import transformers
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_tiny_llm(
|
|
6
|
+
batch_size: int = 2,
|
|
7
|
+
sequence_length: int = 30,
|
|
8
|
+
sequence_length2: int = 3,
|
|
9
|
+
dynamic_rope: bool = False,
|
|
10
|
+
use_static_cache: bool = False,
|
|
11
|
+
**kwargs,
|
|
12
|
+
) -> Dict[str, Any]:
|
|
13
|
+
"""
|
|
14
|
+
Gets a non initialized model similar to :epkg:`arnir0/Tiny-LLM`.
|
|
15
|
+
|
|
16
|
+
:param batch_size: batch size
|
|
17
|
+
:param sequence_length: sequence length
|
|
18
|
+
:param sequence_length2: new sequence length
|
|
19
|
+
:param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
|
|
20
|
+
:param use_static_cache: use StaticCache instead of DynamicCache
|
|
21
|
+
:param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
|
|
22
|
+
:return: dictionary
|
|
23
|
+
|
|
24
|
+
See :ref:`l-plot-tiny-llm-export` or :ref:`l-plot-tiny-llm-export-patched` for examples.
|
|
25
|
+
"""
|
|
26
|
+
from ...tasks.text_generation import get_inputs
|
|
27
|
+
|
|
28
|
+
config = {
|
|
29
|
+
"architectures": ["LlamaForCausalLM"],
|
|
30
|
+
"bos_token_id": 1,
|
|
31
|
+
"eos_token_id": 2,
|
|
32
|
+
"hidden_act": "silu",
|
|
33
|
+
"hidden_size": 192,
|
|
34
|
+
"initializer_range": 0.02,
|
|
35
|
+
"intermediate_size": 1024,
|
|
36
|
+
"max_position_embeddings": 1024,
|
|
37
|
+
"model_type": "llama",
|
|
38
|
+
"num_attention_heads": 2,
|
|
39
|
+
"num_hidden_layers": 1,
|
|
40
|
+
"num_key_value_heads": 1,
|
|
41
|
+
"pretraining_tp": 1,
|
|
42
|
+
"rms_norm_eps": 1e-05,
|
|
43
|
+
"rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
|
|
44
|
+
"tie_word_embeddings": False,
|
|
45
|
+
"torch_dtype": "float32",
|
|
46
|
+
"transformers_version": "4.31.0.dev0",
|
|
47
|
+
"use_cache": True,
|
|
48
|
+
"vocab_size": 32000,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
config.update(**kwargs)
|
|
52
|
+
conf = transformers.LlamaConfig(**config)
|
|
53
|
+
if use_static_cache:
|
|
54
|
+
conf.cache_implementation = "static"
|
|
55
|
+
model = transformers.LlamaForCausalLM(conf)
|
|
56
|
+
model.eval()
|
|
57
|
+
|
|
58
|
+
res = get_inputs(
|
|
59
|
+
model,
|
|
60
|
+
conf,
|
|
61
|
+
dummy_max_token_id=config["vocab_size"], # type: ignore[arg-type]
|
|
62
|
+
num_hidden_layers=config["num_hidden_layers"], # type: ignore[arg-type]
|
|
63
|
+
batch_size=batch_size,
|
|
64
|
+
sequence_length=sequence_length,
|
|
65
|
+
sequence_length2=sequence_length2,
|
|
66
|
+
dynamic_rope=dynamic_rope,
|
|
67
|
+
num_key_value_heads=config["num_key_value_heads"], # type: ignore[arg-type]
|
|
68
|
+
cls_cache="StaticCache" if use_static_cache else "DynamicCache",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return dict(
|
|
72
|
+
inputs=res["inputs"],
|
|
73
|
+
model=model,
|
|
74
|
+
dynamic_shapes=res["dynamic_shapes"],
|
|
75
|
+
configuration=conf,
|
|
76
|
+
)
|