oracle-ads 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/cli.py +7 -5
- ads/aqua/common/entities.py +88 -29
- ads/aqua/common/enums.py +6 -0
- ads/aqua/common/errors.py +5 -0
- ads/aqua/common/utils.py +49 -7
- ads/aqua/constants.py +3 -0
- ads/aqua/extension/deployment_handler.py +36 -0
- ads/aqua/modeldeployment/constants.py +1 -0
- ads/aqua/modeldeployment/deployment.py +83 -12
- ads/aqua/modeldeployment/entities.py +3 -0
- ads/aqua/resources/gpu_shapes_index.json +315 -26
- ads/aqua/shaperecommend/__init__.py +6 -0
- ads/aqua/shaperecommend/constants.py +116 -0
- ads/aqua/shaperecommend/estimator.py +384 -0
- ads/aqua/shaperecommend/llm_config.py +283 -0
- ads/aqua/shaperecommend/recommend.py +493 -0
- ads/aqua/shaperecommend/shape_report.py +233 -0
- ads/aqua/version.json +1 -1
- ads/cli.py +9 -1
- ads/jobs/builders/infrastructure/dsc_job.py +1 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
- ads/model/service/oci_datascience_model_deployment.py +46 -19
- ads/opctl/operator/lowcode/common/data.py +7 -2
- ads/opctl/operator/lowcode/common/transformations.py +207 -0
- ads/opctl/operator/lowcode/common/utils.py +8 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/errors.py +5 -0
- ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
- ads/pipeline/ads_pipeline.py +13 -9
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +42 -35
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,94 +1,383 @@
|
|
1
1
|
{
|
2
2
|
"shapes": {
|
3
3
|
"BM.GPU.A10.4": {
|
4
|
+
"cpu_count": 64,
|
5
|
+
"cpu_memory_in_gbs": 1024,
|
4
6
|
"gpu_count": 4,
|
5
7
|
"gpu_memory_in_gbs": 96,
|
6
|
-
"gpu_type": "A10"
|
8
|
+
"gpu_type": "A10",
|
9
|
+
"quantization": [
|
10
|
+
"awq",
|
11
|
+
"gptq",
|
12
|
+
"marlin",
|
13
|
+
"int8",
|
14
|
+
"bitblas",
|
15
|
+
"aqlm",
|
16
|
+
"bitsandbytes",
|
17
|
+
"deepspeedfp",
|
18
|
+
"gguf"
|
19
|
+
],
|
20
|
+
"ranking": {
|
21
|
+
"cost": 50,
|
22
|
+
"performance": 50
|
23
|
+
}
|
7
24
|
},
|
8
25
|
"BM.GPU.A100-V2.8": {
|
26
|
+
"cpu_count": 128,
|
27
|
+
"cpu_memory_in_gbs": 2048,
|
9
28
|
"gpu_count": 8,
|
10
29
|
"gpu_memory_in_gbs": 640,
|
11
|
-
"gpu_type": "A100"
|
30
|
+
"gpu_type": "A100",
|
31
|
+
"quantization": [
|
32
|
+
"awq",
|
33
|
+
"gptq",
|
34
|
+
"marlin",
|
35
|
+
"int8",
|
36
|
+
"bitblas",
|
37
|
+
"aqlm",
|
38
|
+
"bitsandbytes",
|
39
|
+
"deepspeedfp",
|
40
|
+
"gguf"
|
41
|
+
],
|
42
|
+
"ranking": {
|
43
|
+
"cost": 80,
|
44
|
+
"performance": 70
|
45
|
+
}
|
46
|
+
},
|
47
|
+
"BM.GPU.B200.8": {
|
48
|
+
"cpu_count": 128,
|
49
|
+
"cpu_memory_in_gbs": 4096,
|
50
|
+
"gpu_count": 8,
|
51
|
+
"gpu_memory_in_gbs": 1440,
|
52
|
+
"gpu_type": "B200",
|
53
|
+
"quantization": [
|
54
|
+
"fp4",
|
55
|
+
"fp8",
|
56
|
+
"fp16",
|
57
|
+
"bf16",
|
58
|
+
"tf32",
|
59
|
+
"int8",
|
60
|
+
"fp64"
|
61
|
+
],
|
62
|
+
"ranking": {
|
63
|
+
"cost": 120,
|
64
|
+
"performance": 130
|
65
|
+
}
|
12
66
|
},
|
13
67
|
"BM.GPU.B4.8": {
|
68
|
+
"cpu_count": 64,
|
69
|
+
"cpu_memory_in_gbs": 2048,
|
14
70
|
"gpu_count": 8,
|
15
71
|
"gpu_memory_in_gbs": 320,
|
16
|
-
"gpu_type": "A100"
|
72
|
+
"gpu_type": "A100",
|
73
|
+
"quantization": [
|
74
|
+
"awq",
|
75
|
+
"gptq",
|
76
|
+
"marlin",
|
77
|
+
"int8",
|
78
|
+
"bitblas",
|
79
|
+
"aqlm",
|
80
|
+
"bitsandbytes",
|
81
|
+
"deepspeedfp",
|
82
|
+
"gguf"
|
83
|
+
],
|
84
|
+
"ranking": {
|
85
|
+
"cost": 70,
|
86
|
+
"performance": 60
|
87
|
+
}
|
88
|
+
},
|
89
|
+
"BM.GPU.GB200.4": {
|
90
|
+
"cpu_count": 144,
|
91
|
+
"cpu_memory_in_gbs": 1024,
|
92
|
+
"gpu_count": 4,
|
93
|
+
"gpu_memory_in_gbs": 768,
|
94
|
+
"gpu_type": "GB200",
|
95
|
+
"quantization": [
|
96
|
+
"fp4",
|
97
|
+
"fp8",
|
98
|
+
"fp6",
|
99
|
+
"int8",
|
100
|
+
"fp16",
|
101
|
+
"bf16",
|
102
|
+
"tf32",
|
103
|
+
"fp64"
|
104
|
+
],
|
105
|
+
"ranking": {
|
106
|
+
"cost": 110,
|
107
|
+
"performance": 120
|
108
|
+
}
|
17
109
|
},
|
18
110
|
"BM.GPU.H100.8": {
|
111
|
+
"cpu_count": 112,
|
112
|
+
"cpu_memory_in_gbs": 2048,
|
19
113
|
"gpu_count": 8,
|
20
114
|
"gpu_memory_in_gbs": 640,
|
21
|
-
"gpu_type": "H100"
|
115
|
+
"gpu_type": "H100",
|
116
|
+
"quantization": [
|
117
|
+
"awq",
|
118
|
+
"gptq",
|
119
|
+
"marlin",
|
120
|
+
"fp8",
|
121
|
+
"int8",
|
122
|
+
"bitblas",
|
123
|
+
"aqlm",
|
124
|
+
"bitsandbytes",
|
125
|
+
"deepspeedfp",
|
126
|
+
"gguf"
|
127
|
+
],
|
128
|
+
"ranking": {
|
129
|
+
"cost": 100,
|
130
|
+
"performance": 100
|
131
|
+
}
|
22
132
|
},
|
23
133
|
"BM.GPU.H200.8": {
|
134
|
+
"cpu_count": 112,
|
135
|
+
"cpu_memory_in_gbs": 3072,
|
24
136
|
"gpu_count": 8,
|
25
137
|
"gpu_memory_in_gbs": 1128,
|
26
|
-
"gpu_type": "H200"
|
138
|
+
"gpu_type": "H200",
|
139
|
+
"quantization": [
|
140
|
+
"awq",
|
141
|
+
"gptq",
|
142
|
+
"marlin",
|
143
|
+
"fp8",
|
144
|
+
"int8",
|
145
|
+
"bitblas",
|
146
|
+
"aqlm",
|
147
|
+
"bitsandbytes",
|
148
|
+
"deepspeedfp",
|
149
|
+
"gguf"
|
150
|
+
],
|
151
|
+
"ranking": {
|
152
|
+
"cost": 100,
|
153
|
+
"performance": 110
|
154
|
+
}
|
27
155
|
},
|
28
156
|
"BM.GPU.L40S-NC.4": {
|
157
|
+
"cpu_count": 112,
|
158
|
+
"cpu_memory_in_gbs": 1024,
|
29
159
|
"gpu_count": 4,
|
30
160
|
"gpu_memory_in_gbs": 192,
|
31
|
-
"gpu_type": "L40S"
|
161
|
+
"gpu_type": "L40S",
|
162
|
+
"quantization": [
|
163
|
+
"awq",
|
164
|
+
"gptq",
|
165
|
+
"marlin",
|
166
|
+
"fp8",
|
167
|
+
"int8",
|
168
|
+
"bitblas",
|
169
|
+
"aqlm",
|
170
|
+
"bitsandbytes",
|
171
|
+
"deepspeedfp",
|
172
|
+
"gguf"
|
173
|
+
],
|
174
|
+
"ranking": {
|
175
|
+
"cost": 60,
|
176
|
+
"performance": 80
|
177
|
+
}
|
32
178
|
},
|
33
179
|
"BM.GPU.L40S.4": {
|
180
|
+
"cpu_count": 112,
|
181
|
+
"cpu_memory_in_gbs": 1024,
|
34
182
|
"gpu_count": 4,
|
35
183
|
"gpu_memory_in_gbs": 192,
|
36
|
-
"gpu_type": "L40S"
|
184
|
+
"gpu_type": "L40S",
|
185
|
+
"quantization": [
|
186
|
+
"awq",
|
187
|
+
"gptq",
|
188
|
+
"marlin",
|
189
|
+
"fp8",
|
190
|
+
"int8",
|
191
|
+
"bitblas",
|
192
|
+
"aqlm",
|
193
|
+
"bitsandbytes",
|
194
|
+
"deepspeedfp",
|
195
|
+
"gguf"
|
196
|
+
],
|
197
|
+
"ranking": {
|
198
|
+
"cost": 60,
|
199
|
+
"performance": 80
|
200
|
+
}
|
37
201
|
},
|
38
202
|
"BM.GPU.MI300X.8": {
|
203
|
+
"cpu_count": 112,
|
204
|
+
"cpu_memory_in_gbs": 2048,
|
39
205
|
"gpu_count": 8,
|
40
206
|
"gpu_memory_in_gbs": 1536,
|
41
|
-
"gpu_type": "MI300X"
|
207
|
+
"gpu_type": "MI300X",
|
208
|
+
"quantization": [
|
209
|
+
"fp8",
|
210
|
+
"gguf"
|
211
|
+
],
|
212
|
+
"ranking": {
|
213
|
+
"cost": 90,
|
214
|
+
"performance": 90
|
215
|
+
}
|
42
216
|
},
|
43
217
|
"BM.GPU2.2": {
|
218
|
+
"cpu_count": 28,
|
219
|
+
"cpu_memory_in_gbs": 192,
|
44
220
|
"gpu_count": 2,
|
45
221
|
"gpu_memory_in_gbs": 32,
|
46
|
-
"gpu_type": "P100"
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
222
|
+
"gpu_type": "P100",
|
223
|
+
"quantization": [
|
224
|
+
"fp16"
|
225
|
+
],
|
226
|
+
"ranking": {
|
227
|
+
"cost": 30,
|
228
|
+
"performance": 20
|
229
|
+
}
|
52
230
|
},
|
53
231
|
"BM.GPU4.8": {
|
232
|
+
"cpu_count": 64,
|
233
|
+
"cpu_memory_in_gbs": 2048,
|
54
234
|
"gpu_count": 8,
|
55
235
|
"gpu_memory_in_gbs": 320,
|
56
|
-
"gpu_type": "A100"
|
236
|
+
"gpu_type": "A100",
|
237
|
+
"quantization": [
|
238
|
+
"int8",
|
239
|
+
"fp16",
|
240
|
+
"bf16",
|
241
|
+
"tf32"
|
242
|
+
],
|
243
|
+
"ranking": {
|
244
|
+
"cost": 57,
|
245
|
+
"performance": 65
|
246
|
+
}
|
57
247
|
},
|
58
248
|
"VM.GPU.A10.1": {
|
249
|
+
"cpu_count": 15,
|
250
|
+
"cpu_memory_in_gbs": 240,
|
59
251
|
"gpu_count": 1,
|
60
252
|
"gpu_memory_in_gbs": 24,
|
61
|
-
"gpu_type": "A10"
|
253
|
+
"gpu_type": "A10",
|
254
|
+
"quantization": [
|
255
|
+
"awq",
|
256
|
+
"gptq",
|
257
|
+
"marlin",
|
258
|
+
"int8",
|
259
|
+
"bitblas",
|
260
|
+
"aqlm",
|
261
|
+
"bitsandbytes",
|
262
|
+
"deepspeedfp",
|
263
|
+
"gguf"
|
264
|
+
],
|
265
|
+
"ranking": {
|
266
|
+
"cost": 20,
|
267
|
+
"performance": 30
|
268
|
+
}
|
62
269
|
},
|
63
270
|
"VM.GPU.A10.2": {
|
271
|
+
"cpu_count": 30,
|
272
|
+
"cpu_memory_in_gbs": 480,
|
64
273
|
"gpu_count": 2,
|
65
274
|
"gpu_memory_in_gbs": 48,
|
66
|
-
"gpu_type": "A10"
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
275
|
+
"gpu_type": "A10",
|
276
|
+
"quantization": [
|
277
|
+
"awq",
|
278
|
+
"gptq",
|
279
|
+
"marlin",
|
280
|
+
"int8",
|
281
|
+
"bitblas",
|
282
|
+
"aqlm",
|
283
|
+
"bitsandbytes",
|
284
|
+
"deepspeedfp",
|
285
|
+
"gguf"
|
286
|
+
],
|
287
|
+
"ranking": {
|
288
|
+
"cost": 40,
|
289
|
+
"performance": 40
|
290
|
+
}
|
72
291
|
},
|
73
292
|
"VM.GPU2.1": {
|
293
|
+
"cpu_count": 12,
|
294
|
+
"cpu_memory_in_gbs": 72,
|
74
295
|
"gpu_count": 1,
|
75
296
|
"gpu_memory_in_gbs": 16,
|
76
|
-
"gpu_type": "P100"
|
297
|
+
"gpu_type": "P100",
|
298
|
+
"quantization": [
|
299
|
+
"fp16"
|
300
|
+
],
|
301
|
+
"ranking": {
|
302
|
+
"cost": 10,
|
303
|
+
"performance": 10
|
304
|
+
}
|
77
305
|
},
|
78
306
|
"VM.GPU3.1": {
|
307
|
+
"cpu_count": 6,
|
308
|
+
"cpu_memory_in_gbs": 90,
|
79
309
|
"gpu_count": 1,
|
80
310
|
"gpu_memory_in_gbs": 16,
|
81
|
-
"gpu_type": "V100"
|
311
|
+
"gpu_type": "V100",
|
312
|
+
"quantization": [
|
313
|
+
"gptq",
|
314
|
+
"bitblas",
|
315
|
+
"aqlm",
|
316
|
+
"bitsandbytes",
|
317
|
+
"deepspeedfp",
|
318
|
+
"gguf"
|
319
|
+
],
|
320
|
+
"ranking": {
|
321
|
+
"cost": 35,
|
322
|
+
"performance": 10
|
323
|
+
}
|
82
324
|
},
|
83
325
|
"VM.GPU3.2": {
|
326
|
+
"cpu_count": 12,
|
327
|
+
"cpu_memory_in_gbs": 180,
|
84
328
|
"gpu_count": 2,
|
85
329
|
"gpu_memory_in_gbs": 32,
|
86
|
-
"gpu_type": "V100"
|
330
|
+
"gpu_type": "V100",
|
331
|
+
"quantization": [
|
332
|
+
"gptq",
|
333
|
+
"bitblas",
|
334
|
+
"aqlm",
|
335
|
+
"bitsandbytes",
|
336
|
+
"deepspeedfp",
|
337
|
+
"gguf"
|
338
|
+
],
|
339
|
+
"ranking": {
|
340
|
+
"cost": 45,
|
341
|
+
"performance": 20
|
342
|
+
}
|
87
343
|
},
|
88
344
|
"VM.GPU3.4": {
|
345
|
+
"cpu_count": 24,
|
346
|
+
"cpu_memory_in_gbs": 360,
|
89
347
|
"gpu_count": 4,
|
90
348
|
"gpu_memory_in_gbs": 64,
|
91
|
-
"gpu_type": "V100"
|
349
|
+
"gpu_type": "V100",
|
350
|
+
"quantization": [
|
351
|
+
"gptq",
|
352
|
+
"bitblas",
|
353
|
+
"aqlm",
|
354
|
+
"bitsandbytes",
|
355
|
+
"deepspeedfp",
|
356
|
+
"gguf"
|
357
|
+
],
|
358
|
+
"ranking": {
|
359
|
+
"cost": 55,
|
360
|
+
"performance": 45
|
361
|
+
}
|
362
|
+
},
|
363
|
+
"VM.GPU3.8": {
|
364
|
+
"cpu_count": 24,
|
365
|
+
"cpu_memory_in_gbs": 768,
|
366
|
+
"gpu_count": 8,
|
367
|
+
"gpu_memory_in_gbs": 128,
|
368
|
+
"gpu_type": "V100",
|
369
|
+
"quantization": [
|
370
|
+
"gptq",
|
371
|
+
"bitblas",
|
372
|
+
"aqlm",
|
373
|
+
"bitsandbytes",
|
374
|
+
"deepspeedfp",
|
375
|
+
"gguf"
|
376
|
+
],
|
377
|
+
"ranking": {
|
378
|
+
"cost": 56,
|
379
|
+
"performance": 46
|
380
|
+
}
|
92
381
|
}
|
93
382
|
}
|
94
383
|
}
|
@@ -0,0 +1,6 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# Copyright (c) 2025 Oracle and/or its affiliates.
|
3
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
|
5
|
+
|
6
|
+
__all__ = ["AquaShapeRecommend"]
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
|
3
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
|
5
|
+
"""
|
6
|
+
aqua.shaperecommend.constants
|
7
|
+
~~~~~~~~~~~~~~
|
8
|
+
|
9
|
+
This module contains constants used in Aqua GPU Recommendation for Models.
|
10
|
+
|
11
|
+
LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
|
12
|
+
|
13
|
+
MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
|
14
|
+
|
15
|
+
NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
|
16
|
+
"""
|
17
|
+
|
18
|
+
LLAMA_REQUIRED_FIELDS = [
|
19
|
+
"num_hidden_layers",
|
20
|
+
"hidden_size",
|
21
|
+
"num_attention_heads",
|
22
|
+
"num_key_value_heads",
|
23
|
+
"head_dim",
|
24
|
+
"intermediate_size",
|
25
|
+
"vocab_size",
|
26
|
+
]
|
27
|
+
|
28
|
+
MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + ["num_local_experts", "intermediate_size"]
|
29
|
+
|
30
|
+
NEXT_QUANT = {
|
31
|
+
"float32": ["8bit", "4bit"],
|
32
|
+
"bfloat16": ["8bit", "4bit"],
|
33
|
+
"float16": ["8bit", "4bit"],
|
34
|
+
"int8": ["4bit"],
|
35
|
+
"fp8": ["4bit"],
|
36
|
+
"8bit": ["4bit"],
|
37
|
+
"int4": ["No smaller quantization available"],
|
38
|
+
"4bit": ["No smaller quantization available"],
|
39
|
+
}
|
40
|
+
|
41
|
+
|
42
|
+
TEXT_GENERATION = "text_generation"
|
43
|
+
SAFETENSORS = "safetensors"
|
44
|
+
|
45
|
+
QUANT_METHODS = [
|
46
|
+
"aqlm",
|
47
|
+
"awq",
|
48
|
+
"deepspeedfp",
|
49
|
+
"tpu_int8",
|
50
|
+
"fp8",
|
51
|
+
"ptpc_fp8",
|
52
|
+
"fbgemm_fp8",
|
53
|
+
"modelopt",
|
54
|
+
"modelopt_fp4",
|
55
|
+
"marlin",
|
56
|
+
"bitblas",
|
57
|
+
"gguf",
|
58
|
+
"gptq_marlin_24",
|
59
|
+
"gptq_marlin",
|
60
|
+
"gptq_bitblas",
|
61
|
+
"awq_marlin",
|
62
|
+
"gptq",
|
63
|
+
"compressed-tensors",
|
64
|
+
"bitsandbytes",
|
65
|
+
"qqq",
|
66
|
+
"hqq",
|
67
|
+
"experts_int8",
|
68
|
+
"neuron_quant",
|
69
|
+
"ipex",
|
70
|
+
"quark",
|
71
|
+
"moe_wna16",
|
72
|
+
"torchao",
|
73
|
+
"auto-round",
|
74
|
+
"rtn",
|
75
|
+
"inc",
|
76
|
+
"mxfp4",
|
77
|
+
]
|
78
|
+
|
79
|
+
IN_FLIGHT_QUANTIZATION = {"4bit"} # vLLM only supports 4bit in-flight-quantization
|
80
|
+
|
81
|
+
TROUBLESHOOT_MSG = "The selected model is too large to fit on standard GPU shapes with the current configuration.\nAs troubleshooting, we have suggested the two largest available GPU shapes using the smallest quantization level ('4bit') to maximize chances of fitting the model. "
|
82
|
+
|
83
|
+
VLLM_PARAMS = {
|
84
|
+
"max_model_len": "--max-model-len",
|
85
|
+
"in_flight_quant": "--quantization bitsandbytes --load-format bitsandbytes",
|
86
|
+
}
|
87
|
+
|
88
|
+
DEFAULT_WEIGHT_SIZE = "float32"
|
89
|
+
|
90
|
+
BITS_AND_BYTES_8BIT = "8bit"
|
91
|
+
BITS_AND_BYTES_4BIT = "4bit"
|
92
|
+
|
93
|
+
BITSANDBYTES = "bitsandbytes"
|
94
|
+
|
95
|
+
|
96
|
+
QUANT_MAPPING = {
|
97
|
+
"float32": 4,
|
98
|
+
"bfloat16": 2,
|
99
|
+
"float16": 2,
|
100
|
+
"fp16": 2,
|
101
|
+
"half": 2,
|
102
|
+
"int8": 1,
|
103
|
+
"fp8": 1,
|
104
|
+
"8bit": 1,
|
105
|
+
"4bit": 0.5,
|
106
|
+
"int4": 0.5,
|
107
|
+
}
|
108
|
+
|
109
|
+
SHAPE_MAP = {
|
110
|
+
"NVIDIA_GPU": "GPU",
|
111
|
+
"AMD_ROME": "CPU",
|
112
|
+
"GENERIC": "CPU",
|
113
|
+
"LEGACY": "CPU",
|
114
|
+
"ARM": "CPU",
|
115
|
+
"UNKNOWN_ENUM_VALUE": "N/A",
|
116
|
+
}
|