oracle-ads 2.13.18rc0__py3-none-any.whl → 2.13.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +195 -48
  3. ads/aqua/common/enums.py +6 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +157 -66
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/constants.py +1 -0
  9. ads/aqua/modeldeployment/deployment.py +95 -14
  10. ads/aqua/modeldeployment/entities.py +3 -0
  11. ads/aqua/modeldeployment/model_group_config.py +3 -3
  12. ads/aqua/resources/gpu_shapes_index.json +315 -26
  13. ads/aqua/shaperecommend/__init__.py +6 -0
  14. ads/aqua/shaperecommend/constants.py +116 -0
  15. ads/aqua/shaperecommend/estimator.py +384 -0
  16. ads/aqua/shaperecommend/llm_config.py +283 -0
  17. ads/aqua/shaperecommend/recommend.py +493 -0
  18. ads/aqua/shaperecommend/shape_report.py +233 -0
  19. ads/aqua/version.json +1 -1
  20. ads/cli.py +9 -1
  21. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  22. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  23. ads/model/service/oci_datascience_model_deployment.py +46 -19
  24. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +4 -3
  25. ads/pipeline/ads_pipeline.py +13 -9
  26. {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/METADATA +1 -1
  27. {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/RECORD +30 -24
  28. {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/WHEEL +0 -0
  29. {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/entry_points.txt +0 -0
  30. {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,94 +1,383 @@
1
1
  {
2
2
  "shapes": {
3
3
  "BM.GPU.A10.4": {
4
+ "cpu_count": 64,
5
+ "cpu_memory_in_gbs": 1024,
4
6
  "gpu_count": 4,
5
7
  "gpu_memory_in_gbs": 96,
6
- "gpu_type": "A10"
8
+ "gpu_type": "A10",
9
+ "quantization": [
10
+ "awq",
11
+ "gptq",
12
+ "marlin",
13
+ "int8",
14
+ "bitblas",
15
+ "aqlm",
16
+ "bitsandbytes",
17
+ "deepspeedfp",
18
+ "gguf"
19
+ ],
20
+ "ranking": {
21
+ "cost": 50,
22
+ "performance": 50
23
+ }
7
24
  },
8
25
  "BM.GPU.A100-V2.8": {
26
+ "cpu_count": 128,
27
+ "cpu_memory_in_gbs": 2048,
9
28
  "gpu_count": 8,
10
29
  "gpu_memory_in_gbs": 640,
11
- "gpu_type": "A100"
30
+ "gpu_type": "A100",
31
+ "quantization": [
32
+ "awq",
33
+ "gptq",
34
+ "marlin",
35
+ "int8",
36
+ "bitblas",
37
+ "aqlm",
38
+ "bitsandbytes",
39
+ "deepspeedfp",
40
+ "gguf"
41
+ ],
42
+ "ranking": {
43
+ "cost": 80,
44
+ "performance": 70
45
+ }
46
+ },
47
+ "BM.GPU.B200.8": {
48
+ "cpu_count": 128,
49
+ "cpu_memory_in_gbs": 4096,
50
+ "gpu_count": 8,
51
+ "gpu_memory_in_gbs": 1440,
52
+ "gpu_type": "B200",
53
+ "quantization": [
54
+ "fp4",
55
+ "fp8",
56
+ "fp16",
57
+ "bf16",
58
+ "tf32",
59
+ "int8",
60
+ "fp64"
61
+ ],
62
+ "ranking": {
63
+ "cost": 120,
64
+ "performance": 130
65
+ }
12
66
  },
13
67
  "BM.GPU.B4.8": {
68
+ "cpu_count": 64,
69
+ "cpu_memory_in_gbs": 2048,
14
70
  "gpu_count": 8,
15
71
  "gpu_memory_in_gbs": 320,
16
- "gpu_type": "A100"
72
+ "gpu_type": "A100",
73
+ "quantization": [
74
+ "awq",
75
+ "gptq",
76
+ "marlin",
77
+ "int8",
78
+ "bitblas",
79
+ "aqlm",
80
+ "bitsandbytes",
81
+ "deepspeedfp",
82
+ "gguf"
83
+ ],
84
+ "ranking": {
85
+ "cost": 70,
86
+ "performance": 60
87
+ }
88
+ },
89
+ "BM.GPU.GB200.4": {
90
+ "cpu_count": 144,
91
+ "cpu_memory_in_gbs": 1024,
92
+ "gpu_count": 4,
93
+ "gpu_memory_in_gbs": 768,
94
+ "gpu_type": "GB200",
95
+ "quantization": [
96
+ "fp4",
97
+ "fp8",
98
+ "fp6",
99
+ "int8",
100
+ "fp16",
101
+ "bf16",
102
+ "tf32",
103
+ "fp64"
104
+ ],
105
+ "ranking": {
106
+ "cost": 110,
107
+ "performance": 120
108
+ }
17
109
  },
18
110
  "BM.GPU.H100.8": {
111
+ "cpu_count": 112,
112
+ "cpu_memory_in_gbs": 2048,
19
113
  "gpu_count": 8,
20
114
  "gpu_memory_in_gbs": 640,
21
- "gpu_type": "H100"
115
+ "gpu_type": "H100",
116
+ "quantization": [
117
+ "awq",
118
+ "gptq",
119
+ "marlin",
120
+ "fp8",
121
+ "int8",
122
+ "bitblas",
123
+ "aqlm",
124
+ "bitsandbytes",
125
+ "deepspeedfp",
126
+ "gguf"
127
+ ],
128
+ "ranking": {
129
+ "cost": 100,
130
+ "performance": 100
131
+ }
22
132
  },
23
133
  "BM.GPU.H200.8": {
134
+ "cpu_count": 112,
135
+ "cpu_memory_in_gbs": 3072,
24
136
  "gpu_count": 8,
25
137
  "gpu_memory_in_gbs": 1128,
26
- "gpu_type": "H200"
138
+ "gpu_type": "H200",
139
+ "quantization": [
140
+ "awq",
141
+ "gptq",
142
+ "marlin",
143
+ "fp8",
144
+ "int8",
145
+ "bitblas",
146
+ "aqlm",
147
+ "bitsandbytes",
148
+ "deepspeedfp",
149
+ "gguf"
150
+ ],
151
+ "ranking": {
152
+ "cost": 100,
153
+ "performance": 110
154
+ }
27
155
  },
28
156
  "BM.GPU.L40S-NC.4": {
157
+ "cpu_count": 112,
158
+ "cpu_memory_in_gbs": 1024,
29
159
  "gpu_count": 4,
30
160
  "gpu_memory_in_gbs": 192,
31
- "gpu_type": "L40S"
161
+ "gpu_type": "L40S",
162
+ "quantization": [
163
+ "awq",
164
+ "gptq",
165
+ "marlin",
166
+ "fp8",
167
+ "int8",
168
+ "bitblas",
169
+ "aqlm",
170
+ "bitsandbytes",
171
+ "deepspeedfp",
172
+ "gguf"
173
+ ],
174
+ "ranking": {
175
+ "cost": 60,
176
+ "performance": 80
177
+ }
32
178
  },
33
179
  "BM.GPU.L40S.4": {
180
+ "cpu_count": 112,
181
+ "cpu_memory_in_gbs": 1024,
34
182
  "gpu_count": 4,
35
183
  "gpu_memory_in_gbs": 192,
36
- "gpu_type": "L40S"
184
+ "gpu_type": "L40S",
185
+ "quantization": [
186
+ "awq",
187
+ "gptq",
188
+ "marlin",
189
+ "fp8",
190
+ "int8",
191
+ "bitblas",
192
+ "aqlm",
193
+ "bitsandbytes",
194
+ "deepspeedfp",
195
+ "gguf"
196
+ ],
197
+ "ranking": {
198
+ "cost": 60,
199
+ "performance": 80
200
+ }
37
201
  },
38
202
  "BM.GPU.MI300X.8": {
203
+ "cpu_count": 112,
204
+ "cpu_memory_in_gbs": 2048,
39
205
  "gpu_count": 8,
40
206
  "gpu_memory_in_gbs": 1536,
41
- "gpu_type": "MI300X"
207
+ "gpu_type": "MI300X",
208
+ "quantization": [
209
+ "fp8",
210
+ "gguf"
211
+ ],
212
+ "ranking": {
213
+ "cost": 90,
214
+ "performance": 90
215
+ }
42
216
  },
43
217
  "BM.GPU2.2": {
218
+ "cpu_count": 28,
219
+ "cpu_memory_in_gbs": 192,
44
220
  "gpu_count": 2,
45
221
  "gpu_memory_in_gbs": 32,
46
- "gpu_type": "P100"
47
- },
48
- "BM.GPU3.8": {
49
- "gpu_count": 8,
50
- "gpu_memory_in_gbs": 128,
51
- "gpu_type": "V100"
222
+ "gpu_type": "P100",
223
+ "quantization": [
224
+ "fp16"
225
+ ],
226
+ "ranking": {
227
+ "cost": 30,
228
+ "performance": 20
229
+ }
52
230
  },
53
231
  "BM.GPU4.8": {
232
+ "cpu_count": 64,
233
+ "cpu_memory_in_gbs": 2048,
54
234
  "gpu_count": 8,
55
235
  "gpu_memory_in_gbs": 320,
56
- "gpu_type": "A100"
236
+ "gpu_type": "A100",
237
+ "quantization": [
238
+ "int8",
239
+ "fp16",
240
+ "bf16",
241
+ "tf32"
242
+ ],
243
+ "ranking": {
244
+ "cost": 57,
245
+ "performance": 65
246
+ }
57
247
  },
58
248
  "VM.GPU.A10.1": {
249
+ "cpu_count": 15,
250
+ "cpu_memory_in_gbs": 240,
59
251
  "gpu_count": 1,
60
252
  "gpu_memory_in_gbs": 24,
61
- "gpu_type": "A10"
253
+ "gpu_type": "A10",
254
+ "quantization": [
255
+ "awq",
256
+ "gptq",
257
+ "marlin",
258
+ "int8",
259
+ "bitblas",
260
+ "aqlm",
261
+ "bitsandbytes",
262
+ "deepspeedfp",
263
+ "gguf"
264
+ ],
265
+ "ranking": {
266
+ "cost": 20,
267
+ "performance": 30
268
+ }
62
269
  },
63
270
  "VM.GPU.A10.2": {
271
+ "cpu_count": 30,
272
+ "cpu_memory_in_gbs": 480,
64
273
  "gpu_count": 2,
65
274
  "gpu_memory_in_gbs": 48,
66
- "gpu_type": "A10"
67
- },
68
- "VM.GPU.A10.4": {
69
- "gpu_count": 4,
70
- "gpu_memory_in_gbs": 96,
71
- "gpu_type": "A10"
275
+ "gpu_type": "A10",
276
+ "quantization": [
277
+ "awq",
278
+ "gptq",
279
+ "marlin",
280
+ "int8",
281
+ "bitblas",
282
+ "aqlm",
283
+ "bitsandbytes",
284
+ "deepspeedfp",
285
+ "gguf"
286
+ ],
287
+ "ranking": {
288
+ "cost": 40,
289
+ "performance": 40
290
+ }
72
291
  },
73
292
  "VM.GPU2.1": {
293
+ "cpu_count": 12,
294
+ "cpu_memory_in_gbs": 72,
74
295
  "gpu_count": 1,
75
296
  "gpu_memory_in_gbs": 16,
76
- "gpu_type": "P100"
297
+ "gpu_type": "P100",
298
+ "quantization": [
299
+ "fp16"
300
+ ],
301
+ "ranking": {
302
+ "cost": 10,
303
+ "performance": 10
304
+ }
77
305
  },
78
306
  "VM.GPU3.1": {
307
+ "cpu_count": 6,
308
+ "cpu_memory_in_gbs": 90,
79
309
  "gpu_count": 1,
80
310
  "gpu_memory_in_gbs": 16,
81
- "gpu_type": "V100"
311
+ "gpu_type": "V100",
312
+ "quantization": [
313
+ "gptq",
314
+ "bitblas",
315
+ "aqlm",
316
+ "bitsandbytes",
317
+ "deepspeedfp",
318
+ "gguf"
319
+ ],
320
+ "ranking": {
321
+ "cost": 35,
322
+ "performance": 10
323
+ }
82
324
  },
83
325
  "VM.GPU3.2": {
326
+ "cpu_count": 12,
327
+ "cpu_memory_in_gbs": 180,
84
328
  "gpu_count": 2,
85
329
  "gpu_memory_in_gbs": 32,
86
- "gpu_type": "V100"
330
+ "gpu_type": "V100",
331
+ "quantization": [
332
+ "gptq",
333
+ "bitblas",
334
+ "aqlm",
335
+ "bitsandbytes",
336
+ "deepspeedfp",
337
+ "gguf"
338
+ ],
339
+ "ranking": {
340
+ "cost": 45,
341
+ "performance": 20
342
+ }
87
343
  },
88
344
  "VM.GPU3.4": {
345
+ "cpu_count": 24,
346
+ "cpu_memory_in_gbs": 360,
89
347
  "gpu_count": 4,
90
348
  "gpu_memory_in_gbs": 64,
91
- "gpu_type": "V100"
349
+ "gpu_type": "V100",
350
+ "quantization": [
351
+ "gptq",
352
+ "bitblas",
353
+ "aqlm",
354
+ "bitsandbytes",
355
+ "deepspeedfp",
356
+ "gguf"
357
+ ],
358
+ "ranking": {
359
+ "cost": 55,
360
+ "performance": 45
361
+ }
362
+ },
363
+ "VM.GPU3.8": {
364
+ "cpu_count": 24,
365
+ "cpu_memory_in_gbs": 768,
366
+ "gpu_count": 8,
367
+ "gpu_memory_in_gbs": 128,
368
+ "gpu_type": "V100",
369
+ "quantization": [
370
+ "gptq",
371
+ "bitblas",
372
+ "aqlm",
373
+ "bitsandbytes",
374
+ "deepspeedfp",
375
+ "gguf"
376
+ ],
377
+ "ranking": {
378
+ "cost": 56,
379
+ "performance": 46
380
+ }
92
381
  }
93
382
  }
94
383
  }
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+ from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
5
+
6
+ __all__ = ["AquaShapeRecommend"]
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+
5
+ """
6
+ aqua.shaperecommend.constants
7
+ ~~~~~~~~~~~~~~
8
+
9
+ This module contains constants used in Aqua GPU Recommendation for Models.
10
+
11
+ LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
12
+
13
+ MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
14
+
15
+ NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
16
+ """
17
+
18
+ LLAMA_REQUIRED_FIELDS = [
19
+ "num_hidden_layers",
20
+ "hidden_size",
21
+ "num_attention_heads",
22
+ "num_key_value_heads",
23
+ "head_dim",
24
+ "intermediate_size",
25
+ "vocab_size",
26
+ ]
27
+
28
+ MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + ["num_local_experts", "intermediate_size"]
29
+
30
+ NEXT_QUANT = {
31
+ "float32": ["8bit", "4bit"],
32
+ "bfloat16": ["8bit", "4bit"],
33
+ "float16": ["8bit", "4bit"],
34
+ "int8": ["4bit"],
35
+ "fp8": ["4bit"],
36
+ "8bit": ["4bit"],
37
+ "int4": ["No smaller quantization available"],
38
+ "4bit": ["No smaller quantization available"],
39
+ }
40
+
41
+
42
+ TEXT_GENERATION = "text_generation"
43
+ SAFETENSORS = "safetensors"
44
+
45
+ QUANT_METHODS = [
46
+ "aqlm",
47
+ "awq",
48
+ "deepspeedfp",
49
+ "tpu_int8",
50
+ "fp8",
51
+ "ptpc_fp8",
52
+ "fbgemm_fp8",
53
+ "modelopt",
54
+ "modelopt_fp4",
55
+ "marlin",
56
+ "bitblas",
57
+ "gguf",
58
+ "gptq_marlin_24",
59
+ "gptq_marlin",
60
+ "gptq_bitblas",
61
+ "awq_marlin",
62
+ "gptq",
63
+ "compressed-tensors",
64
+ "bitsandbytes",
65
+ "qqq",
66
+ "hqq",
67
+ "experts_int8",
68
+ "neuron_quant",
69
+ "ipex",
70
+ "quark",
71
+ "moe_wna16",
72
+ "torchao",
73
+ "auto-round",
74
+ "rtn",
75
+ "inc",
76
+ "mxfp4",
77
+ ]
78
+
79
+ IN_FLIGHT_QUANTIZATION = {"4bit"} # vLLM only supports 4bit in-flight-quantization
80
+
81
+ TROUBLESHOOT_MSG = "The selected model is too large to fit on standard GPU shapes with the current configuration.\nAs troubleshooting, we have suggested the two largest available GPU shapes using the smallest quantization level ('4bit') to maximize chances of fitting the model. "
82
+
83
+ VLLM_PARAMS = {
84
+ "max_model_len": "--max-model-len",
85
+ "in_flight_quant": "--quantization bitsandbytes --load-format bitsandbytes",
86
+ }
87
+
88
+ DEFAULT_WEIGHT_SIZE = "float32"
89
+
90
+ BITS_AND_BYTES_8BIT = "8bit"
91
+ BITS_AND_BYTES_4BIT = "4bit"
92
+
93
+ BITSANDBYTES = "bitsandbytes"
94
+
95
+
96
+ QUANT_MAPPING = {
97
+ "float32": 4,
98
+ "bfloat16": 2,
99
+ "float16": 2,
100
+ "fp16": 2,
101
+ "half": 2,
102
+ "int8": 1,
103
+ "fp8": 1,
104
+ "8bit": 1,
105
+ "4bit": 0.5,
106
+ "int4": 0.5,
107
+ }
108
+
109
+ SHAPE_MAP = {
110
+ "NVIDIA_GPU": "GPU",
111
+ "AMD_ROME": "CPU",
112
+ "GENERIC": "CPU",
113
+ "LEGACY": "CPU",
114
+ "ARM": "CPU",
115
+ "UNKNOWN_ENUM_VALUE": "N/A",
116
+ }