oracle-ads 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +88 -29
  3. ads/aqua/common/enums.py +6 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +49 -7
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/constants.py +1 -0
  9. ads/aqua/modeldeployment/deployment.py +83 -12
  10. ads/aqua/modeldeployment/entities.py +3 -0
  11. ads/aqua/resources/gpu_shapes_index.json +315 -26
  12. ads/aqua/shaperecommend/__init__.py +6 -0
  13. ads/aqua/shaperecommend/constants.py +116 -0
  14. ads/aqua/shaperecommend/estimator.py +384 -0
  15. ads/aqua/shaperecommend/llm_config.py +283 -0
  16. ads/aqua/shaperecommend/recommend.py +493 -0
  17. ads/aqua/shaperecommend/shape_report.py +233 -0
  18. ads/aqua/version.json +1 -1
  19. ads/cli.py +9 -1
  20. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  21. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  22. ads/model/service/oci_datascience_model_deployment.py +46 -19
  23. ads/opctl/operator/lowcode/common/data.py +7 -2
  24. ads/opctl/operator/lowcode/common/transformations.py +207 -0
  25. ads/opctl/operator/lowcode/common/utils.py +8 -0
  26. ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
  27. ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
  28. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  29. ads/opctl/operator/lowcode/forecast/errors.py +5 -0
  30. ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
  31. ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
  32. ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
  33. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
  34. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
  35. ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
  36. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
  37. ads/pipeline/ads_pipeline.py +13 -9
  38. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
  39. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +42 -35
  40. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
  41. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
  42. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,94 +1,383 @@
1
1
  {
2
2
  "shapes": {
3
3
  "BM.GPU.A10.4": {
4
+ "cpu_count": 64,
5
+ "cpu_memory_in_gbs": 1024,
4
6
  "gpu_count": 4,
5
7
  "gpu_memory_in_gbs": 96,
6
- "gpu_type": "A10"
8
+ "gpu_type": "A10",
9
+ "quantization": [
10
+ "awq",
11
+ "gptq",
12
+ "marlin",
13
+ "int8",
14
+ "bitblas",
15
+ "aqlm",
16
+ "bitsandbytes",
17
+ "deepspeedfp",
18
+ "gguf"
19
+ ],
20
+ "ranking": {
21
+ "cost": 50,
22
+ "performance": 50
23
+ }
7
24
  },
8
25
  "BM.GPU.A100-V2.8": {
26
+ "cpu_count": 128,
27
+ "cpu_memory_in_gbs": 2048,
9
28
  "gpu_count": 8,
10
29
  "gpu_memory_in_gbs": 640,
11
- "gpu_type": "A100"
30
+ "gpu_type": "A100",
31
+ "quantization": [
32
+ "awq",
33
+ "gptq",
34
+ "marlin",
35
+ "int8",
36
+ "bitblas",
37
+ "aqlm",
38
+ "bitsandbytes",
39
+ "deepspeedfp",
40
+ "gguf"
41
+ ],
42
+ "ranking": {
43
+ "cost": 80,
44
+ "performance": 70
45
+ }
46
+ },
47
+ "BM.GPU.B200.8": {
48
+ "cpu_count": 128,
49
+ "cpu_memory_in_gbs": 4096,
50
+ "gpu_count": 8,
51
+ "gpu_memory_in_gbs": 1440,
52
+ "gpu_type": "B200",
53
+ "quantization": [
54
+ "fp4",
55
+ "fp8",
56
+ "fp16",
57
+ "bf16",
58
+ "tf32",
59
+ "int8",
60
+ "fp64"
61
+ ],
62
+ "ranking": {
63
+ "cost": 120,
64
+ "performance": 130
65
+ }
12
66
  },
13
67
  "BM.GPU.B4.8": {
68
+ "cpu_count": 64,
69
+ "cpu_memory_in_gbs": 2048,
14
70
  "gpu_count": 8,
15
71
  "gpu_memory_in_gbs": 320,
16
- "gpu_type": "A100"
72
+ "gpu_type": "A100",
73
+ "quantization": [
74
+ "awq",
75
+ "gptq",
76
+ "marlin",
77
+ "int8",
78
+ "bitblas",
79
+ "aqlm",
80
+ "bitsandbytes",
81
+ "deepspeedfp",
82
+ "gguf"
83
+ ],
84
+ "ranking": {
85
+ "cost": 70,
86
+ "performance": 60
87
+ }
88
+ },
89
+ "BM.GPU.GB200.4": {
90
+ "cpu_count": 144,
91
+ "cpu_memory_in_gbs": 1024,
92
+ "gpu_count": 4,
93
+ "gpu_memory_in_gbs": 768,
94
+ "gpu_type": "GB200",
95
+ "quantization": [
96
+ "fp4",
97
+ "fp8",
98
+ "fp6",
99
+ "int8",
100
+ "fp16",
101
+ "bf16",
102
+ "tf32",
103
+ "fp64"
104
+ ],
105
+ "ranking": {
106
+ "cost": 110,
107
+ "performance": 120
108
+ }
17
109
  },
18
110
  "BM.GPU.H100.8": {
111
+ "cpu_count": 112,
112
+ "cpu_memory_in_gbs": 2048,
19
113
  "gpu_count": 8,
20
114
  "gpu_memory_in_gbs": 640,
21
- "gpu_type": "H100"
115
+ "gpu_type": "H100",
116
+ "quantization": [
117
+ "awq",
118
+ "gptq",
119
+ "marlin",
120
+ "fp8",
121
+ "int8",
122
+ "bitblas",
123
+ "aqlm",
124
+ "bitsandbytes",
125
+ "deepspeedfp",
126
+ "gguf"
127
+ ],
128
+ "ranking": {
129
+ "cost": 100,
130
+ "performance": 100
131
+ }
22
132
  },
23
133
  "BM.GPU.H200.8": {
134
+ "cpu_count": 112,
135
+ "cpu_memory_in_gbs": 3072,
24
136
  "gpu_count": 8,
25
137
  "gpu_memory_in_gbs": 1128,
26
- "gpu_type": "H200"
138
+ "gpu_type": "H200",
139
+ "quantization": [
140
+ "awq",
141
+ "gptq",
142
+ "marlin",
143
+ "fp8",
144
+ "int8",
145
+ "bitblas",
146
+ "aqlm",
147
+ "bitsandbytes",
148
+ "deepspeedfp",
149
+ "gguf"
150
+ ],
151
+ "ranking": {
152
+ "cost": 100,
153
+ "performance": 110
154
+ }
27
155
  },
28
156
  "BM.GPU.L40S-NC.4": {
157
+ "cpu_count": 112,
158
+ "cpu_memory_in_gbs": 1024,
29
159
  "gpu_count": 4,
30
160
  "gpu_memory_in_gbs": 192,
31
- "gpu_type": "L40S"
161
+ "gpu_type": "L40S",
162
+ "quantization": [
163
+ "awq",
164
+ "gptq",
165
+ "marlin",
166
+ "fp8",
167
+ "int8",
168
+ "bitblas",
169
+ "aqlm",
170
+ "bitsandbytes",
171
+ "deepspeedfp",
172
+ "gguf"
173
+ ],
174
+ "ranking": {
175
+ "cost": 60,
176
+ "performance": 80
177
+ }
32
178
  },
33
179
  "BM.GPU.L40S.4": {
180
+ "cpu_count": 112,
181
+ "cpu_memory_in_gbs": 1024,
34
182
  "gpu_count": 4,
35
183
  "gpu_memory_in_gbs": 192,
36
- "gpu_type": "L40S"
184
+ "gpu_type": "L40S",
185
+ "quantization": [
186
+ "awq",
187
+ "gptq",
188
+ "marlin",
189
+ "fp8",
190
+ "int8",
191
+ "bitblas",
192
+ "aqlm",
193
+ "bitsandbytes",
194
+ "deepspeedfp",
195
+ "gguf"
196
+ ],
197
+ "ranking": {
198
+ "cost": 60,
199
+ "performance": 80
200
+ }
37
201
  },
38
202
  "BM.GPU.MI300X.8": {
203
+ "cpu_count": 112,
204
+ "cpu_memory_in_gbs": 2048,
39
205
  "gpu_count": 8,
40
206
  "gpu_memory_in_gbs": 1536,
41
- "gpu_type": "MI300X"
207
+ "gpu_type": "MI300X",
208
+ "quantization": [
209
+ "fp8",
210
+ "gguf"
211
+ ],
212
+ "ranking": {
213
+ "cost": 90,
214
+ "performance": 90
215
+ }
42
216
  },
43
217
  "BM.GPU2.2": {
218
+ "cpu_count": 28,
219
+ "cpu_memory_in_gbs": 192,
44
220
  "gpu_count": 2,
45
221
  "gpu_memory_in_gbs": 32,
46
- "gpu_type": "P100"
47
- },
48
- "BM.GPU3.8": {
49
- "gpu_count": 8,
50
- "gpu_memory_in_gbs": 128,
51
- "gpu_type": "V100"
222
+ "gpu_type": "P100",
223
+ "quantization": [
224
+ "fp16"
225
+ ],
226
+ "ranking": {
227
+ "cost": 30,
228
+ "performance": 20
229
+ }
52
230
  },
53
231
  "BM.GPU4.8": {
232
+ "cpu_count": 64,
233
+ "cpu_memory_in_gbs": 2048,
54
234
  "gpu_count": 8,
55
235
  "gpu_memory_in_gbs": 320,
56
- "gpu_type": "A100"
236
+ "gpu_type": "A100",
237
+ "quantization": [
238
+ "int8",
239
+ "fp16",
240
+ "bf16",
241
+ "tf32"
242
+ ],
243
+ "ranking": {
244
+ "cost": 57,
245
+ "performance": 65
246
+ }
57
247
  },
58
248
  "VM.GPU.A10.1": {
249
+ "cpu_count": 15,
250
+ "cpu_memory_in_gbs": 240,
59
251
  "gpu_count": 1,
60
252
  "gpu_memory_in_gbs": 24,
61
- "gpu_type": "A10"
253
+ "gpu_type": "A10",
254
+ "quantization": [
255
+ "awq",
256
+ "gptq",
257
+ "marlin",
258
+ "int8",
259
+ "bitblas",
260
+ "aqlm",
261
+ "bitsandbytes",
262
+ "deepspeedfp",
263
+ "gguf"
264
+ ],
265
+ "ranking": {
266
+ "cost": 20,
267
+ "performance": 30
268
+ }
62
269
  },
63
270
  "VM.GPU.A10.2": {
271
+ "cpu_count": 30,
272
+ "cpu_memory_in_gbs": 480,
64
273
  "gpu_count": 2,
65
274
  "gpu_memory_in_gbs": 48,
66
- "gpu_type": "A10"
67
- },
68
- "VM.GPU.A10.4": {
69
- "gpu_count": 4,
70
- "gpu_memory_in_gbs": 96,
71
- "gpu_type": "A10"
275
+ "gpu_type": "A10",
276
+ "quantization": [
277
+ "awq",
278
+ "gptq",
279
+ "marlin",
280
+ "int8",
281
+ "bitblas",
282
+ "aqlm",
283
+ "bitsandbytes",
284
+ "deepspeedfp",
285
+ "gguf"
286
+ ],
287
+ "ranking": {
288
+ "cost": 40,
289
+ "performance": 40
290
+ }
72
291
  },
73
292
  "VM.GPU2.1": {
293
+ "cpu_count": 12,
294
+ "cpu_memory_in_gbs": 72,
74
295
  "gpu_count": 1,
75
296
  "gpu_memory_in_gbs": 16,
76
- "gpu_type": "P100"
297
+ "gpu_type": "P100",
298
+ "quantization": [
299
+ "fp16"
300
+ ],
301
+ "ranking": {
302
+ "cost": 10,
303
+ "performance": 10
304
+ }
77
305
  },
78
306
  "VM.GPU3.1": {
307
+ "cpu_count": 6,
308
+ "cpu_memory_in_gbs": 90,
79
309
  "gpu_count": 1,
80
310
  "gpu_memory_in_gbs": 16,
81
- "gpu_type": "V100"
311
+ "gpu_type": "V100",
312
+ "quantization": [
313
+ "gptq",
314
+ "bitblas",
315
+ "aqlm",
316
+ "bitsandbytes",
317
+ "deepspeedfp",
318
+ "gguf"
319
+ ],
320
+ "ranking": {
321
+ "cost": 35,
322
+ "performance": 10
323
+ }
82
324
  },
83
325
  "VM.GPU3.2": {
326
+ "cpu_count": 12,
327
+ "cpu_memory_in_gbs": 180,
84
328
  "gpu_count": 2,
85
329
  "gpu_memory_in_gbs": 32,
86
- "gpu_type": "V100"
330
+ "gpu_type": "V100",
331
+ "quantization": [
332
+ "gptq",
333
+ "bitblas",
334
+ "aqlm",
335
+ "bitsandbytes",
336
+ "deepspeedfp",
337
+ "gguf"
338
+ ],
339
+ "ranking": {
340
+ "cost": 45,
341
+ "performance": 20
342
+ }
87
343
  },
88
344
  "VM.GPU3.4": {
345
+ "cpu_count": 24,
346
+ "cpu_memory_in_gbs": 360,
89
347
  "gpu_count": 4,
90
348
  "gpu_memory_in_gbs": 64,
91
- "gpu_type": "V100"
349
+ "gpu_type": "V100",
350
+ "quantization": [
351
+ "gptq",
352
+ "bitblas",
353
+ "aqlm",
354
+ "bitsandbytes",
355
+ "deepspeedfp",
356
+ "gguf"
357
+ ],
358
+ "ranking": {
359
+ "cost": 55,
360
+ "performance": 45
361
+ }
362
+ },
363
+ "VM.GPU3.8": {
364
+ "cpu_count": 24,
365
+ "cpu_memory_in_gbs": 768,
366
+ "gpu_count": 8,
367
+ "gpu_memory_in_gbs": 128,
368
+ "gpu_type": "V100",
369
+ "quantization": [
370
+ "gptq",
371
+ "bitblas",
372
+ "aqlm",
373
+ "bitsandbytes",
374
+ "deepspeedfp",
375
+ "gguf"
376
+ ],
377
+ "ranking": {
378
+ "cost": 56,
379
+ "performance": 46
380
+ }
92
381
  }
93
382
  }
94
383
  }
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+ from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
5
+
6
+ __all__ = ["AquaShapeRecommend"]
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+
5
+ """
6
+ aqua.shaperecommend.constants
7
+ ~~~~~~~~~~~~~~
8
+
9
+ This module contains constants used in Aqua GPU Recommendation for Models.
10
+
11
+ LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
12
+
13
+ MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
14
+
15
+ NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
16
+ """
17
+
18
+ LLAMA_REQUIRED_FIELDS = [
19
+ "num_hidden_layers",
20
+ "hidden_size",
21
+ "num_attention_heads",
22
+ "num_key_value_heads",
23
+ "head_dim",
24
+ "intermediate_size",
25
+ "vocab_size",
26
+ ]
27
+
28
+ MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + ["num_local_experts", "intermediate_size"]
29
+
30
+ NEXT_QUANT = {
31
+ "float32": ["8bit", "4bit"],
32
+ "bfloat16": ["8bit", "4bit"],
33
+ "float16": ["8bit", "4bit"],
34
+ "int8": ["4bit"],
35
+ "fp8": ["4bit"],
36
+ "8bit": ["4bit"],
37
+ "int4": ["No smaller quantization available"],
38
+ "4bit": ["No smaller quantization available"],
39
+ }
40
+
41
+
42
+ TEXT_GENERATION = "text_generation"
43
+ SAFETENSORS = "safetensors"
44
+
45
+ QUANT_METHODS = [
46
+ "aqlm",
47
+ "awq",
48
+ "deepspeedfp",
49
+ "tpu_int8",
50
+ "fp8",
51
+ "ptpc_fp8",
52
+ "fbgemm_fp8",
53
+ "modelopt",
54
+ "modelopt_fp4",
55
+ "marlin",
56
+ "bitblas",
57
+ "gguf",
58
+ "gptq_marlin_24",
59
+ "gptq_marlin",
60
+ "gptq_bitblas",
61
+ "awq_marlin",
62
+ "gptq",
63
+ "compressed-tensors",
64
+ "bitsandbytes",
65
+ "qqq",
66
+ "hqq",
67
+ "experts_int8",
68
+ "neuron_quant",
69
+ "ipex",
70
+ "quark",
71
+ "moe_wna16",
72
+ "torchao",
73
+ "auto-round",
74
+ "rtn",
75
+ "inc",
76
+ "mxfp4",
77
+ ]
78
+
79
+ IN_FLIGHT_QUANTIZATION = {"4bit"} # vLLM only supports 4bit in-flight-quantization
80
+
81
+ TROUBLESHOOT_MSG = "The selected model is too large to fit on standard GPU shapes with the current configuration.\nAs troubleshooting, we have suggested the two largest available GPU shapes using the smallest quantization level ('4bit') to maximize chances of fitting the model. "
82
+
83
+ VLLM_PARAMS = {
84
+ "max_model_len": "--max-model-len",
85
+ "in_flight_quant": "--quantization bitsandbytes --load-format bitsandbytes",
86
+ }
87
+
88
+ DEFAULT_WEIGHT_SIZE = "float32"
89
+
90
+ BITS_AND_BYTES_8BIT = "8bit"
91
+ BITS_AND_BYTES_4BIT = "4bit"
92
+
93
+ BITSANDBYTES = "bitsandbytes"
94
+
95
+
96
+ QUANT_MAPPING = {
97
+ "float32": 4,
98
+ "bfloat16": 2,
99
+ "float16": 2,
100
+ "fp16": 2,
101
+ "half": 2,
102
+ "int8": 1,
103
+ "fp8": 1,
104
+ "8bit": 1,
105
+ "4bit": 0.5,
106
+ "int4": 0.5,
107
+ }
108
+
109
+ SHAPE_MAP = {
110
+ "NVIDIA_GPU": "GPU",
111
+ "AMD_ROME": "CPU",
112
+ "GENERIC": "CPU",
113
+ "LEGACY": "CPU",
114
+ "ARM": "CPU",
115
+ "UNKNOWN_ENUM_VALUE": "N/A",
116
+ }