oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/cli.py +7 -5
- ads/aqua/common/entities.py +88 -29
- ads/aqua/common/enums.py +7 -0
- ads/aqua/common/errors.py +5 -0
- ads/aqua/common/utils.py +87 -7
- ads/aqua/constants.py +3 -0
- ads/aqua/extension/deployment_handler.py +36 -0
- ads/aqua/modeldeployment/config_loader.py +10 -0
- ads/aqua/modeldeployment/constants.py +1 -0
- ads/aqua/modeldeployment/deployment.py +99 -22
- ads/aqua/modeldeployment/entities.py +4 -0
- ads/aqua/resources/gpu_shapes_index.json +315 -26
- ads/aqua/shaperecommend/__init__.py +6 -0
- ads/aqua/shaperecommend/constants.py +116 -0
- ads/aqua/shaperecommend/estimator.py +384 -0
- ads/aqua/shaperecommend/llm_config.py +283 -0
- ads/aqua/shaperecommend/recommend.py +493 -0
- ads/aqua/shaperecommend/shape_report.py +233 -0
- ads/aqua/version.json +1 -1
- ads/cli.py +9 -1
- ads/jobs/builders/infrastructure/dsc_job.py +1 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
- ads/model/service/oci_datascience_model_deployment.py +46 -19
- ads/opctl/operator/lowcode/common/data.py +7 -2
- ads/opctl/operator/lowcode/common/transformations.py +207 -0
- ads/opctl/operator/lowcode/common/utils.py +8 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/errors.py +5 -0
- ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
- ads/pipeline/ads_pipeline.py +13 -9
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -8,11 +8,12 @@ import re
|
|
8
8
|
import shlex
|
9
9
|
import threading
|
10
10
|
from datetime import datetime, timedelta
|
11
|
-
from typing import Dict, List, Optional
|
11
|
+
from typing import Dict, List, Optional, Union
|
12
12
|
|
13
13
|
from cachetools import TTLCache, cached
|
14
14
|
from oci.data_science.models import ModelDeploymentShapeSummary
|
15
15
|
from pydantic import ValidationError
|
16
|
+
from rich.table import Table
|
16
17
|
|
17
18
|
from ads.aqua.app import AquaApp, logger
|
18
19
|
from ads.aqua.common.entities import (
|
@@ -27,6 +28,7 @@ from ads.aqua.common.utils import (
|
|
27
28
|
build_pydantic_error_message,
|
28
29
|
find_restricted_params,
|
29
30
|
get_combined_params,
|
31
|
+
get_container_env_type,
|
30
32
|
get_container_params_type,
|
31
33
|
get_ocid_substring,
|
32
34
|
get_params_list,
|
@@ -43,8 +45,11 @@ from ads.aqua.constants import (
|
|
43
45
|
AQUA_MODEL_TYPE_SERVICE,
|
44
46
|
AQUA_MULTI_MODEL_CONFIG,
|
45
47
|
MODEL_BY_REFERENCE_OSS_PATH_KEY,
|
48
|
+
MODEL_GROUP,
|
46
49
|
MODEL_NAME_DELIMITER,
|
50
|
+
SINGLE_MODEL_FLEX,
|
47
51
|
UNKNOWN_DICT,
|
52
|
+
UNKNOWN_ENUM_VALUE,
|
48
53
|
)
|
49
54
|
from ads.aqua.data import AquaResourceIdentifier
|
50
55
|
from ads.aqua.model import AquaModelApp
|
@@ -71,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
|
|
71
76
|
CreateModelDeploymentDetails,
|
72
77
|
)
|
73
78
|
from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
|
79
|
+
from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
|
80
|
+
from ads.aqua.shaperecommend.shape_report import (
|
81
|
+
RequestRecommend,
|
82
|
+
ShapeRecommendationReport,
|
83
|
+
)
|
74
84
|
from ads.common.object_storage_details import ObjectStorageDetails
|
75
85
|
from ads.common.utils import UNKNOWN, get_log_links
|
76
86
|
from ads.common.work_request import DataScienceWorkRequest
|
@@ -199,7 +209,7 @@ class AquaDeploymentApp(AquaApp):
|
|
199
209
|
if create_deployment_details.instance_shape.lower() not in available_shapes:
|
200
210
|
raise AquaValueError(
|
201
211
|
f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
|
202
|
-
f"is not
|
212
|
+
f"is not supported in the {self.region} region. Please choose another shape to deploy the model."
|
203
213
|
)
|
204
214
|
|
205
215
|
# Get container config
|
@@ -381,6 +391,7 @@ class AquaDeploymentApp(AquaApp):
|
|
381
391
|
Tags.AQUA_SERVICE_MODEL_TAG,
|
382
392
|
Tags.AQUA_FINE_TUNED_MODEL_TAG,
|
383
393
|
Tags.AQUA_TAG,
|
394
|
+
Tags.BASE_MODEL_CUSTOM,
|
384
395
|
]:
|
385
396
|
if tag in aqua_model.freeform_tags:
|
386
397
|
tags[tag] = aqua_model.freeform_tags[tag]
|
@@ -515,6 +526,7 @@ class AquaDeploymentApp(AquaApp):
|
|
515
526
|
|
516
527
|
# validate user provided params
|
517
528
|
user_params = env_var.get("PARAMS", UNKNOWN)
|
529
|
+
|
518
530
|
if user_params:
|
519
531
|
# todo: remove this check in the future version, logic to be moved to container_index
|
520
532
|
if (
|
@@ -540,6 +552,18 @@ class AquaDeploymentApp(AquaApp):
|
|
540
552
|
deployment_params = get_combined_params(config_params, user_params)
|
541
553
|
|
542
554
|
params = f"{params} {deployment_params}".strip()
|
555
|
+
|
556
|
+
if create_deployment_details.model_name:
|
557
|
+
# Replace existing --served-model-name argument if present, otherwise add it
|
558
|
+
if "--served-model-name" in params:
|
559
|
+
params = re.sub(
|
560
|
+
r"--served-model-name\s+\S+",
|
561
|
+
f"--served-model-name {create_deployment_details.model_name}",
|
562
|
+
params,
|
563
|
+
)
|
564
|
+
else:
|
565
|
+
params += f" --served-model-name {create_deployment_details.model_name}"
|
566
|
+
|
543
567
|
if params:
|
544
568
|
env_var.update({"PARAMS": params})
|
545
569
|
env_vars = container_spec.env_vars if container_spec else []
|
@@ -862,21 +886,26 @@ class AquaDeploymentApp(AquaApp):
|
|
862
886
|
|
863
887
|
if oci_aqua:
|
864
888
|
# skipping the AQUA model deployments that are created from model group
|
865
|
-
# TODO: remove this checker after AQUA deployment is integrated with model group
|
866
|
-
aqua_model_id = model_deployment.freeform_tags.get(
|
867
|
-
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
|
868
|
-
)
|
869
889
|
if (
|
870
|
-
|
871
|
-
|
872
|
-
== "UNKNOWN_ENUM_VALUE"
|
890
|
+
model_deployment.model_deployment_configuration_details.deployment_type
|
891
|
+
in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
|
873
892
|
):
|
874
893
|
continue
|
875
|
-
|
876
|
-
|
877
|
-
|
894
|
+
try:
|
895
|
+
results.append(
|
896
|
+
AquaDeployment.from_oci_model_deployment(
|
897
|
+
model_deployment, self.region
|
898
|
+
)
|
878
899
|
)
|
879
|
-
|
900
|
+
except Exception as e:
|
901
|
+
logger.error(
|
902
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}",
|
903
|
+
exc_info=True,
|
904
|
+
)
|
905
|
+
raise AquaRuntimeError(
|
906
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}"
|
907
|
+
) from e
|
908
|
+
|
880
909
|
# log telemetry if MD is in active or failed state
|
881
910
|
deployment_id = model_deployment.id
|
882
911
|
state = model_deployment.lifecycle_state.upper()
|
@@ -1042,6 +1071,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1042
1071
|
config = self.get_config_from_metadata(
|
1043
1072
|
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
|
1044
1073
|
).config
|
1074
|
+
|
1045
1075
|
if config:
|
1046
1076
|
logger.info(
|
1047
1077
|
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
|
@@ -1126,7 +1156,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1126
1156
|
model_id: str,
|
1127
1157
|
instance_shape: str,
|
1128
1158
|
gpu_count: int = None,
|
1129
|
-
) ->
|
1159
|
+
) -> Dict:
|
1130
1160
|
"""Gets the default params set in the deployment configs for the given model and instance shape.
|
1131
1161
|
|
1132
1162
|
Parameters
|
@@ -1148,6 +1178,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1148
1178
|
|
1149
1179
|
"""
|
1150
1180
|
default_params = []
|
1181
|
+
default_envs = {}
|
1151
1182
|
config_params = {}
|
1152
1183
|
model = DataScienceModel.from_id(model_id)
|
1153
1184
|
try:
|
@@ -1157,19 +1188,15 @@ class AquaDeploymentApp(AquaApp):
|
|
1157
1188
|
except ValueError:
|
1158
1189
|
container_type_key = UNKNOWN
|
1159
1190
|
logger.debug(
|
1160
|
-
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the
|
1191
|
+
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
|
1192
|
+
f"custom metadata field for model {model_id}."
|
1161
1193
|
)
|
1162
1194
|
|
1163
|
-
if
|
1164
|
-
container_type_key
|
1165
|
-
and container_type_key in InferenceContainerTypeFamily.values()
|
1166
|
-
):
|
1195
|
+
if container_type_key:
|
1167
1196
|
deployment_config = self.get_deployment_config(model_id)
|
1168
|
-
|
1169
1197
|
instance_shape_config = deployment_config.configuration.get(
|
1170
1198
|
instance_shape, ConfigurationItem()
|
1171
1199
|
)
|
1172
|
-
|
1173
1200
|
if instance_shape_config.multi_model_deployment and gpu_count:
|
1174
1201
|
gpu_params = instance_shape_config.multi_model_deployment
|
1175
1202
|
|
@@ -1178,12 +1205,18 @@ class AquaDeploymentApp(AquaApp):
|
|
1178
1205
|
config_params = gpu_config.parameters.get(
|
1179
1206
|
get_container_params_type(container_type_key), UNKNOWN
|
1180
1207
|
)
|
1208
|
+
default_envs = instance_shape_config.env.get(
|
1209
|
+
get_container_env_type(container_type_key), {}
|
1210
|
+
)
|
1181
1211
|
break
|
1182
1212
|
|
1183
1213
|
else:
|
1184
1214
|
config_params = instance_shape_config.parameters.get(
|
1185
1215
|
get_container_params_type(container_type_key), UNKNOWN
|
1186
1216
|
)
|
1217
|
+
default_envs = instance_shape_config.env.get(
|
1218
|
+
get_container_env_type(container_type_key), {}
|
1219
|
+
)
|
1187
1220
|
|
1188
1221
|
if config_params:
|
1189
1222
|
params_list = get_params_list(config_params)
|
@@ -1196,7 +1229,7 @@ class AquaDeploymentApp(AquaApp):
|
|
1196
1229
|
if params.split()[0] not in restricted_params_set:
|
1197
1230
|
default_params.append(params)
|
1198
1231
|
|
1199
|
-
return default_params
|
1232
|
+
return {"data": default_params, "env": default_envs}
|
1200
1233
|
|
1201
1234
|
def validate_deployment_params(
|
1202
1235
|
self,
|
@@ -1243,6 +1276,50 @@ class AquaDeploymentApp(AquaApp):
|
|
1243
1276
|
)
|
1244
1277
|
return {"valid": True}
|
1245
1278
|
|
1279
|
+
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
|
1280
|
+
"""
|
1281
|
+
For the CLI (set generate_table = True), generates the table (in rich diff) with valid
|
1282
|
+
GPU deployment shapes for the provided model and configuration.
|
1283
|
+
|
1284
|
+
For the API (set generate_table = False), generates the JSON with valid
|
1285
|
+
GPU deployment shapes for the provided model and configuration.
|
1286
|
+
|
1287
|
+
Validates if recommendations are generated, calls method to construct the rich diff
|
1288
|
+
table with the recommendation data.
|
1289
|
+
|
1290
|
+
Parameters
|
1291
|
+
----------
|
1292
|
+
model_ocid : str
|
1293
|
+
OCID of the model to recommend feasible compute shapes.
|
1294
|
+
|
1295
|
+
Returns
|
1296
|
+
-------
|
1297
|
+
Table (generate_table = True)
|
1298
|
+
A table format for the recommendation report with compatible deployment shapes
|
1299
|
+
or troubleshooting info citing the largest shapes if no shape is suitable.
|
1300
|
+
|
1301
|
+
ShapeRecommendationReport (generate_table = False)
|
1302
|
+
A recommendation report with compatible deployment shapes, or troubleshooting info
|
1303
|
+
citing the largest shapes if no shape is suitable.
|
1304
|
+
|
1305
|
+
Raises
|
1306
|
+
------
|
1307
|
+
AquaValueError
|
1308
|
+
If model type is unsupported by tool (no recommendation report generated)
|
1309
|
+
"""
|
1310
|
+
try:
|
1311
|
+
request = RequestRecommend(**kwargs)
|
1312
|
+
except ValidationError as e:
|
1313
|
+
custom_error = build_pydantic_error_message(e)
|
1314
|
+
raise AquaValueError( # noqa: B904
|
1315
|
+
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
|
1316
|
+
)
|
1317
|
+
|
1318
|
+
shape_recommend = AquaShapeRecommend()
|
1319
|
+
shape_recommend_report = shape_recommend.which_shapes(request)
|
1320
|
+
|
1321
|
+
return shape_recommend_report
|
1322
|
+
|
1246
1323
|
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
|
1247
1324
|
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
|
1248
1325
|
def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
|
@@ -233,6 +233,10 @@ class CreateModelDeploymentDetails(BaseModel):
|
|
233
233
|
None, description="The description of the deployment."
|
234
234
|
)
|
235
235
|
model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
|
236
|
+
model_name: Optional[str] = Field(
|
237
|
+
None, description="The model name specified by user to deploy."
|
238
|
+
)
|
239
|
+
|
236
240
|
models: Optional[List[AquaMultiModelRef]] = Field(
|
237
241
|
None, description="List of models for multimodel deployment."
|
238
242
|
)
|
@@ -1,94 +1,383 @@
|
|
1
1
|
{
|
2
2
|
"shapes": {
|
3
3
|
"BM.GPU.A10.4": {
|
4
|
+
"cpu_count": 64,
|
5
|
+
"cpu_memory_in_gbs": 1024,
|
4
6
|
"gpu_count": 4,
|
5
7
|
"gpu_memory_in_gbs": 96,
|
6
|
-
"gpu_type": "A10"
|
8
|
+
"gpu_type": "A10",
|
9
|
+
"quantization": [
|
10
|
+
"awq",
|
11
|
+
"gptq",
|
12
|
+
"marlin",
|
13
|
+
"int8",
|
14
|
+
"bitblas",
|
15
|
+
"aqlm",
|
16
|
+
"bitsandbytes",
|
17
|
+
"deepspeedfp",
|
18
|
+
"gguf"
|
19
|
+
],
|
20
|
+
"ranking": {
|
21
|
+
"cost": 50,
|
22
|
+
"performance": 50
|
23
|
+
}
|
7
24
|
},
|
8
25
|
"BM.GPU.A100-V2.8": {
|
26
|
+
"cpu_count": 128,
|
27
|
+
"cpu_memory_in_gbs": 2048,
|
9
28
|
"gpu_count": 8,
|
10
29
|
"gpu_memory_in_gbs": 640,
|
11
|
-
"gpu_type": "A100"
|
30
|
+
"gpu_type": "A100",
|
31
|
+
"quantization": [
|
32
|
+
"awq",
|
33
|
+
"gptq",
|
34
|
+
"marlin",
|
35
|
+
"int8",
|
36
|
+
"bitblas",
|
37
|
+
"aqlm",
|
38
|
+
"bitsandbytes",
|
39
|
+
"deepspeedfp",
|
40
|
+
"gguf"
|
41
|
+
],
|
42
|
+
"ranking": {
|
43
|
+
"cost": 80,
|
44
|
+
"performance": 70
|
45
|
+
}
|
46
|
+
},
|
47
|
+
"BM.GPU.B200.8": {
|
48
|
+
"cpu_count": 128,
|
49
|
+
"cpu_memory_in_gbs": 4096,
|
50
|
+
"gpu_count": 8,
|
51
|
+
"gpu_memory_in_gbs": 1440,
|
52
|
+
"gpu_type": "B200",
|
53
|
+
"quantization": [
|
54
|
+
"fp4",
|
55
|
+
"fp8",
|
56
|
+
"fp16",
|
57
|
+
"bf16",
|
58
|
+
"tf32",
|
59
|
+
"int8",
|
60
|
+
"fp64"
|
61
|
+
],
|
62
|
+
"ranking": {
|
63
|
+
"cost": 120,
|
64
|
+
"performance": 130
|
65
|
+
}
|
12
66
|
},
|
13
67
|
"BM.GPU.B4.8": {
|
68
|
+
"cpu_count": 64,
|
69
|
+
"cpu_memory_in_gbs": 2048,
|
14
70
|
"gpu_count": 8,
|
15
71
|
"gpu_memory_in_gbs": 320,
|
16
|
-
"gpu_type": "A100"
|
72
|
+
"gpu_type": "A100",
|
73
|
+
"quantization": [
|
74
|
+
"awq",
|
75
|
+
"gptq",
|
76
|
+
"marlin",
|
77
|
+
"int8",
|
78
|
+
"bitblas",
|
79
|
+
"aqlm",
|
80
|
+
"bitsandbytes",
|
81
|
+
"deepspeedfp",
|
82
|
+
"gguf"
|
83
|
+
],
|
84
|
+
"ranking": {
|
85
|
+
"cost": 70,
|
86
|
+
"performance": 60
|
87
|
+
}
|
88
|
+
},
|
89
|
+
"BM.GPU.GB200.4": {
|
90
|
+
"cpu_count": 144,
|
91
|
+
"cpu_memory_in_gbs": 1024,
|
92
|
+
"gpu_count": 4,
|
93
|
+
"gpu_memory_in_gbs": 768,
|
94
|
+
"gpu_type": "GB200",
|
95
|
+
"quantization": [
|
96
|
+
"fp4",
|
97
|
+
"fp8",
|
98
|
+
"fp6",
|
99
|
+
"int8",
|
100
|
+
"fp16",
|
101
|
+
"bf16",
|
102
|
+
"tf32",
|
103
|
+
"fp64"
|
104
|
+
],
|
105
|
+
"ranking": {
|
106
|
+
"cost": 110,
|
107
|
+
"performance": 120
|
108
|
+
}
|
17
109
|
},
|
18
110
|
"BM.GPU.H100.8": {
|
111
|
+
"cpu_count": 112,
|
112
|
+
"cpu_memory_in_gbs": 2048,
|
19
113
|
"gpu_count": 8,
|
20
114
|
"gpu_memory_in_gbs": 640,
|
21
|
-
"gpu_type": "H100"
|
115
|
+
"gpu_type": "H100",
|
116
|
+
"quantization": [
|
117
|
+
"awq",
|
118
|
+
"gptq",
|
119
|
+
"marlin",
|
120
|
+
"fp8",
|
121
|
+
"int8",
|
122
|
+
"bitblas",
|
123
|
+
"aqlm",
|
124
|
+
"bitsandbytes",
|
125
|
+
"deepspeedfp",
|
126
|
+
"gguf"
|
127
|
+
],
|
128
|
+
"ranking": {
|
129
|
+
"cost": 100,
|
130
|
+
"performance": 100
|
131
|
+
}
|
22
132
|
},
|
23
133
|
"BM.GPU.H200.8": {
|
134
|
+
"cpu_count": 112,
|
135
|
+
"cpu_memory_in_gbs": 3072,
|
24
136
|
"gpu_count": 8,
|
25
137
|
"gpu_memory_in_gbs": 1128,
|
26
|
-
"gpu_type": "H200"
|
138
|
+
"gpu_type": "H200",
|
139
|
+
"quantization": [
|
140
|
+
"awq",
|
141
|
+
"gptq",
|
142
|
+
"marlin",
|
143
|
+
"fp8",
|
144
|
+
"int8",
|
145
|
+
"bitblas",
|
146
|
+
"aqlm",
|
147
|
+
"bitsandbytes",
|
148
|
+
"deepspeedfp",
|
149
|
+
"gguf"
|
150
|
+
],
|
151
|
+
"ranking": {
|
152
|
+
"cost": 100,
|
153
|
+
"performance": 110
|
154
|
+
}
|
27
155
|
},
|
28
156
|
"BM.GPU.L40S-NC.4": {
|
157
|
+
"cpu_count": 112,
|
158
|
+
"cpu_memory_in_gbs": 1024,
|
29
159
|
"gpu_count": 4,
|
30
160
|
"gpu_memory_in_gbs": 192,
|
31
|
-
"gpu_type": "L40S"
|
161
|
+
"gpu_type": "L40S",
|
162
|
+
"quantization": [
|
163
|
+
"awq",
|
164
|
+
"gptq",
|
165
|
+
"marlin",
|
166
|
+
"fp8",
|
167
|
+
"int8",
|
168
|
+
"bitblas",
|
169
|
+
"aqlm",
|
170
|
+
"bitsandbytes",
|
171
|
+
"deepspeedfp",
|
172
|
+
"gguf"
|
173
|
+
],
|
174
|
+
"ranking": {
|
175
|
+
"cost": 60,
|
176
|
+
"performance": 80
|
177
|
+
}
|
32
178
|
},
|
33
179
|
"BM.GPU.L40S.4": {
|
180
|
+
"cpu_count": 112,
|
181
|
+
"cpu_memory_in_gbs": 1024,
|
34
182
|
"gpu_count": 4,
|
35
183
|
"gpu_memory_in_gbs": 192,
|
36
|
-
"gpu_type": "L40S"
|
184
|
+
"gpu_type": "L40S",
|
185
|
+
"quantization": [
|
186
|
+
"awq",
|
187
|
+
"gptq",
|
188
|
+
"marlin",
|
189
|
+
"fp8",
|
190
|
+
"int8",
|
191
|
+
"bitblas",
|
192
|
+
"aqlm",
|
193
|
+
"bitsandbytes",
|
194
|
+
"deepspeedfp",
|
195
|
+
"gguf"
|
196
|
+
],
|
197
|
+
"ranking": {
|
198
|
+
"cost": 60,
|
199
|
+
"performance": 80
|
200
|
+
}
|
37
201
|
},
|
38
202
|
"BM.GPU.MI300X.8": {
|
203
|
+
"cpu_count": 112,
|
204
|
+
"cpu_memory_in_gbs": 2048,
|
39
205
|
"gpu_count": 8,
|
40
206
|
"gpu_memory_in_gbs": 1536,
|
41
|
-
"gpu_type": "MI300X"
|
207
|
+
"gpu_type": "MI300X",
|
208
|
+
"quantization": [
|
209
|
+
"fp8",
|
210
|
+
"gguf"
|
211
|
+
],
|
212
|
+
"ranking": {
|
213
|
+
"cost": 90,
|
214
|
+
"performance": 90
|
215
|
+
}
|
42
216
|
},
|
43
217
|
"BM.GPU2.2": {
|
218
|
+
"cpu_count": 28,
|
219
|
+
"cpu_memory_in_gbs": 192,
|
44
220
|
"gpu_count": 2,
|
45
221
|
"gpu_memory_in_gbs": 32,
|
46
|
-
"gpu_type": "P100"
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
222
|
+
"gpu_type": "P100",
|
223
|
+
"quantization": [
|
224
|
+
"fp16"
|
225
|
+
],
|
226
|
+
"ranking": {
|
227
|
+
"cost": 30,
|
228
|
+
"performance": 20
|
229
|
+
}
|
52
230
|
},
|
53
231
|
"BM.GPU4.8": {
|
232
|
+
"cpu_count": 64,
|
233
|
+
"cpu_memory_in_gbs": 2048,
|
54
234
|
"gpu_count": 8,
|
55
235
|
"gpu_memory_in_gbs": 320,
|
56
|
-
"gpu_type": "A100"
|
236
|
+
"gpu_type": "A100",
|
237
|
+
"quantization": [
|
238
|
+
"int8",
|
239
|
+
"fp16",
|
240
|
+
"bf16",
|
241
|
+
"tf32"
|
242
|
+
],
|
243
|
+
"ranking": {
|
244
|
+
"cost": 57,
|
245
|
+
"performance": 65
|
246
|
+
}
|
57
247
|
},
|
58
248
|
"VM.GPU.A10.1": {
|
249
|
+
"cpu_count": 15,
|
250
|
+
"cpu_memory_in_gbs": 240,
|
59
251
|
"gpu_count": 1,
|
60
252
|
"gpu_memory_in_gbs": 24,
|
61
|
-
"gpu_type": "A10"
|
253
|
+
"gpu_type": "A10",
|
254
|
+
"quantization": [
|
255
|
+
"awq",
|
256
|
+
"gptq",
|
257
|
+
"marlin",
|
258
|
+
"int8",
|
259
|
+
"bitblas",
|
260
|
+
"aqlm",
|
261
|
+
"bitsandbytes",
|
262
|
+
"deepspeedfp",
|
263
|
+
"gguf"
|
264
|
+
],
|
265
|
+
"ranking": {
|
266
|
+
"cost": 20,
|
267
|
+
"performance": 30
|
268
|
+
}
|
62
269
|
},
|
63
270
|
"VM.GPU.A10.2": {
|
271
|
+
"cpu_count": 30,
|
272
|
+
"cpu_memory_in_gbs": 480,
|
64
273
|
"gpu_count": 2,
|
65
274
|
"gpu_memory_in_gbs": 48,
|
66
|
-
"gpu_type": "A10"
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
275
|
+
"gpu_type": "A10",
|
276
|
+
"quantization": [
|
277
|
+
"awq",
|
278
|
+
"gptq",
|
279
|
+
"marlin",
|
280
|
+
"int8",
|
281
|
+
"bitblas",
|
282
|
+
"aqlm",
|
283
|
+
"bitsandbytes",
|
284
|
+
"deepspeedfp",
|
285
|
+
"gguf"
|
286
|
+
],
|
287
|
+
"ranking": {
|
288
|
+
"cost": 40,
|
289
|
+
"performance": 40
|
290
|
+
}
|
72
291
|
},
|
73
292
|
"VM.GPU2.1": {
|
293
|
+
"cpu_count": 12,
|
294
|
+
"cpu_memory_in_gbs": 72,
|
74
295
|
"gpu_count": 1,
|
75
296
|
"gpu_memory_in_gbs": 16,
|
76
|
-
"gpu_type": "P100"
|
297
|
+
"gpu_type": "P100",
|
298
|
+
"quantization": [
|
299
|
+
"fp16"
|
300
|
+
],
|
301
|
+
"ranking": {
|
302
|
+
"cost": 10,
|
303
|
+
"performance": 10
|
304
|
+
}
|
77
305
|
},
|
78
306
|
"VM.GPU3.1": {
|
307
|
+
"cpu_count": 6,
|
308
|
+
"cpu_memory_in_gbs": 90,
|
79
309
|
"gpu_count": 1,
|
80
310
|
"gpu_memory_in_gbs": 16,
|
81
|
-
"gpu_type": "V100"
|
311
|
+
"gpu_type": "V100",
|
312
|
+
"quantization": [
|
313
|
+
"gptq",
|
314
|
+
"bitblas",
|
315
|
+
"aqlm",
|
316
|
+
"bitsandbytes",
|
317
|
+
"deepspeedfp",
|
318
|
+
"gguf"
|
319
|
+
],
|
320
|
+
"ranking": {
|
321
|
+
"cost": 35,
|
322
|
+
"performance": 10
|
323
|
+
}
|
82
324
|
},
|
83
325
|
"VM.GPU3.2": {
|
326
|
+
"cpu_count": 12,
|
327
|
+
"cpu_memory_in_gbs": 180,
|
84
328
|
"gpu_count": 2,
|
85
329
|
"gpu_memory_in_gbs": 32,
|
86
|
-
"gpu_type": "V100"
|
330
|
+
"gpu_type": "V100",
|
331
|
+
"quantization": [
|
332
|
+
"gptq",
|
333
|
+
"bitblas",
|
334
|
+
"aqlm",
|
335
|
+
"bitsandbytes",
|
336
|
+
"deepspeedfp",
|
337
|
+
"gguf"
|
338
|
+
],
|
339
|
+
"ranking": {
|
340
|
+
"cost": 45,
|
341
|
+
"performance": 20
|
342
|
+
}
|
87
343
|
},
|
88
344
|
"VM.GPU3.4": {
|
345
|
+
"cpu_count": 24,
|
346
|
+
"cpu_memory_in_gbs": 360,
|
89
347
|
"gpu_count": 4,
|
90
348
|
"gpu_memory_in_gbs": 64,
|
91
|
-
"gpu_type": "V100"
|
349
|
+
"gpu_type": "V100",
|
350
|
+
"quantization": [
|
351
|
+
"gptq",
|
352
|
+
"bitblas",
|
353
|
+
"aqlm",
|
354
|
+
"bitsandbytes",
|
355
|
+
"deepspeedfp",
|
356
|
+
"gguf"
|
357
|
+
],
|
358
|
+
"ranking": {
|
359
|
+
"cost": 55,
|
360
|
+
"performance": 45
|
361
|
+
}
|
362
|
+
},
|
363
|
+
"VM.GPU3.8": {
|
364
|
+
"cpu_count": 24,
|
365
|
+
"cpu_memory_in_gbs": 768,
|
366
|
+
"gpu_count": 8,
|
367
|
+
"gpu_memory_in_gbs": 128,
|
368
|
+
"gpu_type": "V100",
|
369
|
+
"quantization": [
|
370
|
+
"gptq",
|
371
|
+
"bitblas",
|
372
|
+
"aqlm",
|
373
|
+
"bitsandbytes",
|
374
|
+
"deepspeedfp",
|
375
|
+
"gguf"
|
376
|
+
],
|
377
|
+
"ranking": {
|
378
|
+
"cost": 56,
|
379
|
+
"performance": 46
|
380
|
+
}
|
92
381
|
}
|
93
382
|
}
|
94
383
|
}
|
@@ -0,0 +1,6 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# Copyright (c) 2025 Oracle and/or its affiliates.
|
3
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
|
+
from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
|
5
|
+
|
6
|
+
__all__ = ["AquaShapeRecommend"]
|