oracle-ads 2.13.17rc0__py3-none-any.whl → 2.13.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +88 -29
  3. ads/aqua/common/enums.py +7 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +87 -7
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/config_loader.py +10 -0
  9. ads/aqua/modeldeployment/constants.py +1 -0
  10. ads/aqua/modeldeployment/deployment.py +99 -22
  11. ads/aqua/modeldeployment/entities.py +4 -0
  12. ads/aqua/resources/gpu_shapes_index.json +315 -26
  13. ads/aqua/shaperecommend/__init__.py +6 -0
  14. ads/aqua/shaperecommend/constants.py +116 -0
  15. ads/aqua/shaperecommend/estimator.py +384 -0
  16. ads/aqua/shaperecommend/llm_config.py +283 -0
  17. ads/aqua/shaperecommend/recommend.py +493 -0
  18. ads/aqua/shaperecommend/shape_report.py +233 -0
  19. ads/aqua/version.json +1 -1
  20. ads/cli.py +9 -1
  21. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  22. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  23. ads/model/service/oci_datascience_model_deployment.py +46 -19
  24. ads/opctl/operator/lowcode/common/data.py +7 -2
  25. ads/opctl/operator/lowcode/common/transformations.py +207 -0
  26. ads/opctl/operator/lowcode/common/utils.py +8 -0
  27. ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
  28. ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
  29. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  30. ads/opctl/operator/lowcode/forecast/errors.py +5 -0
  31. ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
  32. ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
  33. ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
  34. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
  35. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
  36. ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
  37. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
  38. ads/pipeline/ads_pipeline.py +13 -9
  39. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
  40. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +43 -36
  41. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
  42. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
  43. {oracle_ads-2.13.17rc0.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
@@ -8,11 +8,12 @@ import re
8
8
  import shlex
9
9
  import threading
10
10
  from datetime import datetime, timedelta
11
- from typing import Dict, List, Optional
11
+ from typing import Dict, List, Optional, Union
12
12
 
13
13
  from cachetools import TTLCache, cached
14
14
  from oci.data_science.models import ModelDeploymentShapeSummary
15
15
  from pydantic import ValidationError
16
+ from rich.table import Table
16
17
 
17
18
  from ads.aqua.app import AquaApp, logger
18
19
  from ads.aqua.common.entities import (
@@ -27,6 +28,7 @@ from ads.aqua.common.utils import (
27
28
  build_pydantic_error_message,
28
29
  find_restricted_params,
29
30
  get_combined_params,
31
+ get_container_env_type,
30
32
  get_container_params_type,
31
33
  get_ocid_substring,
32
34
  get_params_list,
@@ -43,8 +45,11 @@ from ads.aqua.constants import (
43
45
  AQUA_MODEL_TYPE_SERVICE,
44
46
  AQUA_MULTI_MODEL_CONFIG,
45
47
  MODEL_BY_REFERENCE_OSS_PATH_KEY,
48
+ MODEL_GROUP,
46
49
  MODEL_NAME_DELIMITER,
50
+ SINGLE_MODEL_FLEX,
47
51
  UNKNOWN_DICT,
52
+ UNKNOWN_ENUM_VALUE,
48
53
  )
49
54
  from ads.aqua.data import AquaResourceIdentifier
50
55
  from ads.aqua.model import AquaModelApp
@@ -71,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
71
76
  CreateModelDeploymentDetails,
72
77
  )
73
78
  from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
79
+ from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
80
+ from ads.aqua.shaperecommend.shape_report import (
81
+ RequestRecommend,
82
+ ShapeRecommendationReport,
83
+ )
74
84
  from ads.common.object_storage_details import ObjectStorageDetails
75
85
  from ads.common.utils import UNKNOWN, get_log_links
76
86
  from ads.common.work_request import DataScienceWorkRequest
@@ -199,7 +209,7 @@ class AquaDeploymentApp(AquaApp):
199
209
  if create_deployment_details.instance_shape.lower() not in available_shapes:
200
210
  raise AquaValueError(
201
211
  f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
202
- f"is not available in the {self.region} region. Please choose another shape to deploy the model."
212
+ f"is not supported in the {self.region} region. Please choose another shape to deploy the model."
203
213
  )
204
214
 
205
215
  # Get container config
@@ -381,6 +391,7 @@ class AquaDeploymentApp(AquaApp):
381
391
  Tags.AQUA_SERVICE_MODEL_TAG,
382
392
  Tags.AQUA_FINE_TUNED_MODEL_TAG,
383
393
  Tags.AQUA_TAG,
394
+ Tags.BASE_MODEL_CUSTOM,
384
395
  ]:
385
396
  if tag in aqua_model.freeform_tags:
386
397
  tags[tag] = aqua_model.freeform_tags[tag]
@@ -515,6 +526,7 @@ class AquaDeploymentApp(AquaApp):
515
526
 
516
527
  # validate user provided params
517
528
  user_params = env_var.get("PARAMS", UNKNOWN)
529
+
518
530
  if user_params:
519
531
  # todo: remove this check in the future version, logic to be moved to container_index
520
532
  if (
@@ -540,6 +552,18 @@ class AquaDeploymentApp(AquaApp):
540
552
  deployment_params = get_combined_params(config_params, user_params)
541
553
 
542
554
  params = f"{params} {deployment_params}".strip()
555
+
556
+ if create_deployment_details.model_name:
557
+ # Replace existing --served-model-name argument if present, otherwise add it
558
+ if "--served-model-name" in params:
559
+ params = re.sub(
560
+ r"--served-model-name\s+\S+",
561
+ f"--served-model-name {create_deployment_details.model_name}",
562
+ params,
563
+ )
564
+ else:
565
+ params += f" --served-model-name {create_deployment_details.model_name}"
566
+
543
567
  if params:
544
568
  env_var.update({"PARAMS": params})
545
569
  env_vars = container_spec.env_vars if container_spec else []
@@ -862,21 +886,26 @@ class AquaDeploymentApp(AquaApp):
862
886
 
863
887
  if oci_aqua:
864
888
  # skipping the AQUA model deployments that are created from model group
865
- # TODO: remove this checker after AQUA deployment is integrated with model group
866
- aqua_model_id = model_deployment.freeform_tags.get(
867
- Tags.AQUA_MODEL_ID_TAG, UNKNOWN
868
- )
869
889
  if (
870
- "datasciencemodelgroup" in aqua_model_id
871
- or model_deployment.model_deployment_configuration_details.deployment_type
872
- == "UNKNOWN_ENUM_VALUE"
890
+ model_deployment.model_deployment_configuration_details.deployment_type
891
+ in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
873
892
  ):
874
893
  continue
875
- results.append(
876
- AquaDeployment.from_oci_model_deployment(
877
- model_deployment, self.region
894
+ try:
895
+ results.append(
896
+ AquaDeployment.from_oci_model_deployment(
897
+ model_deployment, self.region
898
+ )
878
899
  )
879
- )
900
+ except Exception as e:
901
+ logger.error(
902
+ f"There was an issue processing the list of model deployments . Error: {str(e)}",
903
+ exc_info=True,
904
+ )
905
+ raise AquaRuntimeError(
906
+ f"There was an issue processing the list of model deployments . Error: {str(e)}"
907
+ ) from e
908
+
880
909
  # log telemetry if MD is in active or failed state
881
910
  deployment_id = model_deployment.id
882
911
  state = model_deployment.lifecycle_state.upper()
@@ -1042,6 +1071,7 @@ class AquaDeploymentApp(AquaApp):
1042
1071
  config = self.get_config_from_metadata(
1043
1072
  model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
1044
1073
  ).config
1074
+
1045
1075
  if config:
1046
1076
  logger.info(
1047
1077
  f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1126,7 +1156,7 @@ class AquaDeploymentApp(AquaApp):
1126
1156
  model_id: str,
1127
1157
  instance_shape: str,
1128
1158
  gpu_count: int = None,
1129
- ) -> List[str]:
1159
+ ) -> Dict:
1130
1160
  """Gets the default params set in the deployment configs for the given model and instance shape.
1131
1161
 
1132
1162
  Parameters
@@ -1148,6 +1178,7 @@ class AquaDeploymentApp(AquaApp):
1148
1178
 
1149
1179
  """
1150
1180
  default_params = []
1181
+ default_envs = {}
1151
1182
  config_params = {}
1152
1183
  model = DataScienceModel.from_id(model_id)
1153
1184
  try:
@@ -1157,19 +1188,15 @@ class AquaDeploymentApp(AquaApp):
1157
1188
  except ValueError:
1158
1189
  container_type_key = UNKNOWN
1159
1190
  logger.debug(
1160
- f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
1191
+ f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
1192
+ f"custom metadata field for model {model_id}."
1161
1193
  )
1162
1194
 
1163
- if (
1164
- container_type_key
1165
- and container_type_key in InferenceContainerTypeFamily.values()
1166
- ):
1195
+ if container_type_key:
1167
1196
  deployment_config = self.get_deployment_config(model_id)
1168
-
1169
1197
  instance_shape_config = deployment_config.configuration.get(
1170
1198
  instance_shape, ConfigurationItem()
1171
1199
  )
1172
-
1173
1200
  if instance_shape_config.multi_model_deployment and gpu_count:
1174
1201
  gpu_params = instance_shape_config.multi_model_deployment
1175
1202
 
@@ -1178,12 +1205,18 @@ class AquaDeploymentApp(AquaApp):
1178
1205
  config_params = gpu_config.parameters.get(
1179
1206
  get_container_params_type(container_type_key), UNKNOWN
1180
1207
  )
1208
+ default_envs = instance_shape_config.env.get(
1209
+ get_container_env_type(container_type_key), {}
1210
+ )
1181
1211
  break
1182
1212
 
1183
1213
  else:
1184
1214
  config_params = instance_shape_config.parameters.get(
1185
1215
  get_container_params_type(container_type_key), UNKNOWN
1186
1216
  )
1217
+ default_envs = instance_shape_config.env.get(
1218
+ get_container_env_type(container_type_key), {}
1219
+ )
1187
1220
 
1188
1221
  if config_params:
1189
1222
  params_list = get_params_list(config_params)
@@ -1196,7 +1229,7 @@ class AquaDeploymentApp(AquaApp):
1196
1229
  if params.split()[0] not in restricted_params_set:
1197
1230
  default_params.append(params)
1198
1231
 
1199
- return default_params
1232
+ return {"data": default_params, "env": default_envs}
1200
1233
 
1201
1234
  def validate_deployment_params(
1202
1235
  self,
@@ -1243,6 +1276,50 @@ class AquaDeploymentApp(AquaApp):
1243
1276
  )
1244
1277
  return {"valid": True}
1245
1278
 
1279
+ def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
1280
+ """
1281
+ For the CLI (set generate_table = True), generates the table (in rich diff) with valid
1282
+ GPU deployment shapes for the provided model and configuration.
1283
+
1284
+ For the API (set generate_table = False), generates the JSON with valid
1285
+ GPU deployment shapes for the provided model and configuration.
1286
+
1287
+ Validates if recommendations are generated, calls method to construct the rich diff
1288
+ table with the recommendation data.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ model_ocid : str
1293
+ OCID of the model to recommend feasible compute shapes.
1294
+
1295
+ Returns
1296
+ -------
1297
+ Table (generate_table = True)
1298
+ A table format for the recommendation report with compatible deployment shapes
1299
+ or troubleshooting info citing the largest shapes if no shape is suitable.
1300
+
1301
+ ShapeRecommendationReport (generate_table = False)
1302
+ A recommendation report with compatible deployment shapes, or troubleshooting info
1303
+ citing the largest shapes if no shape is suitable.
1304
+
1305
+ Raises
1306
+ ------
1307
+ AquaValueError
1308
+ If model type is unsupported by tool (no recommendation report generated)
1309
+ """
1310
+ try:
1311
+ request = RequestRecommend(**kwargs)
1312
+ except ValidationError as e:
1313
+ custom_error = build_pydantic_error_message(e)
1314
+ raise AquaValueError( # noqa: B904
1315
+ f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
1316
+ )
1317
+
1318
+ shape_recommend = AquaShapeRecommend()
1319
+ shape_recommend_report = shape_recommend.which_shapes(request)
1320
+
1321
+ return shape_recommend_report
1322
+
1246
1323
  @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
1247
1324
  @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
1248
1325
  def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
@@ -233,6 +233,10 @@ class CreateModelDeploymentDetails(BaseModel):
233
233
  None, description="The description of the deployment."
234
234
  )
235
235
  model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
236
+ model_name: Optional[str] = Field(
237
+ None, description="The model name specified by user to deploy."
238
+ )
239
+
236
240
  models: Optional[List[AquaMultiModelRef]] = Field(
237
241
  None, description="List of models for multimodel deployment."
238
242
  )
@@ -1,94 +1,383 @@
1
1
  {
2
2
  "shapes": {
3
3
  "BM.GPU.A10.4": {
4
+ "cpu_count": 64,
5
+ "cpu_memory_in_gbs": 1024,
4
6
  "gpu_count": 4,
5
7
  "gpu_memory_in_gbs": 96,
6
- "gpu_type": "A10"
8
+ "gpu_type": "A10",
9
+ "quantization": [
10
+ "awq",
11
+ "gptq",
12
+ "marlin",
13
+ "int8",
14
+ "bitblas",
15
+ "aqlm",
16
+ "bitsandbytes",
17
+ "deepspeedfp",
18
+ "gguf"
19
+ ],
20
+ "ranking": {
21
+ "cost": 50,
22
+ "performance": 50
23
+ }
7
24
  },
8
25
  "BM.GPU.A100-V2.8": {
26
+ "cpu_count": 128,
27
+ "cpu_memory_in_gbs": 2048,
9
28
  "gpu_count": 8,
10
29
  "gpu_memory_in_gbs": 640,
11
- "gpu_type": "A100"
30
+ "gpu_type": "A100",
31
+ "quantization": [
32
+ "awq",
33
+ "gptq",
34
+ "marlin",
35
+ "int8",
36
+ "bitblas",
37
+ "aqlm",
38
+ "bitsandbytes",
39
+ "deepspeedfp",
40
+ "gguf"
41
+ ],
42
+ "ranking": {
43
+ "cost": 80,
44
+ "performance": 70
45
+ }
46
+ },
47
+ "BM.GPU.B200.8": {
48
+ "cpu_count": 128,
49
+ "cpu_memory_in_gbs": 4096,
50
+ "gpu_count": 8,
51
+ "gpu_memory_in_gbs": 1440,
52
+ "gpu_type": "B200",
53
+ "quantization": [
54
+ "fp4",
55
+ "fp8",
56
+ "fp16",
57
+ "bf16",
58
+ "tf32",
59
+ "int8",
60
+ "fp64"
61
+ ],
62
+ "ranking": {
63
+ "cost": 120,
64
+ "performance": 130
65
+ }
12
66
  },
13
67
  "BM.GPU.B4.8": {
68
+ "cpu_count": 64,
69
+ "cpu_memory_in_gbs": 2048,
14
70
  "gpu_count": 8,
15
71
  "gpu_memory_in_gbs": 320,
16
- "gpu_type": "A100"
72
+ "gpu_type": "A100",
73
+ "quantization": [
74
+ "awq",
75
+ "gptq",
76
+ "marlin",
77
+ "int8",
78
+ "bitblas",
79
+ "aqlm",
80
+ "bitsandbytes",
81
+ "deepspeedfp",
82
+ "gguf"
83
+ ],
84
+ "ranking": {
85
+ "cost": 70,
86
+ "performance": 60
87
+ }
88
+ },
89
+ "BM.GPU.GB200.4": {
90
+ "cpu_count": 144,
91
+ "cpu_memory_in_gbs": 1024,
92
+ "gpu_count": 4,
93
+ "gpu_memory_in_gbs": 768,
94
+ "gpu_type": "GB200",
95
+ "quantization": [
96
+ "fp4",
97
+ "fp8",
98
+ "fp6",
99
+ "int8",
100
+ "fp16",
101
+ "bf16",
102
+ "tf32",
103
+ "fp64"
104
+ ],
105
+ "ranking": {
106
+ "cost": 110,
107
+ "performance": 120
108
+ }
17
109
  },
18
110
  "BM.GPU.H100.8": {
111
+ "cpu_count": 112,
112
+ "cpu_memory_in_gbs": 2048,
19
113
  "gpu_count": 8,
20
114
  "gpu_memory_in_gbs": 640,
21
- "gpu_type": "H100"
115
+ "gpu_type": "H100",
116
+ "quantization": [
117
+ "awq",
118
+ "gptq",
119
+ "marlin",
120
+ "fp8",
121
+ "int8",
122
+ "bitblas",
123
+ "aqlm",
124
+ "bitsandbytes",
125
+ "deepspeedfp",
126
+ "gguf"
127
+ ],
128
+ "ranking": {
129
+ "cost": 100,
130
+ "performance": 100
131
+ }
22
132
  },
23
133
  "BM.GPU.H200.8": {
134
+ "cpu_count": 112,
135
+ "cpu_memory_in_gbs": 3072,
24
136
  "gpu_count": 8,
25
137
  "gpu_memory_in_gbs": 1128,
26
- "gpu_type": "H200"
138
+ "gpu_type": "H200",
139
+ "quantization": [
140
+ "awq",
141
+ "gptq",
142
+ "marlin",
143
+ "fp8",
144
+ "int8",
145
+ "bitblas",
146
+ "aqlm",
147
+ "bitsandbytes",
148
+ "deepspeedfp",
149
+ "gguf"
150
+ ],
151
+ "ranking": {
152
+ "cost": 100,
153
+ "performance": 110
154
+ }
27
155
  },
28
156
  "BM.GPU.L40S-NC.4": {
157
+ "cpu_count": 112,
158
+ "cpu_memory_in_gbs": 1024,
29
159
  "gpu_count": 4,
30
160
  "gpu_memory_in_gbs": 192,
31
- "gpu_type": "L40S"
161
+ "gpu_type": "L40S",
162
+ "quantization": [
163
+ "awq",
164
+ "gptq",
165
+ "marlin",
166
+ "fp8",
167
+ "int8",
168
+ "bitblas",
169
+ "aqlm",
170
+ "bitsandbytes",
171
+ "deepspeedfp",
172
+ "gguf"
173
+ ],
174
+ "ranking": {
175
+ "cost": 60,
176
+ "performance": 80
177
+ }
32
178
  },
33
179
  "BM.GPU.L40S.4": {
180
+ "cpu_count": 112,
181
+ "cpu_memory_in_gbs": 1024,
34
182
  "gpu_count": 4,
35
183
  "gpu_memory_in_gbs": 192,
36
- "gpu_type": "L40S"
184
+ "gpu_type": "L40S",
185
+ "quantization": [
186
+ "awq",
187
+ "gptq",
188
+ "marlin",
189
+ "fp8",
190
+ "int8",
191
+ "bitblas",
192
+ "aqlm",
193
+ "bitsandbytes",
194
+ "deepspeedfp",
195
+ "gguf"
196
+ ],
197
+ "ranking": {
198
+ "cost": 60,
199
+ "performance": 80
200
+ }
37
201
  },
38
202
  "BM.GPU.MI300X.8": {
203
+ "cpu_count": 112,
204
+ "cpu_memory_in_gbs": 2048,
39
205
  "gpu_count": 8,
40
206
  "gpu_memory_in_gbs": 1536,
41
- "gpu_type": "MI300X"
207
+ "gpu_type": "MI300X",
208
+ "quantization": [
209
+ "fp8",
210
+ "gguf"
211
+ ],
212
+ "ranking": {
213
+ "cost": 90,
214
+ "performance": 90
215
+ }
42
216
  },
43
217
  "BM.GPU2.2": {
218
+ "cpu_count": 28,
219
+ "cpu_memory_in_gbs": 192,
44
220
  "gpu_count": 2,
45
221
  "gpu_memory_in_gbs": 32,
46
- "gpu_type": "P100"
47
- },
48
- "BM.GPU3.8": {
49
- "gpu_count": 8,
50
- "gpu_memory_in_gbs": 128,
51
- "gpu_type": "V100"
222
+ "gpu_type": "P100",
223
+ "quantization": [
224
+ "fp16"
225
+ ],
226
+ "ranking": {
227
+ "cost": 30,
228
+ "performance": 20
229
+ }
52
230
  },
53
231
  "BM.GPU4.8": {
232
+ "cpu_count": 64,
233
+ "cpu_memory_in_gbs": 2048,
54
234
  "gpu_count": 8,
55
235
  "gpu_memory_in_gbs": 320,
56
- "gpu_type": "A100"
236
+ "gpu_type": "A100",
237
+ "quantization": [
238
+ "int8",
239
+ "fp16",
240
+ "bf16",
241
+ "tf32"
242
+ ],
243
+ "ranking": {
244
+ "cost": 57,
245
+ "performance": 65
246
+ }
57
247
  },
58
248
  "VM.GPU.A10.1": {
249
+ "cpu_count": 15,
250
+ "cpu_memory_in_gbs": 240,
59
251
  "gpu_count": 1,
60
252
  "gpu_memory_in_gbs": 24,
61
- "gpu_type": "A10"
253
+ "gpu_type": "A10",
254
+ "quantization": [
255
+ "awq",
256
+ "gptq",
257
+ "marlin",
258
+ "int8",
259
+ "bitblas",
260
+ "aqlm",
261
+ "bitsandbytes",
262
+ "deepspeedfp",
263
+ "gguf"
264
+ ],
265
+ "ranking": {
266
+ "cost": 20,
267
+ "performance": 30
268
+ }
62
269
  },
63
270
  "VM.GPU.A10.2": {
271
+ "cpu_count": 30,
272
+ "cpu_memory_in_gbs": 480,
64
273
  "gpu_count": 2,
65
274
  "gpu_memory_in_gbs": 48,
66
- "gpu_type": "A10"
67
- },
68
- "VM.GPU.A10.4": {
69
- "gpu_count": 4,
70
- "gpu_memory_in_gbs": 96,
71
- "gpu_type": "A10"
275
+ "gpu_type": "A10",
276
+ "quantization": [
277
+ "awq",
278
+ "gptq",
279
+ "marlin",
280
+ "int8",
281
+ "bitblas",
282
+ "aqlm",
283
+ "bitsandbytes",
284
+ "deepspeedfp",
285
+ "gguf"
286
+ ],
287
+ "ranking": {
288
+ "cost": 40,
289
+ "performance": 40
290
+ }
72
291
  },
73
292
  "VM.GPU2.1": {
293
+ "cpu_count": 12,
294
+ "cpu_memory_in_gbs": 72,
74
295
  "gpu_count": 1,
75
296
  "gpu_memory_in_gbs": 16,
76
- "gpu_type": "P100"
297
+ "gpu_type": "P100",
298
+ "quantization": [
299
+ "fp16"
300
+ ],
301
+ "ranking": {
302
+ "cost": 10,
303
+ "performance": 10
304
+ }
77
305
  },
78
306
  "VM.GPU3.1": {
307
+ "cpu_count": 6,
308
+ "cpu_memory_in_gbs": 90,
79
309
  "gpu_count": 1,
80
310
  "gpu_memory_in_gbs": 16,
81
- "gpu_type": "V100"
311
+ "gpu_type": "V100",
312
+ "quantization": [
313
+ "gptq",
314
+ "bitblas",
315
+ "aqlm",
316
+ "bitsandbytes",
317
+ "deepspeedfp",
318
+ "gguf"
319
+ ],
320
+ "ranking": {
321
+ "cost": 35,
322
+ "performance": 10
323
+ }
82
324
  },
83
325
  "VM.GPU3.2": {
326
+ "cpu_count": 12,
327
+ "cpu_memory_in_gbs": 180,
84
328
  "gpu_count": 2,
85
329
  "gpu_memory_in_gbs": 32,
86
- "gpu_type": "V100"
330
+ "gpu_type": "V100",
331
+ "quantization": [
332
+ "gptq",
333
+ "bitblas",
334
+ "aqlm",
335
+ "bitsandbytes",
336
+ "deepspeedfp",
337
+ "gguf"
338
+ ],
339
+ "ranking": {
340
+ "cost": 45,
341
+ "performance": 20
342
+ }
87
343
  },
88
344
  "VM.GPU3.4": {
345
+ "cpu_count": 24,
346
+ "cpu_memory_in_gbs": 360,
89
347
  "gpu_count": 4,
90
348
  "gpu_memory_in_gbs": 64,
91
- "gpu_type": "V100"
349
+ "gpu_type": "V100",
350
+ "quantization": [
351
+ "gptq",
352
+ "bitblas",
353
+ "aqlm",
354
+ "bitsandbytes",
355
+ "deepspeedfp",
356
+ "gguf"
357
+ ],
358
+ "ranking": {
359
+ "cost": 55,
360
+ "performance": 45
361
+ }
362
+ },
363
+ "VM.GPU3.8": {
364
+ "cpu_count": 24,
365
+ "cpu_memory_in_gbs": 768,
366
+ "gpu_count": 8,
367
+ "gpu_memory_in_gbs": 128,
368
+ "gpu_type": "V100",
369
+ "quantization": [
370
+ "gptq",
371
+ "bitblas",
372
+ "aqlm",
373
+ "bitsandbytes",
374
+ "deepspeedfp",
375
+ "gguf"
376
+ ],
377
+ "ranking": {
378
+ "cost": 56,
379
+ "performance": 46
380
+ }
92
381
  }
93
382
  }
94
383
  }
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 Oracle and/or its affiliates.
3
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
+ from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
5
+
6
+ __all__ = ["AquaShapeRecommend"]