oracle-ads 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ads/aqua/cli.py +7 -5
  2. ads/aqua/common/entities.py +88 -29
  3. ads/aqua/common/enums.py +6 -0
  4. ads/aqua/common/errors.py +5 -0
  5. ads/aqua/common/utils.py +49 -7
  6. ads/aqua/constants.py +3 -0
  7. ads/aqua/extension/deployment_handler.py +36 -0
  8. ads/aqua/modeldeployment/constants.py +1 -0
  9. ads/aqua/modeldeployment/deployment.py +83 -12
  10. ads/aqua/modeldeployment/entities.py +3 -0
  11. ads/aqua/resources/gpu_shapes_index.json +315 -26
  12. ads/aqua/shaperecommend/__init__.py +6 -0
  13. ads/aqua/shaperecommend/constants.py +116 -0
  14. ads/aqua/shaperecommend/estimator.py +384 -0
  15. ads/aqua/shaperecommend/llm_config.py +283 -0
  16. ads/aqua/shaperecommend/recommend.py +493 -0
  17. ads/aqua/shaperecommend/shape_report.py +233 -0
  18. ads/aqua/version.json +1 -1
  19. ads/cli.py +9 -1
  20. ads/jobs/builders/infrastructure/dsc_job.py +1 -0
  21. ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
  22. ads/model/service/oci_datascience_model_deployment.py +46 -19
  23. ads/opctl/operator/lowcode/common/data.py +7 -2
  24. ads/opctl/operator/lowcode/common/transformations.py +207 -0
  25. ads/opctl/operator/lowcode/common/utils.py +8 -0
  26. ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
  27. ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
  28. ads/opctl/operator/lowcode/forecast/const.py +2 -0
  29. ads/opctl/operator/lowcode/forecast/errors.py +5 -0
  30. ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
  31. ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
  32. ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
  33. ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
  34. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
  35. ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
  36. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
  37. ads/pipeline/ads_pipeline.py +13 -9
  38. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
  39. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +42 -35
  40. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
  41. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
  42. {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/cli.py CHANGED
@@ -96,18 +96,20 @@ class AquaCommand:
96
96
  "If you intend to chain a function call to the result, please separate the "
97
97
  "flag and the subsequent function call with separator `-`."
98
98
  )
99
-
99
+
100
100
  @staticmethod
101
101
  def install():
102
102
  """Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
103
103
 
104
- Return
104
+ Return
105
105
  ------
106
106
  int:
107
107
  Installatation status.
108
108
  """
109
109
  import subprocess
110
110
 
111
- wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
112
- status = subprocess.run(f"pip install {wheel_file_path}",shell=True)
113
- return status.check_returncode
111
+ wheel_file_path = os.environ.get(
112
+ "AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
113
+ )
114
+ status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
115
+ return status.check_returncode
@@ -47,19 +47,76 @@ class ModelConfigResult(BaseModel):
47
47
  protected_namespaces = ()
48
48
 
49
49
 
50
- class GPUSpecs(Serializable):
50
+ class ComputeRank(Serializable):
51
51
  """
52
- Represents the GPU specifications for a compute instance.
52
+ Represents the cost and performance rankings for a specific compute shape.
53
+ These rankings help compare different shapes based on their relative pricing
54
+ and computational capabilities.
53
55
  """
54
56
 
55
- gpu_memory_in_gbs: Optional[int] = Field(
56
- default=None, description="The amount of GPU memory available (in GB)."
57
+ cost: Optional[int] = Field(
58
+ None,
59
+ description=(
60
+ "Relative cost ranking of the compute shape. "
61
+ "Value ranges from 10 (most cost-effective) to 100 (most expensive). "
62
+ "Lower values indicate cheaper compute options."
63
+ ),
64
+ )
65
+
66
+ performance: Optional[int] = Field(
67
+ None,
68
+ description=(
69
+ "Relative performance ranking of the compute shape. "
70
+ "Value ranges from 10 (lowest performance) to 110 (highest performance). "
71
+ "Higher values indicate better compute performance."
72
+ ),
57
73
  )
74
+
75
+
76
+ class GPUSpecs(Serializable):
77
+ """
78
+ Represents the specifications and capabilities of a GPU-enabled compute shape.
79
+ Includes details about GPU and CPU resources, supported quantization formats, and
80
+ relative rankings for cost and performance.
81
+ """
82
+
58
83
  gpu_count: Optional[int] = Field(
59
- default=None, description="The number of GPUs available."
84
+ default=None,
85
+ description="Number of physical GPUs available on the compute shape.",
86
+ )
87
+
88
+ gpu_memory_in_gbs: Optional[int] = Field(
89
+ default=None, description="Total GPU memory available in gigabytes (GB)."
60
90
  )
91
+
61
92
  gpu_type: Optional[str] = Field(
62
- default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
93
+ default=None,
94
+ description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
95
+ )
96
+
97
+ quantization: Optional[List[str]] = Field(
98
+ default_factory=list,
99
+ description=(
100
+ "List of supported quantization formats for the GPU. "
101
+ "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
102
+ ),
103
+ )
104
+
105
+ cpu_count: Optional[int] = Field(
106
+ default=None, description="Number of CPU cores available on the shape."
107
+ )
108
+
109
+ cpu_memory_in_gbs: Optional[int] = Field(
110
+ default=None, description="Total CPU memory available in gigabytes (GB)."
111
+ )
112
+
113
+ ranking: Optional[ComputeRank] = Field(
114
+ default=None,
115
+ description=(
116
+ "Relative cost and performance rankings of this shape. "
117
+ "Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
118
+ "and performance from 10 (lowest) to 100+ (highest)."
119
+ ),
63
120
  )
64
121
 
65
122
 
@@ -80,46 +137,49 @@ class GPUShapesIndex(Serializable):
80
137
 
81
138
  class ComputeShapeSummary(Serializable):
82
139
  """
83
- Represents the specifications of a compute instance shape,
84
- including CPU, memory, and optional GPU characteristics.
140
+ Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
85
141
  """
86
142
 
143
+ available: Optional[bool] = Field(
144
+ default=False,
145
+ description="True if the shape is available in the user's tenancy/region.",
146
+ )
147
+
87
148
  core_count: Optional[int] = Field(
88
- default=None,
89
- description="Total number of CPU cores available for the compute shape.",
149
+ default=None, description="Number of vCPUs available for the compute shape."
90
150
  )
151
+
91
152
  memory_in_gbs: Optional[int] = Field(
92
- default=None,
93
- description="Amount of memory (in GB) available for the compute shape.",
153
+ default=None, description="Total CPU memory available for the shape (in GB)."
94
154
  )
155
+
95
156
  name: Optional[str] = Field(
96
- default=None,
97
- description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
157
+ default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
98
158
  )
159
+
99
160
  shape_series: Optional[str] = Field(
100
161
  default=None,
101
- description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
162
+ description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
102
163
  )
164
+
103
165
  gpu_specs: Optional[GPUSpecs] = Field(
104
- default=None,
105
- description="Optional GPU specifications associated with the shape.",
166
+ default=None, description="GPU configuration for the shape, if applicable."
106
167
  )
107
168
 
108
169
  @model_validator(mode="after")
109
170
  @classmethod
110
- def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
171
+ def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
111
172
  """
112
- Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
173
+ Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
113
174
 
114
- - If the shape_series contains "GPU", the validator first checks if the shape name exists
115
- in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
116
- - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
117
- using a regex pattern (looking for a number following a dot at the end of the name).
118
-
119
- The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
175
+ Logic:
176
+ - If `shape_series` includes 'GPU' and `gpu_specs` is None:
177
+ - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
178
+ - Fallback is based on suffix numeric group (e.g., '.2' gpu_count=2).
179
+ - If extraction fails, logs debug-level error but does not raise.
120
180
 
121
181
  Returns:
122
- ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
182
+ ComputeShapeSummary: The updated model instance.
123
183
  """
124
184
  try:
125
185
  if (
@@ -128,16 +188,15 @@ class ComputeShapeSummary(Serializable):
128
188
  and model.name
129
189
  and not model.gpu_specs
130
190
  ):
131
- # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
132
191
  match = re.search(r"\.(\d+)$", model.name)
133
192
  if match:
134
193
  gpu_count = int(match.group(1))
135
194
  model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
136
195
  except Exception as err:
137
196
  logger.debug(
138
- f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
139
- f"Details: {err}"
197
+ f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
140
198
  )
199
+
141
200
  return model
142
201
 
143
202
 
ads/aqua/common/enums.py CHANGED
@@ -123,6 +123,12 @@ class Platform(ExtendedEnum):
123
123
  # - Key: The preferred container family to use when multiple compatible families are selected.
124
124
  # - Value: A list of all compatible families (including the preferred one).
125
125
  CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
126
+ InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
127
+ InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
128
+ InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
129
+ InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
130
+ InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
131
+ ],
126
132
  InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
127
133
  InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
128
134
  InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
ads/aqua/common/errors.py CHANGED
@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
55
55
  def __init__(self, reason, status=403, service_payload=None):
56
56
  super().__init__(reason, status, service_payload)
57
57
 
58
+ class AquaRecommendationError(AquaError):
59
+ """Exception raised for models incompatible with shape recommendation tool."""
60
+
61
+ def __init__(self, reason, status=400, service_payload=None):
62
+ super().__init__(reason, status, service_payload)
58
63
 
59
64
  class AquaFileNotFoundError(AquaError, FileNotFoundError):
60
65
  """Exception raised for missing target file."""
ads/aqua/common/utils.py CHANGED
@@ -1267,10 +1267,10 @@ def load_gpu_shapes_index(
1267
1267
  auth: Optional[Dict[str, Any]] = None,
1268
1268
  ) -> GPUShapesIndex:
1269
1269
  """
1270
- Load the GPU shapes index, preferring the OS bucket copy over the local one.
1270
+ Load the GPU shapes index, merging based on freshness.
1271
1271
 
1272
- Attempts to read `gpu_shapes_index.json` from OCI Object Storage first;
1273
- if that succeeds, those entries will override the local defaults.
1272
+ Compares last-modified timestamps of local and remote files,
1273
+ merging the shapes from the fresher file on top of the older one.
1274
1274
 
1275
1275
  Parameters
1276
1276
  ----------
@@ -1291,7 +1291,9 @@ def load_gpu_shapes_index(
1291
1291
  file_name = "gpu_shapes_index.json"
1292
1292
 
1293
1293
  # Try remote load
1294
- remote_data: Dict[str, Any] = {}
1294
+ local_data, remote_data = {}, {}
1295
+ local_mtime, remote_mtime = None, None
1296
+
1295
1297
  if CONDA_BUCKET_NS:
1296
1298
  try:
1297
1299
  auth = auth or authutil.default_signer()
@@ -1301,8 +1303,24 @@ def load_gpu_shapes_index(
1301
1303
  logger.debug(
1302
1304
  "Loading GPU shapes index from Object Storage: %s", storage_path
1303
1305
  )
1304
- with fsspec.open(storage_path, mode="r", **auth) as f:
1306
+
1307
+ fs = fsspec.filesystem("oci", **auth)
1308
+ with fs.open(storage_path, mode="r") as f:
1305
1309
  remote_data = json.load(f)
1310
+
1311
+ remote_info = fs.info(storage_path)
1312
+ remote_mtime_str = remote_info.get("timeModified", None)
1313
+ if remote_mtime_str:
1314
+ # Convert OCI timestamp (e.g., 'Mon, 04 Aug 2025 06:37:13 GMT') to epoch time
1315
+ remote_mtime = datetime.strptime(
1316
+ remote_mtime_str, "%a, %d %b %Y %H:%M:%S %Z"
1317
+ ).timestamp()
1318
+
1319
+ logger.debug(
1320
+ "Remote GPU shapes last-modified time: %s",
1321
+ datetime.fromtimestamp(remote_mtime).strftime("%Y-%m-%d %H:%M:%S"),
1322
+ )
1323
+
1306
1324
  logger.debug(
1307
1325
  "Loaded %d shapes from Object Storage",
1308
1326
  len(remote_data.get("shapes", {})),
@@ -1311,12 +1329,19 @@ def load_gpu_shapes_index(
1311
1329
  logger.debug("Remote load failed (%s); falling back to local", ex)
1312
1330
 
1313
1331
  # Load local copy
1314
- local_data: Dict[str, Any] = {}
1315
1332
  local_path = os.path.join(os.path.dirname(__file__), "../resources", file_name)
1316
1333
  try:
1317
1334
  logger.debug("Loading GPU shapes index from local file: %s", local_path)
1318
1335
  with open(local_path) as f:
1319
1336
  local_data = json.load(f)
1337
+
1338
+ local_mtime = os.path.getmtime(local_path)
1339
+
1340
+ logger.debug(
1341
+ "Local GPU shapes last-modified time: %s",
1342
+ datetime.fromtimestamp(local_mtime).strftime("%Y-%m-%d %H:%M:%S"),
1343
+ )
1344
+
1320
1345
  logger.debug(
1321
1346
  "Loaded %d shapes from local file", len(local_data.get("shapes", {}))
1322
1347
  )
@@ -1326,7 +1351,24 @@ def load_gpu_shapes_index(
1326
1351
  # Merge: remote shapes override local
1327
1352
  local_shapes = local_data.get("shapes", {})
1328
1353
  remote_shapes = remote_data.get("shapes", {})
1329
- merged_shapes = {**local_shapes, **remote_shapes}
1354
+ merged_shapes = {}
1355
+
1356
+ if local_mtime and remote_mtime:
1357
+ if remote_mtime >= local_mtime:
1358
+ logger.debug("Remote data is fresher or equal; merging remote over local.")
1359
+ merged_shapes = {**local_shapes, **remote_shapes}
1360
+ else:
1361
+ logger.debug("Local data is fresher; merging local over remote.")
1362
+ merged_shapes = {**remote_shapes, **local_shapes}
1363
+ elif remote_shapes:
1364
+ logger.debug("Only remote shapes available.")
1365
+ merged_shapes = remote_shapes
1366
+ elif local_shapes:
1367
+ logger.debug("Only local shapes available.")
1368
+ merged_shapes = local_shapes
1369
+ else:
1370
+ logger.error("No GPU shapes data found in either source.")
1371
+ merged_shapes = {}
1330
1372
 
1331
1373
  return GPUShapesIndex(shapes=merged_shapes)
1332
1374
 
ads/aqua/constants.py CHANGED
@@ -56,6 +56,9 @@ SUPPORTED_FILE_FORMATS = ["jsonl"]
56
56
  MODEL_BY_REFERENCE_OSS_PATH_KEY = "artifact_location"
57
57
 
58
58
  AQUA_CHAT_TEMPLATE_METADATA_KEY = "chat_template"
59
+ UNKNOWN_ENUM_VALUE = "UNKNOWN_ENUM_VALUE"
60
+ MODEL_GROUP = "MODEL_GROUP"
61
+ SINGLE_MODEL_FLEX = "SINGLE_MODEL_FLEX"
59
62
 
60
63
  CONSOLE_LINK_RESOURCE_TYPE_MAPPING = {
61
64
  "datasciencemodel": "models",
@@ -57,6 +57,15 @@ class AquaDeploymentHandler(AquaAPIhandler):
57
57
  return self.get_deployment_config(
58
58
  model_id=id.split(",") if "," in id else id
59
59
  )
60
+ elif paths.startswith("aqua/deployments/recommend_shapes"):
61
+ if not id or not isinstance(id, str):
62
+ raise HTTPError(
63
+ 400,
64
+ f"Invalid request format for {self.request.path}. "
65
+ "Expected a single model OCID specified as --model_id",
66
+ )
67
+ id = id.replace(" ", "")
68
+ return self.get_recommend_shape(model_id=id)
60
69
  elif paths.startswith("aqua/deployments/shapes"):
61
70
  return self.list_shapes()
62
71
  elif paths.startswith("aqua/deployments"):
@@ -161,6 +170,32 @@ class AquaDeploymentHandler(AquaAPIhandler):
161
170
 
162
171
  return self.finish(deployment_config)
163
172
 
173
+ def get_recommend_shape(self, model_id: str):
174
+ """
175
+ Retrieves the valid shape and deployment parameter configuration for one Aqua Model.
176
+
177
+ Parameters
178
+ ----------
179
+ model_id : str
180
+ A single model ID (str).
181
+
182
+ Returns
183
+ -------
184
+ None
185
+ The function sends the ShapeRecommendReport (generate_table = False) or Rich Diff Table (generate_table = True)
186
+ """
187
+ app = AquaDeploymentApp()
188
+
189
+ compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
190
+
191
+ recommend_report = app.recommend_shape(
192
+ model_id=model_id,
193
+ compartment_id=compartment_id,
194
+ generate_table=False,
195
+ )
196
+
197
+ return self.finish(recommend_report)
198
+
164
199
  def list_shapes(self):
165
200
  """
166
201
  Lists the valid model deployment shapes.
@@ -408,6 +443,7 @@ __handlers__ = [
408
443
  ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
409
444
  ("deployments/config/?([^/]*)", AquaDeploymentHandler),
410
445
  ("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
446
+ ("deployments/recommend_shapes/?([^/]*)", AquaDeploymentHandler),
411
447
  ("deployments/?([^/]*)", AquaDeploymentHandler),
412
448
  ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
413
449
  ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
@@ -11,3 +11,4 @@ This module contains constants used in Aqua Model Deployment.
11
11
 
12
12
  DEFAULT_WAIT_TIME = 12000
13
13
  DEFAULT_POLL_INTERVAL = 10
14
+
@@ -8,11 +8,12 @@ import re
8
8
  import shlex
9
9
  import threading
10
10
  from datetime import datetime, timedelta
11
- from typing import Dict, List, Optional
11
+ from typing import Dict, List, Optional, Union
12
12
 
13
13
  from cachetools import TTLCache, cached
14
14
  from oci.data_science.models import ModelDeploymentShapeSummary
15
15
  from pydantic import ValidationError
16
+ from rich.table import Table
16
17
 
17
18
  from ads.aqua.app import AquaApp, logger
18
19
  from ads.aqua.common.entities import (
@@ -44,8 +45,11 @@ from ads.aqua.constants import (
44
45
  AQUA_MODEL_TYPE_SERVICE,
45
46
  AQUA_MULTI_MODEL_CONFIG,
46
47
  MODEL_BY_REFERENCE_OSS_PATH_KEY,
48
+ MODEL_GROUP,
47
49
  MODEL_NAME_DELIMITER,
50
+ SINGLE_MODEL_FLEX,
48
51
  UNKNOWN_DICT,
52
+ UNKNOWN_ENUM_VALUE,
49
53
  )
50
54
  from ads.aqua.data import AquaResourceIdentifier
51
55
  from ads.aqua.model import AquaModelApp
@@ -72,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
72
76
  CreateModelDeploymentDetails,
73
77
  )
74
78
  from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
79
+ from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
80
+ from ads.aqua.shaperecommend.shape_report import (
81
+ RequestRecommend,
82
+ ShapeRecommendationReport,
83
+ )
75
84
  from ads.common.object_storage_details import ObjectStorageDetails
76
85
  from ads.common.utils import UNKNOWN, get_log_links
77
86
  from ads.common.work_request import DataScienceWorkRequest
@@ -517,6 +526,7 @@ class AquaDeploymentApp(AquaApp):
517
526
 
518
527
  # validate user provided params
519
528
  user_params = env_var.get("PARAMS", UNKNOWN)
529
+
520
530
  if user_params:
521
531
  # todo: remove this check in the future version, logic to be moved to container_index
522
532
  if (
@@ -542,6 +552,18 @@ class AquaDeploymentApp(AquaApp):
542
552
  deployment_params = get_combined_params(config_params, user_params)
543
553
 
544
554
  params = f"{params} {deployment_params}".strip()
555
+
556
+ if create_deployment_details.model_name:
557
+ # Replace existing --served-model-name argument if present, otherwise add it
558
+ if "--served-model-name" in params:
559
+ params = re.sub(
560
+ r"--served-model-name\s+\S+",
561
+ f"--served-model-name {create_deployment_details.model_name}",
562
+ params,
563
+ )
564
+ else:
565
+ params += f" --served-model-name {create_deployment_details.model_name}"
566
+
545
567
  if params:
546
568
  env_var.update({"PARAMS": params})
547
569
  env_vars = container_spec.env_vars if container_spec else []
@@ -864,21 +886,26 @@ class AquaDeploymentApp(AquaApp):
864
886
 
865
887
  if oci_aqua:
866
888
  # skipping the AQUA model deployments that are created from model group
867
- # TODO: remove this checker after AQUA deployment is integrated with model group
868
- aqua_model_id = model_deployment.freeform_tags.get(
869
- Tags.AQUA_MODEL_ID_TAG, UNKNOWN
870
- )
871
889
  if (
872
- "datasciencemodelgroup" in aqua_model_id
873
- or model_deployment.model_deployment_configuration_details.deployment_type
874
- == "UNKNOWN_ENUM_VALUE"
890
+ model_deployment.model_deployment_configuration_details.deployment_type
891
+ in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
875
892
  ):
876
893
  continue
877
- results.append(
878
- AquaDeployment.from_oci_model_deployment(
879
- model_deployment, self.region
894
+ try:
895
+ results.append(
896
+ AquaDeployment.from_oci_model_deployment(
897
+ model_deployment, self.region
898
+ )
880
899
  )
881
- )
900
+ except Exception as e:
901
+ logger.error(
902
+ f"There was an issue processing the list of model deployments . Error: {str(e)}",
903
+ exc_info=True,
904
+ )
905
+ raise AquaRuntimeError(
906
+ f"There was an issue processing the list of model deployments . Error: {str(e)}"
907
+ ) from e
908
+
882
909
  # log telemetry if MD is in active or failed state
883
910
  deployment_id = model_deployment.id
884
911
  state = model_deployment.lifecycle_state.upper()
@@ -1249,6 +1276,50 @@ class AquaDeploymentApp(AquaApp):
1249
1276
  )
1250
1277
  return {"valid": True}
1251
1278
 
1279
+ def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
1280
+ """
1281
+ For the CLI (set generate_table = True), generates the table (in rich diff) with valid
1282
+ GPU deployment shapes for the provided model and configuration.
1283
+
1284
+ For the API (set generate_table = False), generates the JSON with valid
1285
+ GPU deployment shapes for the provided model and configuration.
1286
+
1287
+ Validates if recommendations are generated, calls method to construct the rich diff
1288
+ table with the recommendation data.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ model_ocid : str
1293
+ OCID of the model to recommend feasible compute shapes.
1294
+
1295
+ Returns
1296
+ -------
1297
+ Table (generate_table = True)
1298
+ A table format for the recommendation report with compatible deployment shapes
1299
+ or troubleshooting info citing the largest shapes if no shape is suitable.
1300
+
1301
+ ShapeRecommendationReport (generate_table = False)
1302
+ A recommendation report with compatible deployment shapes, or troubleshooting info
1303
+ citing the largest shapes if no shape is suitable.
1304
+
1305
+ Raises
1306
+ ------
1307
+ AquaValueError
1308
+ If model type is unsupported by tool (no recommendation report generated)
1309
+ """
1310
+ try:
1311
+ request = RequestRecommend(**kwargs)
1312
+ except ValidationError as e:
1313
+ custom_error = build_pydantic_error_message(e)
1314
+ raise AquaValueError( # noqa: B904
1315
+ f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
1316
+ )
1317
+
1318
+ shape_recommend = AquaShapeRecommend()
1319
+ shape_recommend_report = shape_recommend.which_shapes(request)
1320
+
1321
+ return shape_recommend_report
1322
+
1252
1323
  @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
1253
1324
  @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
1254
1325
  def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
@@ -233,6 +233,9 @@ class CreateModelDeploymentDetails(BaseModel):
233
233
  None, description="The description of the deployment."
234
234
  )
235
235
  model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
236
+ model_name: Optional[str] = Field(
237
+ None, description="The model name specified by user to deploy."
238
+ )
236
239
 
237
240
  models: Optional[List[AquaMultiModelRef]] = Field(
238
241
  None, description="List of models for multimodel deployment."