oracle-ads 2.13.17__py3-none-any.whl → 2.13.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/cli.py +7 -5
- ads/aqua/common/entities.py +88 -29
- ads/aqua/common/enums.py +6 -0
- ads/aqua/common/errors.py +5 -0
- ads/aqua/common/utils.py +49 -7
- ads/aqua/constants.py +3 -0
- ads/aqua/extension/deployment_handler.py +36 -0
- ads/aqua/modeldeployment/constants.py +1 -0
- ads/aqua/modeldeployment/deployment.py +83 -12
- ads/aqua/modeldeployment/entities.py +3 -0
- ads/aqua/resources/gpu_shapes_index.json +315 -26
- ads/aqua/shaperecommend/__init__.py +6 -0
- ads/aqua/shaperecommend/constants.py +116 -0
- ads/aqua/shaperecommend/estimator.py +384 -0
- ads/aqua/shaperecommend/llm_config.py +283 -0
- ads/aqua/shaperecommend/recommend.py +493 -0
- ads/aqua/shaperecommend/shape_report.py +233 -0
- ads/aqua/version.json +1 -1
- ads/cli.py +9 -1
- ads/jobs/builders/infrastructure/dsc_job.py +1 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
- ads/model/service/oci_datascience_model_deployment.py +46 -19
- ads/opctl/operator/lowcode/common/data.py +7 -2
- ads/opctl/operator/lowcode/common/transformations.py +207 -0
- ads/opctl/operator/lowcode/common/utils.py +8 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +3 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +53 -3
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/errors.py +5 -0
- ads/opctl/operator/lowcode/forecast/meta_selector.py +310 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +119 -30
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -2
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +54 -17
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +6 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -0
- ads/pipeline/ads_pipeline.py +13 -9
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/RECORD +42 -35
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.17.dist-info → oracle_ads-2.13.18.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/cli.py
CHANGED
@@ -96,18 +96,20 @@ class AquaCommand:
|
|
96
96
|
"If you intend to chain a function call to the result, please separate the "
|
97
97
|
"flag and the subsequent function call with separator `-`."
|
98
98
|
)
|
99
|
-
|
99
|
+
|
100
100
|
@staticmethod
|
101
101
|
def install():
|
102
102
|
"""Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
|
103
103
|
|
104
|
-
Return
|
104
|
+
Return
|
105
105
|
------
|
106
106
|
int:
|
107
107
|
Installatation status.
|
108
108
|
"""
|
109
109
|
import subprocess
|
110
110
|
|
111
|
-
wheel_file_path = os.environ.get(
|
112
|
-
|
113
|
-
|
111
|
+
wheel_file_path = os.environ.get(
|
112
|
+
"AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
|
113
|
+
)
|
114
|
+
status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
|
115
|
+
return status.check_returncode
|
ads/aqua/common/entities.py
CHANGED
@@ -47,19 +47,76 @@ class ModelConfigResult(BaseModel):
|
|
47
47
|
protected_namespaces = ()
|
48
48
|
|
49
49
|
|
50
|
-
class
|
50
|
+
class ComputeRank(Serializable):
|
51
51
|
"""
|
52
|
-
Represents the
|
52
|
+
Represents the cost and performance rankings for a specific compute shape.
|
53
|
+
These rankings help compare different shapes based on their relative pricing
|
54
|
+
and computational capabilities.
|
53
55
|
"""
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
cost: Optional[int] = Field(
|
58
|
+
None,
|
59
|
+
description=(
|
60
|
+
"Relative cost ranking of the compute shape. "
|
61
|
+
"Value ranges from 10 (most cost-effective) to 100 (most expensive). "
|
62
|
+
"Lower values indicate cheaper compute options."
|
63
|
+
),
|
64
|
+
)
|
65
|
+
|
66
|
+
performance: Optional[int] = Field(
|
67
|
+
None,
|
68
|
+
description=(
|
69
|
+
"Relative performance ranking of the compute shape. "
|
70
|
+
"Value ranges from 10 (lowest performance) to 110 (highest performance). "
|
71
|
+
"Higher values indicate better compute performance."
|
72
|
+
),
|
57
73
|
)
|
74
|
+
|
75
|
+
|
76
|
+
class GPUSpecs(Serializable):
|
77
|
+
"""
|
78
|
+
Represents the specifications and capabilities of a GPU-enabled compute shape.
|
79
|
+
Includes details about GPU and CPU resources, supported quantization formats, and
|
80
|
+
relative rankings for cost and performance.
|
81
|
+
"""
|
82
|
+
|
58
83
|
gpu_count: Optional[int] = Field(
|
59
|
-
default=None,
|
84
|
+
default=None,
|
85
|
+
description="Number of physical GPUs available on the compute shape.",
|
86
|
+
)
|
87
|
+
|
88
|
+
gpu_memory_in_gbs: Optional[int] = Field(
|
89
|
+
default=None, description="Total GPU memory available in gigabytes (GB)."
|
60
90
|
)
|
91
|
+
|
61
92
|
gpu_type: Optional[str] = Field(
|
62
|
-
default=None,
|
93
|
+
default=None,
|
94
|
+
description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
|
95
|
+
)
|
96
|
+
|
97
|
+
quantization: Optional[List[str]] = Field(
|
98
|
+
default_factory=list,
|
99
|
+
description=(
|
100
|
+
"List of supported quantization formats for the GPU. "
|
101
|
+
"Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
|
102
|
+
),
|
103
|
+
)
|
104
|
+
|
105
|
+
cpu_count: Optional[int] = Field(
|
106
|
+
default=None, description="Number of CPU cores available on the shape."
|
107
|
+
)
|
108
|
+
|
109
|
+
cpu_memory_in_gbs: Optional[int] = Field(
|
110
|
+
default=None, description="Total CPU memory available in gigabytes (GB)."
|
111
|
+
)
|
112
|
+
|
113
|
+
ranking: Optional[ComputeRank] = Field(
|
114
|
+
default=None,
|
115
|
+
description=(
|
116
|
+
"Relative cost and performance rankings of this shape. "
|
117
|
+
"Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
|
118
|
+
"and performance from 10 (lowest) to 100+ (highest)."
|
119
|
+
),
|
63
120
|
)
|
64
121
|
|
65
122
|
|
@@ -80,46 +137,49 @@ class GPUShapesIndex(Serializable):
|
|
80
137
|
|
81
138
|
class ComputeShapeSummary(Serializable):
|
82
139
|
"""
|
83
|
-
Represents
|
84
|
-
including CPU, memory, and optional GPU characteristics.
|
140
|
+
Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
|
85
141
|
"""
|
86
142
|
|
143
|
+
available: Optional[bool] = Field(
|
144
|
+
default=False,
|
145
|
+
description="True if the shape is available in the user's tenancy/region.",
|
146
|
+
)
|
147
|
+
|
87
148
|
core_count: Optional[int] = Field(
|
88
|
-
default=None,
|
89
|
-
description="Total number of CPU cores available for the compute shape.",
|
149
|
+
default=None, description="Number of vCPUs available for the compute shape."
|
90
150
|
)
|
151
|
+
|
91
152
|
memory_in_gbs: Optional[int] = Field(
|
92
|
-
default=None,
|
93
|
-
description="Amount of memory (in GB) available for the compute shape.",
|
153
|
+
default=None, description="Total CPU memory available for the shape (in GB)."
|
94
154
|
)
|
155
|
+
|
95
156
|
name: Optional[str] = Field(
|
96
|
-
default=None,
|
97
|
-
description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
|
157
|
+
default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
|
98
158
|
)
|
159
|
+
|
99
160
|
shape_series: Optional[str] = Field(
|
100
161
|
default=None,
|
101
|
-
description="
|
162
|
+
description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
|
102
163
|
)
|
164
|
+
|
103
165
|
gpu_specs: Optional[GPUSpecs] = Field(
|
104
|
-
default=None,
|
105
|
-
description="Optional GPU specifications associated with the shape.",
|
166
|
+
default=None, description="GPU configuration for the shape, if applicable."
|
106
167
|
)
|
107
168
|
|
108
169
|
@model_validator(mode="after")
|
109
170
|
@classmethod
|
110
|
-
def
|
171
|
+
def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
|
111
172
|
"""
|
112
|
-
|
173
|
+
Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
|
113
174
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
|
175
|
+
Logic:
|
176
|
+
- If `shape_series` includes 'GPU' and `gpu_specs` is None:
|
177
|
+
- Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
|
178
|
+
- Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
|
179
|
+
- If extraction fails, logs debug-level error but does not raise.
|
120
180
|
|
121
181
|
Returns:
|
122
|
-
ComputeShapeSummary: The updated instance
|
182
|
+
ComputeShapeSummary: The updated model instance.
|
123
183
|
"""
|
124
184
|
try:
|
125
185
|
if (
|
@@ -128,16 +188,15 @@ class ComputeShapeSummary(Serializable):
|
|
128
188
|
and model.name
|
129
189
|
and not model.gpu_specs
|
130
190
|
):
|
131
|
-
# Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
|
132
191
|
match = re.search(r"\.(\d+)$", model.name)
|
133
192
|
if match:
|
134
193
|
gpu_count = int(match.group(1))
|
135
194
|
model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
|
136
195
|
except Exception as err:
|
137
196
|
logger.debug(
|
138
|
-
f"
|
139
|
-
f"Details: {err}"
|
197
|
+
f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
|
140
198
|
)
|
199
|
+
|
141
200
|
return model
|
142
201
|
|
143
202
|
|
ads/aqua/common/enums.py
CHANGED
@@ -123,6 +123,12 @@ class Platform(ExtendedEnum):
|
|
123
123
|
# - Key: The preferred container family to use when multiple compatible families are selected.
|
124
124
|
# - Value: A list of all compatible families (including the preferred one).
|
125
125
|
CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
|
126
|
+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
|
127
|
+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
|
128
|
+
InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
|
129
|
+
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
|
130
|
+
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
|
131
|
+
],
|
126
132
|
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
|
127
133
|
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
|
128
134
|
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
|
ads/aqua/common/errors.py
CHANGED
@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
|
|
55
55
|
def __init__(self, reason, status=403, service_payload=None):
|
56
56
|
super().__init__(reason, status, service_payload)
|
57
57
|
|
58
|
+
class AquaRecommendationError(AquaError):
|
59
|
+
"""Exception raised for models incompatible with shape recommendation tool."""
|
60
|
+
|
61
|
+
def __init__(self, reason, status=400, service_payload=None):
|
62
|
+
super().__init__(reason, status, service_payload)
|
58
63
|
|
59
64
|
class AquaFileNotFoundError(AquaError, FileNotFoundError):
|
60
65
|
"""Exception raised for missing target file."""
|
ads/aqua/common/utils.py
CHANGED
@@ -1267,10 +1267,10 @@ def load_gpu_shapes_index(
|
|
1267
1267
|
auth: Optional[Dict[str, Any]] = None,
|
1268
1268
|
) -> GPUShapesIndex:
|
1269
1269
|
"""
|
1270
|
-
Load the GPU shapes index,
|
1270
|
+
Load the GPU shapes index, merging based on freshness.
|
1271
1271
|
|
1272
|
-
|
1273
|
-
|
1272
|
+
Compares last-modified timestamps of local and remote files,
|
1273
|
+
merging the shapes from the fresher file on top of the older one.
|
1274
1274
|
|
1275
1275
|
Parameters
|
1276
1276
|
----------
|
@@ -1291,7 +1291,9 @@ def load_gpu_shapes_index(
|
|
1291
1291
|
file_name = "gpu_shapes_index.json"
|
1292
1292
|
|
1293
1293
|
# Try remote load
|
1294
|
-
|
1294
|
+
local_data, remote_data = {}, {}
|
1295
|
+
local_mtime, remote_mtime = None, None
|
1296
|
+
|
1295
1297
|
if CONDA_BUCKET_NS:
|
1296
1298
|
try:
|
1297
1299
|
auth = auth or authutil.default_signer()
|
@@ -1301,8 +1303,24 @@ def load_gpu_shapes_index(
|
|
1301
1303
|
logger.debug(
|
1302
1304
|
"Loading GPU shapes index from Object Storage: %s", storage_path
|
1303
1305
|
)
|
1304
|
-
|
1306
|
+
|
1307
|
+
fs = fsspec.filesystem("oci", **auth)
|
1308
|
+
with fs.open(storage_path, mode="r") as f:
|
1305
1309
|
remote_data = json.load(f)
|
1310
|
+
|
1311
|
+
remote_info = fs.info(storage_path)
|
1312
|
+
remote_mtime_str = remote_info.get("timeModified", None)
|
1313
|
+
if remote_mtime_str:
|
1314
|
+
# Convert OCI timestamp (e.g., 'Mon, 04 Aug 2025 06:37:13 GMT') to epoch time
|
1315
|
+
remote_mtime = datetime.strptime(
|
1316
|
+
remote_mtime_str, "%a, %d %b %Y %H:%M:%S %Z"
|
1317
|
+
).timestamp()
|
1318
|
+
|
1319
|
+
logger.debug(
|
1320
|
+
"Remote GPU shapes last-modified time: %s",
|
1321
|
+
datetime.fromtimestamp(remote_mtime).strftime("%Y-%m-%d %H:%M:%S"),
|
1322
|
+
)
|
1323
|
+
|
1306
1324
|
logger.debug(
|
1307
1325
|
"Loaded %d shapes from Object Storage",
|
1308
1326
|
len(remote_data.get("shapes", {})),
|
@@ -1311,12 +1329,19 @@ def load_gpu_shapes_index(
|
|
1311
1329
|
logger.debug("Remote load failed (%s); falling back to local", ex)
|
1312
1330
|
|
1313
1331
|
# Load local copy
|
1314
|
-
local_data: Dict[str, Any] = {}
|
1315
1332
|
local_path = os.path.join(os.path.dirname(__file__), "../resources", file_name)
|
1316
1333
|
try:
|
1317
1334
|
logger.debug("Loading GPU shapes index from local file: %s", local_path)
|
1318
1335
|
with open(local_path) as f:
|
1319
1336
|
local_data = json.load(f)
|
1337
|
+
|
1338
|
+
local_mtime = os.path.getmtime(local_path)
|
1339
|
+
|
1340
|
+
logger.debug(
|
1341
|
+
"Local GPU shapes last-modified time: %s",
|
1342
|
+
datetime.fromtimestamp(local_mtime).strftime("%Y-%m-%d %H:%M:%S"),
|
1343
|
+
)
|
1344
|
+
|
1320
1345
|
logger.debug(
|
1321
1346
|
"Loaded %d shapes from local file", len(local_data.get("shapes", {}))
|
1322
1347
|
)
|
@@ -1326,7 +1351,24 @@ def load_gpu_shapes_index(
|
|
1326
1351
|
# Merge: remote shapes override local
|
1327
1352
|
local_shapes = local_data.get("shapes", {})
|
1328
1353
|
remote_shapes = remote_data.get("shapes", {})
|
1329
|
-
merged_shapes = {
|
1354
|
+
merged_shapes = {}
|
1355
|
+
|
1356
|
+
if local_mtime and remote_mtime:
|
1357
|
+
if remote_mtime >= local_mtime:
|
1358
|
+
logger.debug("Remote data is fresher or equal; merging remote over local.")
|
1359
|
+
merged_shapes = {**local_shapes, **remote_shapes}
|
1360
|
+
else:
|
1361
|
+
logger.debug("Local data is fresher; merging local over remote.")
|
1362
|
+
merged_shapes = {**remote_shapes, **local_shapes}
|
1363
|
+
elif remote_shapes:
|
1364
|
+
logger.debug("Only remote shapes available.")
|
1365
|
+
merged_shapes = remote_shapes
|
1366
|
+
elif local_shapes:
|
1367
|
+
logger.debug("Only local shapes available.")
|
1368
|
+
merged_shapes = local_shapes
|
1369
|
+
else:
|
1370
|
+
logger.error("No GPU shapes data found in either source.")
|
1371
|
+
merged_shapes = {}
|
1330
1372
|
|
1331
1373
|
return GPUShapesIndex(shapes=merged_shapes)
|
1332
1374
|
|
ads/aqua/constants.py
CHANGED
@@ -56,6 +56,9 @@ SUPPORTED_FILE_FORMATS = ["jsonl"]
|
|
56
56
|
MODEL_BY_REFERENCE_OSS_PATH_KEY = "artifact_location"
|
57
57
|
|
58
58
|
AQUA_CHAT_TEMPLATE_METADATA_KEY = "chat_template"
|
59
|
+
UNKNOWN_ENUM_VALUE = "UNKNOWN_ENUM_VALUE"
|
60
|
+
MODEL_GROUP = "MODEL_GROUP"
|
61
|
+
SINGLE_MODEL_FLEX = "SINGLE_MODEL_FLEX"
|
59
62
|
|
60
63
|
CONSOLE_LINK_RESOURCE_TYPE_MAPPING = {
|
61
64
|
"datasciencemodel": "models",
|
@@ -57,6 +57,15 @@ class AquaDeploymentHandler(AquaAPIhandler):
|
|
57
57
|
return self.get_deployment_config(
|
58
58
|
model_id=id.split(",") if "," in id else id
|
59
59
|
)
|
60
|
+
elif paths.startswith("aqua/deployments/recommend_shapes"):
|
61
|
+
if not id or not isinstance(id, str):
|
62
|
+
raise HTTPError(
|
63
|
+
400,
|
64
|
+
f"Invalid request format for {self.request.path}. "
|
65
|
+
"Expected a single model OCID specified as --model_id",
|
66
|
+
)
|
67
|
+
id = id.replace(" ", "")
|
68
|
+
return self.get_recommend_shape(model_id=id)
|
60
69
|
elif paths.startswith("aqua/deployments/shapes"):
|
61
70
|
return self.list_shapes()
|
62
71
|
elif paths.startswith("aqua/deployments"):
|
@@ -161,6 +170,32 @@ class AquaDeploymentHandler(AquaAPIhandler):
|
|
161
170
|
|
162
171
|
return self.finish(deployment_config)
|
163
172
|
|
173
|
+
def get_recommend_shape(self, model_id: str):
|
174
|
+
"""
|
175
|
+
Retrieves the valid shape and deployment parameter configuration for one Aqua Model.
|
176
|
+
|
177
|
+
Parameters
|
178
|
+
----------
|
179
|
+
model_id : str
|
180
|
+
A single model ID (str).
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
None
|
185
|
+
The function sends the ShapeRecommendReport (generate_table = False) or Rich Diff Table (generate_table = True)
|
186
|
+
"""
|
187
|
+
app = AquaDeploymentApp()
|
188
|
+
|
189
|
+
compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
|
190
|
+
|
191
|
+
recommend_report = app.recommend_shape(
|
192
|
+
model_id=model_id,
|
193
|
+
compartment_id=compartment_id,
|
194
|
+
generate_table=False,
|
195
|
+
)
|
196
|
+
|
197
|
+
return self.finish(recommend_report)
|
198
|
+
|
164
199
|
def list_shapes(self):
|
165
200
|
"""
|
166
201
|
Lists the valid model deployment shapes.
|
@@ -408,6 +443,7 @@ __handlers__ = [
|
|
408
443
|
("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
|
409
444
|
("deployments/config/?([^/]*)", AquaDeploymentHandler),
|
410
445
|
("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
|
446
|
+
("deployments/recommend_shapes/?([^/]*)", AquaDeploymentHandler),
|
411
447
|
("deployments/?([^/]*)", AquaDeploymentHandler),
|
412
448
|
("deployments/?([^/]*)/activate", AquaDeploymentHandler),
|
413
449
|
("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
|
@@ -8,11 +8,12 @@ import re
|
|
8
8
|
import shlex
|
9
9
|
import threading
|
10
10
|
from datetime import datetime, timedelta
|
11
|
-
from typing import Dict, List, Optional
|
11
|
+
from typing import Dict, List, Optional, Union
|
12
12
|
|
13
13
|
from cachetools import TTLCache, cached
|
14
14
|
from oci.data_science.models import ModelDeploymentShapeSummary
|
15
15
|
from pydantic import ValidationError
|
16
|
+
from rich.table import Table
|
16
17
|
|
17
18
|
from ads.aqua.app import AquaApp, logger
|
18
19
|
from ads.aqua.common.entities import (
|
@@ -44,8 +45,11 @@ from ads.aqua.constants import (
|
|
44
45
|
AQUA_MODEL_TYPE_SERVICE,
|
45
46
|
AQUA_MULTI_MODEL_CONFIG,
|
46
47
|
MODEL_BY_REFERENCE_OSS_PATH_KEY,
|
48
|
+
MODEL_GROUP,
|
47
49
|
MODEL_NAME_DELIMITER,
|
50
|
+
SINGLE_MODEL_FLEX,
|
48
51
|
UNKNOWN_DICT,
|
52
|
+
UNKNOWN_ENUM_VALUE,
|
49
53
|
)
|
50
54
|
from ads.aqua.data import AquaResourceIdentifier
|
51
55
|
from ads.aqua.model import AquaModelApp
|
@@ -72,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
|
|
72
76
|
CreateModelDeploymentDetails,
|
73
77
|
)
|
74
78
|
from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
|
79
|
+
from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
|
80
|
+
from ads.aqua.shaperecommend.shape_report import (
|
81
|
+
RequestRecommend,
|
82
|
+
ShapeRecommendationReport,
|
83
|
+
)
|
75
84
|
from ads.common.object_storage_details import ObjectStorageDetails
|
76
85
|
from ads.common.utils import UNKNOWN, get_log_links
|
77
86
|
from ads.common.work_request import DataScienceWorkRequest
|
@@ -517,6 +526,7 @@ class AquaDeploymentApp(AquaApp):
|
|
517
526
|
|
518
527
|
# validate user provided params
|
519
528
|
user_params = env_var.get("PARAMS", UNKNOWN)
|
529
|
+
|
520
530
|
if user_params:
|
521
531
|
# todo: remove this check in the future version, logic to be moved to container_index
|
522
532
|
if (
|
@@ -542,6 +552,18 @@ class AquaDeploymentApp(AquaApp):
|
|
542
552
|
deployment_params = get_combined_params(config_params, user_params)
|
543
553
|
|
544
554
|
params = f"{params} {deployment_params}".strip()
|
555
|
+
|
556
|
+
if create_deployment_details.model_name:
|
557
|
+
# Replace existing --served-model-name argument if present, otherwise add it
|
558
|
+
if "--served-model-name" in params:
|
559
|
+
params = re.sub(
|
560
|
+
r"--served-model-name\s+\S+",
|
561
|
+
f"--served-model-name {create_deployment_details.model_name}",
|
562
|
+
params,
|
563
|
+
)
|
564
|
+
else:
|
565
|
+
params += f" --served-model-name {create_deployment_details.model_name}"
|
566
|
+
|
545
567
|
if params:
|
546
568
|
env_var.update({"PARAMS": params})
|
547
569
|
env_vars = container_spec.env_vars if container_spec else []
|
@@ -864,21 +886,26 @@ class AquaDeploymentApp(AquaApp):
|
|
864
886
|
|
865
887
|
if oci_aqua:
|
866
888
|
# skipping the AQUA model deployments that are created from model group
|
867
|
-
# TODO: remove this checker after AQUA deployment is integrated with model group
|
868
|
-
aqua_model_id = model_deployment.freeform_tags.get(
|
869
|
-
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
|
870
|
-
)
|
871
889
|
if (
|
872
|
-
|
873
|
-
|
874
|
-
== "UNKNOWN_ENUM_VALUE"
|
890
|
+
model_deployment.model_deployment_configuration_details.deployment_type
|
891
|
+
in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
|
875
892
|
):
|
876
893
|
continue
|
877
|
-
|
878
|
-
|
879
|
-
|
894
|
+
try:
|
895
|
+
results.append(
|
896
|
+
AquaDeployment.from_oci_model_deployment(
|
897
|
+
model_deployment, self.region
|
898
|
+
)
|
880
899
|
)
|
881
|
-
|
900
|
+
except Exception as e:
|
901
|
+
logger.error(
|
902
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}",
|
903
|
+
exc_info=True,
|
904
|
+
)
|
905
|
+
raise AquaRuntimeError(
|
906
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}"
|
907
|
+
) from e
|
908
|
+
|
882
909
|
# log telemetry if MD is in active or failed state
|
883
910
|
deployment_id = model_deployment.id
|
884
911
|
state = model_deployment.lifecycle_state.upper()
|
@@ -1249,6 +1276,50 @@ class AquaDeploymentApp(AquaApp):
|
|
1249
1276
|
)
|
1250
1277
|
return {"valid": True}
|
1251
1278
|
|
1279
|
+
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
|
1280
|
+
"""
|
1281
|
+
For the CLI (set generate_table = True), generates the table (in rich diff) with valid
|
1282
|
+
GPU deployment shapes for the provided model and configuration.
|
1283
|
+
|
1284
|
+
For the API (set generate_table = False), generates the JSON with valid
|
1285
|
+
GPU deployment shapes for the provided model and configuration.
|
1286
|
+
|
1287
|
+
Validates if recommendations are generated, calls method to construct the rich diff
|
1288
|
+
table with the recommendation data.
|
1289
|
+
|
1290
|
+
Parameters
|
1291
|
+
----------
|
1292
|
+
model_ocid : str
|
1293
|
+
OCID of the model to recommend feasible compute shapes.
|
1294
|
+
|
1295
|
+
Returns
|
1296
|
+
-------
|
1297
|
+
Table (generate_table = True)
|
1298
|
+
A table format for the recommendation report with compatible deployment shapes
|
1299
|
+
or troubleshooting info citing the largest shapes if no shape is suitable.
|
1300
|
+
|
1301
|
+
ShapeRecommendationReport (generate_table = False)
|
1302
|
+
A recommendation report with compatible deployment shapes, or troubleshooting info
|
1303
|
+
citing the largest shapes if no shape is suitable.
|
1304
|
+
|
1305
|
+
Raises
|
1306
|
+
------
|
1307
|
+
AquaValueError
|
1308
|
+
If model type is unsupported by tool (no recommendation report generated)
|
1309
|
+
"""
|
1310
|
+
try:
|
1311
|
+
request = RequestRecommend(**kwargs)
|
1312
|
+
except ValidationError as e:
|
1313
|
+
custom_error = build_pydantic_error_message(e)
|
1314
|
+
raise AquaValueError( # noqa: B904
|
1315
|
+
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
|
1316
|
+
)
|
1317
|
+
|
1318
|
+
shape_recommend = AquaShapeRecommend()
|
1319
|
+
shape_recommend_report = shape_recommend.which_shapes(request)
|
1320
|
+
|
1321
|
+
return shape_recommend_report
|
1322
|
+
|
1252
1323
|
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
|
1253
1324
|
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
|
1254
1325
|
def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
|
@@ -233,6 +233,9 @@ class CreateModelDeploymentDetails(BaseModel):
|
|
233
233
|
None, description="The description of the deployment."
|
234
234
|
)
|
235
235
|
model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
|
236
|
+
model_name: Optional[str] = Field(
|
237
|
+
None, description="The model name specified by user to deploy."
|
238
|
+
)
|
236
239
|
|
237
240
|
models: Optional[List[AquaMultiModelRef]] = Field(
|
238
241
|
None, description="List of models for multimodel deployment."
|