oracle-ads 2.13.18rc0__py3-none-any.whl → 2.13.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/cli.py +7 -5
- ads/aqua/common/entities.py +195 -48
- ads/aqua/common/enums.py +6 -0
- ads/aqua/common/errors.py +5 -0
- ads/aqua/common/utils.py +157 -66
- ads/aqua/constants.py +3 -0
- ads/aqua/extension/deployment_handler.py +36 -0
- ads/aqua/modeldeployment/constants.py +1 -0
- ads/aqua/modeldeployment/deployment.py +95 -14
- ads/aqua/modeldeployment/entities.py +3 -0
- ads/aqua/modeldeployment/model_group_config.py +3 -3
- ads/aqua/resources/gpu_shapes_index.json +315 -26
- ads/aqua/shaperecommend/__init__.py +6 -0
- ads/aqua/shaperecommend/constants.py +116 -0
- ads/aqua/shaperecommend/estimator.py +384 -0
- ads/aqua/shaperecommend/llm_config.py +283 -0
- ads/aqua/shaperecommend/recommend.py +493 -0
- ads/aqua/shaperecommend/shape_report.py +233 -0
- ads/aqua/version.json +1 -1
- ads/cli.py +9 -1
- ads/jobs/builders/infrastructure/dsc_job.py +1 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +9 -1
- ads/model/service/oci_datascience_model_deployment.py +46 -19
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +4 -3
- ads/pipeline/ads_pipeline.py +13 -9
- {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/RECORD +30 -24
- {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.18rc0.dist-info → oracle_ads-2.13.19.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/common/utils.py
CHANGED
@@ -800,35 +800,49 @@ def is_service_managed_container(container):
|
|
800
800
|
|
801
801
|
|
802
802
|
def get_params_list(params: str) -> List[str]:
|
803
|
-
"""
|
803
|
+
"""
|
804
|
+
Parses a string of CLI-style double-dash parameters and returns them as a list.
|
804
805
|
|
805
806
|
Parameters
|
806
807
|
----------
|
807
|
-
params
|
808
|
-
string parameters
|
808
|
+
params : str
|
809
|
+
A single string containing parameters separated by the `--` delimiter.
|
809
810
|
|
810
811
|
Returns
|
811
812
|
-------
|
812
|
-
|
813
|
+
List[str]
|
814
|
+
A list of parameter strings, each starting with `--`.
|
813
815
|
|
816
|
+
Example
|
817
|
+
-------
|
818
|
+
>>> get_params_list("--max-model-len 65536 --enforce-eager")
|
819
|
+
['--max-model-len 65536', '--enforce-eager']
|
814
820
|
"""
|
815
821
|
if not params:
|
816
822
|
return []
|
817
|
-
return ["--
|
823
|
+
return [f"--{param.strip()}" for param in params.split("--") if param.strip()]
|
818
824
|
|
819
825
|
|
820
826
|
def get_params_dict(params: Union[str, List[str]]) -> dict:
|
821
|
-
"""
|
827
|
+
"""
|
828
|
+
Converts CLI-style double-dash parameters (as string or list) into a dictionary.
|
822
829
|
|
823
830
|
Parameters
|
824
831
|
----------
|
825
|
-
params:
|
826
|
-
|
832
|
+
params : Union[str, List[str]]
|
833
|
+
Parameters provided either as:
|
834
|
+
- a single string: "--key1 val1 --key2 val2"
|
835
|
+
- a list of strings: ["--key1 val1", "--key2 val2"]
|
827
836
|
|
828
837
|
Returns
|
829
838
|
-------
|
830
|
-
|
839
|
+
dict
|
840
|
+
A dictionary mapping parameter names to their values. If no value is found, uses `UNKNOWN`.
|
831
841
|
|
842
|
+
Example
|
843
|
+
-------
|
844
|
+
>>> get_params_dict("--max-model-len 65536 --enforce-eager")
|
845
|
+
{'--max-model-len': '65536', '--enforce-eager': ''}
|
832
846
|
"""
|
833
847
|
params_list = get_params_list(params) if isinstance(params, str) else params
|
834
848
|
return {
|
@@ -839,35 +853,43 @@ def get_params_dict(params: Union[str, List[str]]) -> dict:
|
|
839
853
|
}
|
840
854
|
|
841
855
|
|
842
|
-
def get_combined_params(
|
856
|
+
def get_combined_params(
|
857
|
+
params1: Optional[str] = None, params2: Optional[str] = None
|
858
|
+
) -> str:
|
843
859
|
"""
|
844
|
-
|
860
|
+
Merges two double-dash parameter strings (`--param value`) into one, with values from `params2`
|
861
|
+
overriding any duplicates from `params1`.
|
862
|
+
|
845
863
|
Parameters
|
846
864
|
----------
|
847
|
-
params1:
|
848
|
-
|
849
|
-
params2:
|
850
|
-
|
865
|
+
params1 : Optional[str]
|
866
|
+
The base parameter string. Can be None.
|
867
|
+
params2 : Optional[str]
|
868
|
+
The override parameter string. Parameters in this string will override those in `params1`.
|
851
869
|
|
852
870
|
Returns
|
853
871
|
-------
|
854
|
-
|
872
|
+
str
|
873
|
+
A combined parameter string with deduplicated keys and overridden values from `params2`.
|
855
874
|
"""
|
875
|
+
if not params1 and not params2:
|
876
|
+
return ""
|
877
|
+
|
878
|
+
# If only one string is provided, return it directly
|
856
879
|
if not params1:
|
857
|
-
return params2
|
880
|
+
return params2.strip()
|
858
881
|
if not params2:
|
859
|
-
return params1
|
860
|
-
|
861
|
-
#
|
862
|
-
|
863
|
-
f"{key} {value}" if value else key
|
864
|
-
for key, value in {
|
865
|
-
**get_params_dict(params1),
|
866
|
-
**get_params_dict(params2),
|
867
|
-
}.items()
|
868
|
-
]
|
882
|
+
return params1.strip()
|
883
|
+
|
884
|
+
# Combine both dictionaries, with params2 overriding params1
|
885
|
+
merged_dict = {**get_params_dict(params1), **get_params_dict(params2)}
|
869
886
|
|
870
|
-
|
887
|
+
# Reconstruct the string
|
888
|
+
combined = " ".join(
|
889
|
+
f"{key} {value}" if value else key for key, value in merged_dict.items()
|
890
|
+
)
|
891
|
+
|
892
|
+
return combined.strip()
|
871
893
|
|
872
894
|
|
873
895
|
def find_restricted_params(
|
@@ -905,28 +927,46 @@ def find_restricted_params(
|
|
905
927
|
return restricted_params
|
906
928
|
|
907
929
|
|
908
|
-
def build_params_string(params:
|
909
|
-
"""
|
930
|
+
def build_params_string(params: Optional[Dict[str, Any]]) -> str:
|
931
|
+
"""
|
932
|
+
Converts a dictionary of CLI parameters into a command-line friendly string.
|
933
|
+
|
934
|
+
This is typically used to transform framework-specific model parameters (e.g., vLLM or TGI flags)
|
935
|
+
into a space-separated string that can be passed to container startup commands.
|
910
936
|
|
911
937
|
Parameters
|
912
938
|
----------
|
913
|
-
params:
|
914
|
-
|
939
|
+
params : Optional[Dict[str, Any]]
|
940
|
+
Dictionary containing parameter name as keys (e.g., "--max-model-len") and their corresponding values.
|
941
|
+
If a parameter does not require a value (e.g., a boolean flag), its value can be None or an empty string.
|
915
942
|
|
916
943
|
Returns
|
917
944
|
-------
|
918
|
-
|
945
|
+
str
|
946
|
+
A space-separated string of CLI arguments.
|
947
|
+
Returns "<unknown>" if the input dictionary is None or empty.
|
948
|
+
|
949
|
+
Example
|
950
|
+
-------
|
951
|
+
>>> build_params_string({"--max-model-len": 4096, "--enforce-eager": None})
|
952
|
+
'--max-model-len 4096 --enforce-eager'
|
919
953
|
"""
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
954
|
+
if not params:
|
955
|
+
return UNKNOWN
|
956
|
+
|
957
|
+
parts = []
|
958
|
+
for key, value in params.items():
|
959
|
+
if value is None or value == "":
|
960
|
+
parts.append(str(key))
|
961
|
+
else:
|
962
|
+
parts.append(f"{key} {value}")
|
963
|
+
|
964
|
+
return " ".join(parts).strip()
|
927
965
|
|
928
966
|
|
929
|
-
def copy_model_config(
|
967
|
+
def copy_model_config(
|
968
|
+
artifact_path: str, os_path: str, auth: Optional[Dict[str, Any]] = None
|
969
|
+
):
|
930
970
|
"""Copies the aqua model config folder from the artifact path to the user provided object storage path.
|
931
971
|
The config folder is overwritten if the files already exist at the destination path.
|
932
972
|
|
@@ -1202,36 +1242,45 @@ def parse_cmd_var(cmd_list: List[str]) -> dict:
|
|
1202
1242
|
return parsed_cmd
|
1203
1243
|
|
1204
1244
|
|
1205
|
-
def validate_cmd_var(
|
1206
|
-
|
1207
|
-
|
1245
|
+
def validate_cmd_var(
|
1246
|
+
cmd_var: Optional[List[str]], overrides: Optional[List[str]]
|
1247
|
+
) -> List[str]:
|
1248
|
+
"""
|
1249
|
+
Validates and combines two lists of command-line parameters. Raises an error if any parameter
|
1250
|
+
key in the `overrides` list already exists in the `cmd_var` list, preventing unintended overrides.
|
1251
|
+
|
1208
1252
|
Parameters
|
1209
1253
|
----------
|
1210
|
-
cmd_var: List[str]
|
1211
|
-
|
1212
|
-
overrides: List[str]
|
1213
|
-
|
1254
|
+
cmd_var : Optional[List[str]]
|
1255
|
+
The default list of command-line parameters (e.g., ["--param1", "value1", "--flag"]).
|
1256
|
+
overrides : Optional[List[str]]
|
1257
|
+
The list of overriding command-line parameters.
|
1258
|
+
|
1214
1259
|
Returns
|
1215
1260
|
-------
|
1216
|
-
|
1261
|
+
List[str]
|
1262
|
+
A validated and combined list of parameters, with overrides appended.
|
1263
|
+
|
1264
|
+
Raises
|
1265
|
+
------
|
1266
|
+
AquaValueError
|
1267
|
+
If `overrides` contain any parameter keys that already exist in `cmd_var`.
|
1217
1268
|
"""
|
1218
|
-
cmd_var = [str(x) for x in cmd_var]
|
1219
|
-
|
1220
|
-
return cmd_var
|
1221
|
-
overrides = [str(x) for x in overrides]
|
1269
|
+
cmd_var = [str(x).strip() for x in cmd_var or []]
|
1270
|
+
overrides = [str(x).strip() for x in overrides or []]
|
1222
1271
|
|
1223
1272
|
cmd_dict = parse_cmd_var(cmd_var)
|
1224
1273
|
overrides_dict = parse_cmd_var(overrides)
|
1225
1274
|
|
1226
|
-
#
|
1227
|
-
|
1228
|
-
if
|
1275
|
+
# Check for conflicting keys
|
1276
|
+
conflicting_keys = set(cmd_dict.keys()) & set(overrides_dict.keys())
|
1277
|
+
if conflicting_keys:
|
1229
1278
|
raise AquaValueError(
|
1230
|
-
f"
|
1279
|
+
f"Cannot override the following model deployment parameters: {', '.join(sorted(conflicting_keys))}"
|
1231
1280
|
)
|
1232
1281
|
|
1233
|
-
|
1234
|
-
return
|
1282
|
+
combined_params = cmd_var + overrides
|
1283
|
+
return combined_params
|
1235
1284
|
|
1236
1285
|
|
1237
1286
|
def build_pydantic_error_message(ex: ValidationError):
|
@@ -1267,10 +1316,10 @@ def load_gpu_shapes_index(
|
|
1267
1316
|
auth: Optional[Dict[str, Any]] = None,
|
1268
1317
|
) -> GPUShapesIndex:
|
1269
1318
|
"""
|
1270
|
-
Load the GPU shapes index,
|
1319
|
+
Load the GPU shapes index, merging based on freshness.
|
1271
1320
|
|
1272
|
-
|
1273
|
-
|
1321
|
+
Compares last-modified timestamps of local and remote files,
|
1322
|
+
merging the shapes from the fresher file on top of the older one.
|
1274
1323
|
|
1275
1324
|
Parameters
|
1276
1325
|
----------
|
@@ -1291,7 +1340,9 @@ def load_gpu_shapes_index(
|
|
1291
1340
|
file_name = "gpu_shapes_index.json"
|
1292
1341
|
|
1293
1342
|
# Try remote load
|
1294
|
-
|
1343
|
+
local_data, remote_data = {}, {}
|
1344
|
+
local_mtime, remote_mtime = None, None
|
1345
|
+
|
1295
1346
|
if CONDA_BUCKET_NS:
|
1296
1347
|
try:
|
1297
1348
|
auth = auth or authutil.default_signer()
|
@@ -1301,8 +1352,24 @@ def load_gpu_shapes_index(
|
|
1301
1352
|
logger.debug(
|
1302
1353
|
"Loading GPU shapes index from Object Storage: %s", storage_path
|
1303
1354
|
)
|
1304
|
-
|
1355
|
+
|
1356
|
+
fs = fsspec.filesystem("oci", **auth)
|
1357
|
+
with fs.open(storage_path, mode="r") as f:
|
1305
1358
|
remote_data = json.load(f)
|
1359
|
+
|
1360
|
+
remote_info = fs.info(storage_path)
|
1361
|
+
remote_mtime_str = remote_info.get("timeModified", None)
|
1362
|
+
if remote_mtime_str:
|
1363
|
+
# Convert OCI timestamp (e.g., 'Mon, 04 Aug 2025 06:37:13 GMT') to epoch time
|
1364
|
+
remote_mtime = datetime.strptime(
|
1365
|
+
remote_mtime_str, "%a, %d %b %Y %H:%M:%S %Z"
|
1366
|
+
).timestamp()
|
1367
|
+
|
1368
|
+
logger.debug(
|
1369
|
+
"Remote GPU shapes last-modified time: %s",
|
1370
|
+
datetime.fromtimestamp(remote_mtime).strftime("%Y-%m-%d %H:%M:%S"),
|
1371
|
+
)
|
1372
|
+
|
1306
1373
|
logger.debug(
|
1307
1374
|
"Loaded %d shapes from Object Storage",
|
1308
1375
|
len(remote_data.get("shapes", {})),
|
@@ -1311,12 +1378,19 @@ def load_gpu_shapes_index(
|
|
1311
1378
|
logger.debug("Remote load failed (%s); falling back to local", ex)
|
1312
1379
|
|
1313
1380
|
# Load local copy
|
1314
|
-
local_data: Dict[str, Any] = {}
|
1315
1381
|
local_path = os.path.join(os.path.dirname(__file__), "../resources", file_name)
|
1316
1382
|
try:
|
1317
1383
|
logger.debug("Loading GPU shapes index from local file: %s", local_path)
|
1318
1384
|
with open(local_path) as f:
|
1319
1385
|
local_data = json.load(f)
|
1386
|
+
|
1387
|
+
local_mtime = os.path.getmtime(local_path)
|
1388
|
+
|
1389
|
+
logger.debug(
|
1390
|
+
"Local GPU shapes last-modified time: %s",
|
1391
|
+
datetime.fromtimestamp(local_mtime).strftime("%Y-%m-%d %H:%M:%S"),
|
1392
|
+
)
|
1393
|
+
|
1320
1394
|
logger.debug(
|
1321
1395
|
"Loaded %d shapes from local file", len(local_data.get("shapes", {}))
|
1322
1396
|
)
|
@@ -1326,7 +1400,24 @@ def load_gpu_shapes_index(
|
|
1326
1400
|
# Merge: remote shapes override local
|
1327
1401
|
local_shapes = local_data.get("shapes", {})
|
1328
1402
|
remote_shapes = remote_data.get("shapes", {})
|
1329
|
-
merged_shapes = {
|
1403
|
+
merged_shapes = {}
|
1404
|
+
|
1405
|
+
if local_mtime and remote_mtime:
|
1406
|
+
if remote_mtime >= local_mtime:
|
1407
|
+
logger.debug("Remote data is fresher or equal; merging remote over local.")
|
1408
|
+
merged_shapes = {**local_shapes, **remote_shapes}
|
1409
|
+
else:
|
1410
|
+
logger.debug("Local data is fresher; merging local over remote.")
|
1411
|
+
merged_shapes = {**remote_shapes, **local_shapes}
|
1412
|
+
elif remote_shapes:
|
1413
|
+
logger.debug("Only remote shapes available.")
|
1414
|
+
merged_shapes = remote_shapes
|
1415
|
+
elif local_shapes:
|
1416
|
+
logger.debug("Only local shapes available.")
|
1417
|
+
merged_shapes = local_shapes
|
1418
|
+
else:
|
1419
|
+
logger.error("No GPU shapes data found in either source.")
|
1420
|
+
merged_shapes = {}
|
1330
1421
|
|
1331
1422
|
return GPUShapesIndex(shapes=merged_shapes)
|
1332
1423
|
|
ads/aqua/constants.py
CHANGED
@@ -56,6 +56,9 @@ SUPPORTED_FILE_FORMATS = ["jsonl"]
|
|
56
56
|
MODEL_BY_REFERENCE_OSS_PATH_KEY = "artifact_location"
|
57
57
|
|
58
58
|
AQUA_CHAT_TEMPLATE_METADATA_KEY = "chat_template"
|
59
|
+
UNKNOWN_ENUM_VALUE = "UNKNOWN_ENUM_VALUE"
|
60
|
+
MODEL_GROUP = "MODEL_GROUP"
|
61
|
+
SINGLE_MODEL_FLEX = "SINGLE_MODEL_FLEX"
|
59
62
|
|
60
63
|
CONSOLE_LINK_RESOURCE_TYPE_MAPPING = {
|
61
64
|
"datasciencemodel": "models",
|
@@ -57,6 +57,15 @@ class AquaDeploymentHandler(AquaAPIhandler):
|
|
57
57
|
return self.get_deployment_config(
|
58
58
|
model_id=id.split(",") if "," in id else id
|
59
59
|
)
|
60
|
+
elif paths.startswith("aqua/deployments/recommend_shapes"):
|
61
|
+
if not id or not isinstance(id, str):
|
62
|
+
raise HTTPError(
|
63
|
+
400,
|
64
|
+
f"Invalid request format for {self.request.path}. "
|
65
|
+
"Expected a single model OCID specified as --model_id",
|
66
|
+
)
|
67
|
+
id = id.replace(" ", "")
|
68
|
+
return self.get_recommend_shape(model_id=id)
|
60
69
|
elif paths.startswith("aqua/deployments/shapes"):
|
61
70
|
return self.list_shapes()
|
62
71
|
elif paths.startswith("aqua/deployments"):
|
@@ -161,6 +170,32 @@ class AquaDeploymentHandler(AquaAPIhandler):
|
|
161
170
|
|
162
171
|
return self.finish(deployment_config)
|
163
172
|
|
173
|
+
def get_recommend_shape(self, model_id: str):
|
174
|
+
"""
|
175
|
+
Retrieves the valid shape and deployment parameter configuration for one Aqua Model.
|
176
|
+
|
177
|
+
Parameters
|
178
|
+
----------
|
179
|
+
model_id : str
|
180
|
+
A single model ID (str).
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
None
|
185
|
+
The function sends the ShapeRecommendReport (generate_table = False) or Rich Diff Table (generate_table = True)
|
186
|
+
"""
|
187
|
+
app = AquaDeploymentApp()
|
188
|
+
|
189
|
+
compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
|
190
|
+
|
191
|
+
recommend_report = app.recommend_shape(
|
192
|
+
model_id=model_id,
|
193
|
+
compartment_id=compartment_id,
|
194
|
+
generate_table=False,
|
195
|
+
)
|
196
|
+
|
197
|
+
return self.finish(recommend_report)
|
198
|
+
|
164
199
|
def list_shapes(self):
|
165
200
|
"""
|
166
201
|
Lists the valid model deployment shapes.
|
@@ -408,6 +443,7 @@ __handlers__ = [
|
|
408
443
|
("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
|
409
444
|
("deployments/config/?([^/]*)", AquaDeploymentHandler),
|
410
445
|
("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
|
446
|
+
("deployments/recommend_shapes/?([^/]*)", AquaDeploymentHandler),
|
411
447
|
("deployments/?([^/]*)", AquaDeploymentHandler),
|
412
448
|
("deployments/?([^/]*)/activate", AquaDeploymentHandler),
|
413
449
|
("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
|
@@ -8,11 +8,12 @@ import re
|
|
8
8
|
import shlex
|
9
9
|
import threading
|
10
10
|
from datetime import datetime, timedelta
|
11
|
-
from typing import Dict, List, Optional
|
11
|
+
from typing import Dict, List, Optional, Union
|
12
12
|
|
13
13
|
from cachetools import TTLCache, cached
|
14
14
|
from oci.data_science.models import ModelDeploymentShapeSummary
|
15
15
|
from pydantic import ValidationError
|
16
|
+
from rich.table import Table
|
16
17
|
|
17
18
|
from ads.aqua.app import AquaApp, logger
|
18
19
|
from ads.aqua.common.entities import (
|
@@ -44,8 +45,11 @@ from ads.aqua.constants import (
|
|
44
45
|
AQUA_MODEL_TYPE_SERVICE,
|
45
46
|
AQUA_MULTI_MODEL_CONFIG,
|
46
47
|
MODEL_BY_REFERENCE_OSS_PATH_KEY,
|
48
|
+
MODEL_GROUP,
|
47
49
|
MODEL_NAME_DELIMITER,
|
50
|
+
SINGLE_MODEL_FLEX,
|
48
51
|
UNKNOWN_DICT,
|
52
|
+
UNKNOWN_ENUM_VALUE,
|
49
53
|
)
|
50
54
|
from ads.aqua.data import AquaResourceIdentifier
|
51
55
|
from ads.aqua.model import AquaModelApp
|
@@ -72,6 +76,11 @@ from ads.aqua.modeldeployment.entities import (
|
|
72
76
|
CreateModelDeploymentDetails,
|
73
77
|
)
|
74
78
|
from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
|
79
|
+
from ads.aqua.shaperecommend.recommend import AquaShapeRecommend
|
80
|
+
from ads.aqua.shaperecommend.shape_report import (
|
81
|
+
RequestRecommend,
|
82
|
+
ShapeRecommendationReport,
|
83
|
+
)
|
75
84
|
from ads.common.object_storage_details import ObjectStorageDetails
|
76
85
|
from ads.common.utils import UNKNOWN, get_log_links
|
77
86
|
from ads.common.work_request import DataScienceWorkRequest
|
@@ -511,12 +520,23 @@ class AquaDeploymentApp(AquaApp):
|
|
511
520
|
|
512
521
|
deployment_config = self.get_deployment_config(model_id=config_source_id)
|
513
522
|
|
523
|
+
# Loads frameworks specific default params from the configuration
|
514
524
|
config_params = deployment_config.configuration.get(
|
515
525
|
create_deployment_details.instance_shape, ConfigurationItem()
|
516
526
|
).parameters.get(get_container_params_type(container_type_key), UNKNOWN)
|
517
527
|
|
528
|
+
# Loads default environment variables from the configuration
|
529
|
+
config_env = deployment_config.configuration.get(
|
530
|
+
create_deployment_details.instance_shape, ConfigurationItem()
|
531
|
+
).env.get(get_container_params_type(container_type_key), {})
|
532
|
+
|
533
|
+
# Merges user provided environment variables with the ones provided in the deployment config
|
534
|
+
# The values provided by user will override the ones provided by default config
|
535
|
+
env_var = {**config_env, **env_var}
|
536
|
+
|
518
537
|
# validate user provided params
|
519
538
|
user_params = env_var.get("PARAMS", UNKNOWN)
|
539
|
+
|
520
540
|
if user_params:
|
521
541
|
# todo: remove this check in the future version, logic to be moved to container_index
|
522
542
|
if (
|
@@ -542,6 +562,18 @@ class AquaDeploymentApp(AquaApp):
|
|
542
562
|
deployment_params = get_combined_params(config_params, user_params)
|
543
563
|
|
544
564
|
params = f"{params} {deployment_params}".strip()
|
565
|
+
|
566
|
+
if create_deployment_details.model_name:
|
567
|
+
# Replace existing --served-model-name argument if present, otherwise add it
|
568
|
+
if "--served-model-name" in params:
|
569
|
+
params = re.sub(
|
570
|
+
r"--served-model-name\s+\S+",
|
571
|
+
f"--served-model-name {create_deployment_details.model_name}",
|
572
|
+
params,
|
573
|
+
)
|
574
|
+
else:
|
575
|
+
params += f" --served-model-name {create_deployment_details.model_name}"
|
576
|
+
|
545
577
|
if params:
|
546
578
|
env_var.update({"PARAMS": params})
|
547
579
|
env_vars = container_spec.env_vars if container_spec else []
|
@@ -621,8 +653,8 @@ class AquaDeploymentApp(AquaApp):
|
|
621
653
|
|
622
654
|
env_var.update({AQUA_MULTI_MODEL_CONFIG: multi_model_config.model_dump_json()})
|
623
655
|
|
624
|
-
|
625
|
-
for env in
|
656
|
+
container_spec_env_vars = container_spec.env_vars if container_spec else []
|
657
|
+
for env in container_spec_env_vars:
|
626
658
|
if isinstance(env, dict):
|
627
659
|
env = {k: v for k, v in env.items() if v}
|
628
660
|
for key, _ in env.items():
|
@@ -864,21 +896,26 @@ class AquaDeploymentApp(AquaApp):
|
|
864
896
|
|
865
897
|
if oci_aqua:
|
866
898
|
# skipping the AQUA model deployments that are created from model group
|
867
|
-
# TODO: remove this checker after AQUA deployment is integrated with model group
|
868
|
-
aqua_model_id = model_deployment.freeform_tags.get(
|
869
|
-
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
|
870
|
-
)
|
871
899
|
if (
|
872
|
-
|
873
|
-
|
874
|
-
== "UNKNOWN_ENUM_VALUE"
|
900
|
+
model_deployment.model_deployment_configuration_details.deployment_type
|
901
|
+
in [UNKNOWN_ENUM_VALUE, MODEL_GROUP, SINGLE_MODEL_FLEX]
|
875
902
|
):
|
876
903
|
continue
|
877
|
-
|
878
|
-
|
879
|
-
|
904
|
+
try:
|
905
|
+
results.append(
|
906
|
+
AquaDeployment.from_oci_model_deployment(
|
907
|
+
model_deployment, self.region
|
908
|
+
)
|
880
909
|
)
|
881
|
-
|
910
|
+
except Exception as e:
|
911
|
+
logger.error(
|
912
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}",
|
913
|
+
exc_info=True,
|
914
|
+
)
|
915
|
+
raise AquaRuntimeError(
|
916
|
+
f"There was an issue processing the list of model deployments . Error: {str(e)}"
|
917
|
+
) from e
|
918
|
+
|
882
919
|
# log telemetry if MD is in active or failed state
|
883
920
|
deployment_id = model_deployment.id
|
884
921
|
state = model_deployment.lifecycle_state.upper()
|
@@ -1249,6 +1286,50 @@ class AquaDeploymentApp(AquaApp):
|
|
1249
1286
|
)
|
1250
1287
|
return {"valid": True}
|
1251
1288
|
|
1289
|
+
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
|
1290
|
+
"""
|
1291
|
+
For the CLI (set generate_table = True), generates the table (in rich diff) with valid
|
1292
|
+
GPU deployment shapes for the provided model and configuration.
|
1293
|
+
|
1294
|
+
For the API (set generate_table = False), generates the JSON with valid
|
1295
|
+
GPU deployment shapes for the provided model and configuration.
|
1296
|
+
|
1297
|
+
Validates if recommendations are generated, calls method to construct the rich diff
|
1298
|
+
table with the recommendation data.
|
1299
|
+
|
1300
|
+
Parameters
|
1301
|
+
----------
|
1302
|
+
model_ocid : str
|
1303
|
+
OCID of the model to recommend feasible compute shapes.
|
1304
|
+
|
1305
|
+
Returns
|
1306
|
+
-------
|
1307
|
+
Table (generate_table = True)
|
1308
|
+
A table format for the recommendation report with compatible deployment shapes
|
1309
|
+
or troubleshooting info citing the largest shapes if no shape is suitable.
|
1310
|
+
|
1311
|
+
ShapeRecommendationReport (generate_table = False)
|
1312
|
+
A recommendation report with compatible deployment shapes, or troubleshooting info
|
1313
|
+
citing the largest shapes if no shape is suitable.
|
1314
|
+
|
1315
|
+
Raises
|
1316
|
+
------
|
1317
|
+
AquaValueError
|
1318
|
+
If model type is unsupported by tool (no recommendation report generated)
|
1319
|
+
"""
|
1320
|
+
try:
|
1321
|
+
request = RequestRecommend(**kwargs)
|
1322
|
+
except ValidationError as e:
|
1323
|
+
custom_error = build_pydantic_error_message(e)
|
1324
|
+
raise AquaValueError( # noqa: B904
|
1325
|
+
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
|
1326
|
+
)
|
1327
|
+
|
1328
|
+
shape_recommend = AquaShapeRecommend()
|
1329
|
+
shape_recommend_report = shape_recommend.which_shapes(request)
|
1330
|
+
|
1331
|
+
return shape_recommend_report
|
1332
|
+
|
1252
1333
|
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
|
1253
1334
|
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
|
1254
1335
|
def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
|
@@ -233,6 +233,9 @@ class CreateModelDeploymentDetails(BaseModel):
|
|
233
233
|
None, description="The description of the deployment."
|
234
234
|
)
|
235
235
|
model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
|
236
|
+
model_name: Optional[str] = Field(
|
237
|
+
None, description="The model name specified by user to deploy."
|
238
|
+
)
|
236
239
|
|
237
240
|
models: Optional[List[AquaMultiModelRef]] = Field(
|
238
241
|
None, description="List of models for multimodel deployment."
|
@@ -130,7 +130,7 @@ class ModelGroupConfig(Serializable):
|
|
130
130
|
Validates if user-provided parameters override pre-set parameters by AQUA.
|
131
131
|
Updates model name and TP size parameters to user-provided parameters.
|
132
132
|
"""
|
133
|
-
user_params = build_params_string(model.
|
133
|
+
user_params = build_params_string(model.params)
|
134
134
|
if user_params:
|
135
135
|
restricted_params = find_restricted_params(
|
136
136
|
container_params, user_params, container_type_key
|
@@ -138,8 +138,8 @@ class ModelGroupConfig(Serializable):
|
|
138
138
|
if restricted_params:
|
139
139
|
selected_model = model.model_name or model.model_id
|
140
140
|
raise AquaValueError(
|
141
|
-
f"Parameters {restricted_params} are set by
|
142
|
-
f"and cannot be overridden or are invalid."
|
141
|
+
f"Parameters {restricted_params} are set by AI Quick Actions "
|
142
|
+
f"and cannot be overridden or are invalid. "
|
143
143
|
f"Select other parameters for model {selected_model}."
|
144
144
|
)
|
145
145
|
|