oracle-ads 2.13.4__py3-none-any.whl → 2.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +6 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/entities.py +224 -2
- ads/aqua/common/enums.py +3 -0
- ads/aqua/common/utils.py +105 -3
- ads/aqua/config/container_config.py +9 -0
- ads/aqua/constants.py +29 -1
- ads/aqua/evaluation/entities.py +6 -1
- ads/aqua/evaluation/evaluation.py +191 -7
- ads/aqua/extension/aqua_ws_msg_handler.py +6 -36
- ads/aqua/extension/base_handler.py +13 -71
- ads/aqua/extension/deployment_handler.py +67 -76
- ads/aqua/extension/errors.py +19 -0
- ads/aqua/extension/utils.py +114 -2
- ads/aqua/finetuning/finetuning.py +50 -1
- ads/aqua/model/constants.py +3 -0
- ads/aqua/model/enums.py +5 -0
- ads/aqua/model/model.py +236 -24
- ads/aqua/modeldeployment/deployment.py +671 -152
- ads/aqua/modeldeployment/entities.py +551 -42
- ads/aqua/modeldeployment/inference.py +4 -5
- ads/aqua/modeldeployment/utils.py +525 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/RECORD +28 -25
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/licenses/LICENSE.txt +0 -0
ads/aqua/common/utils.py
CHANGED
@@ -17,8 +17,9 @@ from datetime import datetime, timedelta
|
|
17
17
|
from functools import wraps
|
18
18
|
from pathlib import Path
|
19
19
|
from string import Template
|
20
|
-
from typing import List, Union
|
20
|
+
from typing import Any, Dict, List, Optional, Union
|
21
21
|
|
22
|
+
import fsspec
|
22
23
|
import oci
|
23
24
|
from cachetools import TTLCache, cached
|
24
25
|
from huggingface_hub.constants import HF_HUB_CACHE
|
@@ -32,8 +33,9 @@ from huggingface_hub.utils import (
|
|
32
33
|
)
|
33
34
|
from oci.data_science.models import JobRun, Model
|
34
35
|
from oci.object_storage.models import ObjectSummary
|
35
|
-
from pydantic import ValidationError
|
36
|
+
from pydantic import BaseModel, ValidationError
|
36
37
|
|
38
|
+
from ads.aqua.common.entities import GPUShapesIndex
|
37
39
|
from ads.aqua.common.enums import (
|
38
40
|
InferenceContainerParamType,
|
39
41
|
InferenceContainerType,
|
@@ -61,6 +63,7 @@ from ads.aqua.constants import (
|
|
61
63
|
VLLM_INFERENCE_RESTRICTED_PARAMS,
|
62
64
|
)
|
63
65
|
from ads.aqua.data import AquaResourceIdentifier
|
66
|
+
from ads.common import auth as authutil
|
64
67
|
from ads.common.auth import AuthState, default_signer
|
65
68
|
from ads.common.decorator.threaded import threaded
|
66
69
|
from ads.common.extended_enum import ExtendedEnum
|
@@ -76,6 +79,7 @@ from ads.common.utils import (
|
|
76
79
|
from ads.config import (
|
77
80
|
AQUA_MODEL_DEPLOYMENT_FOLDER,
|
78
81
|
AQUA_SERVICE_MODELS_BUCKET,
|
82
|
+
CONDA_BUCKET_NAME,
|
79
83
|
CONDA_BUCKET_NS,
|
80
84
|
TENANCY_OCID,
|
81
85
|
)
|
@@ -247,7 +251,7 @@ def load_config(file_path: str, config_file_name: str, **kwargs) -> dict:
|
|
247
251
|
return config
|
248
252
|
|
249
253
|
|
250
|
-
def list_os_files_with_extension(oss_path: str, extension: str) -> [str]:
|
254
|
+
def list_os_files_with_extension(oss_path: str, extension: str) -> List[str]:
|
251
255
|
"""
|
252
256
|
List files in the specified directory with the given extension.
|
253
257
|
|
@@ -934,6 +938,25 @@ def get_combined_params(params1: str = None, params2: str = None) -> str:
|
|
934
938
|
return " ".join(combined_params)
|
935
939
|
|
936
940
|
|
941
|
+
def build_params_string(params: dict) -> str:
|
942
|
+
"""Builds params string from params dict
|
943
|
+
|
944
|
+
Parameters
|
945
|
+
----------
|
946
|
+
params:
|
947
|
+
Parameter dict with key-value pairs
|
948
|
+
|
949
|
+
Returns
|
950
|
+
-------
|
951
|
+
A params string.
|
952
|
+
"""
|
953
|
+
return (
|
954
|
+
" ".join(f"{name} {value}" for name, value in params.items()).strip()
|
955
|
+
if params
|
956
|
+
else UNKNOWN
|
957
|
+
)
|
958
|
+
|
959
|
+
|
937
960
|
def copy_model_config(artifact_path: str, os_path: str, auth: dict = None):
|
938
961
|
"""Copies the aqua model config folder from the artifact path to the user provided object storage path.
|
939
962
|
The config folder is overwritten if the files already exist at the destination path.
|
@@ -1214,3 +1237,82 @@ def build_pydantic_error_message(ex: ValidationError):
|
|
1214
1237
|
for e in ex.errors()
|
1215
1238
|
if "loc" in e and e["loc"]
|
1216
1239
|
} or "; ".join(e["msg"] for e in ex.errors())
|
1240
|
+
|
1241
|
+
|
1242
|
+
def is_pydantic_model(obj: object) -> bool:
|
1243
|
+
"""
|
1244
|
+
Returns True if obj is a Pydantic model class or an instance of a Pydantic model.
|
1245
|
+
|
1246
|
+
Args:
|
1247
|
+
obj: The object or class to check.
|
1248
|
+
|
1249
|
+
Returns:
|
1250
|
+
bool: True if obj is a subclass or instance of BaseModel, False otherwise.
|
1251
|
+
"""
|
1252
|
+
cls = obj if isinstance(obj, type) else type(obj)
|
1253
|
+
return issubclass(cls, BaseModel)
|
1254
|
+
|
1255
|
+
|
1256
|
+
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
|
1257
|
+
def load_gpu_shapes_index(
|
1258
|
+
auth: Optional[Dict] = None,
|
1259
|
+
) -> GPUShapesIndex:
|
1260
|
+
"""
|
1261
|
+
Loads the GPU shapes index from Object Storage or a local resource folder.
|
1262
|
+
|
1263
|
+
The function first attempts to load the file from an Object Storage bucket using fsspec.
|
1264
|
+
If the loading fails (due to connection issues, missing file, etc.), it falls back to
|
1265
|
+
loading the index from a local file.
|
1266
|
+
|
1267
|
+
Parameters
|
1268
|
+
----------
|
1269
|
+
auth: (Dict, optional). Defaults to None.
|
1270
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1271
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1272
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1273
|
+
|
1274
|
+
Returns
|
1275
|
+
-------
|
1276
|
+
GPUShapesIndex: The parsed GPU shapes index.
|
1277
|
+
|
1278
|
+
Raises
|
1279
|
+
------
|
1280
|
+
FileNotFoundError: If the GPU shapes index cannot be found in either Object Storage or locally.
|
1281
|
+
json.JSONDecodeError: If the JSON is malformed.
|
1282
|
+
"""
|
1283
|
+
file_name = "gpu_shapes_index.json"
|
1284
|
+
data: Dict[str, Any] = {}
|
1285
|
+
|
1286
|
+
# Check if the CONDA_BUCKET_NS environment variable is set.
|
1287
|
+
if CONDA_BUCKET_NS:
|
1288
|
+
try:
|
1289
|
+
auth = auth or authutil.default_signer()
|
1290
|
+
# Construct the object storage path. Adjust bucket name and path as needed.
|
1291
|
+
storage_path = (
|
1292
|
+
f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
|
1293
|
+
)
|
1294
|
+
logger.debug("Loading GPU shapes index from Object Storage")
|
1295
|
+
with fsspec.open(storage_path, mode="r", **auth) as file_obj:
|
1296
|
+
data = json.load(file_obj)
|
1297
|
+
logger.debug("Successfully loaded GPU shapes index.")
|
1298
|
+
except Exception as ex:
|
1299
|
+
logger.debug(
|
1300
|
+
f"Failed to load GPU shapes index from Object Storage. Details: {ex}"
|
1301
|
+
)
|
1302
|
+
|
1303
|
+
# If loading from Object Storage failed, load from the local resource folder.
|
1304
|
+
if not data:
|
1305
|
+
try:
|
1306
|
+
local_path = os.path.join(
|
1307
|
+
os.path.dirname(__file__), "../resources", file_name
|
1308
|
+
)
|
1309
|
+
logger.debug(f"Loading GPU shapes index from {local_path}.")
|
1310
|
+
with open(local_path) as file_obj:
|
1311
|
+
data = json.load(file_obj)
|
1312
|
+
logger.debug("Successfully loaded GPU shapes index.")
|
1313
|
+
except Exception as e:
|
1314
|
+
logger.debug(
|
1315
|
+
f"Failed to load GPU shapes index from {local_path}. Details: {e}"
|
1316
|
+
)
|
1317
|
+
|
1318
|
+
return GPUShapesIndex(**data)
|
@@ -8,6 +8,14 @@ from pydantic import Field
|
|
8
8
|
|
9
9
|
from ads.aqua.common.entities import ContainerSpec
|
10
10
|
from ads.aqua.config.utils.serializer import Serializable
|
11
|
+
from ads.common.extended_enum import ExtendedEnum
|
12
|
+
|
13
|
+
|
14
|
+
class Usage(ExtendedEnum):
|
15
|
+
INFERENCE = "inference"
|
16
|
+
BATCH_INFERENCE = "batch_inference"
|
17
|
+
MULTI_MODEL = "multi_model"
|
18
|
+
OTHER = "other"
|
11
19
|
|
12
20
|
|
13
21
|
class AquaContainerConfigSpec(Serializable):
|
@@ -86,6 +94,7 @@ class AquaContainerConfigItem(Serializable):
|
|
86
94
|
|
87
95
|
class Config:
|
88
96
|
extra = "allow"
|
97
|
+
protected_namespaces = ()
|
89
98
|
|
90
99
|
|
91
100
|
class AquaContainerConfig(Serializable):
|
ads/aqua/constants.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# Copyright (c) 2024 Oracle and/or its affiliates.
|
2
|
+
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
|
3
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
4
|
"""This module defines constants used in ads.aqua module."""
|
5
5
|
|
@@ -29,14 +29,18 @@ READY_TO_DEPLOY_STATUS = "ACTIVE"
|
|
29
29
|
READY_TO_FINE_TUNE_STATUS = "TRUE"
|
30
30
|
PRIVATE_ENDPOINT_TYPE = "MODEL_DEPLOYMENT"
|
31
31
|
AQUA_GA_LIST = ["id19sfcrra6z"]
|
32
|
+
AQUA_MULTI_MODEL_CONFIG = "MULTI_MODEL_CONFIG"
|
32
33
|
AQUA_MODEL_TYPE_SERVICE = "service"
|
33
34
|
AQUA_MODEL_TYPE_CUSTOM = "custom"
|
35
|
+
AQUA_MODEL_TYPE_MULTI = "multi_model"
|
34
36
|
AQUA_MODEL_ARTIFACT_CONFIG = "config.json"
|
35
37
|
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME = "_name_or_path"
|
36
38
|
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE = "model_type"
|
37
39
|
AQUA_MODEL_ARTIFACT_FILE = "model_file"
|
38
40
|
HF_METADATA_FOLDER = ".cache/"
|
39
41
|
HF_LOGIN_DEFAULT_TIMEOUT = 2
|
42
|
+
MODEL_NAME_DELIMITER = ";"
|
43
|
+
AQUA_TROUBLESHOOTING_LINK = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/troubleshooting-tips.md"
|
40
44
|
|
41
45
|
TRAINING_METRICS_FINAL = "training_metrics_final"
|
42
46
|
VALIDATION_METRICS_FINAL = "validation_metrics_final"
|
@@ -82,3 +86,27 @@ LLAMA_CPP_INFERENCE_RESTRICTED_PARAMS = {
|
|
82
86
|
"--host",
|
83
87
|
}
|
84
88
|
TEI_CONTAINER_DEFAULT_HOST = "8080"
|
89
|
+
|
90
|
+
OCI_OPERATION_FAILURES = {
|
91
|
+
"list_model_deployments": "Unable to list model deployments. See tips for troubleshooting: ",
|
92
|
+
"list_models": "Unable to list models. See tips for troubleshooting: ",
|
93
|
+
"get_namespace": "Unable to access specified Object Storage Bucket. See tips for troubleshooting: ",
|
94
|
+
"list_log_groups":"Unable to access logs. See tips for troubleshooting: " ,
|
95
|
+
"list_buckets": "Unable to list Object Storage Bucket. See tips for troubleshooting: ",
|
96
|
+
"put_object": "Unable to access or find Object Storage Bucket. See tips for troubleshooting: ",
|
97
|
+
"list_model_version_sets": "Unable to create or fetch model version set. See tips for troubleshooting:",
|
98
|
+
"update_model": "Unable to update model. See tips for troubleshooting: ",
|
99
|
+
"list_data_science_private_endpoints": "Unable to access private endpoint. See tips for troubleshooting: ",
|
100
|
+
"create_model" : "Unable to register model. See tips for troubleshooting: ",
|
101
|
+
"create_deployment": "Unable to create deployment. See tips for troubleshooting: ",
|
102
|
+
"create_model_version_sets" : "Unable to create model version set. See tips for troubleshooting: ",
|
103
|
+
"create_job": "Unable to create job. See tips for troubleshooting: ",
|
104
|
+
"create_job_run": "Unable to create job run. See tips for troubleshooting: ",
|
105
|
+
}
|
106
|
+
|
107
|
+
STATUS_CODE_MESSAGES = {
|
108
|
+
"400": "Could not process your request due to invalid input.",
|
109
|
+
"403": "We're having trouble processing your request with the information provided.",
|
110
|
+
"404": "Authorization Failed: The resource you're looking for isn't accessible.",
|
111
|
+
"408": "Server is taking too long to respond, please try again.",
|
112
|
+
}
|
ads/aqua/evaluation/entities.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Oracle and/or its affiliates.
|
3
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
4
|
|
5
5
|
"""
|
@@ -91,6 +91,11 @@ class CreateAquaEvaluationDetails(Serializable):
|
|
91
91
|
force_overwrite: Optional[bool] = False
|
92
92
|
freeform_tags: Optional[dict] = None
|
93
93
|
defined_tags: Optional[dict] = None
|
94
|
+
container_image_uri: Optional[str] = Field(
|
95
|
+
None,
|
96
|
+
description="Image URI for evaluation container runtime. "
|
97
|
+
"The service managed container will be used by default.",
|
98
|
+
)
|
94
99
|
|
95
100
|
class Config:
|
96
101
|
extra = "ignore"
|
@@ -24,6 +24,7 @@ from oci.data_science.models import (
|
|
24
24
|
from ads.aqua import logger
|
25
25
|
from ads.aqua.app import AquaApp
|
26
26
|
from ads.aqua.common import utils
|
27
|
+
from ads.aqua.common.entities import AquaMultiModelRef
|
27
28
|
from ads.aqua.common.enums import (
|
28
29
|
DataScienceResource,
|
29
30
|
Resource,
|
@@ -76,6 +77,7 @@ from ads.aqua.evaluation.entities import (
|
|
76
77
|
CreateAquaEvaluationDetails,
|
77
78
|
)
|
78
79
|
from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE
|
80
|
+
from ads.aqua.model.constants import ModelCustomMetadataFields
|
79
81
|
from ads.common.auth import default_signer
|
80
82
|
from ads.common.object_storage_details import ObjectStorageDetails
|
81
83
|
from ads.common.utils import UNKNOWN, get_console_link, get_files, get_log_links
|
@@ -96,6 +98,7 @@ from ads.model.generic_model import ModelDeploymentRuntimeType
|
|
96
98
|
from ads.model.model_metadata import (
|
97
99
|
MetadataTaxonomyKeys,
|
98
100
|
ModelCustomMetadata,
|
101
|
+
ModelCustomMetadataItem,
|
99
102
|
ModelProvenanceMetadata,
|
100
103
|
ModelTaxonomyMetadata,
|
101
104
|
)
|
@@ -138,13 +141,62 @@ class AquaEvaluationApp(AquaApp):
|
|
138
141
|
create_aqua_evaluation_details: CreateAquaEvaluationDetails = None,
|
139
142
|
**kwargs,
|
140
143
|
) -> "AquaEvaluationSummary":
|
141
|
-
"""Creates Aqua evaluation for resource
|
144
|
+
"""Creates Aqua evaluation for resource.\n
|
145
|
+
For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#model-evaluation
|
142
146
|
|
143
147
|
Parameters
|
144
148
|
----------
|
145
149
|
create_aqua_evaluation_details: CreateAquaEvaluationDetails
|
146
150
|
The CreateAquaEvaluationDetails data class which contains all
|
147
151
|
required and optional fields to create the aqua evaluation.
|
152
|
+
kwargs:
|
153
|
+
evaluation_source_id: str
|
154
|
+
The evaluation source id. Must be either model or model deployment ocid.
|
155
|
+
evaluation_name: str
|
156
|
+
The name for evaluation.
|
157
|
+
dataset_path: str
|
158
|
+
The dataset path for the evaluation. Could be either a local path from notebook session
|
159
|
+
or an object storage path.
|
160
|
+
report_path: str
|
161
|
+
The report path for the evaluation. Must be an object storage path.
|
162
|
+
model_parameters: dict
|
163
|
+
The parameters for the evaluation.
|
164
|
+
shape_name: str
|
165
|
+
The shape name for the evaluation job infrastructure.
|
166
|
+
memory_in_gbs: float
|
167
|
+
The memory in gbs for the shape selected.
|
168
|
+
ocpus: float
|
169
|
+
The ocpu count for the shape selected.
|
170
|
+
block_storage_size: int
|
171
|
+
The storage for the evaluation job infrastructure.
|
172
|
+
compartment_id: (str, optional). Defaults to `None`.
|
173
|
+
The compartment id for the evaluation.
|
174
|
+
project_id: (str, optional). Defaults to `None`.
|
175
|
+
The project id for the evaluation.
|
176
|
+
evaluation_description: (str, optional). Defaults to `None`.
|
177
|
+
The description for evaluation
|
178
|
+
experiment_id: (str, optional). Defaults to `None`.
|
179
|
+
The evaluation model version set id. If provided,
|
180
|
+
evaluation model will be associated with it.
|
181
|
+
experiment_name: (str, optional). Defaults to `None`.
|
182
|
+
The evaluation model version set name. If provided,
|
183
|
+
the model version set with the same name will be used if exists,
|
184
|
+
otherwise a new model version set will be created with the name.
|
185
|
+
experiment_description: (str, optional). Defaults to `None`.
|
186
|
+
The description for the evaluation model version set.
|
187
|
+
log_group_id: (str, optional). Defaults to `None`.
|
188
|
+
The log group id for the evaluation job infrastructure.
|
189
|
+
log_id: (str, optional). Defaults to `None`.
|
190
|
+
The log id for the evaluation job infrastructure.
|
191
|
+
metrics: (list, optional). Defaults to `None`.
|
192
|
+
The metrics for the evaluation.
|
193
|
+
force_overwrite: (bool, optional). Defaults to `False`.
|
194
|
+
Whether to force overwrite the existing file in object storage.
|
195
|
+
freeform_tags: (dict, optional)
|
196
|
+
Freeform tags for the evaluation model
|
197
|
+
defined_tags: (dict, optional)
|
198
|
+
Defined tags for the evaluation model
|
199
|
+
|
148
200
|
kwargs:
|
149
201
|
The kwargs for creating CreateAquaEvaluationDetails instance if
|
150
202
|
no create_aqua_evaluation_details provided.
|
@@ -183,6 +235,23 @@ class AquaEvaluationApp(AquaApp):
|
|
183
235
|
evaluation_source = ModelDeployment.from_id(
|
184
236
|
create_aqua_evaluation_details.evaluation_source_id
|
185
237
|
)
|
238
|
+
|
239
|
+
if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
|
240
|
+
multi_model_id = evaluation_source.freeform_tags.get(
|
241
|
+
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
|
242
|
+
)
|
243
|
+
|
244
|
+
if not multi_model_id:
|
245
|
+
raise AquaRuntimeError(
|
246
|
+
f"Invalid multi model deployment {multi_model_id}."
|
247
|
+
f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
|
248
|
+
)
|
249
|
+
|
250
|
+
aqua_model = DataScienceModel.from_id(multi_model_id)
|
251
|
+
AquaEvaluationApp.validate_model_name(
|
252
|
+
aqua_model, create_aqua_evaluation_details
|
253
|
+
)
|
254
|
+
|
186
255
|
try:
|
187
256
|
if (
|
188
257
|
evaluation_source.runtime.type
|
@@ -413,8 +482,11 @@ class AquaEvaluationApp(AquaApp):
|
|
413
482
|
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING
|
414
483
|
)
|
415
484
|
|
416
|
-
container_image =
|
417
|
-
create_aqua_evaluation_details.
|
485
|
+
container_image = (
|
486
|
+
create_aqua_evaluation_details.container_image_uri
|
487
|
+
or self._get_evaluation_container(
|
488
|
+
create_aqua_evaluation_details.evaluation_source_id
|
489
|
+
)
|
418
490
|
)
|
419
491
|
|
420
492
|
evaluation_job.with_runtime(
|
@@ -432,9 +504,7 @@ class AquaEvaluationApp(AquaApp):
|
|
432
504
|
metrics=create_aqua_evaluation_details.metrics,
|
433
505
|
inference_configuration=eval_inference_configuration or {},
|
434
506
|
)
|
435
|
-
).create(
|
436
|
-
**kwargs
|
437
|
-
) ## TODO: decide what parameters will be needed
|
507
|
+
).create(**kwargs) ## TODO: decide what parameters will be needed
|
438
508
|
logger.debug(
|
439
509
|
f"Successfully created evaluation job {evaluation_job.id} for {create_aqua_evaluation_details.evaluation_source_id}."
|
440
510
|
)
|
@@ -551,6 +621,120 @@ class AquaEvaluationApp(AquaApp):
|
|
551
621
|
parameters=AquaEvalParams(),
|
552
622
|
)
|
553
623
|
|
624
|
+
@staticmethod
|
625
|
+
def validate_model_name(
|
626
|
+
evaluation_source: DataScienceModel,
|
627
|
+
create_aqua_evaluation_details: CreateAquaEvaluationDetails,
|
628
|
+
) -> None:
|
629
|
+
"""
|
630
|
+
Validates the user input for the model name when creating an Aqua evaluation.
|
631
|
+
|
632
|
+
This function verifies that:
|
633
|
+
- The model group is not empty.
|
634
|
+
- The model multi metadata is present in the DataScienceModel metadata.
|
635
|
+
- The user provided a non-empty model name.
|
636
|
+
- The provided model name exists in the DataScienceModel metadata.
|
637
|
+
- The deployment configuration contains core metadata required for validation.
|
638
|
+
|
639
|
+
Parameters
|
640
|
+
----------
|
641
|
+
evaluation_source : DataScienceModel
|
642
|
+
The DataScienceModel object containing metadata about each model in the deployment.
|
643
|
+
create_aqua_evaluation_details : CreateAquaEvaluationDetails
|
644
|
+
Contains required and optional fields for creating the Aqua evaluation.
|
645
|
+
|
646
|
+
Raises
|
647
|
+
------
|
648
|
+
AquaValueError
|
649
|
+
If the user fails to provide a model name or if the provided model name does not match
|
650
|
+
any of the valid model names in the deployment metadata.
|
651
|
+
AquaRuntimeError
|
652
|
+
If the metadata is missing the model group count or if the model group count is invalid.
|
653
|
+
"""
|
654
|
+
user_model_parameters = create_aqua_evaluation_details.model_parameters
|
655
|
+
custom_metadata_list = evaluation_source.custom_metadata_list
|
656
|
+
user_model_name = user_model_parameters.get("model")
|
657
|
+
|
658
|
+
# Ensure that a non-empty model name was provided.
|
659
|
+
if not user_model_name:
|
660
|
+
error_message = (
|
661
|
+
"No model name was provided for evaluation. For multi-model deployment, "
|
662
|
+
"a model must be specified in the model parameters."
|
663
|
+
)
|
664
|
+
logger.debug(error_message)
|
665
|
+
raise AquaValueError(error_message)
|
666
|
+
|
667
|
+
# Retrieve and convert the model group count from metadata.
|
668
|
+
model_count = custom_metadata_list.get(
|
669
|
+
ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
|
670
|
+
)
|
671
|
+
try:
|
672
|
+
model_group_count = int(model_count.value)
|
673
|
+
except Exception as ex:
|
674
|
+
error_message = (
|
675
|
+
"Missing or invalid `MULTIMODEL_GROUP_COUNT` "
|
676
|
+
f"in custom metadata for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. "
|
677
|
+
f"Details: {ex}"
|
678
|
+
)
|
679
|
+
logger.error(error_message)
|
680
|
+
|
681
|
+
if model_group_count < 1:
|
682
|
+
logger.error(
|
683
|
+
f"Invalid model group count: {model_group_count} for evaluation source ID "
|
684
|
+
f"'{create_aqua_evaluation_details.evaluation_source_id}'. A valid multi-model deployment "
|
685
|
+
f"requires at least one model."
|
686
|
+
)
|
687
|
+
raise AquaRuntimeError(
|
688
|
+
f"Unable to retrieve details for the multi-model deployment evaluation. A valid multi-model deployment "
|
689
|
+
f"must include at least one model. However, the provided evaluation source ID "
|
690
|
+
f"'{create_aqua_evaluation_details.evaluation_source_id}' does not contain any information about deployed models."
|
691
|
+
)
|
692
|
+
|
693
|
+
multi_model_metadata_value = custom_metadata_list.get(
|
694
|
+
ModelCustomMetadataFields.MULTIMODEL_METADATA,
|
695
|
+
ModelCustomMetadataItem(key=ModelCustomMetadataFields.MULTIMODEL_METADATA),
|
696
|
+
).value
|
697
|
+
|
698
|
+
if not multi_model_metadata_value:
|
699
|
+
error_message = (
|
700
|
+
f"Required model metadata is missing for evaluation source ID: {evaluation_source.id}. "
|
701
|
+
f"A valid multi-model deployment requires {ModelCustomMetadataFields.MULTIMODEL_METADATA}. "
|
702
|
+
"Please recreate the model deployment and retry the evaluation, as an issue occurred during the initialization of the model group."
|
703
|
+
)
|
704
|
+
logger.debug(error_message)
|
705
|
+
raise AquaRuntimeError(error_message)
|
706
|
+
|
707
|
+
try:
|
708
|
+
multi_model_metadata = json.loads(
|
709
|
+
evaluation_source.dsc_model.get_custom_metadata_artifact(
|
710
|
+
metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
|
711
|
+
).decode("utf-8")
|
712
|
+
)
|
713
|
+
except Exception as ex:
|
714
|
+
error_message = (
|
715
|
+
f"Error fetching {ModelCustomMetadataFields.MULTIMODEL_METADATA} "
|
716
|
+
f"from custom metadata for evaluation source ID '{evaluation_source.id}'. "
|
717
|
+
f"Details: {ex}"
|
718
|
+
)
|
719
|
+
logger.error(error_message)
|
720
|
+
raise AquaRuntimeError(error_message) from ex
|
721
|
+
|
722
|
+
# Build the list of valid model names from custom metadata.
|
723
|
+
model_names = [
|
724
|
+
AquaMultiModelRef(**metadata).model_name
|
725
|
+
for metadata in multi_model_metadata
|
726
|
+
]
|
727
|
+
|
728
|
+
# Check if the provided model name is among the valid names.
|
729
|
+
if user_model_name not in model_names:
|
730
|
+
error_message = (
|
731
|
+
f"Provided model name '{user_model_name}' does not match any valid model names {model_names} "
|
732
|
+
f"for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. "
|
733
|
+
"Please provide the correct model name."
|
734
|
+
)
|
735
|
+
logger.debug(error_message)
|
736
|
+
raise AquaValueError(error_message)
|
737
|
+
|
554
738
|
def _build_evaluation_runtime(
|
555
739
|
self,
|
556
740
|
evaluation_id: str,
|
@@ -1393,7 +1577,7 @@ class AquaEvaluationApp(AquaApp):
|
|
1393
1577
|
)
|
1394
1578
|
except Exception as e:
|
1395
1579
|
logger.debug(
|
1396
|
-
f"Failed to retreive job run: {jobrun_id}.
|
1580
|
+
f"Failed to retreive job run: {jobrun_id}. DEBUG INFO: {str(e)}"
|
1397
1581
|
)
|
1398
1582
|
jobrun = None
|
1399
1583
|
|
@@ -3,17 +3,10 @@
|
|
3
3
|
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
|
-
import traceback
|
7
|
-
import uuid
|
8
6
|
from abc import abstractmethod
|
9
|
-
from http.client import responses
|
10
7
|
from typing import List
|
11
8
|
|
12
|
-
from tornado.web import HTTPError
|
13
|
-
|
14
|
-
from ads.aqua import logger
|
15
9
|
from ads.aqua.common.decorator import handle_exceptions
|
16
|
-
from ads.aqua.extension.base_handler import AquaAPIhandler
|
17
10
|
from ads.aqua.extension.models.ws_models import (
|
18
11
|
AquaWsError,
|
19
12
|
BaseRequest,
|
@@ -21,6 +14,7 @@ from ads.aqua.extension.models.ws_models import (
|
|
21
14
|
ErrorResponse,
|
22
15
|
RequestResponseType,
|
23
16
|
)
|
17
|
+
from ads.aqua.extension.utils import construct_error
|
24
18
|
from ads.config import AQUA_TELEMETRY_BUCKET, AQUA_TELEMETRY_BUCKET_NS
|
25
19
|
from ads.telemetry.client import TelemetryClient
|
26
20
|
|
@@ -55,48 +49,24 @@ class AquaWSMsgHandler:
|
|
55
49
|
|
56
50
|
def write_error(self, status_code, **kwargs):
|
57
51
|
"""AquaWSMSGhandler errors are JSON, not human pages."""
|
58
|
-
|
52
|
+
|
59
53
|
service_payload = kwargs.get("service_payload", {})
|
60
|
-
|
61
|
-
message = AquaAPIhandler.get_default_error_messages(
|
62
|
-
service_payload, str(status_code), kwargs.get("message", default_msg)
|
63
|
-
)
|
64
|
-
reply = {
|
65
|
-
"status": status_code,
|
66
|
-
"message": message,
|
67
|
-
"service_payload": service_payload,
|
68
|
-
"reason": reason,
|
69
|
-
"request_id": str(uuid.uuid4()),
|
70
|
-
}
|
71
|
-
exc_info = kwargs.get("exc_info")
|
72
|
-
if exc_info:
|
73
|
-
logger.error(
|
74
|
-
f"Error Request ID: {reply['request_id']}\n"
|
75
|
-
f"Error: {''.join(traceback.format_exception(*exc_info))}"
|
76
|
-
)
|
77
|
-
e = exc_info[1]
|
78
|
-
if isinstance(e, HTTPError):
|
79
|
-
reply["message"] = e.log_message or message
|
80
|
-
reply["reason"] = e.reason
|
54
|
+
reply_details = construct_error(status_code, **kwargs)
|
81
55
|
|
82
|
-
logger.error(
|
83
|
-
f"Error Request ID: {reply['request_id']}\n"
|
84
|
-
f"Error: {reply['message']} {reply['reason']}"
|
85
|
-
)
|
86
56
|
# telemetry may not be present if there is an error while initializing
|
87
57
|
if hasattr(self, "telemetry"):
|
88
58
|
aqua_api_details = kwargs.get("aqua_api_details", {})
|
89
59
|
self.telemetry.record_event_async(
|
90
60
|
category="aqua/error",
|
91
61
|
action=str(status_code),
|
92
|
-
value=reason,
|
62
|
+
value=reply_details.reason,
|
93
63
|
**aqua_api_details,
|
94
64
|
)
|
95
65
|
response = AquaWsError(
|
96
66
|
status=status_code,
|
97
|
-
message=message,
|
67
|
+
message=reply_details.message,
|
98
68
|
service_payload=service_payload,
|
99
|
-
reason=reason,
|
69
|
+
reason=reply_details.reason,
|
100
70
|
)
|
101
71
|
base_message = BaseRequest.from_json(self.message, ignore_unknown=True)
|
102
72
|
return ErrorResponse(
|