oracle-ads 2.10.1__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/operator/lowcode/anomaly/README.md +2 -1
- ads/opctl/operator/lowcode/anomaly/__main__.py +10 -4
- ads/opctl/operator/lowcode/anomaly/environment.yaml +2 -1
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +12 -6
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/automlx.py +12 -23
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +37 -7
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +71 -36
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
ads/aqua/evaluation.py
ADDED
@@ -0,0 +1,1751 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# Copyright (c) 2024 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
import base64
|
6
|
+
import json
|
7
|
+
import os
|
8
|
+
import re
|
9
|
+
import tempfile
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
11
|
+
from dataclasses import asdict, dataclass, field
|
12
|
+
from datetime import datetime, timedelta
|
13
|
+
from enum import Enum
|
14
|
+
from pathlib import Path
|
15
|
+
from threading import Lock
|
16
|
+
from typing import Any, Dict, List, Optional, Union
|
17
|
+
|
18
|
+
import oci
|
19
|
+
from cachetools import TTLCache
|
20
|
+
from oci.data_science.models import (
|
21
|
+
JobRun,
|
22
|
+
Metadata,
|
23
|
+
UpdateModelDetails,
|
24
|
+
UpdateModelProvenanceDetails,
|
25
|
+
)
|
26
|
+
|
27
|
+
from ads.aqua import logger, utils
|
28
|
+
from ads.aqua.base import AquaApp
|
29
|
+
from ads.aqua.data import Tags
|
30
|
+
from ads.aqua.exception import (
|
31
|
+
AquaFileExistsError,
|
32
|
+
AquaFileNotFoundError,
|
33
|
+
AquaMissingKeyError,
|
34
|
+
AquaRuntimeError,
|
35
|
+
AquaValueError,
|
36
|
+
)
|
37
|
+
from ads.aqua.utils import (
|
38
|
+
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING,
|
39
|
+
NB_SESSION_IDENTIFIER,
|
40
|
+
UNKNOWN,
|
41
|
+
fire_and_forget,
|
42
|
+
get_container_image,
|
43
|
+
is_valid_ocid,
|
44
|
+
upload_local_to_os,
|
45
|
+
)
|
46
|
+
from ads.common.auth import default_signer
|
47
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
48
|
+
from ads.common.serializer import DataClassSerializable
|
49
|
+
from ads.common.utils import get_console_link, get_files, get_log_links, upload_to_os
|
50
|
+
from ads.config import (
|
51
|
+
AQUA_JOB_SUBNET_ID,
|
52
|
+
COMPARTMENT_OCID,
|
53
|
+
CONDA_BUCKET_NS,
|
54
|
+
PROJECT_OCID,
|
55
|
+
)
|
56
|
+
from ads.jobs.ads_job import DataScienceJobRun, Job
|
57
|
+
from ads.jobs.builders.infrastructure.dsc_job import DataScienceJob
|
58
|
+
from ads.jobs.builders.runtimes.base import Runtime
|
59
|
+
from ads.jobs.builders.runtimes.container_runtime import ContainerRuntime
|
60
|
+
from ads.model.datascience_model import DataScienceModel
|
61
|
+
from ads.model.deployment.model_deployment import ModelDeployment
|
62
|
+
from ads.model.model_metadata import (
|
63
|
+
MetadataTaxonomyKeys,
|
64
|
+
ModelCustomMetadata,
|
65
|
+
ModelProvenanceMetadata,
|
66
|
+
ModelTaxonomyMetadata,
|
67
|
+
)
|
68
|
+
from ads.model.model_version_set import ModelVersionSet
|
69
|
+
from ads.telemetry import telemetry
|
70
|
+
|
71
|
+
EVAL_TERMINATION_STATE = [
|
72
|
+
JobRun.LIFECYCLE_STATE_SUCCEEDED,
|
73
|
+
JobRun.LIFECYCLE_STATE_FAILED,
|
74
|
+
]
|
75
|
+
|
76
|
+
|
77
|
+
class EvaluationJobExitCode(Enum):
|
78
|
+
SUCCESS = 0
|
79
|
+
COMMON_ERROR = 1
|
80
|
+
|
81
|
+
# Configuration-related issues
|
82
|
+
INVALID_EVALUATION_CONFIG = 10
|
83
|
+
EVALUATION_CONFIG_NOT_PROVIDED = 11
|
84
|
+
INVALID_OUTPUT_DIR = 12
|
85
|
+
INVALID_INPUT_DATASET_PATH = 13
|
86
|
+
INVALID_EVALUATION_ID = 14
|
87
|
+
INVALID_TARGET_EVALUATION_ID = 15
|
88
|
+
INVALID_EVALUATION_CONFIG_VALIDATION = 16
|
89
|
+
|
90
|
+
# Evaluation process issues
|
91
|
+
OUTPUT_DIR_NOT_FOUND = 20
|
92
|
+
INVALID_INPUT_DATASET = 21
|
93
|
+
INPUT_DATA_NOT_FOUND = 22
|
94
|
+
EVALUATION_ID_NOT_FOUND = 23
|
95
|
+
EVALUATION_ALREADY_PERFORMED = 24
|
96
|
+
EVALUATION_TARGET_NOT_FOUND = 25
|
97
|
+
NO_SUCCESS_INFERENCE_RESULT = 26
|
98
|
+
COMPUTE_EVALUATION_ERROR = 27
|
99
|
+
EVALUATION_REPORT_ERROR = 28
|
100
|
+
MODEL_INFERENCE_WRONG_RESPONSE_FORMAT = 29
|
101
|
+
UNSUPPORTED_METRICS = 30
|
102
|
+
METRIC_CALCULATION_FAILURE = 31
|
103
|
+
|
104
|
+
|
105
|
+
EVALUATION_JOB_EXIT_CODE_MESSAGE = {
|
106
|
+
EvaluationJobExitCode.SUCCESS.value: "Success",
|
107
|
+
EvaluationJobExitCode.COMMON_ERROR.value: "An error occurred during the evaluation, please check the log for more information.",
|
108
|
+
EvaluationJobExitCode.INVALID_EVALUATION_CONFIG.value: "The provided evaluation configuration was not in the correct format, supported formats are YAML or JSON.",
|
109
|
+
EvaluationJobExitCode.EVALUATION_CONFIG_NOT_PROVIDED.value: "The evaluation config was not provided.",
|
110
|
+
EvaluationJobExitCode.INVALID_OUTPUT_DIR.value: "The specified output directory path is invalid.",
|
111
|
+
EvaluationJobExitCode.INVALID_INPUT_DATASET_PATH.value: "Dataset path is invalid.",
|
112
|
+
EvaluationJobExitCode.INVALID_EVALUATION_ID.value: "Evaluation ID was not found in the Model Catalog.",
|
113
|
+
EvaluationJobExitCode.INVALID_TARGET_EVALUATION_ID.value: "Target evaluation ID was not found in the Model Deployment.",
|
114
|
+
EvaluationJobExitCode.INVALID_EVALUATION_CONFIG_VALIDATION.value: "Validation errors in the evaluation config.",
|
115
|
+
EvaluationJobExitCode.OUTPUT_DIR_NOT_FOUND.value: "Destination folder does not exist or cannot be used for writing, verify the folder's existence and permissions.",
|
116
|
+
EvaluationJobExitCode.INVALID_INPUT_DATASET.value: "Input dataset is in an invalid format, ensure the dataset is in jsonl format and that includes the required columns: 'prompt', 'completion' (optional 'category').",
|
117
|
+
EvaluationJobExitCode.INPUT_DATA_NOT_FOUND.value: "Input data file does not exist or cannot be use for reading, verify the file's existence and permissions.",
|
118
|
+
EvaluationJobExitCode.EVALUATION_ID_NOT_FOUND.value: "Evaluation ID does not match any resource in the Model Catalog, or access may be blocked by policies.",
|
119
|
+
EvaluationJobExitCode.EVALUATION_ALREADY_PERFORMED.value: "Evaluation already has an attached artifact, indicating that the evaluation has already been performed.",
|
120
|
+
EvaluationJobExitCode.EVALUATION_TARGET_NOT_FOUND.value: "Target evaluation ID does not match any resources in Model Deployment.",
|
121
|
+
EvaluationJobExitCode.NO_SUCCESS_INFERENCE_RESULT.value: "Inference process completed without producing expected outcome, verify the model parameters and config.",
|
122
|
+
EvaluationJobExitCode.COMPUTE_EVALUATION_ERROR.value: "Evaluation process encountered an issue while calculating metrics.",
|
123
|
+
EvaluationJobExitCode.EVALUATION_REPORT_ERROR.value: "Failed to save the evaluation report due to an error. Ensure the evaluation model is currently active and the specified path for the output report is valid and accessible. Verify these conditions and reinitiate the evaluation process.",
|
124
|
+
EvaluationJobExitCode.MODEL_INFERENCE_WRONG_RESPONSE_FORMAT.value: "Evaluation encountered unsupported, or unexpected model output, verify the target evaluation model is compatible and produces the correct format.",
|
125
|
+
EvaluationJobExitCode.UNSUPPORTED_METRICS.value: "None of the provided metrics are supported by the framework.",
|
126
|
+
EvaluationJobExitCode.METRIC_CALCULATION_FAILURE.value: "All attempted metric calculations were unsuccessful. Please review the metric configurations and input data.",
|
127
|
+
}
|
128
|
+
|
129
|
+
|
130
|
+
class Resource(Enum):
|
131
|
+
JOB = "jobs"
|
132
|
+
MODEL = "models"
|
133
|
+
MODEL_DEPLOYMENT = "modeldeployments"
|
134
|
+
MODEL_VERSION_SET = "model-version-sets"
|
135
|
+
|
136
|
+
|
137
|
+
class DataScienceResource(Enum):
|
138
|
+
MODEL_DEPLOYMENT = "datasciencemodeldeployment"
|
139
|
+
MODEL = "datasciencemodel"
|
140
|
+
|
141
|
+
|
142
|
+
class EvaluationCustomMetadata(Enum):
|
143
|
+
EVALUATION_SOURCE = "evaluation_source"
|
144
|
+
EVALUATION_JOB_ID = "evaluation_job_id"
|
145
|
+
EVALUATION_JOB_RUN_ID = "evaluation_job_run_id"
|
146
|
+
EVALUATION_OUTPUT_PATH = "evaluation_output_path"
|
147
|
+
EVALUATION_SOURCE_NAME = "evaluation_source_name"
|
148
|
+
EVALUATION_ERROR = "aqua_evaluate_error"
|
149
|
+
|
150
|
+
|
151
|
+
class EvaluationModelTags(Enum):
|
152
|
+
AQUA_EVALUATION = "aqua_evaluation"
|
153
|
+
|
154
|
+
|
155
|
+
class EvaluationJobTags(Enum):
|
156
|
+
AQUA_EVALUATION = "aqua_evaluation"
|
157
|
+
EVALUATION_MODEL_ID = "evaluation_model_id"
|
158
|
+
|
159
|
+
|
160
|
+
class EvaluationUploadStatus(Enum):
|
161
|
+
IN_PROGRESS = "IN_PROGRESS"
|
162
|
+
COMPLETED = "COMPLETED"
|
163
|
+
|
164
|
+
|
165
|
+
@dataclass(repr=False)
|
166
|
+
class AquaResourceIdentifier(DataClassSerializable):
|
167
|
+
id: str = ""
|
168
|
+
name: str = ""
|
169
|
+
url: str = ""
|
170
|
+
|
171
|
+
|
172
|
+
@dataclass(repr=False)
|
173
|
+
class AquaEvalReport(DataClassSerializable):
|
174
|
+
evaluation_id: str = ""
|
175
|
+
content: str = ""
|
176
|
+
|
177
|
+
|
178
|
+
@dataclass(repr=False)
|
179
|
+
class ModelParams(DataClassSerializable):
|
180
|
+
max_tokens: str = ""
|
181
|
+
top_p: str = ""
|
182
|
+
top_k: str = ""
|
183
|
+
temperature: str = ""
|
184
|
+
presence_penalty: Optional[float] = 0.0
|
185
|
+
frequency_penalty: Optional[float] = 0.0
|
186
|
+
stop: Optional[Union[str, List[str]]] = field(default_factory=list)
|
187
|
+
|
188
|
+
|
189
|
+
@dataclass(repr=False)
|
190
|
+
class AquaEvalParams(ModelParams, DataClassSerializable):
|
191
|
+
shape: str = ""
|
192
|
+
dataset_path: str = ""
|
193
|
+
report_path: str = ""
|
194
|
+
|
195
|
+
|
196
|
+
@dataclass(repr=False)
|
197
|
+
class AquaEvalMetric(DataClassSerializable):
|
198
|
+
key: str
|
199
|
+
name: str
|
200
|
+
description: str = ""
|
201
|
+
|
202
|
+
|
203
|
+
@dataclass(repr=False)
|
204
|
+
class AquaEvalMetricSummary(DataClassSerializable):
|
205
|
+
metric: str = ""
|
206
|
+
score: str = ""
|
207
|
+
grade: str = ""
|
208
|
+
|
209
|
+
|
210
|
+
@dataclass(repr=False)
|
211
|
+
class AquaEvalMetrics(DataClassSerializable):
|
212
|
+
id: str
|
213
|
+
report: str
|
214
|
+
metric_results: List[AquaEvalMetric] = field(default_factory=list)
|
215
|
+
metric_summary_result: List[AquaEvalMetricSummary] = field(default_factory=list)
|
216
|
+
|
217
|
+
|
218
|
+
@dataclass(repr=False)
|
219
|
+
class AquaEvaluationCommands(DataClassSerializable):
|
220
|
+
evaluation_id: str
|
221
|
+
evaluation_target_id: str
|
222
|
+
input_data: dict
|
223
|
+
metrics: list
|
224
|
+
output_dir: str
|
225
|
+
params: dict
|
226
|
+
|
227
|
+
|
228
|
+
@dataclass(repr=False)
|
229
|
+
class AquaEvaluationSummary(DataClassSerializable):
|
230
|
+
"""Represents a summary of Aqua evalution."""
|
231
|
+
|
232
|
+
id: str
|
233
|
+
name: str
|
234
|
+
console_url: str
|
235
|
+
lifecycle_state: str
|
236
|
+
lifecycle_details: str
|
237
|
+
time_created: str
|
238
|
+
tags: dict
|
239
|
+
experiment: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
240
|
+
source: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
241
|
+
job: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
242
|
+
parameters: AquaEvalParams = field(default_factory=AquaEvalParams)
|
243
|
+
|
244
|
+
|
245
|
+
@dataclass(repr=False)
|
246
|
+
class AquaEvaluationDetail(AquaEvaluationSummary, DataClassSerializable):
|
247
|
+
"""Represents a details of Aqua evalution."""
|
248
|
+
|
249
|
+
log_group: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
250
|
+
log: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
251
|
+
introspection: dict = field(default_factory=dict)
|
252
|
+
|
253
|
+
|
254
|
+
class RqsAdditionalDetails:
|
255
|
+
METADATA = "metadata"
|
256
|
+
CREATED_BY = "createdBy"
|
257
|
+
DESCRIPTION = "description"
|
258
|
+
MODEL_VERSION_SET_ID = "modelVersionSetId"
|
259
|
+
MODEL_VERSION_SET_NAME = "modelVersionSetName"
|
260
|
+
PROJECT_ID = "projectId"
|
261
|
+
VERSION_LABEL = "versionLabel"
|
262
|
+
|
263
|
+
|
264
|
+
class EvaluationConfig:
|
265
|
+
PARAMS = "model_params"
|
266
|
+
|
267
|
+
|
268
|
+
@dataclass(repr=False)
|
269
|
+
class CreateAquaEvaluationDetails(DataClassSerializable):
|
270
|
+
"""Dataclass to create aqua model evaluation.
|
271
|
+
|
272
|
+
Fields
|
273
|
+
------
|
274
|
+
evaluation_source_id: str
|
275
|
+
The evaluation source id. Must be either model or model deployment ocid.
|
276
|
+
evaluation_name: str
|
277
|
+
The name for evaluation.
|
278
|
+
dataset_path: str
|
279
|
+
The dataset path for the evaluation. Could be either a local path from notebook session
|
280
|
+
or an object storage path.
|
281
|
+
report_path: str
|
282
|
+
The report path for the evaluation. Must be an object storage path.
|
283
|
+
model_parameters: dict
|
284
|
+
The parameters for the evaluation.
|
285
|
+
shape_name: str
|
286
|
+
The shape name for the evaluation job infrastructure.
|
287
|
+
memory_in_gbs: float
|
288
|
+
The memory in gbs for the shape selected.
|
289
|
+
ocpus: float
|
290
|
+
The ocpu count for the shape selected.
|
291
|
+
block_storage_size: int
|
292
|
+
The storage for the evaluation job infrastructure.
|
293
|
+
compartment_id: (str, optional). Defaults to `None`.
|
294
|
+
The compartment id for the evaluation.
|
295
|
+
project_id: (str, optional). Defaults to `None`.
|
296
|
+
The project id for the evaluation.
|
297
|
+
evaluation_description: (str, optional). Defaults to `None`.
|
298
|
+
The description for evaluation
|
299
|
+
experiment_id: (str, optional). Defaults to `None`.
|
300
|
+
The evaluation model version set id. If provided,
|
301
|
+
evaluation model will be associated with it.
|
302
|
+
experiment_name: (str, optional). Defaults to `None`.
|
303
|
+
The evaluation model version set name. If provided,
|
304
|
+
the model version set with the same name will be used if exists,
|
305
|
+
otherwise a new model version set will be created with the name.
|
306
|
+
experiment_description: (str, optional). Defaults to `None`.
|
307
|
+
The description for the evaluation model version set.
|
308
|
+
log_group_id: (str, optional). Defaults to `None`.
|
309
|
+
The log group id for the evaluation job infrastructure.
|
310
|
+
log_id: (str, optional). Defaults to `None`.
|
311
|
+
The log id for the evaluation job infrastructure.
|
312
|
+
metrics: (list, optional). Defaults to `None`.
|
313
|
+
The metrics for the evaluation.
|
314
|
+
"""
|
315
|
+
|
316
|
+
evaluation_source_id: str
|
317
|
+
evaluation_name: str
|
318
|
+
dataset_path: str
|
319
|
+
report_path: str
|
320
|
+
model_parameters: dict
|
321
|
+
shape_name: str
|
322
|
+
block_storage_size: int
|
323
|
+
compartment_id: Optional[str] = None
|
324
|
+
project_id: Optional[str] = None
|
325
|
+
evaluation_description: Optional[str] = None
|
326
|
+
experiment_id: Optional[str] = None
|
327
|
+
experiment_name: Optional[str] = None
|
328
|
+
experiment_description: Optional[str] = None
|
329
|
+
memory_in_gbs: Optional[float] = None
|
330
|
+
ocpus: Optional[float] = None
|
331
|
+
log_group_id: Optional[str] = None
|
332
|
+
log_id: Optional[str] = None
|
333
|
+
metrics: Optional[List] = None
|
334
|
+
|
335
|
+
|
336
|
+
class AquaEvaluationApp(AquaApp):
|
337
|
+
"""Provides a suite of APIs to interact with Aqua evaluations within the
|
338
|
+
Oracle Cloud Infrastructure Data Science service, serving as an interface
|
339
|
+
for managing model evalutions.
|
340
|
+
|
341
|
+
|
342
|
+
Methods
|
343
|
+
-------
|
344
|
+
create(evaluation_source_id, evaluation_name, ...) -> AquaEvaluationSummary:
|
345
|
+
Creates Aqua evaluation for resource.
|
346
|
+
get(model_id: str) -> AquaEvaluationSummary:
|
347
|
+
Retrieves details of an Aqua evaluation by its unique identifier.
|
348
|
+
list(compartment_id: str = None, project_id: str = None, **kwargs) -> List[AquaEvaluationSummary]:
|
349
|
+
Lists all Aqua evaluation within a specified compartment and/or project.
|
350
|
+
|
351
|
+
Note:
|
352
|
+
This class is designed to work within the Oracle Cloud Infrastructure
|
353
|
+
and requires proper configuration and authentication set up to interact
|
354
|
+
with OCI services.
|
355
|
+
"""
|
356
|
+
|
357
|
+
_report_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
|
358
|
+
_metrics_cache = TTLCache(maxsize=10, ttl=timedelta(hours=5), timer=datetime.now)
|
359
|
+
_eval_cache = TTLCache(maxsize=200, ttl=timedelta(hours=10), timer=datetime.now)
|
360
|
+
_cache_lock = Lock()
|
361
|
+
|
362
|
+
@telemetry(entry_point="plugin=evaluation&action=create", name="aqua")
|
363
|
+
def create(
|
364
|
+
self,
|
365
|
+
create_aqua_evaluation_details: CreateAquaEvaluationDetails,
|
366
|
+
**kwargs,
|
367
|
+
) -> "AquaEvaluationSummary":
|
368
|
+
"""Creates Aqua evaluation for resource.
|
369
|
+
|
370
|
+
Parameters
|
371
|
+
----------
|
372
|
+
create_aqua_evaluation_details: CreateAquaEvaluationDetails
|
373
|
+
The CreateAquaEvaluationDetails data class which contains all
|
374
|
+
required and optional fields to create the aqua evaluation.
|
375
|
+
kwargs:
|
376
|
+
The kwargs for the evaluation.
|
377
|
+
|
378
|
+
Returns
|
379
|
+
-------
|
380
|
+
AquaEvaluationSummary:
|
381
|
+
The instance of AquaEvaluationSummary.
|
382
|
+
"""
|
383
|
+
if not is_valid_ocid(create_aqua_evaluation_details.evaluation_source_id):
|
384
|
+
raise AquaValueError(
|
385
|
+
f"Invalid evaluation source {create_aqua_evaluation_details.evaluation_source_id}. "
|
386
|
+
"Specify either a model or model deployment id."
|
387
|
+
)
|
388
|
+
|
389
|
+
evaluation_source = None
|
390
|
+
if (
|
391
|
+
DataScienceResource.MODEL_DEPLOYMENT.value
|
392
|
+
in create_aqua_evaluation_details.evaluation_source_id
|
393
|
+
):
|
394
|
+
evaluation_source = ModelDeployment.from_id(
|
395
|
+
create_aqua_evaluation_details.evaluation_source_id
|
396
|
+
)
|
397
|
+
elif (
|
398
|
+
DataScienceResource.MODEL.value
|
399
|
+
in create_aqua_evaluation_details.evaluation_source_id
|
400
|
+
):
|
401
|
+
evaluation_source = DataScienceModel.from_id(
|
402
|
+
create_aqua_evaluation_details.evaluation_source_id
|
403
|
+
)
|
404
|
+
else:
|
405
|
+
raise AquaValueError(
|
406
|
+
f"Invalid evaluation source {create_aqua_evaluation_details.evaluation_source_id}. "
|
407
|
+
"Specify either a model or model deployment id."
|
408
|
+
)
|
409
|
+
|
410
|
+
if not ObjectStorageDetails.is_oci_path(
|
411
|
+
create_aqua_evaluation_details.report_path
|
412
|
+
):
|
413
|
+
raise AquaValueError(
|
414
|
+
"Evaluation report path must be an object storage path."
|
415
|
+
)
|
416
|
+
|
417
|
+
evaluation_dataset_path = create_aqua_evaluation_details.dataset_path
|
418
|
+
if not ObjectStorageDetails.is_oci_path(evaluation_dataset_path):
|
419
|
+
# format: oci://<bucket>@<namespace>/<prefix>/<dataset_file_name>
|
420
|
+
dataset_file = os.path.basename(evaluation_dataset_path)
|
421
|
+
dst_uri = f"{create_aqua_evaluation_details.report_path.rstrip('/')}/{dataset_file}"
|
422
|
+
try:
|
423
|
+
upload_local_to_os(
|
424
|
+
src_uri=evaluation_dataset_path,
|
425
|
+
dst_uri=dst_uri,
|
426
|
+
auth=default_signer(),
|
427
|
+
force_overwrite=False,
|
428
|
+
)
|
429
|
+
except FileExistsError:
|
430
|
+
raise AquaFileExistsError(
|
431
|
+
f"Dataset {dataset_file} already exists in {create_aqua_evaluation_details.report_path}. "
|
432
|
+
"Please use a new dataset file name or report path."
|
433
|
+
)
|
434
|
+
logger.debug(
|
435
|
+
f"Uploaded local file {evaluation_dataset_path} to object storage {dst_uri}."
|
436
|
+
)
|
437
|
+
# tracks the size of dataset uploaded by user to the destination.
|
438
|
+
self.telemetry.record_event_async(
|
439
|
+
category="aqua/evaluation/upload",
|
440
|
+
action="size",
|
441
|
+
detail=os.path.getsize(os.path.expanduser(evaluation_dataset_path)),
|
442
|
+
)
|
443
|
+
evaluation_dataset_path = dst_uri
|
444
|
+
|
445
|
+
evaluation_model_parameters = None
|
446
|
+
try:
|
447
|
+
evaluation_model_parameters = AquaEvalParams(
|
448
|
+
shape=create_aqua_evaluation_details.shape_name,
|
449
|
+
dataset_path=evaluation_dataset_path,
|
450
|
+
report_path=create_aqua_evaluation_details.report_path,
|
451
|
+
**create_aqua_evaluation_details.model_parameters,
|
452
|
+
)
|
453
|
+
except:
|
454
|
+
raise AquaValueError(
|
455
|
+
"Invalid model parameters. Model parameters should "
|
456
|
+
f"be a dictionary with keys: {', '.join(list(ModelParams.__annotations__.keys()))}."
|
457
|
+
)
|
458
|
+
|
459
|
+
target_compartment = (
|
460
|
+
create_aqua_evaluation_details.compartment_id or COMPARTMENT_OCID
|
461
|
+
)
|
462
|
+
target_project = create_aqua_evaluation_details.project_id or PROJECT_OCID
|
463
|
+
|
464
|
+
experiment_model_version_set_id = create_aqua_evaluation_details.experiment_id
|
465
|
+
experiment_model_version_set_name = (
|
466
|
+
create_aqua_evaluation_details.experiment_name
|
467
|
+
)
|
468
|
+
|
469
|
+
if (
|
470
|
+
not experiment_model_version_set_id
|
471
|
+
and not experiment_model_version_set_name
|
472
|
+
):
|
473
|
+
raise AquaValueError(
|
474
|
+
"Either experiment id or experiment name must be provided."
|
475
|
+
)
|
476
|
+
|
477
|
+
if not experiment_model_version_set_id:
|
478
|
+
try:
|
479
|
+
model_version_set = ModelVersionSet.from_name(
|
480
|
+
name=experiment_model_version_set_name,
|
481
|
+
compartment_id=target_compartment,
|
482
|
+
)
|
483
|
+
if not utils._is_valid_mvs(
|
484
|
+
model_version_set, Tags.AQUA_EVALUATION.value
|
485
|
+
):
|
486
|
+
raise AquaValueError(
|
487
|
+
f"Invalid experiment name. Please provide an experiment with `{Tags.AQUA_EVALUATION.value}` in tags."
|
488
|
+
)
|
489
|
+
except:
|
490
|
+
logger.debug(
|
491
|
+
f"Model version set {experiment_model_version_set_name} doesn't exist. "
|
492
|
+
"Creating new model version set."
|
493
|
+
)
|
494
|
+
|
495
|
+
evaluation_mvs_freeform_tags = {
|
496
|
+
Tags.AQUA_EVALUATION.value: Tags.AQUA_EVALUATION.value,
|
497
|
+
}
|
498
|
+
|
499
|
+
model_version_set = (
|
500
|
+
ModelVersionSet()
|
501
|
+
.with_compartment_id(target_compartment)
|
502
|
+
.with_project_id(target_project)
|
503
|
+
.with_name(experiment_model_version_set_name)
|
504
|
+
.with_description(
|
505
|
+
create_aqua_evaluation_details.experiment_description
|
506
|
+
)
|
507
|
+
.with_freeform_tags(**evaluation_mvs_freeform_tags)
|
508
|
+
# TODO: decide what parameters will be needed
|
509
|
+
.create(**kwargs)
|
510
|
+
)
|
511
|
+
logger.debug(
|
512
|
+
f"Successfully created model version set {experiment_model_version_set_name} with id {model_version_set.id}."
|
513
|
+
)
|
514
|
+
experiment_model_version_set_id = model_version_set.id
|
515
|
+
else:
|
516
|
+
model_version_set = ModelVersionSet.from_id(experiment_model_version_set_id)
|
517
|
+
if not utils._is_valid_mvs(model_version_set, Tags.AQUA_EVALUATION.value):
|
518
|
+
raise AquaValueError(
|
519
|
+
f"Invalid experiment id. Please provide an experiment with `{Tags.AQUA_EVALUATION.value}` in tags."
|
520
|
+
)
|
521
|
+
experiment_model_version_set_name = model_version_set.name
|
522
|
+
|
523
|
+
evaluation_model_custom_metadata = ModelCustomMetadata()
|
524
|
+
evaluation_model_custom_metadata.add(
|
525
|
+
key=EvaluationCustomMetadata.EVALUATION_SOURCE.value,
|
526
|
+
value=create_aqua_evaluation_details.evaluation_source_id,
|
527
|
+
)
|
528
|
+
evaluation_model_custom_metadata.add(
|
529
|
+
key=EvaluationCustomMetadata.EVALUATION_OUTPUT_PATH.value,
|
530
|
+
value=create_aqua_evaluation_details.report_path,
|
531
|
+
)
|
532
|
+
evaluation_model_custom_metadata.add(
|
533
|
+
key=EvaluationCustomMetadata.EVALUATION_SOURCE_NAME.value,
|
534
|
+
value=evaluation_source.display_name,
|
535
|
+
)
|
536
|
+
|
537
|
+
evaluation_model_taxonomy_metadata = ModelTaxonomyMetadata()
|
538
|
+
evaluation_model_taxonomy_metadata[
|
539
|
+
MetadataTaxonomyKeys.HYPERPARAMETERS
|
540
|
+
].value = {
|
541
|
+
"model_params": {
|
542
|
+
key: value for key, value in asdict(evaluation_model_parameters).items()
|
543
|
+
}
|
544
|
+
}
|
545
|
+
|
546
|
+
evaluation_model = (
|
547
|
+
DataScienceModel()
|
548
|
+
.with_compartment_id(target_compartment)
|
549
|
+
.with_project_id(target_project)
|
550
|
+
.with_display_name(create_aqua_evaluation_details.evaluation_name)
|
551
|
+
.with_description(create_aqua_evaluation_details.evaluation_description)
|
552
|
+
.with_model_version_set_id(experiment_model_version_set_id)
|
553
|
+
.with_custom_metadata_list(evaluation_model_custom_metadata)
|
554
|
+
.with_defined_metadata_list(evaluation_model_taxonomy_metadata)
|
555
|
+
.with_provenance_metadata(ModelProvenanceMetadata(training_id=UNKNOWN))
|
556
|
+
# TODO uncomment this once the evaluation container will get the updated version of the ADS
|
557
|
+
# .with_input_schema(create_aqua_evaluation_details.to_dict())
|
558
|
+
# TODO: decide what parameters will be needed
|
559
|
+
.create(
|
560
|
+
remove_existing_artifact=False, # TODO: added here for the purpose of demo and will revisit later
|
561
|
+
**kwargs,
|
562
|
+
)
|
563
|
+
)
|
564
|
+
logger.debug(
|
565
|
+
f"Successfully created evaluation model {evaluation_model.id} for {create_aqua_evaluation_details.evaluation_source_id}."
|
566
|
+
)
|
567
|
+
|
568
|
+
# TODO: validate metrics if it's provided
|
569
|
+
|
570
|
+
evaluation_job_freeform_tags = {
|
571
|
+
EvaluationJobTags.AQUA_EVALUATION.value: EvaluationJobTags.AQUA_EVALUATION.value,
|
572
|
+
EvaluationJobTags.EVALUATION_MODEL_ID.value: evaluation_model.id,
|
573
|
+
}
|
574
|
+
|
575
|
+
evaluation_job = Job(name=evaluation_model.display_name).with_infrastructure(
|
576
|
+
DataScienceJob()
|
577
|
+
.with_log_group_id(create_aqua_evaluation_details.log_group_id)
|
578
|
+
.with_log_id(create_aqua_evaluation_details.log_id)
|
579
|
+
.with_compartment_id(target_compartment)
|
580
|
+
.with_project_id(target_project)
|
581
|
+
.with_shape_name(create_aqua_evaluation_details.shape_name)
|
582
|
+
.with_block_storage_size(create_aqua_evaluation_details.block_storage_size)
|
583
|
+
.with_freeform_tag(**evaluation_job_freeform_tags)
|
584
|
+
)
|
585
|
+
if (
|
586
|
+
create_aqua_evaluation_details.memory_in_gbs
|
587
|
+
and create_aqua_evaluation_details.ocpus
|
588
|
+
):
|
589
|
+
evaluation_job.infrastructure.with_shape_config_details(
|
590
|
+
memory_in_gbs=create_aqua_evaluation_details.memory_in_gbs,
|
591
|
+
ocpus=create_aqua_evaluation_details.ocpus,
|
592
|
+
)
|
593
|
+
if AQUA_JOB_SUBNET_ID:
|
594
|
+
evaluation_job.infrastructure.with_subnet_id(AQUA_JOB_SUBNET_ID)
|
595
|
+
else:
|
596
|
+
if NB_SESSION_IDENTIFIER in os.environ:
|
597
|
+
# apply default subnet id for job by setting ME_STANDALONE
|
598
|
+
# so as to avoid using the notebook session's networking when running on it
|
599
|
+
# https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/infra_and_runtime.html#networking
|
600
|
+
evaluation_job.infrastructure.with_job_infrastructure_type(
|
601
|
+
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING
|
602
|
+
)
|
603
|
+
|
604
|
+
container_image = self._get_evaluation_container(
|
605
|
+
create_aqua_evaluation_details.evaluation_source_id
|
606
|
+
)
|
607
|
+
|
608
|
+
evaluation_job.with_runtime(
|
609
|
+
self._build_evaluation_runtime(
|
610
|
+
evaluation_id=evaluation_model.id,
|
611
|
+
evaluation_source_id=(
|
612
|
+
create_aqua_evaluation_details.evaluation_source_id
|
613
|
+
),
|
614
|
+
container_image=container_image,
|
615
|
+
dataset_path=evaluation_dataset_path,
|
616
|
+
report_path=create_aqua_evaluation_details.report_path,
|
617
|
+
model_parameters=create_aqua_evaluation_details.model_parameters,
|
618
|
+
metrics=create_aqua_evaluation_details.metrics,
|
619
|
+
)
|
620
|
+
).create(
|
621
|
+
**kwargs
|
622
|
+
) ## TODO: decide what parameters will be needed
|
623
|
+
logger.debug(
|
624
|
+
f"Successfully created evaluation job {evaluation_job.id} for {create_aqua_evaluation_details.evaluation_source_id}."
|
625
|
+
)
|
626
|
+
|
627
|
+
evaluation_job_run = evaluation_job.run(
|
628
|
+
name=evaluation_model.display_name,
|
629
|
+
freeform_tags=evaluation_job_freeform_tags,
|
630
|
+
wait=False,
|
631
|
+
)
|
632
|
+
logger.debug(
|
633
|
+
f"Successfully created evaluation job run {evaluation_job_run.id} for {create_aqua_evaluation_details.evaluation_source_id}."
|
634
|
+
)
|
635
|
+
|
636
|
+
evaluation_model_custom_metadata.add(
|
637
|
+
key=EvaluationCustomMetadata.EVALUATION_JOB_ID.value,
|
638
|
+
value=evaluation_job.id,
|
639
|
+
)
|
640
|
+
evaluation_model_custom_metadata.add(
|
641
|
+
key=EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value,
|
642
|
+
value=evaluation_job_run.id,
|
643
|
+
)
|
644
|
+
updated_custom_metadata_list = [
|
645
|
+
Metadata(**metadata)
|
646
|
+
for metadata in evaluation_model_custom_metadata.to_dict()["data"]
|
647
|
+
]
|
648
|
+
|
649
|
+
self.ds_client.update_model(
|
650
|
+
model_id=evaluation_model.id,
|
651
|
+
update_model_details=UpdateModelDetails(
|
652
|
+
custom_metadata_list=updated_custom_metadata_list,
|
653
|
+
freeform_tags={
|
654
|
+
EvaluationModelTags.AQUA_EVALUATION.value: EvaluationModelTags.AQUA_EVALUATION.value,
|
655
|
+
},
|
656
|
+
),
|
657
|
+
)
|
658
|
+
|
659
|
+
self.ds_client.update_model_provenance(
|
660
|
+
model_id=evaluation_model.id,
|
661
|
+
update_model_provenance_details=UpdateModelProvenanceDetails(
|
662
|
+
training_id=evaluation_job_run.id
|
663
|
+
),
|
664
|
+
)
|
665
|
+
|
666
|
+
# tracks unique evaluation that were created for the given evaluation source
|
667
|
+
self.telemetry.record_event_async(
|
668
|
+
category="aqua/evaluation",
|
669
|
+
action="create",
|
670
|
+
detail=evaluation_source.display_name,
|
671
|
+
)
|
672
|
+
|
673
|
+
return AquaEvaluationSummary(
|
674
|
+
id=evaluation_model.id,
|
675
|
+
name=evaluation_model.display_name,
|
676
|
+
console_url=get_console_link(
|
677
|
+
resource=Resource.MODEL.value,
|
678
|
+
ocid=evaluation_model.id,
|
679
|
+
region=self.region,
|
680
|
+
),
|
681
|
+
time_created=str(evaluation_model.dsc_model.time_created),
|
682
|
+
lifecycle_state=evaluation_job_run.lifecycle_state or UNKNOWN,
|
683
|
+
lifecycle_details=evaluation_job_run.lifecycle_details or UNKNOWN,
|
684
|
+
experiment=AquaResourceIdentifier(
|
685
|
+
id=experiment_model_version_set_id,
|
686
|
+
name=experiment_model_version_set_name,
|
687
|
+
url=get_console_link(
|
688
|
+
resource=Resource.MODEL_VERSION_SET.value,
|
689
|
+
ocid=experiment_model_version_set_id,
|
690
|
+
region=self.region,
|
691
|
+
),
|
692
|
+
),
|
693
|
+
source=AquaResourceIdentifier(
|
694
|
+
id=create_aqua_evaluation_details.evaluation_source_id,
|
695
|
+
name=evaluation_source.display_name,
|
696
|
+
url=get_console_link(
|
697
|
+
resource=(
|
698
|
+
Resource.MODEL_DEPLOYMENT.value
|
699
|
+
if DataScienceResource.MODEL_DEPLOYMENT.value
|
700
|
+
in create_aqua_evaluation_details.evaluation_source_id
|
701
|
+
else Resource.MODEL.value
|
702
|
+
),
|
703
|
+
ocid=create_aqua_evaluation_details.evaluation_source_id,
|
704
|
+
region=self.region,
|
705
|
+
),
|
706
|
+
),
|
707
|
+
job=AquaResourceIdentifier(
|
708
|
+
id=evaluation_job.id,
|
709
|
+
name=evaluation_job.name,
|
710
|
+
url=get_console_link(
|
711
|
+
resource=Resource.JOB.value,
|
712
|
+
ocid=evaluation_job.id,
|
713
|
+
region=self.region,
|
714
|
+
),
|
715
|
+
),
|
716
|
+
tags=dict(
|
717
|
+
aqua_evaluation=EvaluationModelTags.AQUA_EVALUATION.value,
|
718
|
+
evaluation_job_id=evaluation_job.id,
|
719
|
+
evaluation_source=create_aqua_evaluation_details.evaluation_source_id,
|
720
|
+
evaluation_experiment_id=experiment_model_version_set_id,
|
721
|
+
),
|
722
|
+
parameters=AquaEvalParams(),
|
723
|
+
)
|
724
|
+
|
725
|
+
def _build_evaluation_runtime(
|
726
|
+
self,
|
727
|
+
evaluation_id: str,
|
728
|
+
evaluation_source_id: str,
|
729
|
+
container_image: str,
|
730
|
+
dataset_path: str,
|
731
|
+
report_path: str,
|
732
|
+
model_parameters: dict,
|
733
|
+
metrics: List = None,
|
734
|
+
) -> Runtime:
|
735
|
+
"""Builds evaluation runtime for Job."""
|
736
|
+
# TODO the image name needs to be extracted from the mapping index.json file.
|
737
|
+
runtime = (
|
738
|
+
ContainerRuntime()
|
739
|
+
.with_image(container_image)
|
740
|
+
.with_environment_variable(
|
741
|
+
**{
|
742
|
+
"AIP_SMC_EVALUATION_ARGUMENTS": json.dumps(
|
743
|
+
asdict(
|
744
|
+
self._build_launch_cmd(
|
745
|
+
evaluation_id=evaluation_id,
|
746
|
+
evaluation_source_id=evaluation_source_id,
|
747
|
+
dataset_path=dataset_path,
|
748
|
+
report_path=report_path,
|
749
|
+
model_parameters=model_parameters,
|
750
|
+
metrics=metrics,
|
751
|
+
)
|
752
|
+
)
|
753
|
+
),
|
754
|
+
"CONDA_BUCKET_NS": CONDA_BUCKET_NS,
|
755
|
+
},
|
756
|
+
)
|
757
|
+
)
|
758
|
+
|
759
|
+
return runtime
|
760
|
+
|
761
|
+
@staticmethod
|
762
|
+
def _get_evaluation_container(source_id: str) -> str:
|
763
|
+
# todo: use the source, identify if it is a model or a deployment. If latter, then fetch the base model id
|
764
|
+
# from the deployment object, and call ds_client.get_model() to get model details. Use custom metadata to
|
765
|
+
# get the container_type_key. Pass this key as container_type to get_container_image method.
|
766
|
+
|
767
|
+
# fetch image name from config
|
768
|
+
container_image = get_container_image(
|
769
|
+
container_type="odsc-llm-evaluate",
|
770
|
+
)
|
771
|
+
logger.info(f"Aqua Image used for evaluating {source_id} :{container_image}")
|
772
|
+
return container_image
|
773
|
+
|
774
|
+
def _build_launch_cmd(
|
775
|
+
self,
|
776
|
+
evaluation_id: str,
|
777
|
+
evaluation_source_id: str,
|
778
|
+
dataset_path: str,
|
779
|
+
report_path: str,
|
780
|
+
model_parameters: dict,
|
781
|
+
metrics: List = None,
|
782
|
+
):
|
783
|
+
return AquaEvaluationCommands(
|
784
|
+
evaluation_id=evaluation_id,
|
785
|
+
evaluation_target_id=evaluation_source_id,
|
786
|
+
input_data={
|
787
|
+
"columns": {
|
788
|
+
"prompt": "prompt",
|
789
|
+
"completion": "completion",
|
790
|
+
"category": "category",
|
791
|
+
},
|
792
|
+
"format": Path(dataset_path).suffix,
|
793
|
+
"url": dataset_path,
|
794
|
+
},
|
795
|
+
metrics=metrics,
|
796
|
+
output_dir=report_path,
|
797
|
+
params=model_parameters,
|
798
|
+
)
|
799
|
+
|
800
|
+
@telemetry(entry_point="plugin=evaluation&action=get", name="aqua")
|
801
|
+
def get(self, eval_id) -> AquaEvaluationDetail:
|
802
|
+
"""Gets the information of an Aqua evalution.
|
803
|
+
|
804
|
+
Parameters
|
805
|
+
----------
|
806
|
+
eval_id: str
|
807
|
+
The model OCID.
|
808
|
+
|
809
|
+
Returns
|
810
|
+
-------
|
811
|
+
AquaEvaluationDetail:
|
812
|
+
The instance of AquaEvaluationDetail.
|
813
|
+
"""
|
814
|
+
logger.info(f"Fetching evaluation: {eval_id} details ...")
|
815
|
+
|
816
|
+
resource = utils.query_resource(eval_id)
|
817
|
+
model_provenance = self.ds_client.get_model_provenance(eval_id).data
|
818
|
+
|
819
|
+
if not resource:
|
820
|
+
raise AquaRuntimeError(
|
821
|
+
f"Failed to retrieve evalution {eval_id}."
|
822
|
+
"Please check if the OCID is correct."
|
823
|
+
)
|
824
|
+
jobrun_id = model_provenance.training_id
|
825
|
+
job_run_details = self._fetch_jobrun(
|
826
|
+
resource, use_rqs=False, jobrun_id=jobrun_id
|
827
|
+
)
|
828
|
+
|
829
|
+
try:
|
830
|
+
log_id = job_run_details.log_details.log_id
|
831
|
+
except Exception as e:
|
832
|
+
logger.debug(f"Failed to get associated log. {str(e)}")
|
833
|
+
log_id = ""
|
834
|
+
|
835
|
+
try:
|
836
|
+
loggroup_id = job_run_details.log_details.log_group_id
|
837
|
+
except Exception as e:
|
838
|
+
logger.debug(f"Failed to get associated loggroup. {str(e)}")
|
839
|
+
loggroup_id = ""
|
840
|
+
|
841
|
+
loggroup_url = get_log_links(region=self.region, log_group_id=loggroup_id)
|
842
|
+
log_url = get_log_links(
|
843
|
+
region=self.region,
|
844
|
+
log_group_id=loggroup_id,
|
845
|
+
log_id=log_id,
|
846
|
+
compartment_id=job_run_details.compartment_id,
|
847
|
+
source_id=jobrun_id
|
848
|
+
) if job_run_details else ""
|
849
|
+
|
850
|
+
log_name = None
|
851
|
+
loggroup_name = None
|
852
|
+
|
853
|
+
if log_id:
|
854
|
+
try:
|
855
|
+
log = utils.query_resource(log_id, return_all=False)
|
856
|
+
log_name = log.display_name if log else ""
|
857
|
+
except:
|
858
|
+
pass
|
859
|
+
|
860
|
+
if loggroup_id:
|
861
|
+
try:
|
862
|
+
loggroup = utils.query_resource(loggroup_id, return_all=False)
|
863
|
+
loggroup_name = loggroup.display_name if loggroup else ""
|
864
|
+
except:
|
865
|
+
pass
|
866
|
+
|
867
|
+
try:
|
868
|
+
introspection = json.loads(
|
869
|
+
self._get_attribute_from_model_metadata(resource, "ArtifactTestResults")
|
870
|
+
)
|
871
|
+
except:
|
872
|
+
introspection = {}
|
873
|
+
|
874
|
+
summary = AquaEvaluationDetail(
|
875
|
+
**self._process(resource),
|
876
|
+
**self._get_status(model=resource, jobrun=job_run_details),
|
877
|
+
job=self._build_job_identifier(
|
878
|
+
job_run_details=job_run_details,
|
879
|
+
),
|
880
|
+
log_group=AquaResourceIdentifier(loggroup_id, loggroup_name, loggroup_url),
|
881
|
+
log=AquaResourceIdentifier(log_id, log_name, log_url),
|
882
|
+
introspection=introspection,
|
883
|
+
)
|
884
|
+
summary.parameters.shape = (
|
885
|
+
job_run_details.job_infrastructure_configuration_details.shape_name
|
886
|
+
)
|
887
|
+
return summary
|
888
|
+
|
889
|
+
@telemetry(entry_point="plugin=evaluation&action=list", name="aqua")
|
890
|
+
def list(
|
891
|
+
self, compartment_id: str = None, project_id: str = None, **kwargs
|
892
|
+
) -> List[AquaEvaluationSummary]:
|
893
|
+
"""List Aqua evaluations in a given compartment and under certain project.
|
894
|
+
|
895
|
+
Parameters
|
896
|
+
----------
|
897
|
+
compartment_id: (str, optional). Defaults to `None`.
|
898
|
+
The compartment OCID.
|
899
|
+
project_id: (str, optional). Defaults to `None`.
|
900
|
+
The project OCID.
|
901
|
+
kwargs
|
902
|
+
Additional keyword arguments.
|
903
|
+
|
904
|
+
Returns
|
905
|
+
-------
|
906
|
+
List[AquaEvaluationSummary]:
|
907
|
+
The list of the `ads.aqua.evalution.AquaEvaluationSummary`.
|
908
|
+
"""
|
909
|
+
logger.info(f"Fetching evaluations from compartment {compartment_id}.")
|
910
|
+
models = utils.query_resources(
|
911
|
+
compartment_id=compartment_id,
|
912
|
+
resource_type="datasciencemodel",
|
913
|
+
tag_list=[EvaluationModelTags.AQUA_EVALUATION.value],
|
914
|
+
)
|
915
|
+
logger.info(f"Fetched {len(models)} evaluations.")
|
916
|
+
|
917
|
+
# TODO: add filter based on project_id if needed.
|
918
|
+
|
919
|
+
mapping = self._prefetch_resources(compartment_id)
|
920
|
+
|
921
|
+
evaluations = []
|
922
|
+
async_tasks = []
|
923
|
+
for model in models:
|
924
|
+
|
925
|
+
if model.identifier in self._eval_cache.keys():
|
926
|
+
logger.debug(f"Retrieving evaluation {model.identifier} from cache.")
|
927
|
+
evaluations.append(self._eval_cache.get(model.identifier))
|
928
|
+
|
929
|
+
else:
|
930
|
+
jobrun_id = self._get_attribute_from_model_metadata(
|
931
|
+
model, EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value
|
932
|
+
)
|
933
|
+
job_run = mapping.get(jobrun_id)
|
934
|
+
|
935
|
+
if not job_run:
|
936
|
+
async_tasks.append((model, jobrun_id))
|
937
|
+
else:
|
938
|
+
evaluations.append(self._process_evaluation_summary(model, job_run))
|
939
|
+
|
940
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
941
|
+
future_to_model = {
|
942
|
+
executor.submit(
|
943
|
+
self._fetch_jobrun, model, use_rqs=True, jobrun_id=jobrun_id
|
944
|
+
): model
|
945
|
+
for model, jobrun_id in async_tasks
|
946
|
+
}
|
947
|
+
for future in as_completed(future_to_model):
|
948
|
+
model = future_to_model[future]
|
949
|
+
try:
|
950
|
+
jobrun = future.result()
|
951
|
+
evaluations.append(
|
952
|
+
self._process_evaluation_summary(model=model, jobrun=jobrun)
|
953
|
+
)
|
954
|
+
except Exception as exc:
|
955
|
+
logger.error(
|
956
|
+
f"Processing evaluation: {model.identifier} generated an exception: {exc}"
|
957
|
+
)
|
958
|
+
evaluations.append(
|
959
|
+
self._process_evaluation_summary(model=model, jobrun=None)
|
960
|
+
)
|
961
|
+
|
962
|
+
# tracks number of times deployment listing was called
|
963
|
+
self.telemetry.record_event_async(category="aqua/evaluation", action="list")
|
964
|
+
|
965
|
+
return evaluations
|
966
|
+
|
967
|
+
def _process_evaluation_summary(
|
968
|
+
self,
|
969
|
+
model: oci.resource_search.models.ResourceSummary,
|
970
|
+
jobrun: oci.resource_search.models.ResourceSummary = None,
|
971
|
+
) -> AquaEvaluationSummary:
|
972
|
+
"""Builds AquaEvaluationSummary from model and jobrun."""
|
973
|
+
|
974
|
+
evaluation_summary = AquaEvaluationSummary(
|
975
|
+
**self._process(model),
|
976
|
+
**self._get_status(
|
977
|
+
model=model,
|
978
|
+
jobrun=jobrun,
|
979
|
+
),
|
980
|
+
job=self._build_job_identifier(
|
981
|
+
job_run_details=jobrun,
|
982
|
+
),
|
983
|
+
)
|
984
|
+
|
985
|
+
# Add evaluation in terminal state into cache
|
986
|
+
if evaluation_summary.lifecycle_state in EVAL_TERMINATION_STATE:
|
987
|
+
self._eval_cache.__setitem__(key=model.identifier, value=evaluation_summary)
|
988
|
+
|
989
|
+
return evaluation_summary
|
990
|
+
|
991
|
+
def _if_eval_artifact_exist(
|
992
|
+
self, model: oci.resource_search.models.ResourceSummary
|
993
|
+
) -> bool:
|
994
|
+
"""Checks if the evaluation artifact exists."""
|
995
|
+
try:
|
996
|
+
response = self.ds_client.head_model_artifact(model_id=model.identifier)
|
997
|
+
return True if response.status == 200 else False
|
998
|
+
except oci.exceptions.ServiceError as ex:
|
999
|
+
if ex.status == 404:
|
1000
|
+
logger.info("Evaluation artifact not found.")
|
1001
|
+
return False
|
1002
|
+
|
1003
|
+
@telemetry(entry_point="plugin=evaluation&action=get_status", name="aqua")
|
1004
|
+
def get_status(self, eval_id: str) -> dict:
|
1005
|
+
"""Gets evaluation's current status.
|
1006
|
+
|
1007
|
+
Parameters
|
1008
|
+
----------
|
1009
|
+
eval_id: str
|
1010
|
+
The evaluation ocid.
|
1011
|
+
|
1012
|
+
Returns
|
1013
|
+
-------
|
1014
|
+
dict
|
1015
|
+
"""
|
1016
|
+
eval = utils.query_resource(eval_id)
|
1017
|
+
|
1018
|
+
# TODO: add job_run_id as input param to skip the query below
|
1019
|
+
model_provenance = self.ds_client.get_model_provenance(eval_id).data
|
1020
|
+
|
1021
|
+
if not eval:
|
1022
|
+
raise AquaRuntimeError(
|
1023
|
+
f"Failed to retrieve evalution {eval_id}."
|
1024
|
+
"Please check if the OCID is correct."
|
1025
|
+
)
|
1026
|
+
jobrun_id = model_provenance.training_id
|
1027
|
+
job_run_details = self._fetch_jobrun(eval, use_rqs=False, jobrun_id=jobrun_id)
|
1028
|
+
|
1029
|
+
try:
|
1030
|
+
log_id = job_run_details.log_details.log_id
|
1031
|
+
except Exception as e:
|
1032
|
+
logger.debug(f"Failed to get associated log. {str(e)}")
|
1033
|
+
log_id = ""
|
1034
|
+
|
1035
|
+
try:
|
1036
|
+
loggroup_id = job_run_details.log_details.log_group_id
|
1037
|
+
except Exception as e:
|
1038
|
+
logger.debug(f"Failed to get associated log. {str(e)}")
|
1039
|
+
loggroup_id = ""
|
1040
|
+
|
1041
|
+
loggroup_url = get_log_links(region=self.region, log_group_id=loggroup_id)
|
1042
|
+
log_url = get_log_links(
|
1043
|
+
region=self.region,
|
1044
|
+
log_group_id=loggroup_id,
|
1045
|
+
log_id=log_id,
|
1046
|
+
compartment_id=job_run_details.compartment_id,
|
1047
|
+
source_id=jobrun_id
|
1048
|
+
) if job_run_details else ""
|
1049
|
+
|
1050
|
+
return dict(
|
1051
|
+
id=eval_id,
|
1052
|
+
**self._get_status(
|
1053
|
+
model=eval,
|
1054
|
+
jobrun=job_run_details,
|
1055
|
+
),
|
1056
|
+
log_id=log_id,
|
1057
|
+
log_url=log_url,
|
1058
|
+
loggroup_id=loggroup_id,
|
1059
|
+
loggroup_url=loggroup_url,
|
1060
|
+
)
|
1061
|
+
|
1062
|
+
def get_supported_metrics(self) -> dict:
|
1063
|
+
"""Gets a list of supported metrics for evaluation."""
|
1064
|
+
# TODO: implement it when starting to support more metrics.
|
1065
|
+
return [
|
1066
|
+
{
|
1067
|
+
"use_case": ["text_generation"],
|
1068
|
+
"key": "bertscore",
|
1069
|
+
"name": "BERT Score",
|
1070
|
+
"description": (
|
1071
|
+
"BERT Score is a metric for evaluating the quality of text "
|
1072
|
+
"generation models, such as machine translation or summarization. "
|
1073
|
+
"It utilizes pre-trained BERT contextual embeddings for both the "
|
1074
|
+
"generated and reference texts, and then calculates the cosine "
|
1075
|
+
"similarity between these embeddings."
|
1076
|
+
),
|
1077
|
+
"args": {},
|
1078
|
+
},
|
1079
|
+
{
|
1080
|
+
"use_case": ["text_generation"],
|
1081
|
+
"key": "rouge",
|
1082
|
+
"name": "ROUGE Score",
|
1083
|
+
"description": (
|
1084
|
+
"ROUGE scores compare a candidate document to a collection of "
|
1085
|
+
"reference documents to evaluate the similarity between them. "
|
1086
|
+
"The metrics range from 0 to 1, with higher scores indicating "
|
1087
|
+
"greater similarity. ROUGE is more suitable for models that don't "
|
1088
|
+
"include paraphrasing and do not generate new text units that don't "
|
1089
|
+
"appear in the references."
|
1090
|
+
),
|
1091
|
+
"args": {},
|
1092
|
+
},
|
1093
|
+
]
|
1094
|
+
|
1095
|
+
@telemetry(entry_point="plugin=evaluation&action=load_metrics", name="aqua")
|
1096
|
+
def load_metrics(self, eval_id: str) -> AquaEvalMetrics:
|
1097
|
+
"""Loads evalution metrics markdown from artifacts.
|
1098
|
+
|
1099
|
+
Parameters
|
1100
|
+
----------
|
1101
|
+
eval_id: str
|
1102
|
+
The evaluation ocid.
|
1103
|
+
|
1104
|
+
Returns
|
1105
|
+
-------
|
1106
|
+
AquaEvalMetrics:
|
1107
|
+
An instance of AquaEvalMetrics.
|
1108
|
+
"""
|
1109
|
+
if eval_id in self._metrics_cache.keys():
|
1110
|
+
logger.info(f"Returning metrics from cache.")
|
1111
|
+
eval_metrics = self._metrics_cache.get(eval_id)
|
1112
|
+
if len(eval_metrics.report) > 0:
|
1113
|
+
return eval_metrics
|
1114
|
+
|
1115
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
1116
|
+
logger.info(f"Downloading evaluation artifact: {eval_id}.")
|
1117
|
+
DataScienceModel.from_id(eval_id).download_artifact(
|
1118
|
+
temp_dir,
|
1119
|
+
auth=self._auth,
|
1120
|
+
)
|
1121
|
+
|
1122
|
+
files_in_artifact = get_files(temp_dir)
|
1123
|
+
report_content = self._read_from_artifact(
|
1124
|
+
temp_dir, files_in_artifact, utils.EVALUATION_REPORT_MD
|
1125
|
+
)
|
1126
|
+
try:
|
1127
|
+
report = json.loads(
|
1128
|
+
self._read_from_artifact(
|
1129
|
+
temp_dir, files_in_artifact, utils.EVALUATION_REPORT_JSON
|
1130
|
+
)
|
1131
|
+
)
|
1132
|
+
except Exception as e:
|
1133
|
+
logger.debug(
|
1134
|
+
"Failed to load `report.json` from evaluation artifact" f"{str(e)}"
|
1135
|
+
)
|
1136
|
+
report = {}
|
1137
|
+
|
1138
|
+
# TODO: after finalizing the format of report.json, move the constant to class
|
1139
|
+
eval_metrics = AquaEvalMetrics(
|
1140
|
+
id=eval_id,
|
1141
|
+
report=base64.b64encode(report_content).decode(),
|
1142
|
+
metric_results=[
|
1143
|
+
AquaEvalMetric(
|
1144
|
+
key=metric_key,
|
1145
|
+
name=metadata.get("name", utils.UNKNOWN),
|
1146
|
+
description=metadata.get("description", utils.UNKNOWN),
|
1147
|
+
)
|
1148
|
+
for metric_key, metadata in report.get("metric_results", {}).items()
|
1149
|
+
],
|
1150
|
+
metric_summary_result=[
|
1151
|
+
AquaEvalMetricSummary(**m)
|
1152
|
+
for m in report.get("metric_summary_result", [{}])
|
1153
|
+
],
|
1154
|
+
)
|
1155
|
+
|
1156
|
+
if report_content:
|
1157
|
+
self._metrics_cache.__setitem__(key=eval_id, value=eval_metrics)
|
1158
|
+
|
1159
|
+
return eval_metrics
|
1160
|
+
|
1161
|
+
def _read_from_artifact(self, artifact_dir, files, target):
|
1162
|
+
"""Reads target file from artifacts.
|
1163
|
+
|
1164
|
+
Parameters
|
1165
|
+
----------
|
1166
|
+
artifact_dir: str
|
1167
|
+
Path of the artifact.
|
1168
|
+
files: list
|
1169
|
+
List of files name in artifacts.
|
1170
|
+
target: str
|
1171
|
+
Target file name.
|
1172
|
+
|
1173
|
+
Return
|
1174
|
+
------
|
1175
|
+
bytes
|
1176
|
+
"""
|
1177
|
+
content = None
|
1178
|
+
for f in files:
|
1179
|
+
if os.path.basename(f) == target:
|
1180
|
+
logger.info(f"Reading {f}...")
|
1181
|
+
with open(os.path.join(artifact_dir, f), "rb") as f:
|
1182
|
+
content = f.read()
|
1183
|
+
break
|
1184
|
+
|
1185
|
+
if not content:
|
1186
|
+
raise AquaFileNotFoundError(
|
1187
|
+
"Related Resource Not Authorized Or Not Found:"
|
1188
|
+
f"Missing `{target}` in evaluation artifact."
|
1189
|
+
)
|
1190
|
+
return content
|
1191
|
+
|
1192
|
+
@telemetry(entry_point="plugin=evaluation&action=download_report", name="aqua")
|
1193
|
+
def download_report(self, eval_id) -> AquaEvalReport:
|
1194
|
+
"""Downloads HTML report from model artifact.
|
1195
|
+
|
1196
|
+
Parameters
|
1197
|
+
----------
|
1198
|
+
eval_id: str
|
1199
|
+
The evaluation ocid.
|
1200
|
+
|
1201
|
+
Returns
|
1202
|
+
-------
|
1203
|
+
AquaEvalReport:
|
1204
|
+
An instance of AquaEvalReport.
|
1205
|
+
|
1206
|
+
Raises
|
1207
|
+
------
|
1208
|
+
AquaFileNotFoundError:
|
1209
|
+
When missing `report.html` in evaluation artifact.
|
1210
|
+
"""
|
1211
|
+
if eval_id in self._report_cache.keys():
|
1212
|
+
logger.info(f"Returning report from cache.")
|
1213
|
+
report = self._report_cache.get(eval_id)
|
1214
|
+
if report.content:
|
1215
|
+
return report
|
1216
|
+
|
1217
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
1218
|
+
DataScienceModel.from_id(eval_id).download_artifact(
|
1219
|
+
temp_dir,
|
1220
|
+
auth=self._auth,
|
1221
|
+
)
|
1222
|
+
content = self._read_from_artifact(
|
1223
|
+
temp_dir, get_files(temp_dir), utils.EVALUATION_REPORT
|
1224
|
+
)
|
1225
|
+
|
1226
|
+
report = AquaEvalReport(
|
1227
|
+
evaluation_id=eval_id, content=base64.b64encode(content).decode()
|
1228
|
+
)
|
1229
|
+
|
1230
|
+
self._report_cache.__setitem__(key=eval_id, value=report)
|
1231
|
+
|
1232
|
+
return report
|
1233
|
+
|
1234
|
+
@telemetry(entry_point="plugin=evaluation&action=cancel", name="aqua")
|
1235
|
+
def cancel(self, eval_id) -> dict:
|
1236
|
+
"""Cancels the job run for the given evaluation id.
|
1237
|
+
Parameters
|
1238
|
+
----------
|
1239
|
+
eval_id: str
|
1240
|
+
The evaluation ocid.
|
1241
|
+
|
1242
|
+
Returns
|
1243
|
+
-------
|
1244
|
+
dict containing id, status and time_accepted
|
1245
|
+
|
1246
|
+
Raises
|
1247
|
+
------
|
1248
|
+
AquaRuntimeError:
|
1249
|
+
if a model doesn't exist for the given eval_id
|
1250
|
+
AquaMissingKeyError:
|
1251
|
+
if training_id is missing the job run id
|
1252
|
+
"""
|
1253
|
+
model = DataScienceModel.from_id(eval_id)
|
1254
|
+
if not model:
|
1255
|
+
raise AquaRuntimeError(
|
1256
|
+
f"Failed to get evaluation details for model {eval_id}"
|
1257
|
+
)
|
1258
|
+
job_run_id = model.provenance_metadata.training_id
|
1259
|
+
if not job_run_id:
|
1260
|
+
raise AquaMissingKeyError(
|
1261
|
+
"Model provenance is missing job run training_id key"
|
1262
|
+
)
|
1263
|
+
|
1264
|
+
status = dict(id=eval_id, status=UNKNOWN, time_accepted="")
|
1265
|
+
run = DataScienceJobRun.from_ocid(job_run_id)
|
1266
|
+
if run.lifecycle_state in [
|
1267
|
+
DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
|
1268
|
+
DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
|
1269
|
+
DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
|
1270
|
+
]:
|
1271
|
+
self._cancel_job_run(run, model)
|
1272
|
+
status = dict(
|
1273
|
+
id=eval_id,
|
1274
|
+
lifecycle_state="CANCELING",
|
1275
|
+
time_accepted=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f%z"),
|
1276
|
+
)
|
1277
|
+
return status
|
1278
|
+
|
1279
|
+
@staticmethod
|
1280
|
+
@fire_and_forget
|
1281
|
+
def _cancel_job_run(run, model):
|
1282
|
+
try:
|
1283
|
+
run.cancel()
|
1284
|
+
logger.info(f"Canceling Job Run: {run.id} for evaluation {model.id}")
|
1285
|
+
except oci.exceptions.ServiceError as ex:
|
1286
|
+
logger.error(
|
1287
|
+
f"Exception occurred while canceling job run: {run.id} for evaluation {model.id}. "
|
1288
|
+
f"Exception message: {ex}"
|
1289
|
+
)
|
1290
|
+
|
1291
|
+
@telemetry(entry_point="plugin=evaluation&action=delete", name="aqua")
|
1292
|
+
def delete(self, eval_id):
|
1293
|
+
"""Deletes the job and the associated model for the given evaluation id.
|
1294
|
+
Parameters
|
1295
|
+
----------
|
1296
|
+
eval_id: str
|
1297
|
+
The evaluation ocid.
|
1298
|
+
|
1299
|
+
Returns
|
1300
|
+
-------
|
1301
|
+
dict containing id, status and time_accepted
|
1302
|
+
|
1303
|
+
Raises
|
1304
|
+
------
|
1305
|
+
AquaRuntimeError:
|
1306
|
+
if a model doesn't exist for the given eval_id
|
1307
|
+
AquaMissingKeyError:
|
1308
|
+
if training_id is missing the job run id
|
1309
|
+
"""
|
1310
|
+
|
1311
|
+
model = DataScienceModel.from_id(eval_id)
|
1312
|
+
if not model:
|
1313
|
+
raise AquaRuntimeError(
|
1314
|
+
f"Failed to get evaluation details for model {eval_id}"
|
1315
|
+
)
|
1316
|
+
|
1317
|
+
try:
|
1318
|
+
job_id = model.custom_metadata_list.get(
|
1319
|
+
EvaluationCustomMetadata.EVALUATION_JOB_ID.value
|
1320
|
+
).value
|
1321
|
+
except ValueError:
|
1322
|
+
raise AquaMissingKeyError(
|
1323
|
+
f"Custom metadata is missing {EvaluationCustomMetadata.EVALUATION_JOB_ID.value} key"
|
1324
|
+
)
|
1325
|
+
|
1326
|
+
job = DataScienceJob.from_id(job_id)
|
1327
|
+
|
1328
|
+
self._delete_job_and_model(job, model)
|
1329
|
+
|
1330
|
+
status = dict(
|
1331
|
+
id=eval_id,
|
1332
|
+
lifecycle_state="DELETING",
|
1333
|
+
time_accepted=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f%z"),
|
1334
|
+
)
|
1335
|
+
return status
|
1336
|
+
|
1337
|
+
@staticmethod
|
1338
|
+
@fire_and_forget
|
1339
|
+
def _delete_job_and_model(job, model):
|
1340
|
+
try:
|
1341
|
+
job.dsc_job.delete(force_delete=True)
|
1342
|
+
logger.info(f"Deleting Job: {job.job_id} for evaluation {model.id}")
|
1343
|
+
|
1344
|
+
model.delete()
|
1345
|
+
logger.info(f"Deleting evaluation: {model.id}")
|
1346
|
+
except oci.exceptions.ServiceError as ex:
|
1347
|
+
logger.error(
|
1348
|
+
f"Exception occurred while deleting job: {job.job_id} for evaluation {model.id}. "
|
1349
|
+
f"Exception message: {ex}"
|
1350
|
+
)
|
1351
|
+
|
1352
|
+
def load_evaluation_config(self, eval_id):
|
1353
|
+
# TODO
|
1354
|
+
return {
|
1355
|
+
"model_params": {
|
1356
|
+
"max_tokens": 500,
|
1357
|
+
"temperature": 0.7,
|
1358
|
+
"top_p": 1.0,
|
1359
|
+
"top_k": 50,
|
1360
|
+
"presence_penalty": 0.0,
|
1361
|
+
"frequency_penalty": 0.0,
|
1362
|
+
"stop": [],
|
1363
|
+
},
|
1364
|
+
"shape": {
|
1365
|
+
"VM.Standard.E3.Flex": {
|
1366
|
+
"ocpu": 2,
|
1367
|
+
"memory_in_gbs": 64,
|
1368
|
+
"block_storage_size": 100,
|
1369
|
+
},
|
1370
|
+
"VM.Standard.E3.Flex": {
|
1371
|
+
"ocpu": 2,
|
1372
|
+
"memory_in_gbs": 64,
|
1373
|
+
"block_storage_size": 100,
|
1374
|
+
},
|
1375
|
+
"VM.Standard.E4.Flex": {
|
1376
|
+
"ocpu": 2,
|
1377
|
+
"memory_in_gbs": 64,
|
1378
|
+
"block_storage_size": 100,
|
1379
|
+
},
|
1380
|
+
"VM.Standard3.Flex": {
|
1381
|
+
"ocpu": 2,
|
1382
|
+
"memory_in_gbs": 64,
|
1383
|
+
"block_storage_size": 100,
|
1384
|
+
},
|
1385
|
+
"VM.Optimized3.Flex": {
|
1386
|
+
"ocpu": 2,
|
1387
|
+
"memory_in_gbs": 64,
|
1388
|
+
"block_storage_size": 100,
|
1389
|
+
},
|
1390
|
+
"VM.Standard.A1.Flex": {
|
1391
|
+
"ocpu": 2,
|
1392
|
+
"memory_in_gbs": 64,
|
1393
|
+
"block_storage_size": 100,
|
1394
|
+
},
|
1395
|
+
},
|
1396
|
+
"default": {
|
1397
|
+
"ocpu": 2,
|
1398
|
+
"memory_in_gbs": 64,
|
1399
|
+
"block_storage_size": 100,
|
1400
|
+
},
|
1401
|
+
}
|
1402
|
+
|
1403
|
+
def _get_attribute_from_model_metadata(
|
1404
|
+
self,
|
1405
|
+
model: oci.resource_search.models.ResourceSummary,
|
1406
|
+
target_attribute: str,
|
1407
|
+
) -> str:
|
1408
|
+
try:
|
1409
|
+
return self._extract_metadata(
|
1410
|
+
model.additional_details.get(RqsAdditionalDetails.METADATA),
|
1411
|
+
target_attribute,
|
1412
|
+
)
|
1413
|
+
except:
|
1414
|
+
logger.debug(
|
1415
|
+
f"Missing `{target_attribute}` in custom metadata of the evaluation."
|
1416
|
+
f"Evaluation id: {model.identifier} "
|
1417
|
+
)
|
1418
|
+
return ""
|
1419
|
+
|
1420
|
+
def _extract_metadata(self, metadata_list: List[Dict], key: str) -> Any:
|
1421
|
+
for metadata in metadata_list:
|
1422
|
+
if metadata.get("key") == key:
|
1423
|
+
return metadata.get("value")
|
1424
|
+
raise AquaMissingKeyError(
|
1425
|
+
f"Missing `{key}` in custom metadata of the evaluation."
|
1426
|
+
)
|
1427
|
+
|
1428
|
+
def _get_source(
|
1429
|
+
self,
|
1430
|
+
evaluation: oci.resource_search.models.ResourceSummary,
|
1431
|
+
resources_mapping: dict = {},
|
1432
|
+
) -> tuple:
|
1433
|
+
"""Returns ocid and name of the model has been evaluated."""
|
1434
|
+
source_id = self._get_attribute_from_model_metadata(
|
1435
|
+
evaluation,
|
1436
|
+
EvaluationCustomMetadata.EVALUATION_SOURCE.value,
|
1437
|
+
)
|
1438
|
+
|
1439
|
+
try:
|
1440
|
+
source = resources_mapping.get(source_id)
|
1441
|
+
source_name = (
|
1442
|
+
source.display_name
|
1443
|
+
if source
|
1444
|
+
else self._get_attribute_from_model_metadata(
|
1445
|
+
evaluation, EvaluationCustomMetadata.EVALUATION_SOURCE_NAME.value
|
1446
|
+
)
|
1447
|
+
)
|
1448
|
+
|
1449
|
+
if not source_name:
|
1450
|
+
resource_type = utils.get_resource_type(source_id)
|
1451
|
+
|
1452
|
+
# TODO: adjust resource principal mapping
|
1453
|
+
if resource_type == "datasciencemodel":
|
1454
|
+
source_name = self.ds_client.get_model(source_id).data.display_name
|
1455
|
+
elif resource_type == "datasciencemodeldeployment":
|
1456
|
+
source_name = self.ds_client.get_model_deployment(
|
1457
|
+
source_id
|
1458
|
+
).data.display_name
|
1459
|
+
else:
|
1460
|
+
raise AquaRuntimeError(
|
1461
|
+
f"Not supported source type: {resource_type}"
|
1462
|
+
)
|
1463
|
+
except Exception as e:
|
1464
|
+
logger.debug(
|
1465
|
+
f"Failed to retrieve source information for evaluation {evaluation.identifier}."
|
1466
|
+
)
|
1467
|
+
source_name = ""
|
1468
|
+
|
1469
|
+
return (source_id, source_name)
|
1470
|
+
|
1471
|
+
def _get_experiment_info(
|
1472
|
+
self, model: oci.resource_search.models.ResourceSummary
|
1473
|
+
) -> tuple:
|
1474
|
+
"""Returns ocid and name of the experiment."""
|
1475
|
+
return (
|
1476
|
+
model.additional_details.get(RqsAdditionalDetails.MODEL_VERSION_SET_ID),
|
1477
|
+
model.additional_details.get(RqsAdditionalDetails.MODEL_VERSION_SET_NAME),
|
1478
|
+
)
|
1479
|
+
|
1480
|
+
def _process(
|
1481
|
+
self,
|
1482
|
+
model: oci.resource_search.models.ResourceSummary,
|
1483
|
+
resources_mapping: dict = {},
|
1484
|
+
) -> dict:
|
1485
|
+
"""Constructs AquaEvaluationSummary from `oci.resource_search.models.ResourceSummary`."""
|
1486
|
+
|
1487
|
+
tags = {}
|
1488
|
+
tags.update(model.defined_tags or {})
|
1489
|
+
tags.update(model.freeform_tags or {})
|
1490
|
+
|
1491
|
+
model_id = model.identifier
|
1492
|
+
console_url = get_console_link(
|
1493
|
+
resource="models",
|
1494
|
+
ocid=model_id,
|
1495
|
+
region=self.region,
|
1496
|
+
)
|
1497
|
+
source_model_id, source_model_name = self._get_source(model, resources_mapping)
|
1498
|
+
experiment_id, experiment_name = self._get_experiment_info(model)
|
1499
|
+
parameters = self._fetch_runtime_params(model)
|
1500
|
+
|
1501
|
+
return dict(
|
1502
|
+
id=model_id,
|
1503
|
+
name=model.display_name,
|
1504
|
+
console_url=console_url,
|
1505
|
+
time_created=model.time_created,
|
1506
|
+
tags=tags,
|
1507
|
+
experiment=self._build_resource_identifier(
|
1508
|
+
id=experiment_id,
|
1509
|
+
name=experiment_name,
|
1510
|
+
),
|
1511
|
+
source=self._build_resource_identifier(
|
1512
|
+
id=source_model_id, name=source_model_name
|
1513
|
+
),
|
1514
|
+
parameters=parameters,
|
1515
|
+
)
|
1516
|
+
|
1517
|
+
def _build_resource_identifier(
|
1518
|
+
self, id: str = None, name: str = None
|
1519
|
+
) -> AquaResourceIdentifier:
|
1520
|
+
"""Constructs AquaResourceIdentifier based on the given ocid and display name."""
|
1521
|
+
try:
|
1522
|
+
resource_type = utils.CONSOLE_LINK_RESOURCE_TYPE_MAPPING.get(
|
1523
|
+
utils.get_resource_type(id)
|
1524
|
+
)
|
1525
|
+
|
1526
|
+
return AquaResourceIdentifier(
|
1527
|
+
id=id,
|
1528
|
+
name=name,
|
1529
|
+
url=get_console_link(
|
1530
|
+
resource=resource_type,
|
1531
|
+
ocid=id,
|
1532
|
+
region=self.region,
|
1533
|
+
),
|
1534
|
+
)
|
1535
|
+
except Exception as e:
|
1536
|
+
logger.error(
|
1537
|
+
f"Failed to construct AquaResourceIdentifier from given id=`{id}`, and name=`{name}`, {str(e)}"
|
1538
|
+
)
|
1539
|
+
return AquaResourceIdentifier()
|
1540
|
+
|
1541
|
+
def _get_jobrun(
|
1542
|
+
self, model: oci.resource_search.models.ResourceSummary, mapping: dict = {}
|
1543
|
+
) -> Union[
|
1544
|
+
oci.resource_search.models.ResourceSummary, oci.data_science.models.JobRun
|
1545
|
+
]:
|
1546
|
+
jobrun_id = self._get_attribute_from_model_metadata(
|
1547
|
+
model, EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value
|
1548
|
+
)
|
1549
|
+
job_run = mapping.get(jobrun_id)
|
1550
|
+
|
1551
|
+
if not job_run:
|
1552
|
+
job_run = self._fetch_jobrun(model, use_rqs=True, jobrun_id=jobrun_id)
|
1553
|
+
return job_run
|
1554
|
+
|
1555
|
+
def _fetch_jobrun(
|
1556
|
+
self,
|
1557
|
+
resource: oci.resource_search.models.ResourceSummary,
|
1558
|
+
use_rqs: bool = True,
|
1559
|
+
jobrun_id: str = None,
|
1560
|
+
) -> Union[
|
1561
|
+
oci.resource_search.models.ResourceSummary, oci.data_science.models.JobRun
|
1562
|
+
]:
|
1563
|
+
"""Extracts job run id from metadata, and gets related job run information."""
|
1564
|
+
|
1565
|
+
jobrun_id = jobrun_id or self._get_attribute_from_model_metadata(
|
1566
|
+
resource, EvaluationCustomMetadata.EVALUATION_JOB_RUN_ID.value
|
1567
|
+
)
|
1568
|
+
|
1569
|
+
logger.info(f"Fetching associated job run: {jobrun_id}")
|
1570
|
+
|
1571
|
+
try:
|
1572
|
+
jobrun = (
|
1573
|
+
utils.query_resource(jobrun_id, return_all=False)
|
1574
|
+
if use_rqs
|
1575
|
+
else self.ds_client.get_job_run(jobrun_id).data
|
1576
|
+
)
|
1577
|
+
except Exception as e:
|
1578
|
+
logger.debug(
|
1579
|
+
f"Failed to retreive job run: {jobrun_id}. " f"DEBUG INFO: {str(e)}"
|
1580
|
+
)
|
1581
|
+
jobrun = None
|
1582
|
+
|
1583
|
+
return jobrun
|
1584
|
+
|
1585
|
+
def _fetch_runtime_params(
|
1586
|
+
self, resource: oci.resource_search.models.ResourceSummary
|
1587
|
+
) -> AquaEvalParams:
|
1588
|
+
"""Extracts model parameters from metadata. Shape is the shape used in job run."""
|
1589
|
+
try:
|
1590
|
+
params = json.loads(
|
1591
|
+
self._get_attribute_from_model_metadata(
|
1592
|
+
resource, MetadataTaxonomyKeys.HYPERPARAMETERS
|
1593
|
+
)
|
1594
|
+
)
|
1595
|
+
if not params.get(EvaluationConfig.PARAMS):
|
1596
|
+
raise AquaMissingKeyError(
|
1597
|
+
"model parameters have not been saved in correct format in model taxonomy.",
|
1598
|
+
service_payload={"params": params},
|
1599
|
+
)
|
1600
|
+
# TODO: validate the format of parameters.
|
1601
|
+
# self._validate_params(params)
|
1602
|
+
|
1603
|
+
return AquaEvalParams(**params[EvaluationConfig.PARAMS])
|
1604
|
+
except Exception as e:
|
1605
|
+
logger.debug(
|
1606
|
+
f"Failed to retrieve model parameters for the model: {str(resource)}."
|
1607
|
+
f"DEBUG INFO: {str(e)}."
|
1608
|
+
)
|
1609
|
+
return AquaEvalParams()
|
1610
|
+
|
1611
|
+
def _build_job_identifier(
|
1612
|
+
self,
|
1613
|
+
job_run_details: Union[
|
1614
|
+
oci.data_science.models.JobRun, oci.resource_search.models.ResourceSummary
|
1615
|
+
] = None,
|
1616
|
+
) -> AquaResourceIdentifier:
|
1617
|
+
try:
|
1618
|
+
job_id = (
|
1619
|
+
job_run_details.id
|
1620
|
+
if isinstance(job_run_details, oci.data_science.models.JobRun)
|
1621
|
+
else job_run_details.identifier
|
1622
|
+
)
|
1623
|
+
return self._build_resource_identifier(
|
1624
|
+
id=job_id, name=job_run_details.display_name
|
1625
|
+
)
|
1626
|
+
|
1627
|
+
except Exception as e:
|
1628
|
+
logger.debug(
|
1629
|
+
f"Failed to get job details from job_run_details: {job_run_details}"
|
1630
|
+
f"DEBUG INFO:{str(e)}"
|
1631
|
+
)
|
1632
|
+
return AquaResourceIdentifier()
|
1633
|
+
|
1634
|
+
# TODO: fix the logic for determine termination state
|
1635
|
+
def _get_status(
|
1636
|
+
self,
|
1637
|
+
model: oci.resource_search.models.ResourceSummary,
|
1638
|
+
jobrun: Union[
|
1639
|
+
oci.resource_search.models.ResourceSummary, oci.data_science.models.JobRun
|
1640
|
+
] = None,
|
1641
|
+
) -> dict:
|
1642
|
+
"""Builds evaluation status based on the model status and job run status.
|
1643
|
+
When detect `aqua_evaluation_error` in custom metadata, the jobrun is failed.
|
1644
|
+
However, if jobrun failed before saving this meta, we need to check the existance
|
1645
|
+
of the evaluation artifact.
|
1646
|
+
|
1647
|
+
"""
|
1648
|
+
# TODO: revisit for CANCELED evaluation
|
1649
|
+
job_run_status = (
|
1650
|
+
JobRun.LIFECYCLE_STATE_FAILED
|
1651
|
+
if self._get_attribute_from_model_metadata(
|
1652
|
+
model, EvaluationCustomMetadata.EVALUATION_ERROR.value
|
1653
|
+
)
|
1654
|
+
else None
|
1655
|
+
)
|
1656
|
+
|
1657
|
+
model_status = model.lifecycle_state
|
1658
|
+
job_run_status = job_run_status or (
|
1659
|
+
jobrun.lifecycle_state
|
1660
|
+
if jobrun and not jobrun.lifecycle_state == JobRun.LIFECYCLE_STATE_DELETED
|
1661
|
+
else (
|
1662
|
+
JobRun.LIFECYCLE_STATE_SUCCEEDED
|
1663
|
+
if self._if_eval_artifact_exist(model)
|
1664
|
+
else JobRun.LIFECYCLE_STATE_FAILED
|
1665
|
+
)
|
1666
|
+
)
|
1667
|
+
|
1668
|
+
lifecycle_state = utils.LifecycleStatus.get_status(
|
1669
|
+
evaluation_status=model_status, job_run_status=job_run_status
|
1670
|
+
)
|
1671
|
+
|
1672
|
+
try:
|
1673
|
+
lifecycle_details = (
|
1674
|
+
utils.LIFECYCLE_DETAILS_MISSING_JOBRUN
|
1675
|
+
if not jobrun
|
1676
|
+
else self._extract_job_lifecycle_details(jobrun.lifecycle_details)
|
1677
|
+
)
|
1678
|
+
except:
|
1679
|
+
# ResourceSummary does not have lifecycle_details attr
|
1680
|
+
lifecycle_details = ""
|
1681
|
+
|
1682
|
+
return dict(
|
1683
|
+
lifecycle_state=(
|
1684
|
+
lifecycle_state
|
1685
|
+
if isinstance(lifecycle_state, str)
|
1686
|
+
else lifecycle_state.value
|
1687
|
+
),
|
1688
|
+
lifecycle_details=lifecycle_details,
|
1689
|
+
)
|
1690
|
+
|
1691
|
+
def _prefetch_resources(self, compartment_id) -> dict:
|
1692
|
+
"""Fetches all AQUA resources."""
|
1693
|
+
# TODO: handle cross compartment/tenency resources
|
1694
|
+
# TODO: add cache
|
1695
|
+
resources = utils.query_resources(
|
1696
|
+
compartment_id=compartment_id,
|
1697
|
+
resource_type="all",
|
1698
|
+
tag_list=[EvaluationModelTags.AQUA_EVALUATION.value, "OCI_AQUA"],
|
1699
|
+
connect_by_ampersands=False,
|
1700
|
+
return_all=False,
|
1701
|
+
)
|
1702
|
+
logger.debug(f"Fetched {len(resources)} AQUA resources.")
|
1703
|
+
return {item.identifier: item for item in resources}
|
1704
|
+
|
1705
|
+
def _extract_job_lifecycle_details(self, lifecycle_details: str) -> str:
|
1706
|
+
"""
|
1707
|
+
Extracts the exit code from a job lifecycle detail string and associates it
|
1708
|
+
with a corresponding message from the EVALUATION_JOB_EXIT_CODE_MESSAGE dictionary.
|
1709
|
+
|
1710
|
+
This method searches the provided lifecycle detail string for an exit code pattern.
|
1711
|
+
Upon finding an exit code, it retrieves the related human-readable message
|
1712
|
+
from a predefined dictionary of exit codes and their meanings. If the exit code
|
1713
|
+
is not found within the string, or if it does not exist in the dictionary,
|
1714
|
+
the original `lifecycle_details` message will be returned.
|
1715
|
+
|
1716
|
+
Parameters
|
1717
|
+
----------
|
1718
|
+
lifecycle_details : str
|
1719
|
+
A string containing the details of the job's lifecycle, typically including an exit code.
|
1720
|
+
|
1721
|
+
Returns
|
1722
|
+
-------
|
1723
|
+
str
|
1724
|
+
A message that combines the extracted exit code with its corresponding descriptive text.
|
1725
|
+
If no exit code is found, or if the exit code is not in the dictionary,
|
1726
|
+
the original `lifecycle_details` message will be returned.
|
1727
|
+
|
1728
|
+
Examples
|
1729
|
+
--------
|
1730
|
+
>>> _extract_job_lifecycle_details("Job run artifact execution failed with exit code 16")
|
1731
|
+
'The evaluation configuration is invalid due to content validation errors.'
|
1732
|
+
|
1733
|
+
>>> _extract_job_lifecycle_details("Job completed successfully.")
|
1734
|
+
'Job completed successfully.'
|
1735
|
+
"""
|
1736
|
+
if not lifecycle_details:
|
1737
|
+
return lifecycle_details
|
1738
|
+
|
1739
|
+
message = lifecycle_details
|
1740
|
+
try:
|
1741
|
+
# Extract exit code
|
1742
|
+
match = re.search(r"exit code (\d+)", lifecycle_details)
|
1743
|
+
if match:
|
1744
|
+
exit_code = int(match.group(1))
|
1745
|
+
exit_code_message = EVALUATION_JOB_EXIT_CODE_MESSAGE.get(exit_code)
|
1746
|
+
if exit_code_message:
|
1747
|
+
message = f"{exit_code_message} Exit code: {exit_code}."
|
1748
|
+
except:
|
1749
|
+
pass
|
1750
|
+
|
1751
|
+
return message
|