oracle-ads 2.10.1__py3-none-any.whl → 2.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/operator/lowcode/anomaly/README.md +2 -1
- ads/opctl/operator/lowcode/anomaly/__main__.py +10 -4
- ads/opctl/operator/lowcode/anomaly/environment.yaml +2 -1
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +12 -6
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/automlx.py +12 -23
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.1.dist-info}/METADATA +37 -7
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.1.dist-info}/RECORD +71 -36
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.1.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.1.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.1.dist-info → oracle_ads-2.11.1.dist-info}/entry_points.txt +0 -0
ads/aqua/finetune.py
ADDED
@@ -0,0 +1,579 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# Copyright (c) 2024 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
import json
|
7
|
+
import os
|
8
|
+
from dataclasses import asdict, dataclass, field
|
9
|
+
from enum import Enum
|
10
|
+
from typing import Dict, Optional
|
11
|
+
|
12
|
+
from oci.data_science.models import (
|
13
|
+
Metadata,
|
14
|
+
UpdateModelDetails,
|
15
|
+
UpdateModelProvenanceDetails,
|
16
|
+
)
|
17
|
+
|
18
|
+
from ads.aqua.base import AquaApp
|
19
|
+
from ads.aqua.data import AquaResourceIdentifier, Resource, Tags
|
20
|
+
from ads.aqua.exception import AquaFileExistsError, AquaValueError
|
21
|
+
from ads.aqua.job import AquaJobSummary
|
22
|
+
from ads.aqua.utils import (
|
23
|
+
DEFAULT_FT_BATCH_SIZE,
|
24
|
+
DEFAULT_FT_BLOCK_STORAGE_SIZE,
|
25
|
+
DEFAULT_FT_REPLICA,
|
26
|
+
DEFAULT_FT_VALIDATION_SET_SIZE,
|
27
|
+
FINE_TUNING_RUNTIME_CONTAINER,
|
28
|
+
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING,
|
29
|
+
UNKNOWN,
|
30
|
+
UNKNOWN_DICT,
|
31
|
+
get_container_image,
|
32
|
+
load_config,
|
33
|
+
logger,
|
34
|
+
upload_local_to_os,
|
35
|
+
)
|
36
|
+
from ads.common.auth import default_signer
|
37
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
38
|
+
from ads.common.serializer import DataClassSerializable
|
39
|
+
from ads.common.utils import get_console_link
|
40
|
+
from ads.config import (
|
41
|
+
AQUA_JOB_SUBNET_ID,
|
42
|
+
AQUA_MODEL_FINETUNING_CONFIG,
|
43
|
+
COMPARTMENT_OCID,
|
44
|
+
CONDA_BUCKET_NS,
|
45
|
+
PROJECT_OCID,
|
46
|
+
)
|
47
|
+
from ads.jobs.ads_job import Job
|
48
|
+
from ads.jobs.builders.infrastructure.dsc_job import DataScienceJob
|
49
|
+
from ads.jobs.builders.runtimes.base import Runtime
|
50
|
+
from ads.jobs.builders.runtimes.container_runtime import ContainerRuntime
|
51
|
+
from ads.model.model_metadata import (
|
52
|
+
MetadataTaxonomyKeys,
|
53
|
+
ModelCustomMetadata,
|
54
|
+
ModelTaxonomyMetadata,
|
55
|
+
)
|
56
|
+
from ads.telemetry import telemetry
|
57
|
+
|
58
|
+
|
59
|
+
class FineTuneCustomMetadata(Enum):
|
60
|
+
FINE_TUNE_SOURCE = "fine_tune_source"
|
61
|
+
FINE_TUNE_SOURCE_NAME = "fine_tune_source_name"
|
62
|
+
FINE_TUNE_OUTPUT_PATH = "fine_tune_output_path"
|
63
|
+
FINE_TUNE_JOB_ID = "fine_tune_job_id"
|
64
|
+
FINE_TUNE_JOB_RUN_ID = "fine_tune_job_run_id"
|
65
|
+
SERVICE_MODEL_ARTIFACT_LOCATION = "artifact_location"
|
66
|
+
SERVICE_MODEL_DEPLOYMENT_CONTAINER = "deployment-container"
|
67
|
+
SERVICE_MODEL_FINE_TUNE_CONTAINER = "finetune-container"
|
68
|
+
|
69
|
+
|
70
|
+
@dataclass(repr=False)
|
71
|
+
class AquaFineTuningParams(DataClassSerializable):
|
72
|
+
epochs: int = None
|
73
|
+
learning_rate: float = None
|
74
|
+
|
75
|
+
|
76
|
+
@dataclass(repr=False)
|
77
|
+
class AquaFineTuningSummary(AquaJobSummary, DataClassSerializable):
|
78
|
+
parameters: AquaFineTuningParams = field(default_factory=AquaFineTuningParams)
|
79
|
+
|
80
|
+
|
81
|
+
@dataclass(repr=False)
|
82
|
+
class CreateFineTuningDetails(DataClassSerializable):
|
83
|
+
"""Dataclass to create aqua model fine tuning.
|
84
|
+
|
85
|
+
Fields
|
86
|
+
------
|
87
|
+
ft_source_id: str
|
88
|
+
The fine tuning source id. Must be model ocid.
|
89
|
+
ft_name: str
|
90
|
+
The name for fine tuning.
|
91
|
+
dataset_path: str
|
92
|
+
The dataset path for fine tuning. Could be either a local path from notebook session
|
93
|
+
or an object storage path.
|
94
|
+
report_path: str
|
95
|
+
The report path for fine tuning. Must be an object storage path.
|
96
|
+
ft_parameters: dict
|
97
|
+
The parameters for fine tuning.
|
98
|
+
shape_name: str
|
99
|
+
The shape name for fine tuning job infrastructure.
|
100
|
+
replica: int
|
101
|
+
The replica for fine tuning job runtime.
|
102
|
+
validation_set_size: float
|
103
|
+
The validation set size for fine tuning job. Must be a float in between [0,1).
|
104
|
+
ft_description: (str, optional). Defaults to `None`.
|
105
|
+
The description for fine tuning.
|
106
|
+
compartment_id: (str, optional). Defaults to `None`.
|
107
|
+
The compartment id for fine tuning.
|
108
|
+
project_id: (str, optional). Defaults to `None`.
|
109
|
+
The project id for fine tuning.
|
110
|
+
experiment_id: (str, optional). Defaults to `None`.
|
111
|
+
The fine tuning model version set id. If provided,
|
112
|
+
fine tuning model will be associated with it.
|
113
|
+
experiment_name: (str, optional). Defaults to `None`.
|
114
|
+
The fine tuning model version set name. If provided,
|
115
|
+
the fine tuning version set with the same name will be used if exists,
|
116
|
+
otherwise a new model version set will be created with the name.
|
117
|
+
experiment_description: (str, optional). Defaults to `None`.
|
118
|
+
The description for fine tuning model version set.
|
119
|
+
block_storage_size: (int, optional). Defaults to 256.
|
120
|
+
The storage for fine tuning job infrastructure.
|
121
|
+
subnet_id: (str, optional). Defaults to `None`.
|
122
|
+
The custom egress for fine tuning job.
|
123
|
+
log_group_id: (str, optional). Defaults to `None`.
|
124
|
+
The log group id for fine tuning job infrastructure.
|
125
|
+
log_id: (str, optional). Defaults to `None`.
|
126
|
+
The log id for fine tuning job infrastructure.
|
127
|
+
"""
|
128
|
+
|
129
|
+
ft_source_id: str
|
130
|
+
ft_name: str
|
131
|
+
dataset_path: str
|
132
|
+
report_path: str
|
133
|
+
ft_parameters: dict
|
134
|
+
shape_name: str
|
135
|
+
replica: int
|
136
|
+
validation_set_size: float
|
137
|
+
ft_description: Optional[str] = None
|
138
|
+
compartment_id: Optional[str] = None
|
139
|
+
project_id: Optional[str] = None
|
140
|
+
experiment_id: Optional[str] = None
|
141
|
+
experiment_name: Optional[str] = None
|
142
|
+
experiment_description: Optional[str] = None
|
143
|
+
block_storage_size: Optional[int] = None
|
144
|
+
subnet_id: Optional[str] = None
|
145
|
+
log_id: Optional[str] = None
|
146
|
+
log_group_id: Optional[str] = None
|
147
|
+
|
148
|
+
|
149
|
+
class AquaFineTuningApp(AquaApp):
|
150
|
+
"""Provides a suite of APIs to interact with Aqua fine-tuned models within the Oracle
|
151
|
+
Cloud Infrastructure Data Science service, serving as an interface for creating fine-tuned models.
|
152
|
+
|
153
|
+
Methods
|
154
|
+
-------
|
155
|
+
create(...) -> AquaFineTuningSummary
|
156
|
+
Creates a fine-tuned Aqua model.
|
157
|
+
get_finetuning_config(self, model_id: str) -> Dict:
|
158
|
+
Gets the finetuning config for given Aqua model.
|
159
|
+
|
160
|
+
Note:
|
161
|
+
Use `ads aqua finetuning <method_name> --help` to get more details on the parameters available.
|
162
|
+
This class is designed to work within the Oracle Cloud Infrastructure
|
163
|
+
and requires proper configuration and authentication set up to interact
|
164
|
+
with OCI services.
|
165
|
+
"""
|
166
|
+
|
167
|
+
@telemetry(entry_point="plugin=finetuning&action=create", name="aqua")
|
168
|
+
def create(
|
169
|
+
self, create_fine_tuning_details: CreateFineTuningDetails = None, **kwargs
|
170
|
+
) -> "AquaFineTuningSummary":
|
171
|
+
"""Creates Aqua fine tuning for model.
|
172
|
+
|
173
|
+
Parameters
|
174
|
+
----------
|
175
|
+
create_fine_tuning_details: CreateFineTuningDetails
|
176
|
+
The CreateFineTuningDetails data class which contains all
|
177
|
+
required and optional fields to create the aqua fine tuning.
|
178
|
+
kwargs:
|
179
|
+
The kwargs for creating CreateFineTuningDetails instance if
|
180
|
+
no create_fine_tuning_details provided.
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
AquaFineTuningSummary:
|
185
|
+
The instance of AquaFineTuningSummary.
|
186
|
+
"""
|
187
|
+
if not create_fine_tuning_details:
|
188
|
+
try:
|
189
|
+
create_fine_tuning_details = CreateFineTuningDetails(**kwargs)
|
190
|
+
except:
|
191
|
+
raise AquaValueError(
|
192
|
+
"Invalid create fine tuning parameters. Allowable parameters are: "
|
193
|
+
f"{', '.join(list(asdict(CreateFineTuningDetails).keys()))}."
|
194
|
+
)
|
195
|
+
|
196
|
+
source = self.get_source(create_fine_tuning_details.ft_source_id)
|
197
|
+
# TODO: add the following validation for fine tuning aqua service model. Revisit it when all service models are available
|
198
|
+
# if source.compartment_id != ODSC_MODEL_COMPARTMENT_OCID:
|
199
|
+
# raise AquaValueError(
|
200
|
+
# f"Fine tuning is only supported for Aqua service models in {ODSC_MODEL_COMPARTMENT_OCID}. "
|
201
|
+
# "Use a valid Aqua service model id instead."
|
202
|
+
# )
|
203
|
+
|
204
|
+
target_compartment = (
|
205
|
+
create_fine_tuning_details.compartment_id or COMPARTMENT_OCID
|
206
|
+
)
|
207
|
+
target_project = create_fine_tuning_details.project_id or PROJECT_OCID
|
208
|
+
|
209
|
+
if not ObjectStorageDetails.is_oci_path(create_fine_tuning_details.report_path):
|
210
|
+
raise AquaValueError(
|
211
|
+
"Fine tuning report path must be an object storage path."
|
212
|
+
)
|
213
|
+
|
214
|
+
if (
|
215
|
+
create_fine_tuning_details.validation_set_size < 0
|
216
|
+
or create_fine_tuning_details.validation_set_size >= 1
|
217
|
+
):
|
218
|
+
raise AquaValueError(
|
219
|
+
f"Fine tuning validation set size should be a float number in between [0, 1)."
|
220
|
+
)
|
221
|
+
|
222
|
+
if create_fine_tuning_details.replica < DEFAULT_FT_REPLICA:
|
223
|
+
raise AquaValueError(
|
224
|
+
f"Fine tuning replica must be equal to or larger than {DEFAULT_FT_REPLICA}."
|
225
|
+
)
|
226
|
+
|
227
|
+
subnet_id = create_fine_tuning_details.subnet_id or AQUA_JOB_SUBNET_ID
|
228
|
+
if not subnet_id and create_fine_tuning_details.replica > DEFAULT_FT_REPLICA:
|
229
|
+
raise AquaValueError(
|
230
|
+
f"Custom egress must be provided if replica is larger than {DEFAULT_FT_REPLICA}. "
|
231
|
+
"Specify the subnet id via API or environment variable AQUA_JOB_SUBNET_ID."
|
232
|
+
)
|
233
|
+
|
234
|
+
if create_fine_tuning_details.replica > DEFAULT_FT_REPLICA:
|
235
|
+
if not (
|
236
|
+
create_fine_tuning_details.log_id
|
237
|
+
and create_fine_tuning_details.log_group_id
|
238
|
+
):
|
239
|
+
raise AquaValueError(
|
240
|
+
f"Logging is required for fine tuning if replica is larger than {DEFAULT_FT_REPLICA}."
|
241
|
+
)
|
242
|
+
|
243
|
+
ft_parameters = None
|
244
|
+
try:
|
245
|
+
ft_parameters = AquaFineTuningParams(
|
246
|
+
**create_fine_tuning_details.ft_parameters,
|
247
|
+
)
|
248
|
+
except:
|
249
|
+
raise AquaValueError(
|
250
|
+
"Invalid fine tuning parameters. Fine tuning parameters should "
|
251
|
+
f"be a dictionary with keys: {', '.join(list(asdict(AquaFineTuningParams).keys()))}."
|
252
|
+
)
|
253
|
+
|
254
|
+
experiment_model_version_set_id = create_fine_tuning_details.experiment_id
|
255
|
+
experiment_model_version_set_name = create_fine_tuning_details.experiment_name
|
256
|
+
|
257
|
+
if (
|
258
|
+
not experiment_model_version_set_id
|
259
|
+
and not experiment_model_version_set_name
|
260
|
+
):
|
261
|
+
raise AquaValueError(
|
262
|
+
"Either experiment id or experiment name must be provided for fine tuning."
|
263
|
+
)
|
264
|
+
|
265
|
+
# upload dataset if it's local path
|
266
|
+
ft_dataset_path = create_fine_tuning_details.dataset_path
|
267
|
+
if not ObjectStorageDetails.is_oci_path(ft_dataset_path):
|
268
|
+
# format: oci://<bucket>@<namespace>/<dataset_file_name>
|
269
|
+
dataset_file = os.path.basename(ft_dataset_path)
|
270
|
+
dst_uri = (
|
271
|
+
f"{create_fine_tuning_details.report_path.rstrip('/')}/{dataset_file}"
|
272
|
+
)
|
273
|
+
try:
|
274
|
+
upload_local_to_os(
|
275
|
+
src_uri=ft_dataset_path,
|
276
|
+
dst_uri=dst_uri,
|
277
|
+
auth=default_signer(),
|
278
|
+
force_overwrite=False,
|
279
|
+
)
|
280
|
+
except FileExistsError:
|
281
|
+
raise AquaFileExistsError(
|
282
|
+
f"Dataset {dataset_file} already exists in {create_fine_tuning_details.report_path}. "
|
283
|
+
"Please use a new dataset file name or report path."
|
284
|
+
)
|
285
|
+
logger.debug(
|
286
|
+
f"Uploaded local file {ft_dataset_path} to object storage {dst_uri}."
|
287
|
+
)
|
288
|
+
# tracks the size of dataset uploaded by user to the destination.
|
289
|
+
self.telemetry.record_event_async(
|
290
|
+
category="aqua/finetune/upload",
|
291
|
+
action="size",
|
292
|
+
detail=os.path.getsize(os.path.expanduser(ft_dataset_path)),
|
293
|
+
)
|
294
|
+
ft_dataset_path = dst_uri
|
295
|
+
|
296
|
+
(
|
297
|
+
experiment_model_version_set_id,
|
298
|
+
experiment_model_version_set_name,
|
299
|
+
) = self.create_model_version_set(
|
300
|
+
model_version_set_id=experiment_model_version_set_id,
|
301
|
+
model_version_set_name=experiment_model_version_set_name,
|
302
|
+
description=create_fine_tuning_details.experiment_description,
|
303
|
+
compartment_id=target_compartment,
|
304
|
+
project_id=target_project,
|
305
|
+
)
|
306
|
+
|
307
|
+
ft_model_custom_metadata = ModelCustomMetadata()
|
308
|
+
ft_model_custom_metadata.add(
|
309
|
+
key=FineTuneCustomMetadata.FINE_TUNE_SOURCE.value,
|
310
|
+
value=create_fine_tuning_details.ft_source_id,
|
311
|
+
)
|
312
|
+
ft_model_custom_metadata.add(
|
313
|
+
key=FineTuneCustomMetadata.FINE_TUNE_SOURCE_NAME.value,
|
314
|
+
value=source.display_name,
|
315
|
+
)
|
316
|
+
service_model_artifact_location = source.custom_metadata_list.get(
|
317
|
+
FineTuneCustomMetadata.SERVICE_MODEL_ARTIFACT_LOCATION.value
|
318
|
+
)
|
319
|
+
service_model_deployment_container = source.custom_metadata_list.get(
|
320
|
+
FineTuneCustomMetadata.SERVICE_MODEL_DEPLOYMENT_CONTAINER.value
|
321
|
+
)
|
322
|
+
ft_model_custom_metadata.add(
|
323
|
+
key=service_model_artifact_location.key,
|
324
|
+
value=service_model_artifact_location.value,
|
325
|
+
description=service_model_artifact_location.description,
|
326
|
+
)
|
327
|
+
ft_model_custom_metadata.add(
|
328
|
+
key=service_model_deployment_container.key,
|
329
|
+
value=service_model_deployment_container.value,
|
330
|
+
description=service_model_deployment_container.description,
|
331
|
+
)
|
332
|
+
|
333
|
+
ft_model_taxonomy_metadata = ModelTaxonomyMetadata()
|
334
|
+
ft_model_taxonomy_metadata[MetadataTaxonomyKeys.HYPERPARAMETERS].value = {
|
335
|
+
**create_fine_tuning_details.ft_parameters,
|
336
|
+
"val_set_size": create_fine_tuning_details.validation_set_size,
|
337
|
+
"training_data": ft_dataset_path,
|
338
|
+
}
|
339
|
+
|
340
|
+
ft_model = self.create_model_catalog(
|
341
|
+
display_name=create_fine_tuning_details.ft_name,
|
342
|
+
description=create_fine_tuning_details.ft_description,
|
343
|
+
model_version_set_id=experiment_model_version_set_id,
|
344
|
+
model_custom_metadata=ft_model_custom_metadata,
|
345
|
+
model_taxonomy_metadata=ft_model_taxonomy_metadata,
|
346
|
+
compartment_id=target_compartment,
|
347
|
+
project_id=target_project,
|
348
|
+
model_by_reference=True,
|
349
|
+
)
|
350
|
+
|
351
|
+
ft_job_freeform_tags = {
|
352
|
+
Tags.AQUA_TAG.value: UNKNOWN,
|
353
|
+
Tags.AQUA_FINE_TUNED_MODEL_TAG.value: f"{source.id}#{source.display_name}",
|
354
|
+
}
|
355
|
+
|
356
|
+
ft_job = Job(name=ft_model.display_name).with_infrastructure(
|
357
|
+
DataScienceJob()
|
358
|
+
.with_log_group_id(create_fine_tuning_details.log_group_id)
|
359
|
+
.with_log_id(create_fine_tuning_details.log_id)
|
360
|
+
.with_compartment_id(target_compartment)
|
361
|
+
.with_project_id(target_project)
|
362
|
+
.with_shape_name(create_fine_tuning_details.shape_name)
|
363
|
+
.with_block_storage_size(
|
364
|
+
create_fine_tuning_details.block_storage_size
|
365
|
+
or DEFAULT_FT_BLOCK_STORAGE_SIZE
|
366
|
+
)
|
367
|
+
.with_freeform_tag(**ft_job_freeform_tags)
|
368
|
+
)
|
369
|
+
|
370
|
+
if not subnet_id:
|
371
|
+
# apply default subnet id for job by setting ME_STANDALONE
|
372
|
+
# so as to avoid using the notebook session's networking when running on it
|
373
|
+
# https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/infra_and_runtime.html#networking
|
374
|
+
ft_job.infrastructure.with_job_infrastructure_type(
|
375
|
+
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING
|
376
|
+
)
|
377
|
+
else:
|
378
|
+
ft_job.infrastructure.with_subnet_id(subnet_id)
|
379
|
+
|
380
|
+
ft_config = self.get_finetuning_config(source.id)
|
381
|
+
|
382
|
+
ft_container = source.custom_metadata_list.get(
|
383
|
+
FineTuneCustomMetadata.SERVICE_MODEL_FINE_TUNE_CONTAINER.value
|
384
|
+
).value
|
385
|
+
|
386
|
+
batch_size = (
|
387
|
+
ft_config.get("shape", UNKNOWN_DICT)
|
388
|
+
.get(create_fine_tuning_details.shape_name, UNKNOWN_DICT)
|
389
|
+
.get("batch_size", DEFAULT_FT_BATCH_SIZE)
|
390
|
+
)
|
391
|
+
finetuning_params = ft_config.get("finetuning_params")
|
392
|
+
|
393
|
+
ft_job.with_runtime(
|
394
|
+
self._build_fine_tuning_runtime(
|
395
|
+
source_id=source.id,
|
396
|
+
ft_model_id=ft_model.id,
|
397
|
+
dataset_path=ft_dataset_path,
|
398
|
+
report_path=create_fine_tuning_details.report_path,
|
399
|
+
replica=create_fine_tuning_details.replica,
|
400
|
+
batch_size=batch_size,
|
401
|
+
finetuning_params=finetuning_params,
|
402
|
+
val_set_size=(
|
403
|
+
create_fine_tuning_details.validation_set_size
|
404
|
+
or DEFAULT_FT_VALIDATION_SET_SIZE
|
405
|
+
),
|
406
|
+
parameters=ft_parameters,
|
407
|
+
ft_container=ft_container,
|
408
|
+
)
|
409
|
+
).create()
|
410
|
+
logger.debug(
|
411
|
+
f"Successfully created fine tuning job {ft_job.id} for {create_fine_tuning_details.ft_source_id}."
|
412
|
+
)
|
413
|
+
|
414
|
+
ft_job_run = ft_job.run(
|
415
|
+
name=ft_model.display_name,
|
416
|
+
freeform_tags=ft_job_freeform_tags,
|
417
|
+
wait=False,
|
418
|
+
)
|
419
|
+
logger.debug(
|
420
|
+
f"Successfully created fine tuning job run {ft_job_run.id} for {create_fine_tuning_details.ft_source_id}."
|
421
|
+
)
|
422
|
+
|
423
|
+
ft_model_custom_metadata.add(
|
424
|
+
key=FineTuneCustomMetadata.FINE_TUNE_JOB_ID.value,
|
425
|
+
value=ft_job.id,
|
426
|
+
)
|
427
|
+
ft_model_custom_metadata.add(
|
428
|
+
key=FineTuneCustomMetadata.FINE_TUNE_JOB_RUN_ID.value,
|
429
|
+
value=ft_job_run.id,
|
430
|
+
)
|
431
|
+
updated_custom_metadata_list = [
|
432
|
+
Metadata(**metadata)
|
433
|
+
for metadata in ft_model_custom_metadata.to_dict()["data"]
|
434
|
+
]
|
435
|
+
|
436
|
+
self.update_model(
|
437
|
+
model_id=ft_model.id,
|
438
|
+
update_model_details=UpdateModelDetails(
|
439
|
+
custom_metadata_list=updated_custom_metadata_list,
|
440
|
+
freeform_tags={
|
441
|
+
Tags.AQUA_FINE_TUNED_MODEL_TAG.value: (
|
442
|
+
f"{source.id}#{source.display_name}"
|
443
|
+
),
|
444
|
+
**source.freeform_tags,
|
445
|
+
},
|
446
|
+
),
|
447
|
+
)
|
448
|
+
|
449
|
+
self.update_model_provenance(
|
450
|
+
model_id=ft_model.id,
|
451
|
+
update_model_provenance_details=UpdateModelProvenanceDetails(
|
452
|
+
training_id=ft_job_run.id
|
453
|
+
),
|
454
|
+
)
|
455
|
+
|
456
|
+
# tracks the shape and replica used for fine-tuning the service models
|
457
|
+
telemetry_kwargs = (
|
458
|
+
{"ocid": ft_job.id[-6:]} if ft_job and len(ft_job.id) > 6 else {}
|
459
|
+
)
|
460
|
+
self.telemetry.record_event_async(
|
461
|
+
category=f"aqua/service/{source.display_name}/finetune/create/shape/",
|
462
|
+
action=f"{create_fine_tuning_details.shape_name}x{create_fine_tuning_details.replica}",
|
463
|
+
**telemetry_kwargs,
|
464
|
+
)
|
465
|
+
# tracks unique fine-tuned models that were created in the user compartment
|
466
|
+
self.telemetry.record_event_async(
|
467
|
+
category="aqua/service/finetune",
|
468
|
+
action="create",
|
469
|
+
detail=source.display_name,
|
470
|
+
)
|
471
|
+
|
472
|
+
return AquaFineTuningSummary(
|
473
|
+
id=ft_model.id,
|
474
|
+
name=ft_model.display_name,
|
475
|
+
console_url=get_console_link(
|
476
|
+
resource=Resource.MODEL.value,
|
477
|
+
ocid=ft_model.id,
|
478
|
+
region=self.region,
|
479
|
+
),
|
480
|
+
time_created=str(ft_model.time_created),
|
481
|
+
lifecycle_state=ft_job_run.lifecycle_state or UNKNOWN,
|
482
|
+
lifecycle_details=ft_job_run.lifecycle_details or UNKNOWN,
|
483
|
+
experiment=AquaResourceIdentifier(
|
484
|
+
id=experiment_model_version_set_id,
|
485
|
+
name=experiment_model_version_set_name,
|
486
|
+
url=get_console_link(
|
487
|
+
resource=Resource.MODEL_VERSION_SET.value,
|
488
|
+
ocid=experiment_model_version_set_id,
|
489
|
+
region=self.region,
|
490
|
+
),
|
491
|
+
),
|
492
|
+
source=AquaResourceIdentifier(
|
493
|
+
id=source.id,
|
494
|
+
name=source.display_name,
|
495
|
+
url=get_console_link(
|
496
|
+
resource=Resource.MODEL.value,
|
497
|
+
ocid=source.id,
|
498
|
+
region=self.region,
|
499
|
+
),
|
500
|
+
),
|
501
|
+
job=AquaResourceIdentifier(
|
502
|
+
id=ft_job.id,
|
503
|
+
name=ft_job.name,
|
504
|
+
url=get_console_link(
|
505
|
+
resource=Resource.JOB.value,
|
506
|
+
ocid=ft_job.id,
|
507
|
+
region=self.region,
|
508
|
+
),
|
509
|
+
),
|
510
|
+
tags=dict(
|
511
|
+
aqua_finetuning=Tags.AQUA_FINE_TUNING.value,
|
512
|
+
finetuning_job_id=ft_job.id,
|
513
|
+
finetuning_source=source.id,
|
514
|
+
finetuning_experiment_id=experiment_model_version_set_id,
|
515
|
+
),
|
516
|
+
parameters=ft_parameters,
|
517
|
+
)
|
518
|
+
|
519
|
+
def _build_fine_tuning_runtime(
|
520
|
+
self,
|
521
|
+
source_id: str,
|
522
|
+
ft_model_id: str,
|
523
|
+
dataset_path: str,
|
524
|
+
report_path: str,
|
525
|
+
replica: int,
|
526
|
+
batch_size: int,
|
527
|
+
val_set_size: float,
|
528
|
+
parameters: AquaFineTuningParams,
|
529
|
+
ft_container: str = None,
|
530
|
+
finetuning_params: str = None,
|
531
|
+
) -> Runtime:
|
532
|
+
"""Builds fine tuning runtime for Job."""
|
533
|
+
container = get_container_image(
|
534
|
+
container_type=ft_container,
|
535
|
+
)
|
536
|
+
runtime = (
|
537
|
+
ContainerRuntime()
|
538
|
+
.with_environment_variable(
|
539
|
+
**{
|
540
|
+
"AIP_SMC_FT_ARGUMENTS": json.dumps(
|
541
|
+
{
|
542
|
+
"baseModel": {"type": "modelCatalog", "modelId": source_id},
|
543
|
+
"outputModel": {
|
544
|
+
"type": "modelCatalog",
|
545
|
+
"modelId": ft_model_id,
|
546
|
+
},
|
547
|
+
}
|
548
|
+
),
|
549
|
+
"OCI__LAUNCH_CMD": (
|
550
|
+
f"--micro_batch_size {batch_size} --num_epochs {parameters.epochs} --learning_rate {parameters.learning_rate} --training_data {dataset_path} --output_dir {report_path} --val_set_size {val_set_size} "
|
551
|
+
+ (f"{finetuning_params}" if finetuning_params else "")
|
552
|
+
),
|
553
|
+
"CONDA_BUCKET_NS": CONDA_BUCKET_NS,
|
554
|
+
}
|
555
|
+
)
|
556
|
+
.with_image(image=container)
|
557
|
+
.with_replica(replica)
|
558
|
+
)
|
559
|
+
|
560
|
+
return runtime
|
561
|
+
|
562
|
+
@telemetry(
|
563
|
+
entry_point="plugin=finetuning&action=get_finetuning_config", name="aqua"
|
564
|
+
)
|
565
|
+
def get_finetuning_config(self, model_id: str) -> Dict:
|
566
|
+
"""Gets the finetuning config for given Aqua model.
|
567
|
+
|
568
|
+
Parameters
|
569
|
+
----------
|
570
|
+
model_id: str
|
571
|
+
The OCID of the Aqua model.
|
572
|
+
|
573
|
+
Returns
|
574
|
+
-------
|
575
|
+
Dict:
|
576
|
+
A dict of allowed finetuning configs.
|
577
|
+
"""
|
578
|
+
|
579
|
+
return self.get_config(model_id, AQUA_MODEL_FINETUNING_CONFIG)
|
ads/aqua/job.py
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from dataclasses import dataclass, field
|
10
|
+
from ads.common.serializer import DataClassSerializable
|
11
|
+
from ads.aqua.data import AquaResourceIdentifier
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass(repr=False)
|
17
|
+
class AquaJobSummary(DataClassSerializable):
|
18
|
+
"""Represents an Aqua job summary."""
|
19
|
+
|
20
|
+
id: str
|
21
|
+
name: str
|
22
|
+
console_url: str
|
23
|
+
lifecycle_state: str
|
24
|
+
lifecycle_details: str
|
25
|
+
time_created: str
|
26
|
+
tags: dict
|
27
|
+
experiment: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
28
|
+
source: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|
29
|
+
job: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
|