oracle-ads 2.13.4__py3-none-any.whl → 2.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +6 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/entities.py +224 -2
- ads/aqua/common/enums.py +3 -0
- ads/aqua/common/utils.py +105 -3
- ads/aqua/config/container_config.py +9 -0
- ads/aqua/constants.py +29 -1
- ads/aqua/evaluation/entities.py +6 -1
- ads/aqua/evaluation/evaluation.py +191 -7
- ads/aqua/extension/aqua_ws_msg_handler.py +6 -36
- ads/aqua/extension/base_handler.py +13 -71
- ads/aqua/extension/deployment_handler.py +67 -76
- ads/aqua/extension/errors.py +19 -0
- ads/aqua/extension/utils.py +114 -2
- ads/aqua/finetuning/finetuning.py +50 -1
- ads/aqua/model/constants.py +3 -0
- ads/aqua/model/enums.py +5 -0
- ads/aqua/model/model.py +236 -24
- ads/aqua/modeldeployment/deployment.py +671 -152
- ads/aqua/modeldeployment/entities.py +551 -42
- ads/aqua/modeldeployment/inference.py +4 -5
- ads/aqua/modeldeployment/utils.py +525 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/RECORD +28 -25
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.4.dist-info → oracle_ads-2.13.5.dist-info}/licenses/LICENSE.txt +0 -0
@@ -2,14 +2,27 @@
|
|
2
2
|
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
|
3
3
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
4
4
|
|
5
|
+
import json
|
5
6
|
import shlex
|
7
|
+
from datetime import datetime, timedelta
|
6
8
|
from typing import Dict, List, Optional, Union
|
7
9
|
|
10
|
+
from cachetools import TTLCache, cached
|
11
|
+
from oci.data_science.models import ModelDeploymentShapeSummary
|
12
|
+
from pydantic import ValidationError
|
13
|
+
|
8
14
|
from ads.aqua.app import AquaApp, logger
|
9
|
-
from ads.aqua.common.entities import
|
15
|
+
from ads.aqua.common.entities import (
|
16
|
+
AquaMultiModelRef,
|
17
|
+
ComputeShapeSummary,
|
18
|
+
ContainerPath,
|
19
|
+
ContainerSpec,
|
20
|
+
)
|
10
21
|
from ads.aqua.common.enums import InferenceContainerTypeFamily, ModelFormat, Tags
|
11
22
|
from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
|
12
23
|
from ads.aqua.common.utils import (
|
24
|
+
build_params_string,
|
25
|
+
build_pydantic_error_message,
|
13
26
|
get_combined_params,
|
14
27
|
get_container_config,
|
15
28
|
get_container_image,
|
@@ -20,19 +33,34 @@ from ads.aqua.common.utils import (
|
|
20
33
|
get_params_list,
|
21
34
|
get_resource_name,
|
22
35
|
get_restricted_params_by_container,
|
36
|
+
load_gpu_shapes_index,
|
23
37
|
validate_cmd_var,
|
24
38
|
)
|
39
|
+
from ads.aqua.config.container_config import AquaContainerConfig, Usage
|
25
40
|
from ads.aqua.constants import (
|
26
41
|
AQUA_MODEL_ARTIFACT_FILE,
|
27
42
|
AQUA_MODEL_TYPE_CUSTOM,
|
43
|
+
AQUA_MODEL_TYPE_MULTI,
|
28
44
|
AQUA_MODEL_TYPE_SERVICE,
|
45
|
+
AQUA_MULTI_MODEL_CONFIG,
|
29
46
|
MODEL_BY_REFERENCE_OSS_PATH_KEY,
|
47
|
+
MODEL_NAME_DELIMITER,
|
30
48
|
UNKNOWN_DICT,
|
31
49
|
)
|
32
50
|
from ads.aqua.data import AquaResourceIdentifier
|
33
51
|
from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata
|
34
52
|
from ads.aqua.model import AquaModelApp
|
35
|
-
from ads.aqua.
|
53
|
+
from ads.aqua.model.constants import ModelCustomMetadataFields
|
54
|
+
from ads.aqua.modeldeployment.entities import (
|
55
|
+
AquaDeployment,
|
56
|
+
AquaDeploymentConfig,
|
57
|
+
AquaDeploymentDetail,
|
58
|
+
ConfigurationItem,
|
59
|
+
ConfigValidationError,
|
60
|
+
CreateModelDeploymentDetails,
|
61
|
+
ModelDeploymentConfigSummary,
|
62
|
+
)
|
63
|
+
from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
|
36
64
|
from ads.common.object_storage_details import ObjectStorageDetails
|
37
65
|
from ads.common.utils import UNKNOWN, get_log_links
|
38
66
|
from ads.config import (
|
@@ -41,6 +69,7 @@ from ads.config import (
|
|
41
69
|
AQUA_DEPLOYMENT_CONTAINER_URI_METADATA_NAME,
|
42
70
|
AQUA_MODEL_DEPLOYMENT_CONFIG,
|
43
71
|
COMPARTMENT_OCID,
|
72
|
+
PROJECT_OCID,
|
44
73
|
)
|
45
74
|
from ads.model.datascience_model import DataScienceModel
|
46
75
|
from ads.model.deployment import (
|
@@ -49,6 +78,7 @@ from ads.model.deployment import (
|
|
49
78
|
ModelDeploymentInfrastructure,
|
50
79
|
ModelDeploymentMode,
|
51
80
|
)
|
81
|
+
from ads.model.model_metadata import ModelCustomMetadataItem
|
52
82
|
from ads.telemetry import telemetry
|
53
83
|
|
54
84
|
|
@@ -66,8 +96,13 @@ class AquaDeploymentApp(AquaApp):
|
|
66
96
|
Retrieves details of an Aqua model deployment by its unique identifier.
|
67
97
|
list(**kwargs) -> List[AquaModelSummary]:
|
68
98
|
Lists all Aqua deployments within a specified compartment and/or project.
|
69
|
-
get_deployment_config(self, model_id: str) ->
|
99
|
+
get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
|
70
100
|
Gets the deployment config of given Aqua model.
|
101
|
+
get_multimodel_deployment_config(self, model_ids: List[str],...) -> ModelDeploymentConfigSummary:
|
102
|
+
Retrieves the deployment configuration for multiple Aqua models and calculates
|
103
|
+
the GPU allocations for all compatible shapes.
|
104
|
+
list_shapes(self, **kwargs) -> List[Dict]:
|
105
|
+
Lists the valid model deployment shapes.
|
71
106
|
|
72
107
|
Note:
|
73
108
|
Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
|
@@ -79,103 +114,228 @@ class AquaDeploymentApp(AquaApp):
|
|
79
114
|
@telemetry(entry_point="plugin=deployment&action=create", name="aqua")
|
80
115
|
def create(
|
81
116
|
self,
|
82
|
-
|
83
|
-
|
84
|
-
display_name: str,
|
85
|
-
instance_count: int = None,
|
86
|
-
log_group_id: str = None,
|
87
|
-
access_log_id: str = None,
|
88
|
-
predict_log_id: str = None,
|
89
|
-
compartment_id: str = None,
|
90
|
-
project_id: str = None,
|
91
|
-
description: str = None,
|
92
|
-
bandwidth_mbps: int = None,
|
93
|
-
web_concurrency: int = None,
|
94
|
-
server_port: int = None,
|
95
|
-
health_check_port: int = None,
|
96
|
-
env_var: Dict = None,
|
97
|
-
container_family: str = None,
|
98
|
-
memory_in_gbs: Optional[float] = None,
|
99
|
-
ocpus: Optional[float] = None,
|
100
|
-
model_file: Optional[str] = None,
|
101
|
-
private_endpoint_id: Optional[str] = None,
|
102
|
-
container_image_uri: Optional[None] = None,
|
103
|
-
cmd_var: List[str] = None,
|
104
|
-
freeform_tags: Optional[dict] = None,
|
105
|
-
defined_tags: Optional[dict] = None,
|
117
|
+
create_deployment_details: Optional[CreateModelDeploymentDetails] = None,
|
118
|
+
**kwargs,
|
106
119
|
) -> "AquaDeployment":
|
107
120
|
"""
|
108
|
-
Creates a new Aqua deployment
|
121
|
+
Creates a new Aqua model deployment.\n
|
122
|
+
For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#create-model-deployment
|
123
|
+
|
124
|
+
Args:
|
125
|
+
create_deployment_details : CreateModelDeploymentDetails, optional
|
126
|
+
An instance of CreateModelDeploymentDetails containing all required and optional
|
127
|
+
fields for creating a model deployment via Aqua.
|
128
|
+
kwargs:
|
129
|
+
instance_shape (str): The instance shape used for deployment.
|
130
|
+
display_name (str): The name of the model deployment.
|
131
|
+
compartment_id (Optional[str]): The compartment OCID.
|
132
|
+
project_id (Optional[str]): The project OCID.
|
133
|
+
description (Optional[str]): The description of the deployment.
|
134
|
+
model_id (Optional[str]): The model OCID to deploy.
|
135
|
+
models (Optional[List[AquaMultiModelRef]]): List of models for multimodel deployment.
|
136
|
+
instance_count (int): Number of instances used for deployment.
|
137
|
+
log_group_id (Optional[str]): OCI logging group ID for logs.
|
138
|
+
access_log_id (Optional[str]): OCID for access logs.
|
139
|
+
predict_log_id (Optional[str]): OCID for prediction logs.
|
140
|
+
bandwidth_mbps (Optional[int]): Bandwidth limit on the load balancer in Mbps.
|
141
|
+
web_concurrency (Optional[int]): Number of worker processes/threads for handling requests.
|
142
|
+
server_port (Optional[int]): Server port for the Docker container image.
|
143
|
+
health_check_port (Optional[int]): Health check port for the Docker container image.
|
144
|
+
env_var (Optional[Dict[str, str]]): Environment variables for deployment.
|
145
|
+
container_family (Optional[str]): Image family of the model deployment container runtime.
|
146
|
+
memory_in_gbs (Optional[float]): Memory (in GB) for the selected shape.
|
147
|
+
ocpus (Optional[float]): OCPU count for the selected shape.
|
148
|
+
model_file (Optional[str]): File used for model deployment.
|
149
|
+
private_endpoint_id (Optional[str]): Private endpoint ID for model deployment.
|
150
|
+
container_image_uri (Optional[str]): Image URI for model deployment container runtime.
|
151
|
+
cmd_var (Optional[List[str]]): Command variables for the container runtime.
|
152
|
+
freeform_tags (Optional[Dict]): Freeform tags for model deployment.
|
153
|
+
defined_tags (Optional[Dict]): Defined tags for model deployment.
|
154
|
+
|
155
|
+
Returns
|
156
|
+
-------
|
157
|
+
AquaDeployment
|
158
|
+
An Aqua deployment instance.
|
159
|
+
"""
|
160
|
+
# Build deployment details from kwargs if not explicitly provided.
|
161
|
+
if create_deployment_details is None:
|
162
|
+
try:
|
163
|
+
create_deployment_details = CreateModelDeploymentDetails(**kwargs)
|
164
|
+
except ValidationError as ex:
|
165
|
+
custom_errors = build_pydantic_error_message(ex)
|
166
|
+
raise AquaValueError(
|
167
|
+
f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
|
168
|
+
) from ex
|
169
|
+
|
170
|
+
if not (create_deployment_details.model_id or create_deployment_details.models):
|
171
|
+
raise AquaValueError(
|
172
|
+
"Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
|
173
|
+
)
|
174
|
+
|
175
|
+
# Set defaults for compartment and project if not provided.
|
176
|
+
compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
|
177
|
+
project_id = create_deployment_details.project_id or PROJECT_OCID
|
178
|
+
freeform_tags = create_deployment_details.freeform_tags
|
179
|
+
defined_tags = create_deployment_details.defined_tags
|
180
|
+
|
181
|
+
# validate instance shape availability in compartment
|
182
|
+
available_shapes = [
|
183
|
+
shape.name.lower()
|
184
|
+
for shape in self.list_shapes(
|
185
|
+
compartment_id=create_deployment_details.compartment_id
|
186
|
+
)
|
187
|
+
]
|
188
|
+
|
189
|
+
if create_deployment_details.instance_shape.lower() not in available_shapes:
|
190
|
+
raise AquaValueError(
|
191
|
+
f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
|
192
|
+
f"is not available in the {self.region} region. Please choose another shape to deploy the model."
|
193
|
+
)
|
194
|
+
|
195
|
+
# Get container config
|
196
|
+
container_config = get_container_config()
|
197
|
+
|
198
|
+
# Create an AquaModelApp instance once to perform the deployment creation.
|
199
|
+
model_app = AquaModelApp()
|
200
|
+
if create_deployment_details.model_id:
|
201
|
+
logger.debug(
|
202
|
+
f"Single model ({create_deployment_details.model_id}) provided. "
|
203
|
+
"Delegating to single model creation method."
|
204
|
+
)
|
205
|
+
aqua_model = model_app.create(
|
206
|
+
model_id=create_deployment_details.model_id,
|
207
|
+
compartment_id=compartment_id,
|
208
|
+
project_id=project_id,
|
209
|
+
freeform_tags=freeform_tags,
|
210
|
+
defined_tags=defined_tags,
|
211
|
+
)
|
212
|
+
return self._create(
|
213
|
+
aqua_model=aqua_model,
|
214
|
+
create_deployment_details=create_deployment_details,
|
215
|
+
container_config=container_config,
|
216
|
+
)
|
217
|
+
else:
|
218
|
+
model_ids = [model.model_id for model in create_deployment_details.models]
|
219
|
+
try:
|
220
|
+
model_config_summary = self.get_multimodel_deployment_config(
|
221
|
+
model_ids=model_ids, compartment_id=compartment_id
|
222
|
+
)
|
223
|
+
if not model_config_summary.gpu_allocation:
|
224
|
+
raise AquaValueError(model_config_summary.error_message)
|
225
|
+
create_deployment_details.validate_multimodel_deployment_feasibility(
|
226
|
+
models_config_summary=model_config_summary
|
227
|
+
)
|
228
|
+
except ConfigValidationError as err:
|
229
|
+
raise AquaValueError(f"{err}") from err
|
230
|
+
|
231
|
+
service_inference_containers = (
|
232
|
+
AquaContainerConfig.from_container_index_json(
|
233
|
+
config=container_config
|
234
|
+
).inference.values()
|
235
|
+
)
|
236
|
+
|
237
|
+
supported_container_families = [
|
238
|
+
container_config_item.family
|
239
|
+
for container_config_item in service_inference_containers
|
240
|
+
if any(
|
241
|
+
usage in container_config_item.usages
|
242
|
+
for usage in [Usage.MULTI_MODEL, Usage.OTHER]
|
243
|
+
)
|
244
|
+
]
|
245
|
+
|
246
|
+
if not supported_container_families:
|
247
|
+
raise AquaValueError(
|
248
|
+
"Currently, there are no containers that support multi-model deployment."
|
249
|
+
)
|
250
|
+
|
251
|
+
# Check if provided container family supports multi-model deployment
|
252
|
+
if (
|
253
|
+
create_deployment_details.container_family
|
254
|
+
and create_deployment_details.container_family
|
255
|
+
not in supported_container_families
|
256
|
+
):
|
257
|
+
raise AquaValueError(
|
258
|
+
f"Unsupported deployment container '{create_deployment_details.container_family}'. "
|
259
|
+
f"Only {supported_container_families} families are supported for multi-model deployments."
|
260
|
+
)
|
261
|
+
|
262
|
+
# Verify if it matches one of the registered containers and attempt to
|
263
|
+
# extract the container family from there.
|
264
|
+
# If the container is not recognized, we can only issue a warning that
|
265
|
+
# the provided container may not support multi-model deployment.
|
266
|
+
if create_deployment_details.container_image_uri:
|
267
|
+
selected_container_name = ContainerPath(
|
268
|
+
full_path=create_deployment_details.container_image_uri
|
269
|
+
).name
|
270
|
+
|
271
|
+
container_config_item = next(
|
272
|
+
(
|
273
|
+
container_config_item
|
274
|
+
for container_config_item in service_inference_containers
|
275
|
+
if ContainerPath(
|
276
|
+
full_path=f"{container_config_item.name}:{container_config_item.version}"
|
277
|
+
).name.upper()
|
278
|
+
== selected_container_name.upper()
|
279
|
+
),
|
280
|
+
None,
|
281
|
+
)
|
282
|
+
|
283
|
+
if (
|
284
|
+
container_config_item
|
285
|
+
and container_config_item.family not in supported_container_families
|
286
|
+
):
|
287
|
+
raise AquaValueError(
|
288
|
+
f"Unsupported deployment container '{create_deployment_details.container_image_uri}'. "
|
289
|
+
f"Only {supported_container_families} families are supported for multi-model deployments."
|
290
|
+
)
|
291
|
+
|
292
|
+
if not container_config_item:
|
293
|
+
logger.warning(
|
294
|
+
f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. "
|
295
|
+
f"Only the following container families are supported: {supported_container_families}."
|
296
|
+
)
|
297
|
+
|
298
|
+
logger.debug(
|
299
|
+
f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
|
300
|
+
)
|
301
|
+
|
302
|
+
aqua_model = model_app.create_multi(
|
303
|
+
models=create_deployment_details.models,
|
304
|
+
compartment_id=compartment_id,
|
305
|
+
project_id=project_id,
|
306
|
+
freeform_tags=freeform_tags,
|
307
|
+
defined_tags=defined_tags,
|
308
|
+
)
|
309
|
+
return self._create_multi(
|
310
|
+
aqua_model=aqua_model,
|
311
|
+
model_config_summary=model_config_summary,
|
312
|
+
create_deployment_details=create_deployment_details,
|
313
|
+
container_config=container_config,
|
314
|
+
)
|
315
|
+
|
316
|
+
def _create(
|
317
|
+
self,
|
318
|
+
aqua_model: DataScienceModel,
|
319
|
+
create_deployment_details: CreateModelDeploymentDetails,
|
320
|
+
container_config: Dict,
|
321
|
+
) -> AquaDeployment:
|
322
|
+
"""Builds the configurations required by single model deployment and creates the deployment.
|
109
323
|
|
110
324
|
Parameters
|
111
325
|
----------
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
description: str
|
121
|
-
The description of the deployment.
|
122
|
-
instance_count: (int, optional). Defaults to 1.
|
123
|
-
The number of instance used for deployment.
|
124
|
-
instance_shape: (str).
|
125
|
-
The shape of the instance used for deployment.
|
126
|
-
log_group_id: (str)
|
127
|
-
The oci logging group id. The access log and predict log share the same log group.
|
128
|
-
access_log_id: (str).
|
129
|
-
The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
|
130
|
-
predict_log_id: (str).
|
131
|
-
The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
|
132
|
-
bandwidth_mbps: (int). Defaults to 10.
|
133
|
-
The bandwidth limit on the load balancer in Mbps.
|
134
|
-
web_concurrency: str
|
135
|
-
The number of worker processes/threads to handle incoming requests
|
136
|
-
with_bucket_uri(bucket_uri)
|
137
|
-
Sets the bucket uri when uploading large size model.
|
138
|
-
server_port: (int).
|
139
|
-
The server port for docker container image.
|
140
|
-
health_check_port: (int).
|
141
|
-
The health check port for docker container image.
|
142
|
-
env_var : dict, optional
|
143
|
-
Environment variable for the deployment, by default None.
|
144
|
-
container_family: str
|
145
|
-
The image family of model deployment container runtime.
|
146
|
-
memory_in_gbs: float
|
147
|
-
The memory in gbs for the shape selected.
|
148
|
-
ocpus: float
|
149
|
-
The ocpu count for the shape selected.
|
150
|
-
model_file: str
|
151
|
-
The file used for model deployment.
|
152
|
-
private_endpoint_id: str
|
153
|
-
The private endpoint id of model deployment.
|
154
|
-
container_image_uri: str
|
155
|
-
The image of model deployment container runtime, ignored for service managed containers.
|
156
|
-
Required parameter for BYOC based deployments if this parameter was not set during model registration.
|
157
|
-
cmd_var: List[str]
|
158
|
-
The cmd of model deployment container runtime.
|
159
|
-
freeform_tags: dict
|
160
|
-
Freeform tags for the model deployment
|
161
|
-
defined_tags: dict
|
162
|
-
Defined tags for the model deployment
|
326
|
+
aqua_model : DataScienceModel
|
327
|
+
An instance of Aqua data science model.
|
328
|
+
create_deployment_details : CreateModelDeploymentDetails
|
329
|
+
An instance of CreateModelDeploymentDetails containing all required and optional
|
330
|
+
fields for creating a model deployment via Aqua.
|
331
|
+
container_config: Dict
|
332
|
+
Container config dictionary.
|
333
|
+
|
163
334
|
Returns
|
164
335
|
-------
|
165
336
|
AquaDeployment
|
166
|
-
An Aqua deployment instance
|
167
|
-
|
337
|
+
An Aqua deployment instance.
|
168
338
|
"""
|
169
|
-
# TODO validate if the service model has no artifact and if it requires import step before deployment.
|
170
|
-
# Create a model catalog entry in the user compartment
|
171
|
-
aqua_model = AquaModelApp().create(
|
172
|
-
model_id=model_id,
|
173
|
-
compartment_id=compartment_id,
|
174
|
-
project_id=project_id,
|
175
|
-
freeform_tags=freeform_tags,
|
176
|
-
defined_tags=defined_tags,
|
177
|
-
)
|
178
|
-
|
179
339
|
tags = {}
|
180
340
|
for tag in [
|
181
341
|
Tags.AQUA_SERVICE_MODEL_TAG,
|
@@ -189,7 +349,7 @@ class AquaDeploymentApp(AquaApp):
|
|
189
349
|
tags.update({Tags.TASK: aqua_model.freeform_tags.get(Tags.TASK, UNKNOWN)})
|
190
350
|
|
191
351
|
# Set up info to get deployment config
|
192
|
-
config_source_id = model_id
|
352
|
+
config_source_id = create_deployment_details.model_id
|
193
353
|
model_name = aqua_model.display_name
|
194
354
|
|
195
355
|
is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags
|
@@ -209,10 +369,8 @@ class AquaDeploymentApp(AquaApp):
|
|
209
369
|
) from err
|
210
370
|
|
211
371
|
# set up env and cmd var
|
212
|
-
|
213
|
-
|
214
|
-
if not cmd_var:
|
215
|
-
cmd_var = []
|
372
|
+
env_var = create_deployment_details.env_var or {}
|
373
|
+
cmd_var = create_deployment_details.cmd_var or []
|
216
374
|
|
217
375
|
try:
|
218
376
|
model_path_prefix = aqua_model.custom_metadata_list.get(
|
@@ -245,11 +403,13 @@ class AquaDeploymentApp(AquaApp):
|
|
245
403
|
env_var.update({"FT_MODEL": f"{fine_tune_output_path}"})
|
246
404
|
|
247
405
|
container_type_key = self._get_container_type_key(
|
248
|
-
model=aqua_model,
|
406
|
+
model=aqua_model,
|
407
|
+
container_family=create_deployment_details.container_family,
|
249
408
|
)
|
250
409
|
|
251
|
-
container_image_uri =
|
252
|
-
|
410
|
+
container_image_uri = (
|
411
|
+
create_deployment_details.container_image_uri
|
412
|
+
or get_container_image(container_type=container_type_key)
|
253
413
|
)
|
254
414
|
if not container_image_uri:
|
255
415
|
try:
|
@@ -295,6 +455,7 @@ class AquaDeploymentApp(AquaApp):
|
|
295
455
|
and container_type_key.lower()
|
296
456
|
== InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY
|
297
457
|
):
|
458
|
+
model_file = create_deployment_details.model_file
|
298
459
|
if model_file is not None:
|
299
460
|
logger.info(
|
300
461
|
f"Overriding {model_file} as model_file for model {aqua_model.id}."
|
@@ -318,27 +479,24 @@ class AquaDeploymentApp(AquaApp):
|
|
318
479
|
# Fetch the startup cli command for the container
|
319
480
|
# container_index.json will have "containerSpec" section which will provide the cli params for
|
320
481
|
# a given container family
|
321
|
-
container_config = get_container_config()
|
322
482
|
container_spec = container_config.get(ContainerSpec.CONTAINER_SPEC, {}).get(
|
323
483
|
container_type_key, {}
|
324
484
|
)
|
325
485
|
# these params cannot be overridden for Aqua deployments
|
326
486
|
params = container_spec.get(ContainerSpec.CLI_PARM, "")
|
327
|
-
server_port = server_port or container_spec.get(
|
487
|
+
server_port = create_deployment_details.server_port or container_spec.get(
|
328
488
|
ContainerSpec.SERVER_PORT
|
329
|
-
) # Give
|
330
|
-
health_check_port =
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
.
|
339
|
-
|
340
|
-
.get(get_container_params_type(container_type_key), UNKNOWN)
|
341
|
-
)
|
489
|
+
) # Give precedence to the input parameter
|
490
|
+
health_check_port = (
|
491
|
+
create_deployment_details.health_check_port
|
492
|
+
or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
|
493
|
+
) # Give precedence to the input parameter
|
494
|
+
|
495
|
+
deployment_config = self.get_deployment_config(model_id=config_source_id)
|
496
|
+
|
497
|
+
config_params = deployment_config.configuration.get(
|
498
|
+
create_deployment_details.instance_shape, ConfigurationItem()
|
499
|
+
).parameters.get(get_container_params_type(container_type_key), UNKNOWN)
|
342
500
|
|
343
501
|
# validate user provided params
|
344
502
|
user_params = env_var.get("PARAMS", UNKNOWN)
|
@@ -372,36 +530,250 @@ class AquaDeploymentApp(AquaApp):
|
|
372
530
|
|
373
531
|
for env in container_spec.get(ContainerSpec.ENV_VARS, []):
|
374
532
|
if isinstance(env, dict):
|
375
|
-
for key,
|
533
|
+
for key, _ in env.items():
|
376
534
|
if key not in env_var:
|
377
535
|
env_var.update(env)
|
378
536
|
|
379
537
|
logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}")
|
380
538
|
|
539
|
+
tags = {**tags, **(create_deployment_details.freeform_tags or {})}
|
540
|
+
model_type = (
|
541
|
+
AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
|
542
|
+
)
|
543
|
+
|
544
|
+
return self._create_deployment(
|
545
|
+
create_deployment_details=create_deployment_details,
|
546
|
+
aqua_model_id=aqua_model.id,
|
547
|
+
model_name=model_name,
|
548
|
+
model_type=model_type,
|
549
|
+
container_image_uri=container_image_uri,
|
550
|
+
server_port=server_port,
|
551
|
+
health_check_port=health_check_port,
|
552
|
+
env_var=env_var,
|
553
|
+
tags=tags,
|
554
|
+
cmd_var=cmd_var,
|
555
|
+
)
|
556
|
+
|
557
|
+
def _create_multi(
|
558
|
+
self,
|
559
|
+
aqua_model: DataScienceModel,
|
560
|
+
model_config_summary: ModelDeploymentConfigSummary,
|
561
|
+
create_deployment_details: CreateModelDeploymentDetails,
|
562
|
+
container_config: Dict,
|
563
|
+
) -> AquaDeployment:
|
564
|
+
"""Builds the environment variables required by multi deployment container and creates the deployment.
|
565
|
+
|
566
|
+
Parameters
|
567
|
+
----------
|
568
|
+
model_config_summary : model_config_summary
|
569
|
+
Summary Model Deployment configuration for the group of models.
|
570
|
+
aqua_model : DataScienceModel
|
571
|
+
An instance of Aqua data science model.
|
572
|
+
create_deployment_details : CreateModelDeploymentDetails
|
573
|
+
An instance of CreateModelDeploymentDetails containing all required and optional
|
574
|
+
fields for creating a model deployment via Aqua.
|
575
|
+
container_config: Dict
|
576
|
+
Container config dictionary.
|
577
|
+
Returns
|
578
|
+
-------
|
579
|
+
AquaDeployment
|
580
|
+
An Aqua deployment instance.
|
581
|
+
"""
|
582
|
+
model_config = []
|
583
|
+
model_name_list = []
|
584
|
+
env_var = {**(create_deployment_details.env_var or UNKNOWN_DICT)}
|
585
|
+
|
586
|
+
container_type_key = self._get_container_type_key(
|
587
|
+
model=aqua_model,
|
588
|
+
container_family=create_deployment_details.container_family,
|
589
|
+
)
|
590
|
+
container_spec = container_config.get(
|
591
|
+
ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT
|
592
|
+
).get(container_type_key, UNKNOWN_DICT)
|
593
|
+
|
594
|
+
container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()
|
595
|
+
|
596
|
+
for model in create_deployment_details.models:
|
597
|
+
user_params = build_params_string(model.env_var)
|
598
|
+
if user_params:
|
599
|
+
restricted_params = self._find_restricted_params(
|
600
|
+
container_params, user_params, container_type_key
|
601
|
+
)
|
602
|
+
if restricted_params:
|
603
|
+
selected_model = model.model_name or model.model_id
|
604
|
+
raise AquaValueError(
|
605
|
+
f"Parameters {restricted_params} are set by Aqua "
|
606
|
+
f"and cannot be overridden or are invalid."
|
607
|
+
f"Select other parameters for model {selected_model}."
|
608
|
+
)
|
609
|
+
|
610
|
+
# replaces `--served-model-name`` with user's model name
|
611
|
+
container_params_dict = get_params_dict(container_params)
|
612
|
+
container_params_dict.update({"--served-model-name": model.model_name})
|
613
|
+
# replaces `--tensor-parallel-size` with model gpu count
|
614
|
+
container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
|
615
|
+
params = build_params_string(container_params_dict)
|
616
|
+
|
617
|
+
deployment_config = model_config_summary.deployment_config.get(
|
618
|
+
model.model_id, AquaDeploymentConfig()
|
619
|
+
).configuration.get(
|
620
|
+
create_deployment_details.instance_shape, ConfigurationItem()
|
621
|
+
)
|
622
|
+
|
623
|
+
# finds the corresponding deployment parameters based on the gpu count
|
624
|
+
# and combines them with user's parameters. Existing deployment parameters
|
625
|
+
# will be overriden by user's parameters.
|
626
|
+
params_found = False
|
627
|
+
for item in deployment_config.multi_model_deployment:
|
628
|
+
if (
|
629
|
+
model.gpu_count
|
630
|
+
and item.gpu_count
|
631
|
+
and item.gpu_count == model.gpu_count
|
632
|
+
):
|
633
|
+
config_parameters = item.parameters.get(
|
634
|
+
get_container_params_type(container_type_key), UNKNOWN
|
635
|
+
)
|
636
|
+
params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
|
637
|
+
params_found = True
|
638
|
+
break
|
639
|
+
|
640
|
+
if not params_found and deployment_config.parameters:
|
641
|
+
config_parameters = deployment_config.parameters.get(
|
642
|
+
get_container_params_type(container_type_key), UNKNOWN
|
643
|
+
)
|
644
|
+
params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
|
645
|
+
params_found = True
|
646
|
+
|
647
|
+
# if no config parameters found, append user parameters directly.
|
648
|
+
if not params_found:
|
649
|
+
params = f"{params} {user_params}".strip()
|
650
|
+
|
651
|
+
artifact_path_prefix = model.artifact_location.rstrip("/")
|
652
|
+
if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
|
653
|
+
os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
|
654
|
+
artifact_path_prefix = os_path.filepath.rstrip("/")
|
655
|
+
|
656
|
+
model_config.append({"params": params, "model_path": artifact_path_prefix})
|
657
|
+
model_name_list.append(model.model_name)
|
658
|
+
|
659
|
+
env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
|
660
|
+
|
661
|
+
for env in container_spec.get(ContainerSpec.ENV_VARS, []):
|
662
|
+
if isinstance(env, dict):
|
663
|
+
for key, _ in env.items():
|
664
|
+
if key not in env_var:
|
665
|
+
env_var.update(env)
|
666
|
+
|
667
|
+
logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.")
|
668
|
+
|
669
|
+
container_image_uri = (
|
670
|
+
create_deployment_details.container_image_uri
|
671
|
+
or get_container_image(container_type=container_type_key)
|
672
|
+
)
|
673
|
+
server_port = create_deployment_details.server_port or container_spec.get(
|
674
|
+
ContainerSpec.SERVER_PORT
|
675
|
+
)
|
676
|
+
health_check_port = (
|
677
|
+
create_deployment_details.health_check_port
|
678
|
+
or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
|
679
|
+
)
|
680
|
+
tags = {
|
681
|
+
Tags.AQUA_MODEL_ID_TAG: aqua_model.id,
|
682
|
+
Tags.MULTIMODEL_TYPE_TAG: "true",
|
683
|
+
Tags.AQUA_TAG: "active",
|
684
|
+
**(create_deployment_details.freeform_tags or UNKNOWN_DICT),
|
685
|
+
}
|
686
|
+
|
687
|
+
model_name = f"{MODEL_NAME_DELIMITER} ".join(model_name_list)
|
688
|
+
|
689
|
+
aqua_deployment = self._create_deployment(
|
690
|
+
create_deployment_details=create_deployment_details,
|
691
|
+
aqua_model_id=aqua_model.id,
|
692
|
+
model_name=model_name,
|
693
|
+
model_type=AQUA_MODEL_TYPE_MULTI,
|
694
|
+
container_image_uri=container_image_uri,
|
695
|
+
server_port=server_port,
|
696
|
+
health_check_port=health_check_port,
|
697
|
+
env_var=env_var,
|
698
|
+
tags=tags,
|
699
|
+
)
|
700
|
+
aqua_deployment.models = create_deployment_details.models
|
701
|
+
return aqua_deployment
|
702
|
+
|
703
|
+
def _create_deployment(
|
704
|
+
self,
|
705
|
+
create_deployment_details: CreateModelDeploymentDetails,
|
706
|
+
aqua_model_id: str,
|
707
|
+
model_name: str,
|
708
|
+
model_type: str,
|
709
|
+
container_image_uri: str,
|
710
|
+
server_port: str,
|
711
|
+
health_check_port: str,
|
712
|
+
env_var: dict,
|
713
|
+
tags: dict,
|
714
|
+
cmd_var: Optional[dict] = None,
|
715
|
+
):
|
716
|
+
"""Creates data science model deployment.
|
717
|
+
|
718
|
+
Parameters
|
719
|
+
----------
|
720
|
+
create_deployment_details : CreateModelDeploymentDetails
|
721
|
+
An instance of CreateModelDeploymentDetails containing all required and optional
|
722
|
+
fields for creating a model deployment via Aqua.
|
723
|
+
aqua_model_id: str
|
724
|
+
The id of the aqua model to be deployed.
|
725
|
+
model_name: str
|
726
|
+
The name of the aqua model to be deployed. If it's multi model deployment, it is a list of model names.
|
727
|
+
model_type: str
|
728
|
+
The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
|
729
|
+
container_image_uri: str
|
730
|
+
The container image uri to deploy the model.
|
731
|
+
server_port: str
|
732
|
+
The service port of the container image.
|
733
|
+
health_check_port: str
|
734
|
+
The health check port of the container image.
|
735
|
+
env_var: dict
|
736
|
+
The environment variables input for the deployment.
|
737
|
+
tags: dict
|
738
|
+
The tags input for the deployment.
|
739
|
+
cmd_var: dict, optional
|
740
|
+
The cmd arguments input for the deployment.
|
741
|
+
|
742
|
+
Returns
|
743
|
+
-------
|
744
|
+
AquaDeployment
|
745
|
+
An Aqua deployment instance.
|
746
|
+
"""
|
381
747
|
# Start model deployment
|
382
748
|
# configure model deployment infrastructure
|
383
749
|
infrastructure = (
|
384
750
|
ModelDeploymentInfrastructure()
|
385
|
-
.with_project_id(project_id)
|
386
|
-
.with_compartment_id(
|
387
|
-
|
388
|
-
|
389
|
-
.
|
390
|
-
.
|
391
|
-
.
|
751
|
+
.with_project_id(create_deployment_details.project_id or PROJECT_OCID)
|
752
|
+
.with_compartment_id(
|
753
|
+
create_deployment_details.compartment_id or COMPARTMENT_OCID
|
754
|
+
)
|
755
|
+
.with_shape_name(create_deployment_details.instance_shape)
|
756
|
+
.with_bandwidth_mbps(create_deployment_details.bandwidth_mbps)
|
757
|
+
.with_replica(create_deployment_details.instance_count)
|
758
|
+
.with_web_concurrency(create_deployment_details.web_concurrency)
|
759
|
+
.with_private_endpoint_id(create_deployment_details.private_endpoint_id)
|
392
760
|
.with_access_log(
|
393
|
-
log_group_id=log_group_id,
|
394
|
-
log_id=access_log_id,
|
761
|
+
log_group_id=create_deployment_details.log_group_id,
|
762
|
+
log_id=create_deployment_details.access_log_id,
|
395
763
|
)
|
396
764
|
.with_predict_log(
|
397
|
-
log_group_id=log_group_id,
|
398
|
-
log_id=predict_log_id,
|
765
|
+
log_group_id=create_deployment_details.log_group_id,
|
766
|
+
log_id=create_deployment_details.predict_log_id,
|
399
767
|
)
|
400
768
|
)
|
401
|
-
if
|
769
|
+
if (
|
770
|
+
create_deployment_details.memory_in_gbs
|
771
|
+
and create_deployment_details.ocpus
|
772
|
+
and infrastructure.shape_name.endswith("Flex")
|
773
|
+
):
|
402
774
|
infrastructure.with_shape_config_details(
|
403
|
-
ocpus=ocpus,
|
404
|
-
memory_in_gbs=memory_in_gbs,
|
775
|
+
ocpus=create_deployment_details.ocpus,
|
776
|
+
memory_in_gbs=create_deployment_details.memory_in_gbs,
|
405
777
|
)
|
406
778
|
# configure model deployment runtime
|
407
779
|
container_runtime = (
|
@@ -411,7 +783,7 @@ class AquaDeploymentApp(AquaApp):
|
|
411
783
|
.with_health_check_port(health_check_port)
|
412
784
|
.with_env(env_var)
|
413
785
|
.with_deployment_mode(ModelDeploymentMode.HTTPS)
|
414
|
-
.with_model_uri(
|
786
|
+
.with_model_uri(aqua_model_id)
|
415
787
|
.with_region(self.region)
|
416
788
|
.with_overwrite_existing_artifact(True)
|
417
789
|
.with_remove_existing_artifact(True)
|
@@ -419,24 +791,20 @@ class AquaDeploymentApp(AquaApp):
|
|
419
791
|
if cmd_var:
|
420
792
|
container_runtime.with_cmd(cmd_var)
|
421
793
|
|
422
|
-
tags = {**tags, **(freeform_tags or {})}
|
423
794
|
# configure model deployment and deploy model on container runtime
|
424
795
|
deployment = (
|
425
796
|
ModelDeployment()
|
426
|
-
.with_display_name(display_name)
|
427
|
-
.with_description(description)
|
797
|
+
.with_display_name(create_deployment_details.display_name)
|
798
|
+
.with_description(create_deployment_details.description)
|
428
799
|
.with_freeform_tags(**tags)
|
429
|
-
.with_defined_tags(**(defined_tags or {}))
|
800
|
+
.with_defined_tags(**(create_deployment_details.defined_tags or {}))
|
430
801
|
.with_infrastructure(infrastructure)
|
431
802
|
.with_runtime(container_runtime)
|
432
803
|
).deploy(wait_for_completion=False)
|
433
804
|
|
434
|
-
deployment_id = deployment.
|
805
|
+
deployment_id = deployment.id
|
435
806
|
logger.info(
|
436
|
-
f"Aqua model deployment {deployment_id} created for model {
|
437
|
-
)
|
438
|
-
model_type = (
|
439
|
-
AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
|
807
|
+
f"Aqua model deployment {deployment_id} created for model {aqua_model_id}."
|
440
808
|
)
|
441
809
|
|
442
810
|
# we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
|
@@ -453,7 +821,7 @@ class AquaDeploymentApp(AquaApp):
|
|
453
821
|
self.telemetry.record_event_async(
|
454
822
|
category=f"aqua/{model_type}/deployment/create",
|
455
823
|
action="shape",
|
456
|
-
detail=instance_shape,
|
824
|
+
detail=create_deployment_details.instance_shape,
|
457
825
|
value=model_name,
|
458
826
|
)
|
459
827
|
|
@@ -627,10 +995,43 @@ class AquaDeploymentApp(AquaApp):
|
|
627
995
|
source_id=model_deployment.id,
|
628
996
|
)
|
629
997
|
|
998
|
+
aqua_deployment = AquaDeployment.from_oci_model_deployment(
|
999
|
+
model_deployment, self.region
|
1000
|
+
)
|
1001
|
+
|
1002
|
+
if Tags.MULTIMODEL_TYPE_TAG in model_deployment.freeform_tags:
|
1003
|
+
aqua_model_id = model_deployment.freeform_tags.get(
|
1004
|
+
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
|
1005
|
+
)
|
1006
|
+
if not aqua_model_id:
|
1007
|
+
raise AquaRuntimeError(
|
1008
|
+
f"Invalid multi model deployment {model_deployment_id}."
|
1009
|
+
f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
|
1010
|
+
)
|
1011
|
+
aqua_model = DataScienceModel.from_id(aqua_model_id)
|
1012
|
+
custom_metadata_list = aqua_model.custom_metadata_list
|
1013
|
+
multi_model_metadata_value = custom_metadata_list.get(
|
1014
|
+
ModelCustomMetadataFields.MULTIMODEL_METADATA,
|
1015
|
+
ModelCustomMetadataItem(
|
1016
|
+
key=ModelCustomMetadataFields.MULTIMODEL_METADATA
|
1017
|
+
),
|
1018
|
+
).value
|
1019
|
+
if not multi_model_metadata_value:
|
1020
|
+
raise AquaRuntimeError(
|
1021
|
+
f"Invalid multi-model deployment: {model_deployment_id}. "
|
1022
|
+
f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})."
|
1023
|
+
)
|
1024
|
+
multi_model_metadata = json.loads(
|
1025
|
+
aqua_model.dsc_model.get_custom_metadata_artifact(
|
1026
|
+
metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
|
1027
|
+
).decode("utf-8")
|
1028
|
+
)
|
1029
|
+
aqua_deployment.models = [
|
1030
|
+
AquaMultiModelRef(**metadata) for metadata in multi_model_metadata
|
1031
|
+
]
|
1032
|
+
|
630
1033
|
return AquaDeploymentDetail(
|
631
|
-
**vars(
|
632
|
-
AquaDeployment.from_oci_model_deployment(model_deployment, self.region)
|
633
|
-
),
|
1034
|
+
**vars(aqua_deployment),
|
634
1035
|
log_group=AquaResourceIdentifier(
|
635
1036
|
log_group_id, log_group_name, log_group_url
|
636
1037
|
),
|
@@ -640,7 +1041,7 @@ class AquaDeploymentApp(AquaApp):
|
|
640
1041
|
@telemetry(
|
641
1042
|
entry_point="plugin=deployment&action=get_deployment_config", name="aqua"
|
642
1043
|
)
|
643
|
-
def get_deployment_config(self, model_id: str) ->
|
1044
|
+
def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
|
644
1045
|
"""Gets the deployment config of given Aqua model.
|
645
1046
|
|
646
1047
|
Parameters
|
@@ -650,20 +1051,83 @@ class AquaDeploymentApp(AquaApp):
|
|
650
1051
|
|
651
1052
|
Returns
|
652
1053
|
-------
|
653
|
-
|
654
|
-
|
1054
|
+
AquaDeploymentConfig:
|
1055
|
+
An instance of AquaDeploymentConfig.
|
655
1056
|
"""
|
656
1057
|
config = self.get_config(model_id, AQUA_MODEL_DEPLOYMENT_CONFIG).config
|
657
1058
|
if not config:
|
658
1059
|
logger.debug(
|
659
1060
|
f"Deployment config for custom model: {model_id} is not available. Use defaults."
|
660
1061
|
)
|
661
|
-
return config
|
1062
|
+
return AquaDeploymentConfig(**(config or UNKNOWN_DICT))
|
1063
|
+
|
1064
|
+
@telemetry(
|
1065
|
+
entry_point="plugin=deployment&action=get_multimodel_deployment_config",
|
1066
|
+
name="aqua",
|
1067
|
+
)
|
1068
|
+
def get_multimodel_deployment_config(
|
1069
|
+
self,
|
1070
|
+
model_ids: List[str],
|
1071
|
+
primary_model_id: Optional[str] = None,
|
1072
|
+
**kwargs: Dict,
|
1073
|
+
) -> ModelDeploymentConfigSummary:
|
1074
|
+
"""
|
1075
|
+
Retrieves the deployment configuration for multiple models and calculates
|
1076
|
+
GPU allocations across all compatible shapes.
|
1077
|
+
|
1078
|
+
More details:
|
1079
|
+
https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#get_multimodel_deployment_config
|
1080
|
+
|
1081
|
+
CLI example:
|
1082
|
+
ads aqua deployment get_multimodel_deployment_config --model_ids '["ocid1.datasciencemodel.oc1.iad.OCID"]'
|
1083
|
+
|
1084
|
+
If a primary model ID is provided, GPU allocation will prioritize that model
|
1085
|
+
when selecting compatible shapes.
|
1086
|
+
|
1087
|
+
Example:
|
1088
|
+
Assume all three models: A, B, and C, support the same shape: "BM.GPU.H100.8" and each supports the following GPU counts for that shape: 1, 2, 4, 8.
|
1089
|
+
If `no` primary model is specified, valid allocations could be: [2, 4, 2], [2, 2, 4], or [4, 2, 2]
|
1090
|
+
If `B` is set as the primary model, the allocation will be: [2, 4, 2], where B receives the maximum available GPU count
|
1091
|
+
|
1092
|
+
Parameters
|
1093
|
+
----------
|
1094
|
+
model_ids : List[str]
|
1095
|
+
A list of OCIDs for the Aqua models.
|
1096
|
+
primary_model_id : Optional[str]
|
1097
|
+
The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
|
1098
|
+
this model. Otherwise, GPUs will be evenly allocated.
|
1099
|
+
**kwargs: Dict
|
1100
|
+
- compartment_id: str
|
1101
|
+
The compartment OCID to retrieve the model deployment shapes.
|
1102
|
+
|
1103
|
+
Returns
|
1104
|
+
-------
|
1105
|
+
ModelDeploymentConfigSummary
|
1106
|
+
A summary of the model deployment configurations and GPU allocations.
|
1107
|
+
"""
|
1108
|
+
if not model_ids:
|
1109
|
+
raise AquaValueError(
|
1110
|
+
"Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
|
1111
|
+
)
|
1112
|
+
|
1113
|
+
compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
|
1114
|
+
|
1115
|
+
# Get the all model deployment available shapes in a given compartment
|
1116
|
+
available_shapes = self.list_shapes(compartment_id=compartment_id)
|
1117
|
+
|
1118
|
+
return MultiModelDeploymentConfigLoader(
|
1119
|
+
deployment_app=self,
|
1120
|
+
).load(
|
1121
|
+
shapes=available_shapes,
|
1122
|
+
model_ids=model_ids,
|
1123
|
+
primary_model_id=primary_model_id,
|
1124
|
+
)
|
662
1125
|
|
663
1126
|
def get_deployment_default_params(
|
664
1127
|
self,
|
665
1128
|
model_id: str,
|
666
1129
|
instance_shape: str,
|
1130
|
+
gpu_count: int = None,
|
667
1131
|
) -> List[str]:
|
668
1132
|
"""Gets the default params set in the deployment configs for the given model and instance shape.
|
669
1133
|
|
@@ -675,6 +1139,9 @@ class AquaDeploymentApp(AquaApp):
|
|
675
1139
|
instance_shape: (str).
|
676
1140
|
The shape of the instance used for deployment.
|
677
1141
|
|
1142
|
+
gpu_count: (int, optional).
|
1143
|
+
The number of GPUs used by the Aqua model. Defaults to None.
|
1144
|
+
|
678
1145
|
Returns
|
679
1146
|
-------
|
680
1147
|
List[str]:
|
@@ -683,6 +1150,7 @@ class AquaDeploymentApp(AquaApp):
|
|
683
1150
|
|
684
1151
|
"""
|
685
1152
|
default_params = []
|
1153
|
+
config_params = {}
|
686
1154
|
model = DataScienceModel.from_id(model_id)
|
687
1155
|
try:
|
688
1156
|
container_type_key = model.custom_metadata_list.get(
|
@@ -699,12 +1167,26 @@ class AquaDeploymentApp(AquaApp):
|
|
699
1167
|
and container_type_key in InferenceContainerTypeFamily.values()
|
700
1168
|
):
|
701
1169
|
deployment_config = self.get_deployment_config(model_id)
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
.get("parameters", UNKNOWN_DICT)
|
706
|
-
.get(get_container_params_type(container_type_key), UNKNOWN)
|
1170
|
+
|
1171
|
+
instance_shape_config = deployment_config.configuration.get(
|
1172
|
+
instance_shape, ConfigurationItem()
|
707
1173
|
)
|
1174
|
+
|
1175
|
+
if instance_shape_config.multi_model_deployment and gpu_count:
|
1176
|
+
gpu_params = instance_shape_config.multi_model_deployment
|
1177
|
+
|
1178
|
+
for gpu_config in gpu_params:
|
1179
|
+
if gpu_config.gpu_count == gpu_count:
|
1180
|
+
config_params = gpu_config.parameters.get(
|
1181
|
+
get_container_params_type(container_type_key), UNKNOWN
|
1182
|
+
)
|
1183
|
+
break
|
1184
|
+
|
1185
|
+
else:
|
1186
|
+
config_params = instance_shape_config.parameters.get(
|
1187
|
+
get_container_params_type(container_type_key), UNKNOWN
|
1188
|
+
)
|
1189
|
+
|
708
1190
|
if config_params:
|
709
1191
|
params_list = get_params_list(config_params)
|
710
1192
|
restricted_params_set = get_restricted_params_by_container(
|
@@ -799,3 +1281,40 @@ class AquaDeploymentApp(AquaApp):
|
|
799
1281
|
restricted_params.append(key.lstrip("-"))
|
800
1282
|
|
801
1283
|
return restricted_params
|
1284
|
+
|
1285
|
+
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
|
1286
|
+
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
|
1287
|
+
def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
|
1288
|
+
"""Lists the valid model deployment shapes.
|
1289
|
+
|
1290
|
+
Parameters
|
1291
|
+
----------
|
1292
|
+
kwargs
|
1293
|
+
Keyword arguments, such as compartment_id
|
1294
|
+
for `list_call_get_all_results <https://docs.oracle.com/en-us/iaas/tools/python/2.118.1/api/pagination.html#oci.pagination.list_call_get_all_results>`_
|
1295
|
+
|
1296
|
+
Returns
|
1297
|
+
-------
|
1298
|
+
List[ComputeShapeSummary]:
|
1299
|
+
The list of the model deployment shapes.
|
1300
|
+
"""
|
1301
|
+
compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
|
1302
|
+
oci_shapes: list[ModelDeploymentShapeSummary] = self.list_resource(
|
1303
|
+
self.ds_client.list_model_deployment_shapes,
|
1304
|
+
compartment_id=compartment_id,
|
1305
|
+
**kwargs,
|
1306
|
+
)
|
1307
|
+
|
1308
|
+
gpu_specs = load_gpu_shapes_index()
|
1309
|
+
|
1310
|
+
return [
|
1311
|
+
ComputeShapeSummary(
|
1312
|
+
core_count=oci_shape.core_count,
|
1313
|
+
memory_in_gbs=oci_shape.memory_in_gbs,
|
1314
|
+
shape_series=oci_shape.shape_series,
|
1315
|
+
name=oci_shape.name,
|
1316
|
+
gpu_specs=gpu_specs.shapes.get(oci_shape.name)
|
1317
|
+
or gpu_specs.shapes.get(oci_shape.name.upper()),
|
1318
|
+
)
|
1319
|
+
for oci_shape in oci_shapes
|
1320
|
+
]
|