oracle-ads 2.13.4__py3-none-any.whl → 2.13.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,14 +2,27 @@
2
2
  # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
3
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
 
5
+ import json
5
6
  import shlex
7
+ from datetime import datetime, timedelta
6
8
  from typing import Dict, List, Optional, Union
7
9
 
10
+ from cachetools import TTLCache, cached
11
+ from oci.data_science.models import ModelDeploymentShapeSummary
12
+ from pydantic import ValidationError
13
+
8
14
  from ads.aqua.app import AquaApp, logger
9
- from ads.aqua.common.entities import ContainerSpec
15
+ from ads.aqua.common.entities import (
16
+ AquaMultiModelRef,
17
+ ComputeShapeSummary,
18
+ ContainerPath,
19
+ ContainerSpec,
20
+ )
10
21
  from ads.aqua.common.enums import InferenceContainerTypeFamily, ModelFormat, Tags
11
22
  from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
12
23
  from ads.aqua.common.utils import (
24
+ build_params_string,
25
+ build_pydantic_error_message,
13
26
  get_combined_params,
14
27
  get_container_config,
15
28
  get_container_image,
@@ -20,19 +33,34 @@ from ads.aqua.common.utils import (
20
33
  get_params_list,
21
34
  get_resource_name,
22
35
  get_restricted_params_by_container,
36
+ load_gpu_shapes_index,
23
37
  validate_cmd_var,
24
38
  )
39
+ from ads.aqua.config.container_config import AquaContainerConfig, Usage
25
40
  from ads.aqua.constants import (
26
41
  AQUA_MODEL_ARTIFACT_FILE,
27
42
  AQUA_MODEL_TYPE_CUSTOM,
43
+ AQUA_MODEL_TYPE_MULTI,
28
44
  AQUA_MODEL_TYPE_SERVICE,
45
+ AQUA_MULTI_MODEL_CONFIG,
29
46
  MODEL_BY_REFERENCE_OSS_PATH_KEY,
47
+ MODEL_NAME_DELIMITER,
30
48
  UNKNOWN_DICT,
31
49
  )
32
50
  from ads.aqua.data import AquaResourceIdentifier
33
51
  from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata
34
52
  from ads.aqua.model import AquaModelApp
35
- from ads.aqua.modeldeployment.entities import AquaDeployment, AquaDeploymentDetail
53
+ from ads.aqua.model.constants import ModelCustomMetadataFields
54
+ from ads.aqua.modeldeployment.entities import (
55
+ AquaDeployment,
56
+ AquaDeploymentConfig,
57
+ AquaDeploymentDetail,
58
+ ConfigurationItem,
59
+ ConfigValidationError,
60
+ CreateModelDeploymentDetails,
61
+ ModelDeploymentConfigSummary,
62
+ )
63
+ from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
36
64
  from ads.common.object_storage_details import ObjectStorageDetails
37
65
  from ads.common.utils import UNKNOWN, get_log_links
38
66
  from ads.config import (
@@ -41,6 +69,7 @@ from ads.config import (
41
69
  AQUA_DEPLOYMENT_CONTAINER_URI_METADATA_NAME,
42
70
  AQUA_MODEL_DEPLOYMENT_CONFIG,
43
71
  COMPARTMENT_OCID,
72
+ PROJECT_OCID,
44
73
  )
45
74
  from ads.model.datascience_model import DataScienceModel
46
75
  from ads.model.deployment import (
@@ -49,6 +78,7 @@ from ads.model.deployment import (
49
78
  ModelDeploymentInfrastructure,
50
79
  ModelDeploymentMode,
51
80
  )
81
+ from ads.model.model_metadata import ModelCustomMetadataItem
52
82
  from ads.telemetry import telemetry
53
83
 
54
84
 
@@ -66,8 +96,13 @@ class AquaDeploymentApp(AquaApp):
66
96
  Retrieves details of an Aqua model deployment by its unique identifier.
67
97
  list(**kwargs) -> List[AquaModelSummary]:
68
98
  Lists all Aqua deployments within a specified compartment and/or project.
69
- get_deployment_config(self, model_id: str) -> Dict:
99
+ get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
70
100
  Gets the deployment config of given Aqua model.
101
+ get_multimodel_deployment_config(self, model_ids: List[str],...) -> ModelDeploymentConfigSummary:
102
+ Retrieves the deployment configuration for multiple Aqua models and calculates
103
+ the GPU allocations for all compatible shapes.
104
+ list_shapes(self, **kwargs) -> List[Dict]:
105
+ Lists the valid model deployment shapes.
71
106
 
72
107
  Note:
73
108
  Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
@@ -79,103 +114,228 @@ class AquaDeploymentApp(AquaApp):
79
114
  @telemetry(entry_point="plugin=deployment&action=create", name="aqua")
80
115
  def create(
81
116
  self,
82
- model_id: str,
83
- instance_shape: str,
84
- display_name: str,
85
- instance_count: int = None,
86
- log_group_id: str = None,
87
- access_log_id: str = None,
88
- predict_log_id: str = None,
89
- compartment_id: str = None,
90
- project_id: str = None,
91
- description: str = None,
92
- bandwidth_mbps: int = None,
93
- web_concurrency: int = None,
94
- server_port: int = None,
95
- health_check_port: int = None,
96
- env_var: Dict = None,
97
- container_family: str = None,
98
- memory_in_gbs: Optional[float] = None,
99
- ocpus: Optional[float] = None,
100
- model_file: Optional[str] = None,
101
- private_endpoint_id: Optional[str] = None,
102
- container_image_uri: Optional[None] = None,
103
- cmd_var: List[str] = None,
104
- freeform_tags: Optional[dict] = None,
105
- defined_tags: Optional[dict] = None,
117
+ create_deployment_details: Optional[CreateModelDeploymentDetails] = None,
118
+ **kwargs,
106
119
  ) -> "AquaDeployment":
107
120
  """
108
- Creates a new Aqua deployment
121
+ Creates a new Aqua model deployment.\n
122
+ For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#create-model-deployment
123
+
124
+ Args:
125
+ create_deployment_details : CreateModelDeploymentDetails, optional
126
+ An instance of CreateModelDeploymentDetails containing all required and optional
127
+ fields for creating a model deployment via Aqua.
128
+ kwargs:
129
+ instance_shape (str): The instance shape used for deployment.
130
+ display_name (str): The name of the model deployment.
131
+ compartment_id (Optional[str]): The compartment OCID.
132
+ project_id (Optional[str]): The project OCID.
133
+ description (Optional[str]): The description of the deployment.
134
+ model_id (Optional[str]): The model OCID to deploy.
135
+ models (Optional[List[AquaMultiModelRef]]): List of models for multimodel deployment.
136
+ instance_count (int): Number of instances used for deployment.
137
+ log_group_id (Optional[str]): OCI logging group ID for logs.
138
+ access_log_id (Optional[str]): OCID for access logs.
139
+ predict_log_id (Optional[str]): OCID for prediction logs.
140
+ bandwidth_mbps (Optional[int]): Bandwidth limit on the load balancer in Mbps.
141
+ web_concurrency (Optional[int]): Number of worker processes/threads for handling requests.
142
+ server_port (Optional[int]): Server port for the Docker container image.
143
+ health_check_port (Optional[int]): Health check port for the Docker container image.
144
+ env_var (Optional[Dict[str, str]]): Environment variables for deployment.
145
+ container_family (Optional[str]): Image family of the model deployment container runtime.
146
+ memory_in_gbs (Optional[float]): Memory (in GB) for the selected shape.
147
+ ocpus (Optional[float]): OCPU count for the selected shape.
148
+ model_file (Optional[str]): File used for model deployment.
149
+ private_endpoint_id (Optional[str]): Private endpoint ID for model deployment.
150
+ container_image_uri (Optional[str]): Image URI for model deployment container runtime.
151
+ cmd_var (Optional[List[str]]): Command variables for the container runtime.
152
+ freeform_tags (Optional[Dict]): Freeform tags for model deployment.
153
+ defined_tags (Optional[Dict]): Defined tags for model deployment.
154
+
155
+ Returns
156
+ -------
157
+ AquaDeployment
158
+ An Aqua deployment instance.
159
+ """
160
+ # Build deployment details from kwargs if not explicitly provided.
161
+ if create_deployment_details is None:
162
+ try:
163
+ create_deployment_details = CreateModelDeploymentDetails(**kwargs)
164
+ except ValidationError as ex:
165
+ custom_errors = build_pydantic_error_message(ex)
166
+ raise AquaValueError(
167
+ f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
168
+ ) from ex
169
+
170
+ if not (create_deployment_details.model_id or create_deployment_details.models):
171
+ raise AquaValueError(
172
+ "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
173
+ )
174
+
175
+ # Set defaults for compartment and project if not provided.
176
+ compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
177
+ project_id = create_deployment_details.project_id or PROJECT_OCID
178
+ freeform_tags = create_deployment_details.freeform_tags
179
+ defined_tags = create_deployment_details.defined_tags
180
+
181
+ # validate instance shape availability in compartment
182
+ available_shapes = [
183
+ shape.name.lower()
184
+ for shape in self.list_shapes(
185
+ compartment_id=create_deployment_details.compartment_id
186
+ )
187
+ ]
188
+
189
+ if create_deployment_details.instance_shape.lower() not in available_shapes:
190
+ raise AquaValueError(
191
+ f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
192
+ f"is not available in the {self.region} region. Please choose another shape to deploy the model."
193
+ )
194
+
195
+ # Get container config
196
+ container_config = get_container_config()
197
+
198
+ # Create an AquaModelApp instance once to perform the deployment creation.
199
+ model_app = AquaModelApp()
200
+ if create_deployment_details.model_id:
201
+ logger.debug(
202
+ f"Single model ({create_deployment_details.model_id}) provided. "
203
+ "Delegating to single model creation method."
204
+ )
205
+ aqua_model = model_app.create(
206
+ model_id=create_deployment_details.model_id,
207
+ compartment_id=compartment_id,
208
+ project_id=project_id,
209
+ freeform_tags=freeform_tags,
210
+ defined_tags=defined_tags,
211
+ )
212
+ return self._create(
213
+ aqua_model=aqua_model,
214
+ create_deployment_details=create_deployment_details,
215
+ container_config=container_config,
216
+ )
217
+ else:
218
+ model_ids = [model.model_id for model in create_deployment_details.models]
219
+ try:
220
+ model_config_summary = self.get_multimodel_deployment_config(
221
+ model_ids=model_ids, compartment_id=compartment_id
222
+ )
223
+ if not model_config_summary.gpu_allocation:
224
+ raise AquaValueError(model_config_summary.error_message)
225
+ create_deployment_details.validate_multimodel_deployment_feasibility(
226
+ models_config_summary=model_config_summary
227
+ )
228
+ except ConfigValidationError as err:
229
+ raise AquaValueError(f"{err}") from err
230
+
231
+ service_inference_containers = (
232
+ AquaContainerConfig.from_container_index_json(
233
+ config=container_config
234
+ ).inference.values()
235
+ )
236
+
237
+ supported_container_families = [
238
+ container_config_item.family
239
+ for container_config_item in service_inference_containers
240
+ if any(
241
+ usage in container_config_item.usages
242
+ for usage in [Usage.MULTI_MODEL, Usage.OTHER]
243
+ )
244
+ ]
245
+
246
+ if not supported_container_families:
247
+ raise AquaValueError(
248
+ "Currently, there are no containers that support multi-model deployment."
249
+ )
250
+
251
+ # Check if provided container family supports multi-model deployment
252
+ if (
253
+ create_deployment_details.container_family
254
+ and create_deployment_details.container_family
255
+ not in supported_container_families
256
+ ):
257
+ raise AquaValueError(
258
+ f"Unsupported deployment container '{create_deployment_details.container_family}'. "
259
+ f"Only {supported_container_families} families are supported for multi-model deployments."
260
+ )
261
+
262
+ # Verify if it matches one of the registered containers and attempt to
263
+ # extract the container family from there.
264
+ # If the container is not recognized, we can only issue a warning that
265
+ # the provided container may not support multi-model deployment.
266
+ if create_deployment_details.container_image_uri:
267
+ selected_container_name = ContainerPath(
268
+ full_path=create_deployment_details.container_image_uri
269
+ ).name
270
+
271
+ container_config_item = next(
272
+ (
273
+ container_config_item
274
+ for container_config_item in service_inference_containers
275
+ if ContainerPath(
276
+ full_path=f"{container_config_item.name}:{container_config_item.version}"
277
+ ).name.upper()
278
+ == selected_container_name.upper()
279
+ ),
280
+ None,
281
+ )
282
+
283
+ if (
284
+ container_config_item
285
+ and container_config_item.family not in supported_container_families
286
+ ):
287
+ raise AquaValueError(
288
+ f"Unsupported deployment container '{create_deployment_details.container_image_uri}'. "
289
+ f"Only {supported_container_families} families are supported for multi-model deployments."
290
+ )
291
+
292
+ if not container_config_item:
293
+ logger.warning(
294
+ f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. "
295
+ f"Only the following container families are supported: {supported_container_families}."
296
+ )
297
+
298
+ logger.debug(
299
+ f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
300
+ )
301
+
302
+ aqua_model = model_app.create_multi(
303
+ models=create_deployment_details.models,
304
+ compartment_id=compartment_id,
305
+ project_id=project_id,
306
+ freeform_tags=freeform_tags,
307
+ defined_tags=defined_tags,
308
+ )
309
+ return self._create_multi(
310
+ aqua_model=aqua_model,
311
+ model_config_summary=model_config_summary,
312
+ create_deployment_details=create_deployment_details,
313
+ container_config=container_config,
314
+ )
315
+
316
+ def _create(
317
+ self,
318
+ aqua_model: DataScienceModel,
319
+ create_deployment_details: CreateModelDeploymentDetails,
320
+ container_config: Dict,
321
+ ) -> AquaDeployment:
322
+ """Builds the configurations required by single model deployment and creates the deployment.
109
323
 
110
324
  Parameters
111
325
  ----------
112
- model_id: str
113
- The model OCID to deploy.
114
- compartment_id: str
115
- The compartment OCID
116
- project_id: str
117
- Target project to list deployments from.
118
- display_name: str
119
- The name of model deployment.
120
- description: str
121
- The description of the deployment.
122
- instance_count: (int, optional). Defaults to 1.
123
- The number of instance used for deployment.
124
- instance_shape: (str).
125
- The shape of the instance used for deployment.
126
- log_group_id: (str)
127
- The oci logging group id. The access log and predict log share the same log group.
128
- access_log_id: (str).
129
- The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
130
- predict_log_id: (str).
131
- The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
132
- bandwidth_mbps: (int). Defaults to 10.
133
- The bandwidth limit on the load balancer in Mbps.
134
- web_concurrency: str
135
- The number of worker processes/threads to handle incoming requests
136
- with_bucket_uri(bucket_uri)
137
- Sets the bucket uri when uploading large size model.
138
- server_port: (int).
139
- The server port for docker container image.
140
- health_check_port: (int).
141
- The health check port for docker container image.
142
- env_var : dict, optional
143
- Environment variable for the deployment, by default None.
144
- container_family: str
145
- The image family of model deployment container runtime.
146
- memory_in_gbs: float
147
- The memory in gbs for the shape selected.
148
- ocpus: float
149
- The ocpu count for the shape selected.
150
- model_file: str
151
- The file used for model deployment.
152
- private_endpoint_id: str
153
- The private endpoint id of model deployment.
154
- container_image_uri: str
155
- The image of model deployment container runtime, ignored for service managed containers.
156
- Required parameter for BYOC based deployments if this parameter was not set during model registration.
157
- cmd_var: List[str]
158
- The cmd of model deployment container runtime.
159
- freeform_tags: dict
160
- Freeform tags for the model deployment
161
- defined_tags: dict
162
- Defined tags for the model deployment
326
+ aqua_model : DataScienceModel
327
+ An instance of Aqua data science model.
328
+ create_deployment_details : CreateModelDeploymentDetails
329
+ An instance of CreateModelDeploymentDetails containing all required and optional
330
+ fields for creating a model deployment via Aqua.
331
+ container_config: Dict
332
+ Container config dictionary.
333
+
163
334
  Returns
164
335
  -------
165
336
  AquaDeployment
166
- An Aqua deployment instance
167
-
337
+ An Aqua deployment instance.
168
338
  """
169
- # TODO validate if the service model has no artifact and if it requires import step before deployment.
170
- # Create a model catalog entry in the user compartment
171
- aqua_model = AquaModelApp().create(
172
- model_id=model_id,
173
- compartment_id=compartment_id,
174
- project_id=project_id,
175
- freeform_tags=freeform_tags,
176
- defined_tags=defined_tags,
177
- )
178
-
179
339
  tags = {}
180
340
  for tag in [
181
341
  Tags.AQUA_SERVICE_MODEL_TAG,
@@ -189,7 +349,7 @@ class AquaDeploymentApp(AquaApp):
189
349
  tags.update({Tags.TASK: aqua_model.freeform_tags.get(Tags.TASK, UNKNOWN)})
190
350
 
191
351
  # Set up info to get deployment config
192
- config_source_id = model_id
352
+ config_source_id = create_deployment_details.model_id
193
353
  model_name = aqua_model.display_name
194
354
 
195
355
  is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags
@@ -209,10 +369,8 @@ class AquaDeploymentApp(AquaApp):
209
369
  ) from err
210
370
 
211
371
  # set up env and cmd var
212
- if not env_var:
213
- env_var = {}
214
- if not cmd_var:
215
- cmd_var = []
372
+ env_var = create_deployment_details.env_var or {}
373
+ cmd_var = create_deployment_details.cmd_var or []
216
374
 
217
375
  try:
218
376
  model_path_prefix = aqua_model.custom_metadata_list.get(
@@ -245,11 +403,13 @@ class AquaDeploymentApp(AquaApp):
245
403
  env_var.update({"FT_MODEL": f"{fine_tune_output_path}"})
246
404
 
247
405
  container_type_key = self._get_container_type_key(
248
- model=aqua_model, container_family=container_family
406
+ model=aqua_model,
407
+ container_family=create_deployment_details.container_family,
249
408
  )
250
409
 
251
- container_image_uri = container_image_uri or get_container_image(
252
- container_type=container_type_key
410
+ container_image_uri = (
411
+ create_deployment_details.container_image_uri
412
+ or get_container_image(container_type=container_type_key)
253
413
  )
254
414
  if not container_image_uri:
255
415
  try:
@@ -295,6 +455,7 @@ class AquaDeploymentApp(AquaApp):
295
455
  and container_type_key.lower()
296
456
  == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY
297
457
  ):
458
+ model_file = create_deployment_details.model_file
298
459
  if model_file is not None:
299
460
  logger.info(
300
461
  f"Overriding {model_file} as model_file for model {aqua_model.id}."
@@ -318,27 +479,24 @@ class AquaDeploymentApp(AquaApp):
318
479
  # Fetch the startup cli command for the container
319
480
  # container_index.json will have "containerSpec" section which will provide the cli params for
320
481
  # a given container family
321
- container_config = get_container_config()
322
482
  container_spec = container_config.get(ContainerSpec.CONTAINER_SPEC, {}).get(
323
483
  container_type_key, {}
324
484
  )
325
485
  # these params cannot be overridden for Aqua deployments
326
486
  params = container_spec.get(ContainerSpec.CLI_PARM, "")
327
- server_port = server_port or container_spec.get(
487
+ server_port = create_deployment_details.server_port or container_spec.get(
328
488
  ContainerSpec.SERVER_PORT
329
- ) # Give precendece to the input parameter
330
- health_check_port = health_check_port or container_spec.get(
331
- ContainerSpec.HEALTH_CHECK_PORT
332
- ) # Give precendece to the input parameter
333
-
334
- deployment_config = self.get_deployment_config(config_source_id)
335
-
336
- config_params = (
337
- deployment_config.get("configuration", UNKNOWN_DICT)
338
- .get(instance_shape, UNKNOWN_DICT)
339
- .get("parameters", UNKNOWN_DICT)
340
- .get(get_container_params_type(container_type_key), UNKNOWN)
341
- )
489
+ ) # Give precedence to the input parameter
490
+ health_check_port = (
491
+ create_deployment_details.health_check_port
492
+ or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
493
+ ) # Give precedence to the input parameter
494
+
495
+ deployment_config = self.get_deployment_config(model_id=config_source_id)
496
+
497
+ config_params = deployment_config.configuration.get(
498
+ create_deployment_details.instance_shape, ConfigurationItem()
499
+ ).parameters.get(get_container_params_type(container_type_key), UNKNOWN)
342
500
 
343
501
  # validate user provided params
344
502
  user_params = env_var.get("PARAMS", UNKNOWN)
@@ -372,36 +530,250 @@ class AquaDeploymentApp(AquaApp):
372
530
 
373
531
  for env in container_spec.get(ContainerSpec.ENV_VARS, []):
374
532
  if isinstance(env, dict):
375
- for key, _items in env.items():
533
+ for key, _ in env.items():
376
534
  if key not in env_var:
377
535
  env_var.update(env)
378
536
 
379
537
  logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}")
380
538
 
539
+ tags = {**tags, **(create_deployment_details.freeform_tags or {})}
540
+ model_type = (
541
+ AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
542
+ )
543
+
544
+ return self._create_deployment(
545
+ create_deployment_details=create_deployment_details,
546
+ aqua_model_id=aqua_model.id,
547
+ model_name=model_name,
548
+ model_type=model_type,
549
+ container_image_uri=container_image_uri,
550
+ server_port=server_port,
551
+ health_check_port=health_check_port,
552
+ env_var=env_var,
553
+ tags=tags,
554
+ cmd_var=cmd_var,
555
+ )
556
+
557
+ def _create_multi(
558
+ self,
559
+ aqua_model: DataScienceModel,
560
+ model_config_summary: ModelDeploymentConfigSummary,
561
+ create_deployment_details: CreateModelDeploymentDetails,
562
+ container_config: Dict,
563
+ ) -> AquaDeployment:
564
+ """Builds the environment variables required by multi deployment container and creates the deployment.
565
+
566
+ Parameters
567
+ ----------
568
+ model_config_summary : model_config_summary
569
+ Summary Model Deployment configuration for the group of models.
570
+ aqua_model : DataScienceModel
571
+ An instance of Aqua data science model.
572
+ create_deployment_details : CreateModelDeploymentDetails
573
+ An instance of CreateModelDeploymentDetails containing all required and optional
574
+ fields for creating a model deployment via Aqua.
575
+ container_config: Dict
576
+ Container config dictionary.
577
+ Returns
578
+ -------
579
+ AquaDeployment
580
+ An Aqua deployment instance.
581
+ """
582
+ model_config = []
583
+ model_name_list = []
584
+ env_var = {**(create_deployment_details.env_var or UNKNOWN_DICT)}
585
+
586
+ container_type_key = self._get_container_type_key(
587
+ model=aqua_model,
588
+ container_family=create_deployment_details.container_family,
589
+ )
590
+ container_spec = container_config.get(
591
+ ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT
592
+ ).get(container_type_key, UNKNOWN_DICT)
593
+
594
+ container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()
595
+
596
+ for model in create_deployment_details.models:
597
+ user_params = build_params_string(model.env_var)
598
+ if user_params:
599
+ restricted_params = self._find_restricted_params(
600
+ container_params, user_params, container_type_key
601
+ )
602
+ if restricted_params:
603
+ selected_model = model.model_name or model.model_id
604
+ raise AquaValueError(
605
+ f"Parameters {restricted_params} are set by Aqua "
606
+ f"and cannot be overridden or are invalid."
607
+ f"Select other parameters for model {selected_model}."
608
+ )
609
+
610
+ # replaces `--served-model-name`` with user's model name
611
+ container_params_dict = get_params_dict(container_params)
612
+ container_params_dict.update({"--served-model-name": model.model_name})
613
+ # replaces `--tensor-parallel-size` with model gpu count
614
+ container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
615
+ params = build_params_string(container_params_dict)
616
+
617
+ deployment_config = model_config_summary.deployment_config.get(
618
+ model.model_id, AquaDeploymentConfig()
619
+ ).configuration.get(
620
+ create_deployment_details.instance_shape, ConfigurationItem()
621
+ )
622
+
623
+ # finds the corresponding deployment parameters based on the gpu count
624
+ # and combines them with user's parameters. Existing deployment parameters
625
+ # will be overriden by user's parameters.
626
+ params_found = False
627
+ for item in deployment_config.multi_model_deployment:
628
+ if (
629
+ model.gpu_count
630
+ and item.gpu_count
631
+ and item.gpu_count == model.gpu_count
632
+ ):
633
+ config_parameters = item.parameters.get(
634
+ get_container_params_type(container_type_key), UNKNOWN
635
+ )
636
+ params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
637
+ params_found = True
638
+ break
639
+
640
+ if not params_found and deployment_config.parameters:
641
+ config_parameters = deployment_config.parameters.get(
642
+ get_container_params_type(container_type_key), UNKNOWN
643
+ )
644
+ params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
645
+ params_found = True
646
+
647
+ # if no config parameters found, append user parameters directly.
648
+ if not params_found:
649
+ params = f"{params} {user_params}".strip()
650
+
651
+ artifact_path_prefix = model.artifact_location.rstrip("/")
652
+ if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
653
+ os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
654
+ artifact_path_prefix = os_path.filepath.rstrip("/")
655
+
656
+ model_config.append({"params": params, "model_path": artifact_path_prefix})
657
+ model_name_list.append(model.model_name)
658
+
659
+ env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
660
+
661
+ for env in container_spec.get(ContainerSpec.ENV_VARS, []):
662
+ if isinstance(env, dict):
663
+ for key, _ in env.items():
664
+ if key not in env_var:
665
+ env_var.update(env)
666
+
667
+ logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.")
668
+
669
+ container_image_uri = (
670
+ create_deployment_details.container_image_uri
671
+ or get_container_image(container_type=container_type_key)
672
+ )
673
+ server_port = create_deployment_details.server_port or container_spec.get(
674
+ ContainerSpec.SERVER_PORT
675
+ )
676
+ health_check_port = (
677
+ create_deployment_details.health_check_port
678
+ or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
679
+ )
680
+ tags = {
681
+ Tags.AQUA_MODEL_ID_TAG: aqua_model.id,
682
+ Tags.MULTIMODEL_TYPE_TAG: "true",
683
+ Tags.AQUA_TAG: "active",
684
+ **(create_deployment_details.freeform_tags or UNKNOWN_DICT),
685
+ }
686
+
687
+ model_name = f"{MODEL_NAME_DELIMITER} ".join(model_name_list)
688
+
689
+ aqua_deployment = self._create_deployment(
690
+ create_deployment_details=create_deployment_details,
691
+ aqua_model_id=aqua_model.id,
692
+ model_name=model_name,
693
+ model_type=AQUA_MODEL_TYPE_MULTI,
694
+ container_image_uri=container_image_uri,
695
+ server_port=server_port,
696
+ health_check_port=health_check_port,
697
+ env_var=env_var,
698
+ tags=tags,
699
+ )
700
+ aqua_deployment.models = create_deployment_details.models
701
+ return aqua_deployment
702
+
703
+ def _create_deployment(
704
+ self,
705
+ create_deployment_details: CreateModelDeploymentDetails,
706
+ aqua_model_id: str,
707
+ model_name: str,
708
+ model_type: str,
709
+ container_image_uri: str,
710
+ server_port: str,
711
+ health_check_port: str,
712
+ env_var: dict,
713
+ tags: dict,
714
+ cmd_var: Optional[dict] = None,
715
+ ):
716
+ """Creates data science model deployment.
717
+
718
+ Parameters
719
+ ----------
720
+ create_deployment_details : CreateModelDeploymentDetails
721
+ An instance of CreateModelDeploymentDetails containing all required and optional
722
+ fields for creating a model deployment via Aqua.
723
+ aqua_model_id: str
724
+ The id of the aqua model to be deployed.
725
+ model_name: str
726
+ The name of the aqua model to be deployed. If it's multi model deployment, it is a list of model names.
727
+ model_type: str
728
+ The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
729
+ container_image_uri: str
730
+ The container image uri to deploy the model.
731
+ server_port: str
732
+ The service port of the container image.
733
+ health_check_port: str
734
+ The health check port of the container image.
735
+ env_var: dict
736
+ The environment variables input for the deployment.
737
+ tags: dict
738
+ The tags input for the deployment.
739
+ cmd_var: dict, optional
740
+ The cmd arguments input for the deployment.
741
+
742
+ Returns
743
+ -------
744
+ AquaDeployment
745
+ An Aqua deployment instance.
746
+ """
381
747
  # Start model deployment
382
748
  # configure model deployment infrastructure
383
749
  infrastructure = (
384
750
  ModelDeploymentInfrastructure()
385
- .with_project_id(project_id)
386
- .with_compartment_id(compartment_id)
387
- .with_shape_name(instance_shape)
388
- .with_bandwidth_mbps(bandwidth_mbps)
389
- .with_replica(instance_count)
390
- .with_web_concurrency(web_concurrency)
391
- .with_private_endpoint_id(private_endpoint_id)
751
+ .with_project_id(create_deployment_details.project_id or PROJECT_OCID)
752
+ .with_compartment_id(
753
+ create_deployment_details.compartment_id or COMPARTMENT_OCID
754
+ )
755
+ .with_shape_name(create_deployment_details.instance_shape)
756
+ .with_bandwidth_mbps(create_deployment_details.bandwidth_mbps)
757
+ .with_replica(create_deployment_details.instance_count)
758
+ .with_web_concurrency(create_deployment_details.web_concurrency)
759
+ .with_private_endpoint_id(create_deployment_details.private_endpoint_id)
392
760
  .with_access_log(
393
- log_group_id=log_group_id,
394
- log_id=access_log_id,
761
+ log_group_id=create_deployment_details.log_group_id,
762
+ log_id=create_deployment_details.access_log_id,
395
763
  )
396
764
  .with_predict_log(
397
- log_group_id=log_group_id,
398
- log_id=predict_log_id,
765
+ log_group_id=create_deployment_details.log_group_id,
766
+ log_id=create_deployment_details.predict_log_id,
399
767
  )
400
768
  )
401
- if memory_in_gbs and ocpus and infrastructure.shape_name.endswith("Flex"):
769
+ if (
770
+ create_deployment_details.memory_in_gbs
771
+ and create_deployment_details.ocpus
772
+ and infrastructure.shape_name.endswith("Flex")
773
+ ):
402
774
  infrastructure.with_shape_config_details(
403
- ocpus=ocpus,
404
- memory_in_gbs=memory_in_gbs,
775
+ ocpus=create_deployment_details.ocpus,
776
+ memory_in_gbs=create_deployment_details.memory_in_gbs,
405
777
  )
406
778
  # configure model deployment runtime
407
779
  container_runtime = (
@@ -411,7 +783,7 @@ class AquaDeploymentApp(AquaApp):
411
783
  .with_health_check_port(health_check_port)
412
784
  .with_env(env_var)
413
785
  .with_deployment_mode(ModelDeploymentMode.HTTPS)
414
- .with_model_uri(aqua_model.id)
786
+ .with_model_uri(aqua_model_id)
415
787
  .with_region(self.region)
416
788
  .with_overwrite_existing_artifact(True)
417
789
  .with_remove_existing_artifact(True)
@@ -419,24 +791,20 @@ class AquaDeploymentApp(AquaApp):
419
791
  if cmd_var:
420
792
  container_runtime.with_cmd(cmd_var)
421
793
 
422
- tags = {**tags, **(freeform_tags or {})}
423
794
  # configure model deployment and deploy model on container runtime
424
795
  deployment = (
425
796
  ModelDeployment()
426
- .with_display_name(display_name)
427
- .with_description(description)
797
+ .with_display_name(create_deployment_details.display_name)
798
+ .with_description(create_deployment_details.description)
428
799
  .with_freeform_tags(**tags)
429
- .with_defined_tags(**(defined_tags or {}))
800
+ .with_defined_tags(**(create_deployment_details.defined_tags or {}))
430
801
  .with_infrastructure(infrastructure)
431
802
  .with_runtime(container_runtime)
432
803
  ).deploy(wait_for_completion=False)
433
804
 
434
- deployment_id = deployment.dsc_model_deployment.id
805
+ deployment_id = deployment.id
435
806
  logger.info(
436
- f"Aqua model deployment {deployment_id} created for model {aqua_model.id}."
437
- )
438
- model_type = (
439
- AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
807
+ f"Aqua model deployment {deployment_id} created for model {aqua_model_id}."
440
808
  )
441
809
 
442
810
  # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
@@ -453,7 +821,7 @@ class AquaDeploymentApp(AquaApp):
453
821
  self.telemetry.record_event_async(
454
822
  category=f"aqua/{model_type}/deployment/create",
455
823
  action="shape",
456
- detail=instance_shape,
824
+ detail=create_deployment_details.instance_shape,
457
825
  value=model_name,
458
826
  )
459
827
 
@@ -627,10 +995,43 @@ class AquaDeploymentApp(AquaApp):
627
995
  source_id=model_deployment.id,
628
996
  )
629
997
 
998
+ aqua_deployment = AquaDeployment.from_oci_model_deployment(
999
+ model_deployment, self.region
1000
+ )
1001
+
1002
+ if Tags.MULTIMODEL_TYPE_TAG in model_deployment.freeform_tags:
1003
+ aqua_model_id = model_deployment.freeform_tags.get(
1004
+ Tags.AQUA_MODEL_ID_TAG, UNKNOWN
1005
+ )
1006
+ if not aqua_model_id:
1007
+ raise AquaRuntimeError(
1008
+ f"Invalid multi model deployment {model_deployment_id}."
1009
+ f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
1010
+ )
1011
+ aqua_model = DataScienceModel.from_id(aqua_model_id)
1012
+ custom_metadata_list = aqua_model.custom_metadata_list
1013
+ multi_model_metadata_value = custom_metadata_list.get(
1014
+ ModelCustomMetadataFields.MULTIMODEL_METADATA,
1015
+ ModelCustomMetadataItem(
1016
+ key=ModelCustomMetadataFields.MULTIMODEL_METADATA
1017
+ ),
1018
+ ).value
1019
+ if not multi_model_metadata_value:
1020
+ raise AquaRuntimeError(
1021
+ f"Invalid multi-model deployment: {model_deployment_id}. "
1022
+ f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})."
1023
+ )
1024
+ multi_model_metadata = json.loads(
1025
+ aqua_model.dsc_model.get_custom_metadata_artifact(
1026
+ metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
1027
+ ).decode("utf-8")
1028
+ )
1029
+ aqua_deployment.models = [
1030
+ AquaMultiModelRef(**metadata) for metadata in multi_model_metadata
1031
+ ]
1032
+
630
1033
  return AquaDeploymentDetail(
631
- **vars(
632
- AquaDeployment.from_oci_model_deployment(model_deployment, self.region)
633
- ),
1034
+ **vars(aqua_deployment),
634
1035
  log_group=AquaResourceIdentifier(
635
1036
  log_group_id, log_group_name, log_group_url
636
1037
  ),
@@ -640,7 +1041,7 @@ class AquaDeploymentApp(AquaApp):
640
1041
  @telemetry(
641
1042
  entry_point="plugin=deployment&action=get_deployment_config", name="aqua"
642
1043
  )
643
- def get_deployment_config(self, model_id: str) -> Dict:
1044
+ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
644
1045
  """Gets the deployment config of given Aqua model.
645
1046
 
646
1047
  Parameters
@@ -650,20 +1051,83 @@ class AquaDeploymentApp(AquaApp):
650
1051
 
651
1052
  Returns
652
1053
  -------
653
- Dict:
654
- A dict of allowed deployment configs.
1054
+ AquaDeploymentConfig:
1055
+ An instance of AquaDeploymentConfig.
655
1056
  """
656
1057
  config = self.get_config(model_id, AQUA_MODEL_DEPLOYMENT_CONFIG).config
657
1058
  if not config:
658
1059
  logger.debug(
659
1060
  f"Deployment config for custom model: {model_id} is not available. Use defaults."
660
1061
  )
661
- return config
1062
+ return AquaDeploymentConfig(**(config or UNKNOWN_DICT))
1063
+
1064
+ @telemetry(
1065
+ entry_point="plugin=deployment&action=get_multimodel_deployment_config",
1066
+ name="aqua",
1067
+ )
1068
+ def get_multimodel_deployment_config(
1069
+ self,
1070
+ model_ids: List[str],
1071
+ primary_model_id: Optional[str] = None,
1072
+ **kwargs: Dict,
1073
+ ) -> ModelDeploymentConfigSummary:
1074
+ """
1075
+ Retrieves the deployment configuration for multiple models and calculates
1076
+ GPU allocations across all compatible shapes.
1077
+
1078
+ More details:
1079
+ https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#get_multimodel_deployment_config
1080
+
1081
+ CLI example:
1082
+ ads aqua deployment get_multimodel_deployment_config --model_ids '["ocid1.datasciencemodel.oc1.iad.OCID"]'
1083
+
1084
+ If a primary model ID is provided, GPU allocation will prioritize that model
1085
+ when selecting compatible shapes.
1086
+
1087
+ Example:
1088
+ Assume all three models: A, B, and C, support the same shape: "BM.GPU.H100.8" and each supports the following GPU counts for that shape: 1, 2, 4, 8.
1089
+ If `no` primary model is specified, valid allocations could be: [2, 4, 2], [2, 2, 4], or [4, 2, 2]
1090
+ If `B` is set as the primary model, the allocation will be: [2, 4, 2], where B receives the maximum available GPU count
1091
+
1092
+ Parameters
1093
+ ----------
1094
+ model_ids : List[str]
1095
+ A list of OCIDs for the Aqua models.
1096
+ primary_model_id : Optional[str]
1097
+ The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
1098
+ this model. Otherwise, GPUs will be evenly allocated.
1099
+ **kwargs: Dict
1100
+ - compartment_id: str
1101
+ The compartment OCID to retrieve the model deployment shapes.
1102
+
1103
+ Returns
1104
+ -------
1105
+ ModelDeploymentConfigSummary
1106
+ A summary of the model deployment configurations and GPU allocations.
1107
+ """
1108
+ if not model_ids:
1109
+ raise AquaValueError(
1110
+ "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
1111
+ )
1112
+
1113
+ compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
1114
+
1115
+ # Get the all model deployment available shapes in a given compartment
1116
+ available_shapes = self.list_shapes(compartment_id=compartment_id)
1117
+
1118
+ return MultiModelDeploymentConfigLoader(
1119
+ deployment_app=self,
1120
+ ).load(
1121
+ shapes=available_shapes,
1122
+ model_ids=model_ids,
1123
+ primary_model_id=primary_model_id,
1124
+ )
662
1125
 
663
1126
  def get_deployment_default_params(
664
1127
  self,
665
1128
  model_id: str,
666
1129
  instance_shape: str,
1130
+ gpu_count: int = None,
667
1131
  ) -> List[str]:
668
1132
  """Gets the default params set in the deployment configs for the given model and instance shape.
669
1133
 
@@ -675,6 +1139,9 @@ class AquaDeploymentApp(AquaApp):
675
1139
  instance_shape: (str).
676
1140
  The shape of the instance used for deployment.
677
1141
 
1142
+ gpu_count: (int, optional).
1143
+ The number of GPUs used by the Aqua model. Defaults to None.
1144
+
678
1145
  Returns
679
1146
  -------
680
1147
  List[str]:
@@ -683,6 +1150,7 @@ class AquaDeploymentApp(AquaApp):
683
1150
 
684
1151
  """
685
1152
  default_params = []
1153
+ config_params = {}
686
1154
  model = DataScienceModel.from_id(model_id)
687
1155
  try:
688
1156
  container_type_key = model.custom_metadata_list.get(
@@ -699,12 +1167,26 @@ class AquaDeploymentApp(AquaApp):
699
1167
  and container_type_key in InferenceContainerTypeFamily.values()
700
1168
  ):
701
1169
  deployment_config = self.get_deployment_config(model_id)
702
- config_params = (
703
- deployment_config.get("configuration", UNKNOWN_DICT)
704
- .get(instance_shape, UNKNOWN_DICT)
705
- .get("parameters", UNKNOWN_DICT)
706
- .get(get_container_params_type(container_type_key), UNKNOWN)
1170
+
1171
+ instance_shape_config = deployment_config.configuration.get(
1172
+ instance_shape, ConfigurationItem()
707
1173
  )
1174
+
1175
+ if instance_shape_config.multi_model_deployment and gpu_count:
1176
+ gpu_params = instance_shape_config.multi_model_deployment
1177
+
1178
+ for gpu_config in gpu_params:
1179
+ if gpu_config.gpu_count == gpu_count:
1180
+ config_params = gpu_config.parameters.get(
1181
+ get_container_params_type(container_type_key), UNKNOWN
1182
+ )
1183
+ break
1184
+
1185
+ else:
1186
+ config_params = instance_shape_config.parameters.get(
1187
+ get_container_params_type(container_type_key), UNKNOWN
1188
+ )
1189
+
708
1190
  if config_params:
709
1191
  params_list = get_params_list(config_params)
710
1192
  restricted_params_set = get_restricted_params_by_container(
@@ -799,3 +1281,40 @@ class AquaDeploymentApp(AquaApp):
799
1281
  restricted_params.append(key.lstrip("-"))
800
1282
 
801
1283
  return restricted_params
1284
+
1285
+ @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
1286
+ @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
1287
+ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
1288
+ """Lists the valid model deployment shapes.
1289
+
1290
+ Parameters
1291
+ ----------
1292
+ kwargs
1293
+ Keyword arguments, such as compartment_id
1294
+ for `list_call_get_all_results <https://docs.oracle.com/en-us/iaas/tools/python/2.118.1/api/pagination.html#oci.pagination.list_call_get_all_results>`_
1295
+
1296
+ Returns
1297
+ -------
1298
+ List[ComputeShapeSummary]:
1299
+ The list of the model deployment shapes.
1300
+ """
1301
+ compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
1302
+ oci_shapes: list[ModelDeploymentShapeSummary] = self.list_resource(
1303
+ self.ds_client.list_model_deployment_shapes,
1304
+ compartment_id=compartment_id,
1305
+ **kwargs,
1306
+ )
1307
+
1308
+ gpu_specs = load_gpu_shapes_index()
1309
+
1310
+ return [
1311
+ ComputeShapeSummary(
1312
+ core_count=oci_shape.core_count,
1313
+ memory_in_gbs=oci_shape.memory_in_gbs,
1314
+ shape_series=oci_shape.shape_series,
1315
+ name=oci_shape.name,
1316
+ gpu_specs=gpu_specs.shapes.get(oci_shape.name)
1317
+ or gpu_specs.shapes.get(oci_shape.name.upper()),
1318
+ )
1319
+ for oci_shape in oci_shapes
1320
+ ]