oracle-ads 2.13.11__py3-none-any.whl → 2.13.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ads/aqua/app.py +73 -15
  2. ads/aqua/cli.py +17 -0
  3. ads/aqua/client/client.py +38 -21
  4. ads/aqua/client/openai_client.py +20 -10
  5. ads/aqua/common/entities.py +78 -12
  6. ads/aqua/common/utils.py +35 -0
  7. ads/aqua/constants.py +2 -0
  8. ads/aqua/evaluation/evaluation.py +5 -4
  9. ads/aqua/extension/common_handler.py +47 -2
  10. ads/aqua/extension/model_handler.py +51 -9
  11. ads/aqua/model/constants.py +1 -0
  12. ads/aqua/model/enums.py +19 -1
  13. ads/aqua/model/model.py +119 -51
  14. ads/aqua/model/utils.py +1 -2
  15. ads/aqua/modeldeployment/config_loader.py +815 -0
  16. ads/aqua/modeldeployment/constants.py +4 -1
  17. ads/aqua/modeldeployment/deployment.py +178 -129
  18. ads/aqua/modeldeployment/entities.py +150 -178
  19. ads/aqua/modeldeployment/model_group_config.py +233 -0
  20. ads/aqua/modeldeployment/utils.py +0 -539
  21. ads/aqua/verify_policies/__init__.py +8 -0
  22. ads/aqua/verify_policies/constants.py +13 -0
  23. ads/aqua/verify_policies/entities.py +29 -0
  24. ads/aqua/verify_policies/messages.py +101 -0
  25. ads/aqua/verify_policies/utils.py +432 -0
  26. ads/aqua/verify_policies/verify.py +345 -0
  27. ads/aqua/version.json +3 -0
  28. ads/common/oci_logging.py +4 -7
  29. ads/common/work_request.py +39 -38
  30. ads/jobs/builders/infrastructure/dsc_job.py +121 -24
  31. ads/jobs/builders/infrastructure/dsc_job_runtime.py +71 -24
  32. ads/jobs/builders/runtimes/base.py +7 -5
  33. ads/jobs/builders/runtimes/pytorch_runtime.py +6 -8
  34. ads/jobs/templates/driver_pytorch.py +486 -172
  35. ads/jobs/templates/driver_utils.py +27 -11
  36. ads/model/deployment/model_deployment.py +51 -38
  37. ads/model/service/oci_datascience_model_deployment.py +6 -11
  38. ads/telemetry/client.py +4 -4
  39. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/METADATA +2 -1
  40. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/RECORD +43 -34
  41. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/WHEEL +0 -0
  42. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/entry_points.txt +0 -0
  43. {oracle_ads-2.13.11.dist-info → oracle_ads-2.13.13.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python
2
- # Copyright (c) 2024 Oracle and/or its affiliates.
2
+ # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
3
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
 
5
5
  """
@@ -8,3 +8,6 @@ aqua.modeldeployment.constants
8
8
 
9
9
  This module contains constants used in Aqua Model Deployment.
10
10
  """
11
+
12
+ DEFAULT_WAIT_TIME = 12000
13
+ DEFAULT_POLL_INTERVAL = 10
@@ -2,10 +2,13 @@
2
2
  # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
3
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
 
5
+
5
6
  import json
7
+ import re
6
8
  import shlex
9
+ import threading
7
10
  from datetime import datetime, timedelta
8
- from typing import Dict, List, Optional, Union
11
+ from typing import Dict, List, Optional
9
12
 
10
13
  from cachetools import TTLCache, cached
11
14
  from oci.data_science.models import ModelDeploymentShapeSummary
@@ -17,20 +20,15 @@ from ads.aqua.common.entities import (
17
20
  ComputeShapeSummary,
18
21
  ContainerPath,
19
22
  )
20
- from ads.aqua.common.enums import (
21
- InferenceContainerTypeFamily,
22
- ModelFormat,
23
- Tags,
24
- )
23
+ from ads.aqua.common.enums import InferenceContainerTypeFamily, ModelFormat, Tags
25
24
  from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
26
25
  from ads.aqua.common.utils import (
27
26
  DEFINED_METADATA_TO_FILE_MAP,
28
- build_params_string,
29
27
  build_pydantic_error_message,
28
+ find_restricted_params,
30
29
  get_combined_params,
31
30
  get_container_params_type,
32
31
  get_ocid_substring,
33
- get_params_dict,
34
32
  get_params_list,
35
33
  get_resource_name,
36
34
  get_restricted_params_by_container,
@@ -50,27 +48,38 @@ from ads.aqua.constants import (
50
48
  )
51
49
  from ads.aqua.data import AquaResourceIdentifier
52
50
  from ads.aqua.model import AquaModelApp
53
- from ads.aqua.model.constants import AquaModelMetadataKeys, ModelCustomMetadataFields
51
+ from ads.aqua.model.constants import (
52
+ AquaModelMetadataKeys,
53
+ ModelCustomMetadataFields,
54
+ ModelTask,
55
+ )
54
56
  from ads.aqua.model.utils import (
55
57
  extract_base_model_from_ft,
56
58
  extract_fine_tune_artifacts_path,
57
59
  )
60
+ from ads.aqua.modeldeployment.config_loader import (
61
+ AquaDeploymentConfig,
62
+ ConfigurationItem,
63
+ ModelDeploymentConfigSummary,
64
+ MultiModelDeploymentConfigLoader,
65
+ )
66
+ from ads.aqua.modeldeployment.constants import DEFAULT_POLL_INTERVAL, DEFAULT_WAIT_TIME
58
67
  from ads.aqua.modeldeployment.entities import (
59
68
  AquaDeployment,
60
- AquaDeploymentConfig,
61
69
  AquaDeploymentDetail,
62
- ConfigurationItem,
63
70
  ConfigValidationError,
64
71
  CreateModelDeploymentDetails,
65
- ModelDeploymentConfigSummary,
66
72
  )
67
- from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
73
+ from ads.aqua.modeldeployment.model_group_config import ModelGroupConfig
68
74
  from ads.common.object_storage_details import ObjectStorageDetails
69
75
  from ads.common.utils import UNKNOWN, get_log_links
76
+ from ads.common.work_request import DataScienceWorkRequest
70
77
  from ads.config import (
71
78
  AQUA_DEPLOYMENT_CONTAINER_CMD_VAR_METADATA_NAME,
72
79
  AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME,
73
80
  AQUA_DEPLOYMENT_CONTAINER_URI_METADATA_NAME,
81
+ AQUA_TELEMETRY_BUCKET,
82
+ AQUA_TELEMETRY_BUCKET_NS,
74
83
  COMPARTMENT_OCID,
75
84
  PROJECT_OCID,
76
85
  )
@@ -210,20 +219,52 @@ class AquaDeploymentApp(AquaApp):
210
219
  freeform_tags=freeform_tags,
211
220
  defined_tags=defined_tags,
212
221
  )
222
+ task_tag = aqua_model.freeform_tags.get(Tags.TASK, UNKNOWN)
223
+ if (
224
+ task_tag == ModelTask.TIME_SERIES_FORECASTING
225
+ or task_tag == ModelTask.TIME_SERIES_FORECASTING.replace("-", "_")
226
+ ):
227
+ create_deployment_details.env_var.update(
228
+ {Tags.TASK.upper(): ModelTask.TIME_SERIES_FORECASTING}
229
+ )
213
230
  return self._create(
214
231
  aqua_model=aqua_model,
215
232
  create_deployment_details=create_deployment_details,
216
233
  container_config=container_config,
217
234
  )
218
235
  else:
219
- model_ids = [model.model_id for model in create_deployment_details.models]
236
+ # Collect all unique model IDs (including fine-tuned models)
237
+ source_model_ids = list(
238
+ {
239
+ model_id
240
+ for model in create_deployment_details.models
241
+ for model_id in model.all_model_ids()
242
+ }
243
+ )
244
+ logger.debug(
245
+ "Fetching source model metadata for model IDs: %s", source_model_ids
246
+ )
247
+ # Fetch source model metadata
248
+ source_models = self.get_multi_source(source_model_ids) or {}
249
+
250
+ try:
251
+ create_deployment_details.validate_input_models(
252
+ model_details=source_models
253
+ )
254
+ except ConfigValidationError as err:
255
+ raise AquaValueError(f"{err}") from err
256
+
257
+ base_model_ids = [
258
+ model.model_id for model in create_deployment_details.models
259
+ ]
220
260
 
221
261
  try:
222
262
  model_config_summary = self.get_multimodel_deployment_config(
223
- model_ids=model_ids, compartment_id=compartment_id
263
+ model_ids=base_model_ids, compartment_id=compartment_id
224
264
  )
225
265
  if not model_config_summary.gpu_allocation:
226
266
  raise AquaValueError(model_config_summary.error_message)
267
+
227
268
  create_deployment_details.validate_multimodel_deployment_feasibility(
228
269
  models_config_summary=model_config_summary
229
270
  )
@@ -294,7 +335,7 @@ class AquaDeploymentApp(AquaApp):
294
335
  )
295
336
 
296
337
  logger.debug(
297
- f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
338
+ f"Multi models ({source_model_ids}) provided. Delegating to multi model creation method."
298
339
  )
299
340
 
300
341
  aqua_model = model_app.create_multi(
@@ -303,6 +344,7 @@ class AquaDeploymentApp(AquaApp):
303
344
  project_id=project_id,
304
345
  freeform_tags=freeform_tags,
305
346
  defined_tags=defined_tags,
347
+ source_models=source_models,
306
348
  )
307
349
  return self._create_multi(
308
350
  aqua_model=aqua_model,
@@ -486,7 +528,7 @@ class AquaDeploymentApp(AquaApp):
486
528
  f"with deployment without parameter overrides."
487
529
  )
488
530
 
489
- restricted_params = self._find_restricted_params(
531
+ restricted_params = find_restricted_params(
490
532
  params, user_params, container_type_key
491
533
  )
492
534
  if restricted_params:
@@ -508,6 +550,9 @@ class AquaDeploymentApp(AquaApp):
508
550
  if key not in env_var:
509
551
  env_var.update(env)
510
552
 
553
+ env_var.update({"AQUA_TELEMETRY_BUCKET_NS": AQUA_TELEMETRY_BUCKET_NS})
554
+ env_var.update({"AQUA_TELEMETRY_BUCKET": AQUA_TELEMETRY_BUCKET})
555
+
511
556
  logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}")
512
557
 
513
558
  tags = {**tags, **(create_deployment_details.freeform_tags or {})}
@@ -553,7 +598,6 @@ class AquaDeploymentApp(AquaApp):
553
598
  AquaDeployment
554
599
  An Aqua deployment instance.
555
600
  """
556
- model_config = []
557
601
  model_name_list = []
558
602
  env_var = {**(create_deployment_details.env_var or UNKNOWN_DICT)}
559
603
 
@@ -566,80 +610,14 @@ class AquaDeploymentApp(AquaApp):
566
610
 
567
611
  container_params = container_spec.cli_param if container_spec else UNKNOWN
568
612
 
569
- for model in create_deployment_details.models:
570
- user_params = build_params_string(model.env_var)
571
- if user_params:
572
- restricted_params = self._find_restricted_params(
573
- container_params, user_params, container_type_key
574
- )
575
- if restricted_params:
576
- selected_model = model.model_name or model.model_id
577
- raise AquaValueError(
578
- f"Parameters {restricted_params} are set by Aqua "
579
- f"and cannot be overridden or are invalid."
580
- f"Select other parameters for model {selected_model}."
581
- )
582
-
583
- # replaces `--served-model-name`` with user's model name
584
- container_params_dict = get_params_dict(container_params)
585
- container_params_dict.update({"--served-model-name": model.model_name})
586
- # replaces `--tensor-parallel-size` with model gpu count
587
- container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
588
- params = build_params_string(container_params_dict)
589
-
590
- deployment_config = model_config_summary.deployment_config.get(
591
- model.model_id, AquaDeploymentConfig()
592
- ).configuration.get(
593
- create_deployment_details.instance_shape, ConfigurationItem()
594
- )
595
-
596
- # finds the corresponding deployment parameters based on the gpu count
597
- # and combines them with user's parameters. Existing deployment parameters
598
- # will be overriden by user's parameters.
599
- params_found = False
600
- for item in deployment_config.multi_model_deployment:
601
- if (
602
- model.gpu_count
603
- and item.gpu_count
604
- and item.gpu_count == model.gpu_count
605
- ):
606
- config_parameters = item.parameters.get(
607
- get_container_params_type(container_type_key), UNKNOWN
608
- )
609
- params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
610
- params_found = True
611
- break
612
-
613
- if not params_found and deployment_config.parameters:
614
- config_parameters = deployment_config.parameters.get(
615
- get_container_params_type(container_type_key), UNKNOWN
616
- )
617
- params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
618
- params_found = True
619
-
620
- # if no config parameters found, append user parameters directly.
621
- if not params_found:
622
- params = f"{params} {user_params}".strip()
623
-
624
- artifact_path_prefix = model.artifact_location.rstrip("/")
625
- if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
626
- os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
627
- artifact_path_prefix = os_path.filepath.rstrip("/")
628
-
629
- # override by-default completion/ chat endpoint with other endpoint (embedding)
630
- config_data = {"params": params, "model_path": artifact_path_prefix}
631
- if model.model_task:
632
- config_data["model_task"] = model.model_task
633
-
634
- if model.fine_tune_weights_location:
635
- config_data["fine_tune_weights_location"] = (
636
- model.fine_tune_weights_location
637
- )
638
-
639
- model_config.append(config_data)
640
- model_name_list.append(model.model_name)
613
+ multi_model_config = ModelGroupConfig.from_create_model_deployment_details(
614
+ create_deployment_details,
615
+ model_config_summary,
616
+ container_type_key,
617
+ container_params,
618
+ )
641
619
 
642
- env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
620
+ env_var.update({AQUA_MULTI_MODEL_CONFIG: multi_model_config.model_dump_json()})
643
621
 
644
622
  env_vars = container_spec.env_vars if container_spec else []
645
623
  for env in env_vars:
@@ -787,9 +765,23 @@ class AquaDeploymentApp(AquaApp):
787
765
  ).deploy(wait_for_completion=False)
788
766
 
789
767
  deployment_id = deployment.id
768
+
790
769
  logger.info(
791
- f"Aqua model deployment {deployment_id} created for model {aqua_model_id}."
770
+ f"Aqua model deployment {deployment_id} created for model {aqua_model_id}. Work request Id is {deployment.dsc_model_deployment.workflow_req_id}"
792
771
  )
772
+ status_list = []
773
+
774
+ progress_thread = threading.Thread(
775
+ target=self.get_deployment_status,
776
+ args=(
777
+ deployment,
778
+ deployment.dsc_model_deployment.workflow_req_id,
779
+ model_type,
780
+ model_name,
781
+ ),
782
+ daemon=True,
783
+ )
784
+ progress_thread.start()
793
785
 
794
786
  # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
795
787
  telemetry_kwargs = {"ocid": get_ocid_substring(deployment_id, key_len=8)}
@@ -870,12 +862,22 @@ class AquaDeploymentApp(AquaApp):
870
862
  )
871
863
 
872
864
  if oci_aqua:
865
+ # skipping the AQUA model deployments that are created from model group
866
+ # TODO: remove this checker after AQUA deployment is integrated with model group
867
+ aqua_model_id = model_deployment.freeform_tags.get(
868
+ Tags.AQUA_MODEL_ID_TAG, UNKNOWN
869
+ )
870
+ if (
871
+ "datasciencemodelgroup" in aqua_model_id
872
+ or model_deployment.model_deployment_configuration_details.deployment_type
873
+ == "UNKNOWN_ENUM_VALUE"
874
+ ):
875
+ continue
873
876
  results.append(
874
877
  AquaDeployment.from_oci_model_deployment(
875
878
  model_deployment, self.region
876
879
  )
877
880
  )
878
-
879
881
  # log telemetry if MD is in active or failed state
880
882
  deployment_id = model_deployment.id
881
883
  state = model_deployment.lifecycle_state.upper()
@@ -1231,7 +1233,7 @@ class AquaDeploymentApp(AquaApp):
1231
1233
  container_spec = container_config.spec if container_config else UNKNOWN
1232
1234
  cli_params = container_spec.cli_param if container_spec else UNKNOWN
1233
1235
 
1234
- restricted_params = self._find_restricted_params(
1236
+ restricted_params = find_restricted_params(
1235
1237
  cli_params, params, container_type_key
1236
1238
  )
1237
1239
 
@@ -1242,41 +1244,6 @@ class AquaDeploymentApp(AquaApp):
1242
1244
  )
1243
1245
  return {"valid": True}
1244
1246
 
1245
- @staticmethod
1246
- def _find_restricted_params(
1247
- default_params: Union[str, List[str]],
1248
- user_params: Union[str, List[str]],
1249
- container_family: str,
1250
- ) -> List[str]:
1251
- """Returns a list of restricted params that user chooses to override when creating an Aqua deployment.
1252
- The default parameters coming from the container index json file cannot be overridden.
1253
-
1254
- Parameters
1255
- ----------
1256
- default_params:
1257
- Inference container parameter string with default values.
1258
- user_params:
1259
- Inference container parameter string with user provided values.
1260
- container_family: str
1261
- The image family of model deployment container runtime.
1262
-
1263
- Returns
1264
- -------
1265
- A list with params keys common between params1 and params2.
1266
-
1267
- """
1268
- restricted_params = []
1269
- if default_params and user_params:
1270
- default_params_dict = get_params_dict(default_params)
1271
- user_params_dict = get_params_dict(user_params)
1272
-
1273
- restricted_params_set = get_restricted_params_by_container(container_family)
1274
- for key, _items in user_params_dict.items():
1275
- if key in default_params_dict or key in restricted_params_set:
1276
- restricted_params.append(key.lstrip("-"))
1277
-
1278
- return restricted_params
1279
-
1280
1247
  @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
1281
1248
  @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
1282
1249
  def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
@@ -1313,3 +1280,85 @@ class AquaDeploymentApp(AquaApp):
1313
1280
  )
1314
1281
  for oci_shape in oci_shapes
1315
1282
  ]
1283
+
1284
+ def get_deployment_status(
1285
+ self,
1286
+ deployment: ModelDeployment,
1287
+ work_request_id: str,
1288
+ model_type: str,
1289
+ model_name: str,
1290
+ ) -> None:
1291
+ """Waits for the data science model deployment to be completed and log its status in telemetry.
1292
+
1293
+ Parameters
1294
+ ----------
1295
+
1296
+ model_deployment_id: str
1297
+ The id of the deployed aqua model.
1298
+ work_request_id: str
1299
+ The work request Id of the model deployment.
1300
+ model_type: str
1301
+ The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
1302
+
1303
+ Returns
1304
+ -------
1305
+ AquaDeployment
1306
+ An Aqua deployment instance.
1307
+ """
1308
+ ocid = get_ocid_substring(deployment.id, key_len=8)
1309
+ data_science_work_request: DataScienceWorkRequest = DataScienceWorkRequest(
1310
+ work_request_id
1311
+ )
1312
+ try:
1313
+ data_science_work_request.wait_work_request(
1314
+ progress_bar_description="Creating model deployment",
1315
+ max_wait_time=DEFAULT_WAIT_TIME,
1316
+ poll_interval=DEFAULT_POLL_INTERVAL,
1317
+ )
1318
+ except Exception:
1319
+ status = ""
1320
+ logs = deployment.show_logs().sort_values(by="time", ascending=False)
1321
+
1322
+ if logs and len(logs) > 0:
1323
+ status = logs.iloc[0]["message"]
1324
+
1325
+ status = re.sub(r"[^a-zA-Z0-9]", " ", status)
1326
+
1327
+ if data_science_work_request._error_message:
1328
+ error_str = ""
1329
+ for error in data_science_work_request._error_message:
1330
+ error_str = error_str + " " + error.message
1331
+
1332
+ error_str = re.sub(r"[^a-zA-Z0-9]", " ", error_str)
1333
+ telemetry_kwargs = {
1334
+ "ocid": ocid,
1335
+ "model_name": model_name,
1336
+ "work_request_error": error_str,
1337
+ "status": status,
1338
+ }
1339
+
1340
+ self.telemetry.record_event(
1341
+ category=f"aqua/{model_type}/deployment/status",
1342
+ action="FAILED",
1343
+ **telemetry_kwargs,
1344
+ )
1345
+ else:
1346
+ telemetry_kwargs = {
1347
+ "ocid": ocid,
1348
+ "model_name": model_name,
1349
+ "status": status,
1350
+ }
1351
+
1352
+ self.telemetry.record_event(
1353
+ category=f"aqua/{model_type}/deployment/status",
1354
+ action="FAILED",
1355
+ **telemetry_kwargs,
1356
+ )
1357
+
1358
+ else:
1359
+ telemetry_kwargs = {"ocid": ocid, "model_name": model_name}
1360
+ self.telemetry.record_event(
1361
+ category=f"aqua/{model_type}/deployment/status",
1362
+ action="SUCCEEDED",
1363
+ **telemetry_kwargs,
1364
+ )