oracle-ads 2.11.15__py3-none-any.whl → 2.11.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +5 -6
- ads/aqua/common/entities.py +17 -0
- ads/aqua/common/enums.py +14 -1
- ads/aqua/common/utils.py +160 -3
- ads/aqua/config/config.py +1 -1
- ads/aqua/config/deployment_config_defaults.json +29 -1
- ads/aqua/config/resource_limit_names.json +1 -0
- ads/aqua/constants.py +6 -1
- ads/aqua/evaluation/entities.py +0 -1
- ads/aqua/evaluation/evaluation.py +47 -14
- ads/aqua/extension/common_handler.py +75 -5
- ads/aqua/extension/common_ws_msg_handler.py +57 -0
- ads/aqua/extension/deployment_handler.py +16 -13
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +1 -1
- ads/aqua/extension/evaluation_ws_msg_handler.py +28 -6
- ads/aqua/extension/model_handler.py +134 -8
- ads/aqua/extension/models/ws_models.py +78 -3
- ads/aqua/extension/models_ws_msg_handler.py +49 -0
- ads/aqua/extension/ui_websocket_handler.py +7 -1
- ads/aqua/model/entities.py +28 -0
- ads/aqua/model/model.py +544 -129
- ads/aqua/modeldeployment/deployment.py +102 -43
- ads/aqua/modeldeployment/entities.py +9 -20
- ads/aqua/ui.py +152 -28
- ads/common/object_storage_details.py +2 -5
- ads/common/serializer.py +2 -3
- ads/jobs/builders/infrastructure/dsc_job.py +41 -12
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +74 -27
- ads/jobs/builders/runtimes/container_runtime.py +83 -4
- ads/opctl/operator/lowcode/anomaly/const.py +1 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +23 -7
- ads/opctl/operator/lowcode/anomaly/operator_config.py +1 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +4 -0
- ads/opctl/operator/lowcode/common/errors.py +6 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +3 -1
- ads/opctl/operator/lowcode/forecast/model/base_model.py +21 -13
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +11 -2
- ads/pipeline/ads_pipeline_run.py +13 -2
- {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/METADATA +2 -1
- {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/RECORD +44 -40
- {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
@@ -7,16 +6,15 @@
|
|
7
6
|
import json
|
8
7
|
import os
|
9
8
|
import re
|
9
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
10
10
|
from dataclasses import dataclass
|
11
11
|
from typing import Dict, List
|
12
12
|
from urllib.parse import urlparse
|
13
13
|
|
14
|
-
|
15
14
|
import oci
|
16
15
|
from ads.common import auth as authutil
|
17
16
|
from ads.common import oci_client
|
18
17
|
from ads.dataset.progress import TqdmProgressBar
|
19
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
20
18
|
|
21
19
|
THREAD_POOL_MAX_WORKERS = 10
|
22
20
|
|
@@ -169,8 +167,7 @@ class ObjectStorageDetails:
|
|
169
167
|
|
170
168
|
def list_objects(self, **kwargs):
|
171
169
|
"""Lists objects in a given oss path
|
172
|
-
|
173
|
-
Parameters
|
170
|
+
Parameters
|
174
171
|
-------
|
175
172
|
**kwargs:
|
176
173
|
namespace, bucket, filepath are set by the class. By default, fields gets all values. For other supported
|
ads/common/serializer.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
2
|
|
4
3
|
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
@@ -25,10 +24,8 @@ from ads.common import logger
|
|
25
24
|
from ads.common.auth import default_signer
|
26
25
|
|
27
26
|
try:
|
28
|
-
from yaml import CSafeDumper as dumper
|
29
27
|
from yaml import CSafeLoader as loader
|
30
28
|
except:
|
31
|
-
from yaml import SafeDumper as dumper
|
32
29
|
from yaml import SafeLoader as loader
|
33
30
|
|
34
31
|
|
@@ -99,6 +96,8 @@ class Serializable(ABC):
|
|
99
96
|
"""JSON serializer for objects not serializable by default json code."""
|
100
97
|
if isinstance(obj, datetime):
|
101
98
|
return obj.isoformat()
|
99
|
+
if hasattr(obj, "to_dict"):
|
100
|
+
return obj.to_dict()
|
102
101
|
raise TypeError(f"Type {type(obj)} not serializable.")
|
103
102
|
|
104
103
|
@staticmethod
|
@@ -30,6 +30,7 @@ from ads.common.oci_logging import OCILog
|
|
30
30
|
from ads.common.oci_resource import ResourceNotFoundError
|
31
31
|
from ads.jobs.builders.infrastructure.base import Infrastructure, RunInstance
|
32
32
|
from ads.jobs.builders.infrastructure.dsc_job_runtime import (
|
33
|
+
ContainerRuntimeHandler,
|
33
34
|
DataScienceJobRuntimeManager,
|
34
35
|
)
|
35
36
|
from ads.jobs.builders.infrastructure.utils import get_value
|
@@ -376,13 +377,12 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
|
|
376
377
|
"""
|
377
378
|
runs = self.run_list()
|
378
379
|
for run in runs:
|
379
|
-
if force_delete
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
run.cancel(wait_for_completion=True)
|
380
|
+
if force_delete and run.lifecycle_state in [
|
381
|
+
DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
|
382
|
+
DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
|
383
|
+
DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
|
384
|
+
]:
|
385
|
+
run.cancel(wait_for_completion=True)
|
386
386
|
run.delete()
|
387
387
|
self.client.delete_job(self.id)
|
388
388
|
return self
|
@@ -458,7 +458,7 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
|
|
458
458
|
----------
|
459
459
|
**kwargs :
|
460
460
|
Keyword arguments for initializing a Data Science Job Run.
|
461
|
-
The keys can be any keys in supported by OCI JobConfigurationDetails and JobRun, including:
|
461
|
+
The keys can be any keys in supported by OCI JobConfigurationDetails, OcirContainerJobEnvironmentConfigurationDetails and JobRun, including:
|
462
462
|
* hyperparameter_values: dict(str, str)
|
463
463
|
* environment_variables: dict(str, str)
|
464
464
|
* command_line_arguments: str
|
@@ -466,6 +466,11 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
|
|
466
466
|
* display_name: str
|
467
467
|
* freeform_tags: dict(str, str)
|
468
468
|
* defined_tags: dict(str, dict(str, object))
|
469
|
+
* image: str
|
470
|
+
* cmd: list[str]
|
471
|
+
* entrypoint: list[str]
|
472
|
+
* image_digest: str
|
473
|
+
* image_signature_id: str
|
469
474
|
|
470
475
|
If display_name is not specified, it will be generated as "<JOB_NAME>-run-<TIMESTAMP>".
|
471
476
|
|
@@ -478,14 +483,28 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
|
|
478
483
|
if not self.id:
|
479
484
|
self.create()
|
480
485
|
|
481
|
-
|
486
|
+
config_swagger_types = (
|
482
487
|
oci.data_science.models.DefaultJobConfigurationDetails().swagger_types.keys()
|
483
488
|
)
|
489
|
+
env_config_swagger_types = {}
|
490
|
+
if hasattr(oci.data_science.models, "OcirContainerJobEnvironmentConfigurationDetails"):
|
491
|
+
env_config_swagger_types = (
|
492
|
+
oci.data_science.models.OcirContainerJobEnvironmentConfigurationDetails().swagger_types.keys()
|
493
|
+
)
|
484
494
|
config_kwargs = {}
|
495
|
+
env_config_kwargs = {}
|
485
496
|
keys = list(kwargs.keys())
|
486
497
|
for key in keys:
|
487
|
-
if key in
|
498
|
+
if key in config_swagger_types:
|
488
499
|
config_kwargs[key] = kwargs.pop(key)
|
500
|
+
elif key in env_config_swagger_types:
|
501
|
+
value = kwargs.pop(key)
|
502
|
+
if key in [
|
503
|
+
ContainerRuntime.CONST_CMD,
|
504
|
+
ContainerRuntime.CONST_ENTRYPOINT
|
505
|
+
] and isinstance(value, str):
|
506
|
+
value = ContainerRuntimeHandler.split_args(value)
|
507
|
+
env_config_kwargs[key] = value
|
489
508
|
|
490
509
|
# remove timestamp from the job name (added in default names, when display_name not specified by user)
|
491
510
|
if self.display_name:
|
@@ -514,6 +533,12 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
|
|
514
533
|
config_override.update(config_kwargs)
|
515
534
|
kwargs["job_configuration_override_details"] = config_override
|
516
535
|
|
536
|
+
if env_config_kwargs:
|
537
|
+
env_config_kwargs["jobEnvironmentType"] = "OCIR_CONTAINER"
|
538
|
+
env_config_override = kwargs.get("job_environment_configuration_override_details", {})
|
539
|
+
env_config_override.update(env_config_kwargs)
|
540
|
+
kwargs["job_environment_configuration_override_details"] = env_config_override
|
541
|
+
|
517
542
|
wait = kwargs.pop("wait", False)
|
518
543
|
run = DataScienceJobRun(**kwargs, **self.auth).create()
|
519
544
|
if wait:
|
@@ -868,10 +893,14 @@ class DataScienceJobRun(
|
|
868
893
|
return self
|
869
894
|
|
870
895
|
def delete(self, force_delete: bool = False):
|
871
|
-
if force_delete
|
896
|
+
if force_delete and self.status in [
|
897
|
+
DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
|
898
|
+
DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
|
899
|
+
DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
|
900
|
+
]:
|
872
901
|
self.cancel(wait_for_completion=True)
|
873
902
|
super().delete()
|
874
|
-
return
|
903
|
+
return self
|
875
904
|
|
876
905
|
|
877
906
|
# This is for backward compatibility
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8; -*-
|
3
3
|
|
4
|
-
# Copyright (c) 2021,
|
4
|
+
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
6
|
"""Contains classes for conversion between ADS runtime and OCI Data Science Job implementation.
|
7
7
|
This module is for ADS developers only.
|
@@ -305,10 +305,29 @@ class RuntimeHandler:
|
|
305
305
|
self._extract_envs,
|
306
306
|
self._extract_artifact,
|
307
307
|
self._extract_runtime_minutes,
|
308
|
+
self._extract_properties,
|
308
309
|
]
|
309
310
|
for extraction in extractions:
|
310
311
|
runtime_spec.update(extraction(dsc_job))
|
311
312
|
return self.RUNTIME_CLASS(self._format_env_var(runtime_spec))
|
313
|
+
|
314
|
+
def _extract_properties(self, dsc_job) -> dict:
|
315
|
+
"""Extract the job runtime properties from data science job.
|
316
|
+
|
317
|
+
This is the base method which does not extract the job runtime properties.
|
318
|
+
Sub-class should implement the extraction if needed.
|
319
|
+
|
320
|
+
Parameters
|
321
|
+
----------
|
322
|
+
dsc_job : DSCJob or oci.datascience.models.Job
|
323
|
+
The data science job containing runtime information.
|
324
|
+
|
325
|
+
Returns
|
326
|
+
-------
|
327
|
+
dict
|
328
|
+
A runtime specification dictionary for initializing a runtime.
|
329
|
+
"""
|
330
|
+
return {}
|
312
331
|
|
313
332
|
def _extract_args(self, dsc_job) -> dict:
|
314
333
|
"""Extracts the command line arguments from data science job.
|
@@ -942,9 +961,12 @@ class GitPythonRuntimeHandler(CondaRuntimeHandler):
|
|
942
961
|
class ContainerRuntimeHandler(RuntimeHandler):
|
943
962
|
RUNTIME_CLASS = ContainerRuntime
|
944
963
|
CMD_DELIMITER = ","
|
945
|
-
|
946
|
-
|
947
|
-
|
964
|
+
|
965
|
+
def translate(self, runtime: Runtime) -> dict:
|
966
|
+
payload = super().translate(runtime)
|
967
|
+
job_env_config = self._translate_env_config(runtime)
|
968
|
+
payload["job_environment_configuration_details"] = job_env_config
|
969
|
+
return payload
|
948
970
|
|
949
971
|
def _translate_artifact(self, runtime: Runtime):
|
950
972
|
"""Specifies a dummy script as the job artifact.
|
@@ -964,29 +986,34 @@ class ContainerRuntimeHandler(RuntimeHandler):
|
|
964
986
|
os.path.dirname(__file__), "../../templates", "container.py"
|
965
987
|
)
|
966
988
|
|
967
|
-
def
|
968
|
-
"""
|
989
|
+
def _translate_env_config(self, runtime: Runtime) -> dict:
|
990
|
+
"""Converts runtime properties to ``OcirContainerJobEnvironmentConfigurationDetails`` payload required by OCI Data Science job.
|
969
991
|
|
970
992
|
Parameters
|
971
993
|
----------
|
972
|
-
runtime :
|
973
|
-
|
994
|
+
runtime : Runtime
|
995
|
+
The runtime containing the properties to be converted.
|
974
996
|
|
975
997
|
Returns
|
976
998
|
-------
|
977
999
|
dict
|
978
|
-
A dictionary
|
1000
|
+
A dictionary storing the ``OcirContainerJobEnvironmentConfigurationDetails`` payload for OCI data science job.
|
979
1001
|
"""
|
980
|
-
|
981
|
-
|
982
|
-
envs = super()._translate_env(runtime)
|
983
|
-
spec_mappings = {
|
984
|
-
ContainerRuntime.CONST_IMAGE: self.CONST_CONTAINER_IMAGE,
|
985
|
-
ContainerRuntime.CONST_ENTRYPOINT: self.CONST_CONTAINER_ENTRYPOINT,
|
986
|
-
ContainerRuntime.CONST_CMD: self.CONST_CONTAINER_CMD,
|
1002
|
+
job_environment_configuration_details = {
|
1003
|
+
"job_environment_type": runtime.job_env_type
|
987
1004
|
}
|
988
|
-
|
989
|
-
|
1005
|
+
|
1006
|
+
for key, value in ContainerRuntime.attribute_map.items():
|
1007
|
+
property = runtime.get_spec(key, None)
|
1008
|
+
if key in [
|
1009
|
+
ContainerRuntime.CONST_CMD,
|
1010
|
+
ContainerRuntime.CONST_ENTRYPOINT
|
1011
|
+
] and isinstance(property, str):
|
1012
|
+
property = self.split_args(property)
|
1013
|
+
if property is not None:
|
1014
|
+
job_environment_configuration_details[value] = property
|
1015
|
+
|
1016
|
+
return job_environment_configuration_details
|
990
1017
|
|
991
1018
|
@staticmethod
|
992
1019
|
def split_args(args: str) -> list:
|
@@ -1031,17 +1058,37 @@ class ContainerRuntimeHandler(RuntimeHandler):
|
|
1031
1058
|
"""
|
1032
1059
|
spec = super()._extract_envs(dsc_job)
|
1033
1060
|
envs = spec.pop(ContainerRuntime.CONST_ENV_VAR, {})
|
1034
|
-
|
1035
|
-
raise IncompatibleRuntime()
|
1036
|
-
spec[ContainerRuntime.CONST_IMAGE] = envs.pop(self.CONST_CONTAINER_IMAGE)
|
1037
|
-
cmd = self.split_args(envs.pop(self.CONST_CONTAINER_CMD, ""))
|
1038
|
-
if cmd:
|
1039
|
-
spec[ContainerRuntime.CONST_CMD] = cmd
|
1040
|
-
entrypoint = self.split_args(envs.pop(self.CONST_CONTAINER_ENTRYPOINT, ""))
|
1041
|
-
if entrypoint:
|
1042
|
-
spec[ContainerRuntime.CONST_ENTRYPOINT] = entrypoint
|
1061
|
+
|
1043
1062
|
if envs:
|
1044
1063
|
spec[ContainerRuntime.CONST_ENV_VAR] = envs
|
1064
|
+
|
1065
|
+
return spec
|
1066
|
+
|
1067
|
+
def _extract_properties(self, dsc_job) -> dict:
|
1068
|
+
"""Extract the runtime properties from data science job.
|
1069
|
+
|
1070
|
+
Parameters
|
1071
|
+
----------
|
1072
|
+
dsc_job : DSCJob or oci.datascience.models.Job
|
1073
|
+
The data science job containing runtime information.
|
1074
|
+
|
1075
|
+
Returns
|
1076
|
+
-------
|
1077
|
+
dict
|
1078
|
+
A runtime specification dictionary for initializing a runtime.
|
1079
|
+
"""
|
1080
|
+
spec = super()._extract_envs(dsc_job)
|
1081
|
+
|
1082
|
+
job_env_config = getattr(dsc_job, "job_environment_configuration_details", None)
|
1083
|
+
job_env_type = getattr(job_env_config, "job_environment_type", None)
|
1084
|
+
|
1085
|
+
if not (job_env_config and job_env_type == "OCIR_CONTAINER"):
|
1086
|
+
raise IncompatibleRuntime()
|
1087
|
+
|
1088
|
+
for key, value in ContainerRuntime.attribute_map.items():
|
1089
|
+
property = getattr(job_env_config, value, None)
|
1090
|
+
if property is not None:
|
1091
|
+
spec[key] = property
|
1045
1092
|
return spec
|
1046
1093
|
|
1047
1094
|
|
@@ -3,9 +3,12 @@
|
|
3
3
|
|
4
4
|
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
import logging
|
6
7
|
from typing import Union
|
7
8
|
from ads.jobs.builders.runtimes.base import MultiNodeRuntime
|
8
9
|
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
9
12
|
|
10
13
|
class ContainerRuntime(MultiNodeRuntime):
|
11
14
|
"""Represents a container job runtime
|
@@ -13,18 +16,23 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
13
16
|
To define container runtime:
|
14
17
|
|
15
18
|
>>> ContainerRuntime()
|
16
|
-
>>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>")
|
19
|
+
>>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>:<tag>")
|
17
20
|
>>> .with_cmd("sleep 5 && echo Hello World")
|
18
21
|
>>> .with_entrypoint(["/bin/sh", "-c"])
|
22
|
+
>>> .with_image_digest("<image_digest>")
|
23
|
+
>>> .with_image_signature_id("<image_signature_id>")
|
19
24
|
>>> .with_environment_variable(MY_ENV="MY_VALUE")
|
20
25
|
|
21
|
-
Alternatively, you can define the ``entrypoint
|
26
|
+
Alternatively, you can define the ``entrypoint``, ``cmd``,
|
27
|
+
``image_digest``and ``image_signature_id`` along with the image.
|
22
28
|
|
23
29
|
>>> ContainerRuntime()
|
24
30
|
>>> .with_image(
|
25
|
-
>>> "iad.ocir.io/<your_tenancy>/<your_image>",
|
31
|
+
>>> "iad.ocir.io/<your_tenancy>/<your_image>:<tag>",
|
26
32
|
>>> entrypoint=["/bin/sh", "-c"],
|
27
33
|
>>> cmd="sleep 5 && echo Hello World",
|
34
|
+
>>> image_digest="<image_digest>",
|
35
|
+
>>> image_signature_id="<image_signature_id>",
|
28
36
|
>>> )
|
29
37
|
>>> .with_environment_variable(MY_ENV="MY_VALUE")
|
30
38
|
|
@@ -46,20 +54,34 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
46
54
|
CONST_IMAGE = "image"
|
47
55
|
CONST_ENTRYPOINT = "entrypoint"
|
48
56
|
CONST_CMD = "cmd"
|
57
|
+
CONST_IMAGE_DIGEST = "imageDigest"
|
58
|
+
CONST_IMAGE_SIGNATURE_ID = "imageSignatureId"
|
49
59
|
attribute_map = {
|
50
60
|
CONST_IMAGE: CONST_IMAGE,
|
51
61
|
CONST_ENTRYPOINT: CONST_ENTRYPOINT,
|
52
62
|
CONST_CMD: CONST_CMD,
|
63
|
+
CONST_IMAGE_DIGEST: "image_digest",
|
64
|
+
CONST_IMAGE_SIGNATURE_ID: "image_signature_id",
|
53
65
|
}
|
54
66
|
attribute_map.update(MultiNodeRuntime.attribute_map)
|
55
67
|
|
68
|
+
@property
|
69
|
+
def job_env_type(self) -> str:
|
70
|
+
"""The container type"""
|
71
|
+
return "OCIR_CONTAINER"
|
72
|
+
|
56
73
|
@property
|
57
74
|
def image(self) -> str:
|
58
75
|
"""The container image"""
|
59
76
|
return self.get_spec(self.CONST_IMAGE)
|
60
77
|
|
61
78
|
def with_image(
|
62
|
-
self,
|
79
|
+
self,
|
80
|
+
image: str,
|
81
|
+
entrypoint: Union[str, list, None] = None,
|
82
|
+
cmd: str = None,
|
83
|
+
image_digest: str = None,
|
84
|
+
image_signature_id: str = None,
|
63
85
|
) -> "ContainerRuntime":
|
64
86
|
"""Specify the image for the container job.
|
65
87
|
|
@@ -71,16 +93,73 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
71
93
|
Entrypoint for the job, by default None (the entrypoint defined in the image will be used).
|
72
94
|
cmd : str, optional
|
73
95
|
Command for the job, by default None.
|
96
|
+
image_digest: str, optional
|
97
|
+
The image digest, by default None.
|
98
|
+
image_signature_id: str, optional
|
99
|
+
The image signature id, by default None.
|
74
100
|
|
75
101
|
Returns
|
76
102
|
-------
|
77
103
|
ContainerRuntime
|
78
104
|
The runtime instance.
|
79
105
|
"""
|
106
|
+
if not isinstance(image, str):
|
107
|
+
raise ValueError(
|
108
|
+
"Custom image must be provided as a string."
|
109
|
+
)
|
110
|
+
if image.find(":") < 0:
|
111
|
+
logger.warning(
|
112
|
+
"Tag is required for custom image. Accepted format: iad.ocir.io/<tenancy>/<image>:<tag>."
|
113
|
+
)
|
80
114
|
self.with_entrypoint(entrypoint)
|
81
115
|
self.set_spec(self.CONST_CMD, cmd)
|
116
|
+
self.with_image_digest(image_digest)
|
117
|
+
self.with_image_signature_id(image_signature_id)
|
82
118
|
return self.set_spec(self.CONST_IMAGE, image)
|
83
119
|
|
120
|
+
@property
|
121
|
+
def image_digest(self) -> str:
|
122
|
+
"""The container image digest."""
|
123
|
+
return self.get_spec(self.CONST_IMAGE_DIGEST)
|
124
|
+
|
125
|
+
def with_image_digest(self, image_digest: str) -> "ContainerRuntime":
|
126
|
+
"""Sets the digest of custom image.
|
127
|
+
|
128
|
+
Parameters
|
129
|
+
----------
|
130
|
+
image_digest: str
|
131
|
+
The image digest.
|
132
|
+
|
133
|
+
Returns
|
134
|
+
-------
|
135
|
+
ContainerRuntime
|
136
|
+
The runtime instance.
|
137
|
+
"""
|
138
|
+
return self.set_spec(self.CONST_IMAGE_DIGEST, image_digest)
|
139
|
+
|
140
|
+
@property
|
141
|
+
def image_signature_id(self) -> str:
|
142
|
+
"""The container image signature id."""
|
143
|
+
return self.get_spec(self.CONST_IMAGE_SIGNATURE_ID)
|
144
|
+
|
145
|
+
def with_image_signature_id(self, image_signature_id: str) -> "ContainerRuntime":
|
146
|
+
"""Sets the signature id of custom image.
|
147
|
+
|
148
|
+
Parameters
|
149
|
+
----------
|
150
|
+
image_signature_id: str
|
151
|
+
The image signature id.
|
152
|
+
|
153
|
+
Returns
|
154
|
+
-------
|
155
|
+
ContainerRuntime
|
156
|
+
The runtime instance.
|
157
|
+
"""
|
158
|
+
return self.set_spec(
|
159
|
+
self.CONST_IMAGE_SIGNATURE_ID,
|
160
|
+
image_signature_id
|
161
|
+
)
|
162
|
+
|
84
163
|
@property
|
85
164
|
def entrypoint(self) -> str:
|
86
165
|
"""Entrypoint of the container job"""
|
@@ -16,7 +16,7 @@ from sklearn import linear_model
|
|
16
16
|
|
17
17
|
from ads.common.object_storage_details import ObjectStorageDetails
|
18
18
|
from ads.opctl import logger
|
19
|
-
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
|
19
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
|
20
20
|
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
|
21
21
|
from ads.opctl.operator.lowcode.common.utils import (
|
22
22
|
disable_print,
|
@@ -79,7 +79,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
79
79
|
anomaly_output, test_data, elapsed_time
|
80
80
|
)
|
81
81
|
table_blocks = [
|
82
|
-
rc.DataTable(df, label=col, index=True)
|
82
|
+
rc.DataTable(df.head(SUBSAMPLE_THRESHOLD) if self.spec.subsample_report_data and len(df) > SUBSAMPLE_THRESHOLD else df, label=col, index=True)
|
83
83
|
for col, df in self.datasets.full_data_dict.items()
|
84
84
|
]
|
85
85
|
data_table = rc.Select(blocks=table_blocks)
|
@@ -94,20 +94,36 @@ class AnomalyOperatorBaseModel(ABC):
|
|
94
94
|
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
|
95
95
|
OutputColumns.ANOMALY_COL
|
96
96
|
]
|
97
|
+
anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
|
98
|
+
downsampled_time_col = time_col
|
99
|
+
selected_indices = list(range(len(time_col)))
|
100
|
+
if self.spec.subsample_report_data:
|
101
|
+
non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
|
102
|
+
# Downsample non-anomalous data if it exceeds the threshold (1000)
|
103
|
+
if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
|
104
|
+
downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
|
105
|
+
selected_indices = anomaly_indices + downsampled_non_anomaly_indices
|
106
|
+
selected_indices.sort()
|
107
|
+
downsampled_time_col = time_col[selected_indices]
|
108
|
+
|
97
109
|
columns = set(df.columns).difference({date_column})
|
98
110
|
for col in columns:
|
99
111
|
y = df[col].reset_index(drop=True)
|
112
|
+
|
113
|
+
downsampled_y = y[selected_indices]
|
114
|
+
|
100
115
|
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
|
101
116
|
ax.grid()
|
102
|
-
ax.plot(
|
103
|
-
|
104
|
-
|
105
|
-
|
117
|
+
ax.plot(downsampled_time_col, downsampled_y, color="black")
|
118
|
+
# Plot anomalies
|
119
|
+
for i in anomaly_indices:
|
120
|
+
ax.scatter(time_col[i], y[i], color="red", marker="o")
|
106
121
|
plt.xlabel(date_column)
|
107
122
|
plt.ylabel(col)
|
108
123
|
plt.title(f"`{col}` with reference to anomalies")
|
109
124
|
figure_blocks.append(rc.Widget(ax))
|
110
|
-
|
125
|
+
|
126
|
+
blocks.append(rc.Group(*figure_blocks, label=target))
|
111
127
|
plots = rc.Select(blocks)
|
112
128
|
|
113
129
|
report_sections = []
|
@@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
|
|
77
77
|
model: str = None
|
78
78
|
model_kwargs: Dict = field(default_factory=dict)
|
79
79
|
contamination: float = None
|
80
|
+
subsample_report_data: bool = None
|
80
81
|
|
81
82
|
def __post_init__(self):
|
82
83
|
"""Adjusts the specification details."""
|
@@ -67,7 +67,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
67
67
|
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
|
68
68
|
# If trend is constant, remove constant columns
|
69
69
|
if "trend" not in model_kwargs or model_kwargs["trend"] == "c":
|
70
|
-
self.constant_cols[s_id] = df.columns[df.nunique() == 1]
|
70
|
+
self.constant_cols[s_id] = list(df.columns[df.nunique() == 1])
|
71
|
+
if target in self.constant_cols[s_id]:
|
72
|
+
self.constant_cols[s_id].remove(target)
|
71
73
|
df = df.drop(columns=self.constant_cols[s_id])
|
72
74
|
|
73
75
|
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
|
@@ -249,20 +249,28 @@ class ForecastOperatorBaseModel(ABC):
|
|
249
249
|
train_metrics_sections = [sec9_text, sec9]
|
250
250
|
|
251
251
|
backtest_sections = []
|
252
|
+
output_dir = self.spec.output_directory.url
|
253
|
+
backtest_report_name = "backtest_stats.csv"
|
254
|
+
file_path = f"{output_dir}/{backtest_report_name}"
|
252
255
|
if self.spec.model == AUTO_SELECT:
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
256
|
+
backtest_sections.append(rc.Heading("Auto-select statistics", level=2))
|
257
|
+
if not os.path.exists(file_path):
|
258
|
+
failure_msg = rc.Text("auto-select could not be executed. Please check the "
|
259
|
+
"logs for more details.")
|
260
|
+
backtest_sections.append(failure_msg)
|
261
|
+
else:
|
262
|
+
backtest_stats = pd.read_csv(file_path)
|
263
|
+
average_dict = backtest_stats.mean().to_dict()
|
264
|
+
del average_dict['backtest']
|
265
|
+
best_model = min(average_dict, key=average_dict.get)
|
266
|
+
backtest_text = rc.Heading("Back Testing Metrics", level=3)
|
267
|
+
summary_text = rc.Text(
|
268
|
+
f"Overall, the average scores for the models are {average_dict}, with {best_model}"
|
269
|
+
f" being identified as the top-performing model during backtesting.")
|
270
|
+
backtest_table = rc.DataTable(backtest_stats, index=True)
|
271
|
+
liner_plot = get_auto_select_plot(backtest_stats)
|
272
|
+
backtest_sections.extend([backtest_text, backtest_table, summary_text,
|
273
|
+
liner_plot])
|
266
274
|
|
267
275
|
|
268
276
|
forecast_plots = []
|
@@ -12,7 +12,8 @@ from ads.opctl import logger
|
|
12
12
|
from ads.opctl.operator.lowcode.common.const import DataColumns
|
13
13
|
from .model.forecast_datasets import ForecastDatasets
|
14
14
|
from .operator_config import ForecastOperatorConfig
|
15
|
-
|
15
|
+
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
|
16
|
+
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
16
17
|
|
17
18
|
class ModelEvaluator:
|
18
19
|
"""
|
@@ -61,6 +62,9 @@ class ModelEvaluator:
|
|
61
62
|
unique_dates = min_series_data[date_col].unique()
|
62
63
|
|
63
64
|
cut_offs = self.generate_cutoffs(unique_dates, horizon)
|
65
|
+
if not len(cut_offs):
|
66
|
+
raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
|
67
|
+
"instead of using auto-select.")
|
64
68
|
training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
|
65
69
|
in cut_offs]
|
66
70
|
test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
|
@@ -137,7 +141,12 @@ class ModelEvaluator:
|
|
137
141
|
return metrics
|
138
142
|
|
139
143
|
def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
|
140
|
-
|
144
|
+
try:
|
145
|
+
metrics = self.run_all_models(datasets, operator_config)
|
146
|
+
except InsufficientDataError as e:
|
147
|
+
model = SupportedModels.Prophet
|
148
|
+
logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
|
149
|
+
return model
|
141
150
|
avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
|
142
151
|
best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
|
143
152
|
logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
|