oracle-ads 2.11.15__py3-none-any.whl → 2.11.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ads/aqua/app.py +5 -6
  2. ads/aqua/common/entities.py +17 -0
  3. ads/aqua/common/enums.py +14 -1
  4. ads/aqua/common/utils.py +160 -3
  5. ads/aqua/config/config.py +1 -1
  6. ads/aqua/config/deployment_config_defaults.json +29 -1
  7. ads/aqua/config/resource_limit_names.json +1 -0
  8. ads/aqua/constants.py +6 -1
  9. ads/aqua/evaluation/entities.py +0 -1
  10. ads/aqua/evaluation/evaluation.py +47 -14
  11. ads/aqua/extension/common_handler.py +75 -5
  12. ads/aqua/extension/common_ws_msg_handler.py +57 -0
  13. ads/aqua/extension/deployment_handler.py +16 -13
  14. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  15. ads/aqua/extension/errors.py +1 -1
  16. ads/aqua/extension/evaluation_ws_msg_handler.py +28 -6
  17. ads/aqua/extension/model_handler.py +134 -8
  18. ads/aqua/extension/models/ws_models.py +78 -3
  19. ads/aqua/extension/models_ws_msg_handler.py +49 -0
  20. ads/aqua/extension/ui_websocket_handler.py +7 -1
  21. ads/aqua/model/entities.py +28 -0
  22. ads/aqua/model/model.py +544 -129
  23. ads/aqua/modeldeployment/deployment.py +102 -43
  24. ads/aqua/modeldeployment/entities.py +9 -20
  25. ads/aqua/ui.py +152 -28
  26. ads/common/object_storage_details.py +2 -5
  27. ads/common/serializer.py +2 -3
  28. ads/jobs/builders/infrastructure/dsc_job.py +41 -12
  29. ads/jobs/builders/infrastructure/dsc_job_runtime.py +74 -27
  30. ads/jobs/builders/runtimes/container_runtime.py +83 -4
  31. ads/opctl/operator/lowcode/anomaly/const.py +1 -0
  32. ads/opctl/operator/lowcode/anomaly/model/base_model.py +23 -7
  33. ads/opctl/operator/lowcode/anomaly/operator_config.py +1 -0
  34. ads/opctl/operator/lowcode/anomaly/schema.yaml +4 -0
  35. ads/opctl/operator/lowcode/common/errors.py +6 -0
  36. ads/opctl/operator/lowcode/forecast/model/arima.py +3 -1
  37. ads/opctl/operator/lowcode/forecast/model/base_model.py +21 -13
  38. ads/opctl/operator/lowcode/forecast/model_evaluator.py +11 -2
  39. ads/pipeline/ads_pipeline_run.py +13 -2
  40. {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/METADATA +2 -1
  41. {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/RECORD +44 -40
  42. {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/LICENSE.txt +0 -0
  43. {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/WHEEL +0 -0
  44. {oracle_ads-2.11.15.dist-info → oracle_ads-2.11.17.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python
2
- # -*- coding: utf-8 -*--
3
2
 
4
3
  # Copyright (c) 2021, 2024 Oracle and/or its affiliates.
5
4
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
@@ -7,16 +6,15 @@
7
6
  import json
8
7
  import os
9
8
  import re
9
+ from concurrent.futures import ThreadPoolExecutor, as_completed
10
10
  from dataclasses import dataclass
11
11
  from typing import Dict, List
12
12
  from urllib.parse import urlparse
13
13
 
14
-
15
14
  import oci
16
15
  from ads.common import auth as authutil
17
16
  from ads.common import oci_client
18
17
  from ads.dataset.progress import TqdmProgressBar
19
- from concurrent.futures import ThreadPoolExecutor, as_completed
20
18
 
21
19
  THREAD_POOL_MAX_WORKERS = 10
22
20
 
@@ -169,8 +167,7 @@ class ObjectStorageDetails:
169
167
 
170
168
  def list_objects(self, **kwargs):
171
169
  """Lists objects in a given oss path
172
-
173
- Parameters
170
+ Parameters
174
171
  -------
175
172
  **kwargs:
176
173
  namespace, bucket, filepath are set by the class. By default, fields gets all values. For other supported
ads/common/serializer.py CHANGED
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env python
2
- # -*- coding: utf-8; -*-
3
2
 
4
3
  # Copyright (c) 2021, 2024 Oracle and/or its affiliates.
5
4
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
@@ -25,10 +24,8 @@ from ads.common import logger
25
24
  from ads.common.auth import default_signer
26
25
 
27
26
  try:
28
- from yaml import CSafeDumper as dumper
29
27
  from yaml import CSafeLoader as loader
30
28
  except:
31
- from yaml import SafeDumper as dumper
32
29
  from yaml import SafeLoader as loader
33
30
 
34
31
 
@@ -99,6 +96,8 @@ class Serializable(ABC):
99
96
  """JSON serializer for objects not serializable by default json code."""
100
97
  if isinstance(obj, datetime):
101
98
  return obj.isoformat()
99
+ if hasattr(obj, "to_dict"):
100
+ return obj.to_dict()
102
101
  raise TypeError(f"Type {type(obj)} not serializable.")
103
102
 
104
103
  @staticmethod
@@ -30,6 +30,7 @@ from ads.common.oci_logging import OCILog
30
30
  from ads.common.oci_resource import ResourceNotFoundError
31
31
  from ads.jobs.builders.infrastructure.base import Infrastructure, RunInstance
32
32
  from ads.jobs.builders.infrastructure.dsc_job_runtime import (
33
+ ContainerRuntimeHandler,
33
34
  DataScienceJobRuntimeManager,
34
35
  )
35
36
  from ads.jobs.builders.infrastructure.utils import get_value
@@ -376,13 +377,12 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
376
377
  """
377
378
  runs = self.run_list()
378
379
  for run in runs:
379
- if force_delete:
380
- if run.lifecycle_state in [
381
- DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
382
- DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
383
- DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
384
- ]:
385
- run.cancel(wait_for_completion=True)
380
+ if force_delete and run.lifecycle_state in [
381
+ DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
382
+ DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
383
+ DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
384
+ ]:
385
+ run.cancel(wait_for_completion=True)
386
386
  run.delete()
387
387
  self.client.delete_job(self.id)
388
388
  return self
@@ -458,7 +458,7 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
458
458
  ----------
459
459
  **kwargs :
460
460
  Keyword arguments for initializing a Data Science Job Run.
461
- The keys can be any keys in supported by OCI JobConfigurationDetails and JobRun, including:
461
+ The keys can be any keys in supported by OCI JobConfigurationDetails, OcirContainerJobEnvironmentConfigurationDetails and JobRun, including:
462
462
  * hyperparameter_values: dict(str, str)
463
463
  * environment_variables: dict(str, str)
464
464
  * command_line_arguments: str
@@ -466,6 +466,11 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
466
466
  * display_name: str
467
467
  * freeform_tags: dict(str, str)
468
468
  * defined_tags: dict(str, dict(str, object))
469
+ * image: str
470
+ * cmd: list[str]
471
+ * entrypoint: list[str]
472
+ * image_digest: str
473
+ * image_signature_id: str
469
474
 
470
475
  If display_name is not specified, it will be generated as "<JOB_NAME>-run-<TIMESTAMP>".
471
476
 
@@ -478,14 +483,28 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
478
483
  if not self.id:
479
484
  self.create()
480
485
 
481
- swagger_types = (
486
+ config_swagger_types = (
482
487
  oci.data_science.models.DefaultJobConfigurationDetails().swagger_types.keys()
483
488
  )
489
+ env_config_swagger_types = {}
490
+ if hasattr(oci.data_science.models, "OcirContainerJobEnvironmentConfigurationDetails"):
491
+ env_config_swagger_types = (
492
+ oci.data_science.models.OcirContainerJobEnvironmentConfigurationDetails().swagger_types.keys()
493
+ )
484
494
  config_kwargs = {}
495
+ env_config_kwargs = {}
485
496
  keys = list(kwargs.keys())
486
497
  for key in keys:
487
- if key in swagger_types:
498
+ if key in config_swagger_types:
488
499
  config_kwargs[key] = kwargs.pop(key)
500
+ elif key in env_config_swagger_types:
501
+ value = kwargs.pop(key)
502
+ if key in [
503
+ ContainerRuntime.CONST_CMD,
504
+ ContainerRuntime.CONST_ENTRYPOINT
505
+ ] and isinstance(value, str):
506
+ value = ContainerRuntimeHandler.split_args(value)
507
+ env_config_kwargs[key] = value
489
508
 
490
509
  # remove timestamp from the job name (added in default names, when display_name not specified by user)
491
510
  if self.display_name:
@@ -514,6 +533,12 @@ class DSCJob(OCIDataScienceMixin, oci.data_science.models.Job):
514
533
  config_override.update(config_kwargs)
515
534
  kwargs["job_configuration_override_details"] = config_override
516
535
 
536
+ if env_config_kwargs:
537
+ env_config_kwargs["jobEnvironmentType"] = "OCIR_CONTAINER"
538
+ env_config_override = kwargs.get("job_environment_configuration_override_details", {})
539
+ env_config_override.update(env_config_kwargs)
540
+ kwargs["job_environment_configuration_override_details"] = env_config_override
541
+
517
542
  wait = kwargs.pop("wait", False)
518
543
  run = DataScienceJobRun(**kwargs, **self.auth).create()
519
544
  if wait:
@@ -868,10 +893,14 @@ class DataScienceJobRun(
868
893
  return self
869
894
 
870
895
  def delete(self, force_delete: bool = False):
871
- if force_delete:
896
+ if force_delete and self.status in [
897
+ DataScienceJobRun.LIFECYCLE_STATE_ACCEPTED,
898
+ DataScienceJobRun.LIFECYCLE_STATE_IN_PROGRESS,
899
+ DataScienceJobRun.LIFECYCLE_STATE_NEEDS_ATTENTION,
900
+ ]:
872
901
  self.cancel(wait_for_completion=True)
873
902
  super().delete()
874
- return
903
+ return self
875
904
 
876
905
 
877
906
  # This is for backward compatibility
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8; -*-
3
3
 
4
- # Copyright (c) 2021, 2023 Oracle and/or its affiliates.
4
+ # Copyright (c) 2021, 2024 Oracle and/or its affiliates.
5
5
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
6
  """Contains classes for conversion between ADS runtime and OCI Data Science Job implementation.
7
7
  This module is for ADS developers only.
@@ -305,10 +305,29 @@ class RuntimeHandler:
305
305
  self._extract_envs,
306
306
  self._extract_artifact,
307
307
  self._extract_runtime_minutes,
308
+ self._extract_properties,
308
309
  ]
309
310
  for extraction in extractions:
310
311
  runtime_spec.update(extraction(dsc_job))
311
312
  return self.RUNTIME_CLASS(self._format_env_var(runtime_spec))
313
+
314
+ def _extract_properties(self, dsc_job) -> dict:
315
+ """Extract the job runtime properties from data science job.
316
+
317
+ This is the base method which does not extract the job runtime properties.
318
+ Sub-class should implement the extraction if needed.
319
+
320
+ Parameters
321
+ ----------
322
+ dsc_job : DSCJob or oci.datascience.models.Job
323
+ The data science job containing runtime information.
324
+
325
+ Returns
326
+ -------
327
+ dict
328
+ A runtime specification dictionary for initializing a runtime.
329
+ """
330
+ return {}
312
331
 
313
332
  def _extract_args(self, dsc_job) -> dict:
314
333
  """Extracts the command line arguments from data science job.
@@ -942,9 +961,12 @@ class GitPythonRuntimeHandler(CondaRuntimeHandler):
942
961
  class ContainerRuntimeHandler(RuntimeHandler):
943
962
  RUNTIME_CLASS = ContainerRuntime
944
963
  CMD_DELIMITER = ","
945
- CONST_CONTAINER_IMAGE = "CONTAINER_CUSTOM_IMAGE"
946
- CONST_CONTAINER_ENTRYPOINT = "CONTAINER_ENTRYPOINT"
947
- CONST_CONTAINER_CMD = "CONTAINER_CMD"
964
+
965
+ def translate(self, runtime: Runtime) -> dict:
966
+ payload = super().translate(runtime)
967
+ job_env_config = self._translate_env_config(runtime)
968
+ payload["job_environment_configuration_details"] = job_env_config
969
+ return payload
948
970
 
949
971
  def _translate_artifact(self, runtime: Runtime):
950
972
  """Specifies a dummy script as the job artifact.
@@ -964,29 +986,34 @@ class ContainerRuntimeHandler(RuntimeHandler):
964
986
  os.path.dirname(__file__), "../../templates", "container.py"
965
987
  )
966
988
 
967
- def _translate_env(self, runtime: ContainerRuntime) -> dict:
968
- """Translate the environment variable.
989
+ def _translate_env_config(self, runtime: Runtime) -> dict:
990
+ """Converts runtime properties to ``OcirContainerJobEnvironmentConfigurationDetails`` payload required by OCI Data Science job.
969
991
 
970
992
  Parameters
971
993
  ----------
972
- runtime : GitPythonRuntime
973
- An instance of GitPythonRuntime
994
+ runtime : Runtime
995
+ The runtime containing the properties to be converted.
974
996
 
975
997
  Returns
976
998
  -------
977
999
  dict
978
- A dictionary containing environment variables for OCI data science job.
1000
+ A dictionary storing the ``OcirContainerJobEnvironmentConfigurationDetails`` payload for OCI data science job.
979
1001
  """
980
- if not runtime.image:
981
- raise ValueError("Specify container image for ContainerRuntime.")
982
- envs = super()._translate_env(runtime)
983
- spec_mappings = {
984
- ContainerRuntime.CONST_IMAGE: self.CONST_CONTAINER_IMAGE,
985
- ContainerRuntime.CONST_ENTRYPOINT: self.CONST_CONTAINER_ENTRYPOINT,
986
- ContainerRuntime.CONST_CMD: self.CONST_CONTAINER_CMD,
1002
+ job_environment_configuration_details = {
1003
+ "job_environment_type": runtime.job_env_type
987
1004
  }
988
- envs.update(self._translate_specs(runtime, spec_mappings, self.CMD_DELIMITER))
989
- return envs
1005
+
1006
+ for key, value in ContainerRuntime.attribute_map.items():
1007
+ property = runtime.get_spec(key, None)
1008
+ if key in [
1009
+ ContainerRuntime.CONST_CMD,
1010
+ ContainerRuntime.CONST_ENTRYPOINT
1011
+ ] and isinstance(property, str):
1012
+ property = self.split_args(property)
1013
+ if property is not None:
1014
+ job_environment_configuration_details[value] = property
1015
+
1016
+ return job_environment_configuration_details
990
1017
 
991
1018
  @staticmethod
992
1019
  def split_args(args: str) -> list:
@@ -1031,17 +1058,37 @@ class ContainerRuntimeHandler(RuntimeHandler):
1031
1058
  """
1032
1059
  spec = super()._extract_envs(dsc_job)
1033
1060
  envs = spec.pop(ContainerRuntime.CONST_ENV_VAR, {})
1034
- if self.CONST_CONTAINER_IMAGE not in envs:
1035
- raise IncompatibleRuntime()
1036
- spec[ContainerRuntime.CONST_IMAGE] = envs.pop(self.CONST_CONTAINER_IMAGE)
1037
- cmd = self.split_args(envs.pop(self.CONST_CONTAINER_CMD, ""))
1038
- if cmd:
1039
- spec[ContainerRuntime.CONST_CMD] = cmd
1040
- entrypoint = self.split_args(envs.pop(self.CONST_CONTAINER_ENTRYPOINT, ""))
1041
- if entrypoint:
1042
- spec[ContainerRuntime.CONST_ENTRYPOINT] = entrypoint
1061
+
1043
1062
  if envs:
1044
1063
  spec[ContainerRuntime.CONST_ENV_VAR] = envs
1064
+
1065
+ return spec
1066
+
1067
+ def _extract_properties(self, dsc_job) -> dict:
1068
+ """Extract the runtime properties from data science job.
1069
+
1070
+ Parameters
1071
+ ----------
1072
+ dsc_job : DSCJob or oci.datascience.models.Job
1073
+ The data science job containing runtime information.
1074
+
1075
+ Returns
1076
+ -------
1077
+ dict
1078
+ A runtime specification dictionary for initializing a runtime.
1079
+ """
1080
+ spec = super()._extract_envs(dsc_job)
1081
+
1082
+ job_env_config = getattr(dsc_job, "job_environment_configuration_details", None)
1083
+ job_env_type = getattr(job_env_config, "job_environment_type", None)
1084
+
1085
+ if not (job_env_config and job_env_type == "OCIR_CONTAINER"):
1086
+ raise IncompatibleRuntime()
1087
+
1088
+ for key, value in ContainerRuntime.attribute_map.items():
1089
+ property = getattr(job_env_config, value, None)
1090
+ if property is not None:
1091
+ spec[key] = property
1045
1092
  return spec
1046
1093
 
1047
1094
 
@@ -3,9 +3,12 @@
3
3
 
4
4
  # Copyright (c) 2021, 2024 Oracle and/or its affiliates.
5
5
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+ import logging
6
7
  from typing import Union
7
8
  from ads.jobs.builders.runtimes.base import MultiNodeRuntime
8
9
 
10
+ logger = logging.getLogger(__name__)
11
+
9
12
 
10
13
  class ContainerRuntime(MultiNodeRuntime):
11
14
  """Represents a container job runtime
@@ -13,18 +16,23 @@ class ContainerRuntime(MultiNodeRuntime):
13
16
  To define container runtime:
14
17
 
15
18
  >>> ContainerRuntime()
16
- >>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>")
19
+ >>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>:<tag>")
17
20
  >>> .with_cmd("sleep 5 && echo Hello World")
18
21
  >>> .with_entrypoint(["/bin/sh", "-c"])
22
+ >>> .with_image_digest("<image_digest>")
23
+ >>> .with_image_signature_id("<image_signature_id>")
19
24
  >>> .with_environment_variable(MY_ENV="MY_VALUE")
20
25
 
21
- Alternatively, you can define the ``entrypoint`` and ``cmd`` along with the image.
26
+ Alternatively, you can define the ``entrypoint``, ``cmd``,
27
+ ``image_digest``and ``image_signature_id`` along with the image.
22
28
 
23
29
  >>> ContainerRuntime()
24
30
  >>> .with_image(
25
- >>> "iad.ocir.io/<your_tenancy>/<your_image>",
31
+ >>> "iad.ocir.io/<your_tenancy>/<your_image>:<tag>",
26
32
  >>> entrypoint=["/bin/sh", "-c"],
27
33
  >>> cmd="sleep 5 && echo Hello World",
34
+ >>> image_digest="<image_digest>",
35
+ >>> image_signature_id="<image_signature_id>",
28
36
  >>> )
29
37
  >>> .with_environment_variable(MY_ENV="MY_VALUE")
30
38
 
@@ -46,20 +54,34 @@ class ContainerRuntime(MultiNodeRuntime):
46
54
  CONST_IMAGE = "image"
47
55
  CONST_ENTRYPOINT = "entrypoint"
48
56
  CONST_CMD = "cmd"
57
+ CONST_IMAGE_DIGEST = "imageDigest"
58
+ CONST_IMAGE_SIGNATURE_ID = "imageSignatureId"
49
59
  attribute_map = {
50
60
  CONST_IMAGE: CONST_IMAGE,
51
61
  CONST_ENTRYPOINT: CONST_ENTRYPOINT,
52
62
  CONST_CMD: CONST_CMD,
63
+ CONST_IMAGE_DIGEST: "image_digest",
64
+ CONST_IMAGE_SIGNATURE_ID: "image_signature_id",
53
65
  }
54
66
  attribute_map.update(MultiNodeRuntime.attribute_map)
55
67
 
68
+ @property
69
+ def job_env_type(self) -> str:
70
+ """The container type"""
71
+ return "OCIR_CONTAINER"
72
+
56
73
  @property
57
74
  def image(self) -> str:
58
75
  """The container image"""
59
76
  return self.get_spec(self.CONST_IMAGE)
60
77
 
61
78
  def with_image(
62
- self, image: str, entrypoint: Union[str, list, None] = None, cmd: str = None
79
+ self,
80
+ image: str,
81
+ entrypoint: Union[str, list, None] = None,
82
+ cmd: str = None,
83
+ image_digest: str = None,
84
+ image_signature_id: str = None,
63
85
  ) -> "ContainerRuntime":
64
86
  """Specify the image for the container job.
65
87
 
@@ -71,16 +93,73 @@ class ContainerRuntime(MultiNodeRuntime):
71
93
  Entrypoint for the job, by default None (the entrypoint defined in the image will be used).
72
94
  cmd : str, optional
73
95
  Command for the job, by default None.
96
+ image_digest: str, optional
97
+ The image digest, by default None.
98
+ image_signature_id: str, optional
99
+ The image signature id, by default None.
74
100
 
75
101
  Returns
76
102
  -------
77
103
  ContainerRuntime
78
104
  The runtime instance.
79
105
  """
106
+ if not isinstance(image, str):
107
+ raise ValueError(
108
+ "Custom image must be provided as a string."
109
+ )
110
+ if image.find(":") < 0:
111
+ logger.warning(
112
+ "Tag is required for custom image. Accepted format: iad.ocir.io/<tenancy>/<image>:<tag>."
113
+ )
80
114
  self.with_entrypoint(entrypoint)
81
115
  self.set_spec(self.CONST_CMD, cmd)
116
+ self.with_image_digest(image_digest)
117
+ self.with_image_signature_id(image_signature_id)
82
118
  return self.set_spec(self.CONST_IMAGE, image)
83
119
 
120
+ @property
121
+ def image_digest(self) -> str:
122
+ """The container image digest."""
123
+ return self.get_spec(self.CONST_IMAGE_DIGEST)
124
+
125
+ def with_image_digest(self, image_digest: str) -> "ContainerRuntime":
126
+ """Sets the digest of custom image.
127
+
128
+ Parameters
129
+ ----------
130
+ image_digest: str
131
+ The image digest.
132
+
133
+ Returns
134
+ -------
135
+ ContainerRuntime
136
+ The runtime instance.
137
+ """
138
+ return self.set_spec(self.CONST_IMAGE_DIGEST, image_digest)
139
+
140
+ @property
141
+ def image_signature_id(self) -> str:
142
+ """The container image signature id."""
143
+ return self.get_spec(self.CONST_IMAGE_SIGNATURE_ID)
144
+
145
+ def with_image_signature_id(self, image_signature_id: str) -> "ContainerRuntime":
146
+ """Sets the signature id of custom image.
147
+
148
+ Parameters
149
+ ----------
150
+ image_signature_id: str
151
+ The image signature id.
152
+
153
+ Returns
154
+ -------
155
+ ContainerRuntime
156
+ The runtime instance.
157
+ """
158
+ return self.set_spec(
159
+ self.CONST_IMAGE_SIGNATURE_ID,
160
+ image_signature_id
161
+ )
162
+
84
163
  @property
85
164
  def entrypoint(self) -> str:
86
165
  """Entrypoint of the container job"""
@@ -94,3 +94,4 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):
94
94
 
95
95
 
96
96
  TODS_DEFAULT_MODEL = "ocsvm"
97
+ SUBSAMPLE_THRESHOLD = 1000
@@ -16,7 +16,7 @@ from sklearn import linear_model
16
16
 
17
17
  from ads.common.object_storage_details import ObjectStorageDetails
18
18
  from ads.opctl import logger
19
- from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
19
+ from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
20
20
  from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
21
21
  from ads.opctl.operator.lowcode.common.utils import (
22
22
  disable_print,
@@ -79,7 +79,7 @@ class AnomalyOperatorBaseModel(ABC):
79
79
  anomaly_output, test_data, elapsed_time
80
80
  )
81
81
  table_blocks = [
82
- rc.DataTable(df, label=col, index=True)
82
+ rc.DataTable(df.head(SUBSAMPLE_THRESHOLD) if self.spec.subsample_report_data and len(df) > SUBSAMPLE_THRESHOLD else df, label=col, index=True)
83
83
  for col, df in self.datasets.full_data_dict.items()
84
84
  ]
85
85
  data_table = rc.Select(blocks=table_blocks)
@@ -94,20 +94,36 @@ class AnomalyOperatorBaseModel(ABC):
94
94
  anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
95
95
  OutputColumns.ANOMALY_COL
96
96
  ]
97
+ anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
98
+ downsampled_time_col = time_col
99
+ selected_indices = list(range(len(time_col)))
100
+ if self.spec.subsample_report_data:
101
+ non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
102
+ # Downsample non-anomalous data if it exceeds the threshold (1000)
103
+ if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
104
+ downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
105
+ selected_indices = anomaly_indices + downsampled_non_anomaly_indices
106
+ selected_indices.sort()
107
+ downsampled_time_col = time_col[selected_indices]
108
+
97
109
  columns = set(df.columns).difference({date_column})
98
110
  for col in columns:
99
111
  y = df[col].reset_index(drop=True)
112
+
113
+ downsampled_y = y[selected_indices]
114
+
100
115
  fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
101
116
  ax.grid()
102
- ax.plot(time_col, y, color="black")
103
- for i, index in enumerate(anomaly_col):
104
- if index == 1:
105
- ax.scatter(time_col[i], y[i], color="red", marker="o")
117
+ ax.plot(downsampled_time_col, downsampled_y, color="black")
118
+ # Plot anomalies
119
+ for i in anomaly_indices:
120
+ ax.scatter(time_col[i], y[i], color="red", marker="o")
106
121
  plt.xlabel(date_column)
107
122
  plt.ylabel(col)
108
123
  plt.title(f"`{col}` with reference to anomalies")
109
124
  figure_blocks.append(rc.Widget(ax))
110
- blocks.append(rc.Group(*figure_blocks, label=target))
125
+
126
+ blocks.append(rc.Group(*figure_blocks, label=target))
111
127
  plots = rc.Select(blocks)
112
128
 
113
129
  report_sections = []
@@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
77
77
  model: str = None
78
78
  model_kwargs: Dict = field(default_factory=dict)
79
79
  contamination: float = None
80
+ subsample_report_data: bool = None
80
81
 
81
82
  def __post_init__(self):
82
83
  """Adjusts the specification details."""
@@ -377,4 +377,8 @@ spec:
377
377
  type: dict
378
378
  required: false
379
379
 
380
+ subsample_report_data:
381
+ type: boolean
382
+ required: false
383
+
380
384
  type: dict
@@ -39,3 +39,9 @@ class PermissionsError(Exception):
39
39
  "complies with the required schema for the operator. \n"
40
40
  f"{error}"
41
41
  )
42
+
43
+
44
+ class InsufficientDataError(Exception):
45
+ def __init__(self, message: str):
46
+ self.message = message
47
+ super().__init__(message)
@@ -67,7 +67,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
67
67
  self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
68
68
  # If trend is constant, remove constant columns
69
69
  if "trend" not in model_kwargs or model_kwargs["trend"] == "c":
70
- self.constant_cols[s_id] = df.columns[df.nunique() == 1]
70
+ self.constant_cols[s_id] = list(df.columns[df.nunique() == 1])
71
+ if target in self.constant_cols[s_id]:
72
+ self.constant_cols[s_id].remove(target)
71
73
  df = df.drop(columns=self.constant_cols[s_id])
72
74
 
73
75
  # format the dataframe for this target. Dropping NA on target[df] will remove all future data
@@ -249,20 +249,28 @@ class ForecastOperatorBaseModel(ABC):
249
249
  train_metrics_sections = [sec9_text, sec9]
250
250
 
251
251
  backtest_sections = []
252
+ output_dir = self.spec.output_directory.url
253
+ backtest_report_name = "backtest_stats.csv"
254
+ file_path = f"{output_dir}/{backtest_report_name}"
252
255
  if self.spec.model == AUTO_SELECT:
253
- output_dir = self.spec.output_directory.url
254
- backtest_report_name = "backtest_stats.csv"
255
- backtest_stats = pd.read_csv(f"{output_dir}/{backtest_report_name}")
256
- average_dict = backtest_stats.mean().to_dict()
257
- del average_dict['backtest']
258
- best_model = min(average_dict, key=average_dict.get)
259
- backtest_text = rc.Heading("Back Testing Metrics", level=2)
260
- summary_text = rc.Text(
261
- f"Overall, the average scores for the models are {average_dict}, with {best_model}"
262
- f" being identified as the top-performing model during backtesting.")
263
- backtest_table = rc.DataTable(backtest_stats, index=True)
264
- liner_plot = get_auto_select_plot(backtest_stats)
265
- backtest_sections = [backtest_text, backtest_table, summary_text, liner_plot]
256
+ backtest_sections.append(rc.Heading("Auto-select statistics", level=2))
257
+ if not os.path.exists(file_path):
258
+ failure_msg = rc.Text("auto-select could not be executed. Please check the "
259
+ "logs for more details.")
260
+ backtest_sections.append(failure_msg)
261
+ else:
262
+ backtest_stats = pd.read_csv(file_path)
263
+ average_dict = backtest_stats.mean().to_dict()
264
+ del average_dict['backtest']
265
+ best_model = min(average_dict, key=average_dict.get)
266
+ backtest_text = rc.Heading("Back Testing Metrics", level=3)
267
+ summary_text = rc.Text(
268
+ f"Overall, the average scores for the models are {average_dict}, with {best_model}"
269
+ f" being identified as the top-performing model during backtesting.")
270
+ backtest_table = rc.DataTable(backtest_stats, index=True)
271
+ liner_plot = get_auto_select_plot(backtest_stats)
272
+ backtest_sections.extend([backtest_text, backtest_table, summary_text,
273
+ liner_plot])
266
274
 
267
275
 
268
276
  forecast_plots = []
@@ -12,7 +12,8 @@ from ads.opctl import logger
12
12
  from ads.opctl.operator.lowcode.common.const import DataColumns
13
13
  from .model.forecast_datasets import ForecastDatasets
14
14
  from .operator_config import ForecastOperatorConfig
15
-
15
+ from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
16
+ from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
16
17
 
17
18
  class ModelEvaluator:
18
19
  """
@@ -61,6 +62,9 @@ class ModelEvaluator:
61
62
  unique_dates = min_series_data[date_col].unique()
62
63
 
63
64
  cut_offs = self.generate_cutoffs(unique_dates, horizon)
65
+ if not len(cut_offs):
66
+ raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
67
+ "instead of using auto-select.")
64
68
  training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
65
69
  in cut_offs]
66
70
  test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
@@ -137,7 +141,12 @@ class ModelEvaluator:
137
141
  return metrics
138
142
 
139
143
  def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
140
- metrics = self.run_all_models(datasets, operator_config)
144
+ try:
145
+ metrics = self.run_all_models(datasets, operator_config)
146
+ except InsufficientDataError as e:
147
+ model = SupportedModels.Prophet
148
+ logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
149
+ return model
141
150
  avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
142
151
  best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
143
152
  logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")