oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. ads/aqua/__init__.py +12 -0
  2. ads/aqua/base.py +324 -0
  3. ads/aqua/cli.py +19 -0
  4. ads/aqua/config/deployment_config_defaults.json +9 -0
  5. ads/aqua/config/resource_limit_names.json +7 -0
  6. ads/aqua/constants.py +45 -0
  7. ads/aqua/data.py +40 -0
  8. ads/aqua/decorator.py +101 -0
  9. ads/aqua/deployment.py +643 -0
  10. ads/aqua/dummy_data/icon.txt +1 -0
  11. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  12. ads/aqua/dummy_data/oci_models.json +1 -0
  13. ads/aqua/dummy_data/readme.md +26 -0
  14. ads/aqua/evaluation.py +1751 -0
  15. ads/aqua/exception.py +82 -0
  16. ads/aqua/extension/__init__.py +40 -0
  17. ads/aqua/extension/base_handler.py +138 -0
  18. ads/aqua/extension/common_handler.py +21 -0
  19. ads/aqua/extension/deployment_handler.py +202 -0
  20. ads/aqua/extension/evaluation_handler.py +135 -0
  21. ads/aqua/extension/finetune_handler.py +66 -0
  22. ads/aqua/extension/model_handler.py +59 -0
  23. ads/aqua/extension/ui_handler.py +201 -0
  24. ads/aqua/extension/utils.py +23 -0
  25. ads/aqua/finetune.py +579 -0
  26. ads/aqua/job.py +29 -0
  27. ads/aqua/model.py +819 -0
  28. ads/aqua/training/__init__.py +4 -0
  29. ads/aqua/training/exceptions.py +459 -0
  30. ads/aqua/ui.py +453 -0
  31. ads/aqua/utils.py +715 -0
  32. ads/cli.py +37 -6
  33. ads/common/auth.py +7 -0
  34. ads/common/decorator/__init__.py +7 -3
  35. ads/common/decorator/require_nonempty_arg.py +65 -0
  36. ads/common/object_storage_details.py +166 -7
  37. ads/common/oci_client.py +18 -1
  38. ads/common/oci_logging.py +2 -2
  39. ads/common/oci_mixin.py +4 -5
  40. ads/common/serializer.py +34 -5
  41. ads/common/utils.py +75 -10
  42. ads/config.py +40 -1
  43. ads/dataset/correlation_plot.py +10 -12
  44. ads/jobs/ads_job.py +43 -25
  45. ads/jobs/builders/infrastructure/base.py +4 -2
  46. ads/jobs/builders/infrastructure/dsc_job.py +49 -39
  47. ads/jobs/builders/runtimes/base.py +71 -1
  48. ads/jobs/builders/runtimes/container_runtime.py +4 -4
  49. ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
  50. ads/jobs/templates/driver_pytorch.py +27 -10
  51. ads/model/artifact_downloader.py +84 -14
  52. ads/model/artifact_uploader.py +25 -23
  53. ads/model/datascience_model.py +388 -38
  54. ads/model/deployment/model_deployment.py +10 -2
  55. ads/model/generic_model.py +8 -0
  56. ads/model/model_file_description_schema.json +68 -0
  57. ads/model/model_metadata.py +1 -1
  58. ads/model/service/oci_datascience_model.py +34 -5
  59. ads/opctl/config/merger.py +2 -2
  60. ads/opctl/operator/__init__.py +3 -1
  61. ads/opctl/operator/cli.py +7 -1
  62. ads/opctl/operator/cmd.py +3 -3
  63. ads/opctl/operator/common/errors.py +2 -1
  64. ads/opctl/operator/common/operator_config.py +22 -3
  65. ads/opctl/operator/common/utils.py +16 -0
  66. ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
  67. ads/opctl/operator/lowcode/anomaly/README.md +209 -0
  68. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  69. ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
  70. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  71. ads/opctl/operator/lowcode/anomaly/const.py +88 -0
  72. ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
  73. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  74. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
  75. ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
  76. ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
  77. ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
  78. ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
  79. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  80. ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
  81. ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
  82. ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
  83. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  84. ads/opctl/operator/lowcode/common/const.py +10 -0
  85. ads/opctl/operator/lowcode/common/data.py +96 -0
  86. ads/opctl/operator/lowcode/common/errors.py +41 -0
  87. ads/opctl/operator/lowcode/common/transformations.py +191 -0
  88. ads/opctl/operator/lowcode/common/utils.py +250 -0
  89. ads/opctl/operator/lowcode/forecast/README.md +3 -2
  90. ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
  91. ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
  92. ads/opctl/operator/lowcode/forecast/const.py +17 -1
  93. ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
  94. ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
  95. ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
  96. ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
  97. ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
  98. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
  99. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
  100. ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
  101. ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
  102. ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
  103. ads/opctl/operator/lowcode/forecast/utils.py +186 -356
  104. ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
  105. ads/opctl/operator/lowcode/pii/model/report.py +7 -7
  106. ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
  107. ads/opctl/operator/lowcode/pii/utils.py +0 -82
  108. ads/opctl/operator/runtime/runtime.py +3 -2
  109. ads/telemetry/base.py +62 -0
  110. ads/telemetry/client.py +105 -0
  111. ads/telemetry/telemetry.py +6 -3
  112. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
  113. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
  114. ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
  115. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
  116. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
  117. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -1761,7 +1761,11 @@ class ModelDeployment(Builder):
1761
1761
  }
1762
1762
 
1763
1763
  logs = {}
1764
- if self.infrastructure.access_log:
1764
+ if (
1765
+ self.infrastructure.access_log and
1766
+ self.infrastructure.access_log.get(self.infrastructure.CONST_LOG_GROUP_ID, None)
1767
+ and self.infrastructure.access_log.get(self.infrastructure.CONST_LOG_ID, None)
1768
+ ):
1765
1769
  logs[self.infrastructure.CONST_ACCESS] = {
1766
1770
  self.infrastructure.CONST_LOG_GROUP_ID: self.infrastructure.access_log.get(
1767
1771
  "logGroupId", None
@@ -1770,7 +1774,11 @@ class ModelDeployment(Builder):
1770
1774
  "logId", None
1771
1775
  ),
1772
1776
  }
1773
- if self.infrastructure.predict_log:
1777
+ if (
1778
+ self.infrastructure.predict_log and
1779
+ self.infrastructure.predict_log.get(self.infrastructure.CONST_LOG_GROUP_ID, None)
1780
+ and self.infrastructure.predict_log.get(self.infrastructure.CONST_LOG_ID, None)
1781
+ ):
1774
1782
  logs[self.infrastructure.CONST_PREDICT] = {
1775
1783
  self.infrastructure.CONST_LOG_GROUP_ID: self.infrastructure.predict_log.get(
1776
1784
  "logGroupId", None
@@ -2054,6 +2054,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2054
2054
  remove_existing_artifact: Optional[bool] = True,
2055
2055
  reload: Optional[bool] = True,
2056
2056
  version_label: Optional[str] = None,
2057
+ model_by_reference: Optional[bool] = False,
2057
2058
  **kwargs,
2058
2059
  ) -> str:
2059
2060
  """Saves model artifacts to the model catalog.
@@ -2091,6 +2092,8 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2091
2092
  The number of worker processes to use in parallel for uploading individual parts of a multipart upload.
2092
2093
  reload: (bool, optional)
2093
2094
  Whether to reload to check if `load_model()` works in `score.py`. Default to `True`.
2095
+ model_by_reference: (bool, optional)
2096
+ Whether model artifact is made available to Model Store by reference.
2094
2097
  kwargs:
2095
2098
  project_id: (str, optional).
2096
2099
  Project OCID. If not specified, the value will be taken either
@@ -2220,6 +2223,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2220
2223
  overwrite_existing_artifact=overwrite_existing_artifact,
2221
2224
  remove_existing_artifact=remove_existing_artifact,
2222
2225
  parallel_process_count=parallel_process_count,
2226
+ model_by_reference=model_by_reference,
2223
2227
  **kwargs,
2224
2228
  )
2225
2229
 
@@ -2620,6 +2624,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2620
2624
  remove_existing_artifact: Optional[bool] = True,
2621
2625
  model_version_set: Optional[Union[str, ModelVersionSet]] = None,
2622
2626
  version_label: Optional[str] = None,
2627
+ model_by_reference: Optional[bool] = False,
2623
2628
  **kwargs: Dict,
2624
2629
  ) -> "ModelDeployment":
2625
2630
  """Shortcut for prepare, save and deploy steps.
@@ -2724,6 +2729,8 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2724
2729
  The Model version set OCID, or name, or `ModelVersionSet` instance.
2725
2730
  version_label: (str, optional). Defaults to None.
2726
2731
  The model version lebel.
2732
+ model_by_reference: (bool, optional)
2733
+ Whether model artifact is made available to Model Store by reference.
2727
2734
  kwargs:
2728
2735
  impute_values: (dict, optional).
2729
2736
  The dictionary where the key is the column index(or names is accepted
@@ -2827,6 +2834,7 @@ class GenericModel(MetadataMixin, Introspectable, EvaluatorMixin):
2827
2834
  model_version_set=model_version_set,
2828
2835
  version_label=version_label,
2829
2836
  region=kwargs.pop("region", None),
2837
+ model_by_reference=model_by_reference,
2830
2838
  )
2831
2839
  # Set default deployment_display_name if not specified - randomly generated easy to remember name generated
2832
2840
  if not deployment_display_name:
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "properties": {
4
+ "models": {
5
+ "items": {
6
+ "properties": {
7
+ "bucketName": {
8
+ "type": "string"
9
+ },
10
+ "namespace": {
11
+ "type": "string"
12
+ },
13
+ "objects": {
14
+ "items": {
15
+ "properties": {
16
+ "name": {
17
+ "type": "string"
18
+ },
19
+ "sizeInBytes": {
20
+ "minimum": 0,
21
+ "type": "integer"
22
+ },
23
+ "version": {
24
+ "type": "string"
25
+ }
26
+ },
27
+ "required": [
28
+ "name",
29
+ "version",
30
+ "sizeInBytes"
31
+ ],
32
+ "type": "object"
33
+ },
34
+ "minItems": 1,
35
+ "type": "array"
36
+ },
37
+ "prefix": {
38
+ "type": "string"
39
+ }
40
+ },
41
+ "required": [
42
+ "namespace",
43
+ "bucketName",
44
+ "prefix",
45
+ "objects"
46
+ ],
47
+ "type": "object"
48
+ },
49
+ "minItems": 1,
50
+ "type": "array"
51
+ },
52
+ "type": {
53
+ "enum": [
54
+ "modelOSSReferenceDescription"
55
+ ],
56
+ "type": "string"
57
+ },
58
+ "version": {
59
+ "type": "string"
60
+ }
61
+ },
62
+ "required": [
63
+ "version",
64
+ "type",
65
+ "models"
66
+ ],
67
+ "type": "object"
68
+ }
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*--
3
3
 
4
- # Copyright (c) 2021, 2023 Oracle and/or its affiliates.
4
+ # Copyright (c) 2021, 2024 Oracle and/or its affiliates.
5
5
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
6
 
7
7
  import json
@@ -38,6 +38,8 @@ MODEL_NEEDS_TO_BE_SAVED = (
38
38
  "Model needs to be saved to the Model Catalog before it can be accessed."
39
39
  )
40
40
 
41
+ MODEL_BY_REFERENCE_DESC = "modelDescription"
42
+
41
43
 
42
44
  class ModelProvenanceNotFoundError(Exception): # pragma: no cover
43
45
  pass
@@ -304,18 +306,25 @@ class OCIDataScienceModel(
304
306
  @check_for_model_id(
305
307
  msg="Model needs to be saved to the Model Catalog before the artifact can be created."
306
308
  )
307
- def create_model_artifact(self, bytes_content: BytesIO) -> None:
309
+ def create_model_artifact(
310
+ self,
311
+ bytes_content: BytesIO,
312
+ extension: str = None,
313
+ ) -> None:
308
314
  """Creates model artifact for specified model.
309
315
 
310
316
  Parameters
311
317
  ----------
312
318
  bytes_content: BytesIO
313
319
  Model artifacts to upload.
320
+ extension: str
321
+ File extension, defaults to zip
314
322
  """
323
+ ext = ".json" if extension and extension.lower() == ".json" else ".zip"
315
324
  self.client.create_model_artifact(
316
325
  self.id,
317
326
  bytes_content,
318
- content_disposition=f'attachment; filename="{self.id}.zip"',
327
+ content_disposition=f'attachment; filename="{self.id}{ext}"',
319
328
  )
320
329
 
321
330
  @check_for_model_id(
@@ -423,10 +432,14 @@ class OCIDataScienceModel(
423
432
  OCIDataScienceModel
424
433
  The `OCIDataScienceModel` instance (self).
425
434
  """
435
+
436
+ model_details = self.to_oci_model(UpdateModelDetails)
437
+
438
+ # Clean up the model version set, otherwise it throws an error that model is already
439
+ # associated with the model version set.
440
+ model_details.model_version_set_id = None
426
441
  return self.update_from_oci_model(
427
- self.client.update_model(
428
- self.id, self.to_oci_model(UpdateModelDetails)
429
- ).data
442
+ self.client.update_model(self.id, model_details).data
430
443
  )
431
444
 
432
445
  @check_for_model_id(
@@ -539,3 +552,19 @@ class OCIDataScienceModel(
539
552
  if not ocid:
540
553
  raise ValueError("Model OCID not provided.")
541
554
  return super().from_ocid(ocid)
555
+
556
+ def is_model_by_reference(self):
557
+ """Checks if model is created by reference
558
+ Returns
559
+ -------
560
+ bool flag denoting whether model was created by reference.
561
+
562
+ """
563
+ if self.custom_metadata_list:
564
+ for metadata in self.custom_metadata_list:
565
+ if (
566
+ metadata.key == MODEL_BY_REFERENCE_DESC
567
+ and metadata.value.lower() == "true"
568
+ ):
569
+ return True
570
+ return False
@@ -11,7 +11,7 @@ import json
11
11
 
12
12
  import yaml
13
13
 
14
- from ads.common.auth import AuthType
14
+ from ads.common.auth import AuthType, ResourcePrincipal
15
15
  from ads.opctl import logger
16
16
  from ads.opctl.config.base import ConfigProcessor
17
17
  from ads.opctl.config.utils import read_from_ini, _DefaultNoneDict
@@ -115,7 +115,7 @@ class ConfigMerger(ConfigProcessor):
115
115
  )
116
116
  # set default auth
117
117
  if not self.config["execution"].get("auth", None):
118
- if is_in_notebook_session():
118
+ if ResourcePrincipal.supported():
119
119
  self.config["execution"]["auth"] = (
120
120
  exec_config.get("auth") or AuthType.RESOURCE_PRINCIPAL
121
121
  )
@@ -14,7 +14,9 @@ def __registered_operators():
14
14
  return [
15
15
  f
16
16
  for f in os.listdir(target_dir)
17
- if os.path.isdir(os.path.join(target_dir, f)) and not f.startswith("__")
17
+ if os.path.isdir(os.path.join(target_dir, f))
18
+ and not f.startswith("__")
19
+ and f != "common"
18
20
  ]
19
21
 
20
22
 
ads/opctl/operator/cli.py CHANGED
@@ -9,12 +9,14 @@ from typing import Any, Dict
9
9
  import click
10
10
  import fsspec
11
11
  import yaml
12
+ import logging
12
13
  from ads.opctl.operator.common.utils import default_signer
13
14
  from ads.common.auth import AuthType
14
15
  from ads.common.object_storage_details import ObjectStorageDetails
15
16
  from ads.opctl.constants import BACKEND_NAME, RUNTIME_TYPE
16
17
  from ads.opctl.decorator.common import click_options, with_auth, with_click_unknown_args
17
18
  from ads.opctl.utils import suppress_traceback
19
+ from ads.opctl import logger
18
20
 
19
21
  from .__init__ import __operators__
20
22
  from .cmd import run as cmd_run
@@ -311,10 +313,14 @@ def publish_conda(debug: bool, **kwargs: Dict[str, Any]) -> None:
311
313
  @click.pass_context
312
314
  @with_click_unknown_args
313
315
  @with_auth
314
- def run(ctx: click.core.Context, debug: bool, **kwargs: Dict[str, Any]) -> None:
316
+ def run(ctx: click.core.Context, debug: bool = False, **kwargs: Dict[str, Any]) -> None:
315
317
  """
316
318
  Runs the operator with the given specification on the targeted backend.
317
319
  """
320
+ if debug:
321
+ logger.setLevel(logging.DEBUG)
322
+ else:
323
+ logger.setLevel(logging.CRITICAL)
318
324
  operator_spec = {}
319
325
  backend = kwargs.pop("backend")
320
326
 
ads/opctl/operator/cmd.py CHANGED
@@ -48,7 +48,7 @@ from .common.backend_factory import BackendFactory
48
48
  from .common.errors import (
49
49
  OperatorCondaNotFoundError,
50
50
  OperatorImageNotFoundError,
51
- OperatorSchemaYamlError,
51
+ InvalidParameterError,
52
52
  )
53
53
  from .common.operator_loader import _operator_info_list
54
54
 
@@ -167,7 +167,7 @@ def init(
167
167
  )
168
168
  else:
169
169
  overwrite = True
170
- output = os.path.join(tempfile.TemporaryDirectory().name, "")
170
+ output = operator_utils.create_output_folder(name=type + "/")
171
171
 
172
172
  # generating operator specification
173
173
  operator_config = {}
@@ -422,7 +422,7 @@ def verify(
422
422
  run_name="verify",
423
423
  )
424
424
  operator_module.get("verify")(config, **kwargs)
425
- except OperatorSchemaYamlError as ex:
425
+ except InvalidParameterError as ex:
426
426
  logger.debug(ex)
427
427
  raise ValueError(
428
428
  f"The operator's specification is not valid for the `{operator_info.type}` operator. "
@@ -7,8 +7,9 @@
7
7
  from ads.opctl.operator import __operators__
8
8
 
9
9
 
10
- class OperatorSchemaYamlError(Exception):
10
+ class InvalidParameterError(Exception):
11
11
  """Exception raised when there is an issue with the schema."""
12
+
12
13
  def __init__(self, error: str):
13
14
  super().__init__(
14
15
  "Invalid operator specification. Check the YAML structure and ensure it "
@@ -8,12 +8,31 @@
8
8
  import json
9
9
  from abc import abstractmethod
10
10
  from dataclasses import dataclass
11
- from typing import Any, Dict
11
+ from typing import Any, Dict, List
12
12
 
13
13
  from ads.common.serializer import DataClassSerializable
14
14
 
15
15
  from ads.opctl.operator.common.utils import OperatorValidator
16
- from ads.opctl.operator.common.errors import OperatorSchemaYamlError
16
+ from ads.opctl.operator.common.errors import InvalidParameterError
17
+
18
+ @dataclass(repr=True)
19
+ class InputData(DataClassSerializable):
20
+ """Class representing operator specification input data details."""
21
+
22
+ connect_args: Dict = None
23
+ format: str = None
24
+ columns: List[str] = None
25
+ url: str = None
26
+ filters: List[str] = None
27
+ options: Dict = None
28
+ limit: int = None
29
+ sql: str = None
30
+ table_name: str = None
31
+
32
+
33
+ @dataclass(repr=True)
34
+ class OutputDirectory(InputData):
35
+ """Class representing operator specification output directory details."""
17
36
 
18
37
 
19
38
  @dataclass(repr=True)
@@ -65,7 +84,7 @@ class OperatorConfig(DataClassSerializable):
65
84
  result = validator.validate(obj_dict)
66
85
 
67
86
  if not result:
68
- raise OperatorSchemaYamlError(json.dumps(validator.errors, indent=2))
87
+ raise InvalidParameterError(json.dumps(validator.errors, indent=2))
69
88
  return True
70
89
 
71
90
  @classmethod
@@ -29,6 +29,22 @@ class OperatorValidator(Validator):
29
29
  pass
30
30
 
31
31
 
32
+ def create_output_folder(name):
33
+ output_folder = name
34
+ protocol = fsspec.utils.get_protocol(output_folder)
35
+ storage_options = {}
36
+ if protocol != "file":
37
+ storage_options = auth or default_signer()
38
+
39
+ fs = fsspec.filesystem(protocol, **storage_options)
40
+ name_suffix = 1
41
+ while fs.exists(output_folder):
42
+ name_suffix = name_suffix + 1
43
+ output_folder = f"{name}_{name_suffix}"
44
+ fs.mkdirs(output_folder)
45
+ return output_folder
46
+
47
+
32
48
  def _build_image(
33
49
  dockerfile: str,
34
50
  image_name: str,
@@ -0,0 +1,15 @@
1
+ type: anomaly
2
+ version: v1
3
+ conda_type: service
4
+ name: Anomaly Detection Operator
5
+ gpu: no
6
+ keywords:
7
+ - Anomaly Detection
8
+ backends:
9
+ - job
10
+ - operator.local
11
+ description: |
12
+ Anomaly Detection is the identification of rare items, events, or observations in data that
13
+ differ significantly from the expectation. This can be used for several scenarios like asset
14
+ monitoring, maintenance and prognostic surveillance in industries such as utility,
15
+ aviation and manufacturing.
@@ -0,0 +1,209 @@
1
+ # Anomaly Detection Operator
2
+
3
+ Anomaly Detection is the identification of rare items, events, or observations in data that differ significantly from the expectation. This can be used for several scenarios like asset monitoring, maintenance and prognostic surveillance in industries such as utility, aviation and manufacturing.
4
+
5
+ Below are the steps to configure and run the Anomaly Detection Operator on different resources.
6
+
7
+ ## 1. Prerequisites
8
+
9
+ Follow the [CLI Configuration](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html) steps from the ADS documentation. This step is mandatory as it sets up default values for different options while running the Anomaly Detection Operator on OCI Data Science jobs or OCI Data Flow applications. If you have previously done this and used a flexible shape, make sure to adjust `ml_job_config.ini` with shape config details and `docker_registry` information.
10
+
11
+ - ocpus = 1
12
+ - memory_in_gbs = 16
13
+ - docker_registry = `<iad.ocir.io/namespace/>`
14
+
15
+ ## 2. Generating configs
16
+
17
+ To generate starter configs, run the command below. This will create a list of YAML configs and place them in the `output` folder.
18
+
19
+ ```bash
20
+ ads operator init -t anomaly --overwrite --output ~/anomaly/
21
+ ```
22
+
23
+ The most important files expected to be generated are:
24
+
25
+ - `anomaly.yaml`: Contains anomaly detection related configuration.
26
+ - `backend_operator_local_python_config.yaml`: This includes a local backend configuration for running anomaly detection in a local environment. The environment should be set up manually before running the operator.
27
+ - `backend_operator_local_container_config.yaml`: This includes a local backend configuration for running anomaly detection within a local container. The container should be built before running the operator. Please refer to the instructions below for details on how to accomplish this.
28
+ - `backend_job_container_config.yaml`: Contains Data Science job-related config to run anomaly detection in a Data Science job within a container (BYOC) runtime. The container should be built and published before running the operator. Please refer to the instructions below for details on how to accomplish this.
29
+ - `backend_job_python_config.yaml`: Contains Data Science job-related config to run anomaly detection in a Data Science job within a conda runtime. The conda should be built and published before running the operator.
30
+
31
+ All generated configurations should be ready to use without the need for any additional adjustments. However, they are provided as starter kit configurations that can be customized as needed.
32
+
33
+ ## 3. Running anomaly detection on the local conda environment
34
+
35
+ To run anomaly detection locally, create and activate a new conda environment (`ads-anomaly`). Install all the required libraries listed in the `environment.yaml` file.
36
+
37
+ ```yaml
38
+ - datapane
39
+ - cerberus
40
+ - oracle-automlx==23.4.1
41
+ - oracle-automlx[classic]==23.4.1
42
+ - "git+https://github.com/oracle/accelerated-data-science.git@feature/anomaly#egg=oracle-ads"
43
+ ```
44
+
45
+ Please review the previously generated `anomaly.yaml` file using the `init` command, and make any necessary adjustments to the input and output file locations. By default, it assumes that the files should be located in the same folder from which the `init` command was executed.
46
+
47
+ Use the command below to verify the anomaly detection config.
48
+
49
+ ```bash
50
+ ads operator verify -f ~/anomaly/anomaly.yaml
51
+ ```
52
+
53
+ Use the following command to run the anomaly detection within the `ads-anomaly` conda environment.
54
+
55
+ ```bash
56
+ ads operator run -f ~/anomaly/anomaly.yaml -b local
57
+ ```
58
+
59
+ The operator will run in your local environment without requiring any additional modifications.
60
+
61
+ ## 4. Running anomaly detection on the local container
62
+
63
+ To run the anomaly detection detection operator within a local container, follow these steps:
64
+
65
+ Use the command below to build the anomaly detection container.
66
+
67
+ ```bash
68
+ ads operator build-image -t anomaly
69
+ ```
70
+
71
+ This will create a new `anomaly:v1` image, with `/etc/operator` as the designated working directory within the container.
72
+
73
+
74
+ Check the `backend_operator_local_container_config.yaml` config file. By default, it should have a `volume` section with the `.oci` configs folder mounted.
75
+
76
+ ```yaml
77
+ volume:
78
+ - "/Users/<user>/.oci:/root/.oci"
79
+ ```
80
+
81
+ Mounting the OCI configs folder is only required if an OCI Object Storage bucket will be used to store the input anomaly detection data or output anomaly detection result. The input/output folders can also be mounted to the container.
82
+
83
+ ```yaml
84
+ volume:
85
+ - /Users/<user>/.oci:/root/.oci
86
+ - /Users/<user>/anomaly/data:/etc/operator/data
87
+ - /Users/<user>/anomaly/result:/etc/operator/result
88
+ ```
89
+
90
+ The full config can look like:
91
+ ```yaml
92
+ kind: operator.local
93
+ spec:
94
+ image: anomaly:v1
95
+ volume:
96
+ - /Users/<user>/.oci:/root/.oci
97
+ - /Users/<user>/anomaly/data:/etc/operator/data
98
+ - /Users/<user>/anomaly/result:/etc/operator/result
99
+ type: container
100
+ version: v1
101
+ ```
102
+
103
+ Run the anomaly detection within a container using the command below:
104
+
105
+ ```bash
106
+ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_operator_local_container_config.yaml
107
+ ```
108
+
109
+ ## 5. Running anomaly detection in the Data Science job within container runtime
110
+
111
+ To execute the anomaly detection detection operator within a Data Science job using container runtime, please follow the steps outlined below:
112
+
113
+ You can use the following command to build the anomaly detection container. This step can be skipped if you have already done this for running the operator within a local container.
114
+
115
+ ```bash
116
+ ads operator build-image -t anomaly
117
+ ```
118
+
119
+ This will create a new `anomaly:v1` image, with `/etc/operator` as the designated working directory within the container.
120
+
121
+ Publish the `anomaly:v1` container to the [Oracle Container Registry](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/home.htm). To become familiar with OCI, read the documentation links posted below.
122
+
123
+ - [Access Container Registry](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Concepts/registryoverview.htm#access)
124
+ - [Create repositories](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Tasks/registrycreatingarepository.htm#top)
125
+ - [Push images](https://docs.public.oneportal.content.oci.oraclecloud.com/en-us/iaas/Content/Registry/Tasks/registrypushingimagesusingthedockercli.htm#Pushing_Images_Using_the_Docker_CLI)
126
+
127
+ To publish `anomaly:v1` to OCR, use the command posted below:
128
+
129
+ ```bash
130
+ ads operator publish-image anomaly:v1 --registry <iad.ocir.io/tenancy/>
131
+ ```
132
+
133
+ After the container is published to OCR, it can be used within Data Science jobs service. Check the `backend_job_container_config.yaml` config file. It should contain pre-populated infrastructure and runtime sections. The runtime section should contain an image property, something like `image: iad.ocir.io/<tenancy>/anomaly:v1`. More details about supported options can be found in the ADS Jobs documentation - [Run a Container](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/run_container.html).
134
+
135
+ Adjust the `anomaly.yaml` config with proper input/output folders. When the anomaly detection is run in the Data Science job, it will not have access to local folders. Therefore, input data and output folders should be placed in the Object Storage bucket. Open the `anomaly.yaml` and adjust the following fields:
136
+
137
+ ```yaml
138
+ input_data:
139
+ url: oci://bucket@namespace/anomaly/input_data/data.csv
140
+ output_directory:
141
+ url: oci://bucket@namespace/anomaly/result/
142
+ test_data:
143
+ url: oci://bucket@namespace/anomaly/input_data/test.csv
144
+ ```
145
+
146
+ Run the anomaly detection on the Data Science jobs using the command posted below:
147
+
148
+ ```bash
149
+ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_job_container_config.yaml
150
+ ```
151
+
152
+ The logs can be monitored using the `ads opctl watch` command.
153
+
154
+ ```bash
155
+ ads opctl watch <OCID>
156
+ ```
157
+
158
+ ## 6. Running anomaly detection in the Data Science job within conda runtime
159
+
160
+ To execute the anomaly detection detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
161
+
162
+ You can use the following command to build the anomaly detection conda environment.
163
+
164
+ ```bash
165
+ ads operator build-conda -t anomaly
166
+ ```
167
+
168
+ This will create a new `anomaly_v1` conda environment and place it in the folder specified within `ads opctl configure` command.
169
+
170
+ Use the command below to Publish the `anomaly_v1` conda environment to the Object Storage bucket.
171
+
172
+ ```bash
173
+ ads operator publish-conda -t anomaly
174
+ ```
175
+ More details about configuring CLI can be found here - [Configuring CLI](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html)
176
+
177
+
178
+ After the conda environment is published to Object Storage, it can be used within Data Science jobs service. Check the `backend_job_python_config.yaml` config file. It should contain pre-populated infrastructure and runtime sections. The runtime section should contain a `conda` section.
179
+
180
+ ```yaml
181
+ conda:
182
+ type: published
183
+ uri: oci://bucket@namespace/conda_environments/cpu/anomaly/1/anomaly_v1
184
+ ```
185
+
186
+ More details about supported options can be found in the ADS Jobs documentation - [Run a Python Workload](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/jobs/run_python.html).
187
+
188
+ Adjust the `anomaly.yaml` config with proper input/output folders. When the anomaly detection is run in the Data Science job, it will not have access to local folders. Therefore, input data and output folders should be placed in the Object Storage bucket. Open the `anomaly.yaml` and adjust the following fields:
189
+
190
+ ```yaml
191
+ input_data:
192
+ url: oci://bucket@namespace/anomaly/input_data/data.csv
193
+ output_directory:
194
+ url: oci://bucket@namespace/anomaly/result/
195
+ test_data:
196
+ url: oci://bucket@namespace/anomaly/input_data/test.csv
197
+ ```
198
+
199
+ Run the anomaly detection on the Data Science jobs using the command posted below:
200
+
201
+ ```bash
202
+ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_job_python_config.yaml
203
+ ```
204
+
205
+ The logs can be monitored using the `ads opctl watch` command.
206
+
207
+ ```bash
208
+ ads opctl watch <OCID>
209
+ ```
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/