oracle-ads 2.11.14__py3-none-any.whl → 2.11.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/entities.py +17 -0
- ads/aqua/common/enums.py +5 -1
- ads/aqua/common/utils.py +109 -22
- ads/aqua/config/config.py +1 -1
- ads/aqua/config/deployment_config_defaults.json +29 -1
- ads/aqua/config/resource_limit_names.json +1 -0
- ads/aqua/constants.py +35 -18
- ads/aqua/evaluation/entities.py +0 -1
- ads/aqua/evaluation/evaluation.py +165 -121
- ads/aqua/extension/common_ws_msg_handler.py +57 -0
- ads/aqua/extension/deployment_handler.py +14 -13
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +1 -1
- ads/aqua/extension/evaluation_handler.py +4 -7
- ads/aqua/extension/evaluation_ws_msg_handler.py +28 -10
- ads/aqua/extension/model_handler.py +31 -6
- ads/aqua/extension/models/ws_models.py +78 -3
- ads/aqua/extension/models_ws_msg_handler.py +49 -0
- ads/aqua/extension/ui_websocket_handler.py +7 -1
- ads/aqua/model/entities.py +17 -9
- ads/aqua/model/model.py +260 -90
- ads/aqua/modeldeployment/constants.py +0 -16
- ads/aqua/modeldeployment/deployment.py +97 -74
- ads/aqua/modeldeployment/entities.py +9 -20
- ads/aqua/ui.py +152 -28
- ads/common/object_storage_details.py +2 -5
- ads/common/serializer.py +2 -3
- ads/jobs/builders/infrastructure/dsc_job.py +29 -3
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +74 -27
- ads/jobs/builders/runtimes/container_runtime.py +83 -4
- ads/opctl/operator/common/operator_config.py +1 -0
- ads/opctl/operator/lowcode/anomaly/README.md +3 -3
- ads/opctl/operator/lowcode/anomaly/__main__.py +5 -6
- ads/opctl/operator/lowcode/anomaly/const.py +9 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +6 -2
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +51 -26
- ads/opctl/operator/lowcode/anomaly/model/factory.py +41 -13
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +79 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +79 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +1 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -2
- ads/opctl/operator/lowcode/anomaly/utils.py +16 -13
- ads/opctl/operator/lowcode/common/data.py +2 -1
- ads/opctl/operator/lowcode/common/errors.py +6 -0
- ads/opctl/operator/lowcode/common/transformations.py +37 -9
- ads/opctl/operator/lowcode/common/utils.py +32 -10
- ads/opctl/operator/lowcode/forecast/model/base_model.py +21 -13
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +14 -18
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +15 -4
- ads/opctl/operator/lowcode/forecast/schema.yaml +9 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +25 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +198 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +58 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +88 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/pipeline/ads_pipeline_run.py +13 -2
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/METADATA +6 -1
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/RECORD +70 -50
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/entry_points.txt +0 -0
@@ -3,9 +3,12 @@
|
|
3
3
|
|
4
4
|
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
5
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
import logging
|
6
7
|
from typing import Union
|
7
8
|
from ads.jobs.builders.runtimes.base import MultiNodeRuntime
|
8
9
|
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
9
12
|
|
10
13
|
class ContainerRuntime(MultiNodeRuntime):
|
11
14
|
"""Represents a container job runtime
|
@@ -13,18 +16,23 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
13
16
|
To define container runtime:
|
14
17
|
|
15
18
|
>>> ContainerRuntime()
|
16
|
-
>>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>")
|
19
|
+
>>> .with_image("iad.ocir.io/<your_tenancy>/<your_image>:<tag>")
|
17
20
|
>>> .with_cmd("sleep 5 && echo Hello World")
|
18
21
|
>>> .with_entrypoint(["/bin/sh", "-c"])
|
22
|
+
>>> .with_image_digest("<image_digest>")
|
23
|
+
>>> .with_image_signature_id("<image_signature_id>")
|
19
24
|
>>> .with_environment_variable(MY_ENV="MY_VALUE")
|
20
25
|
|
21
|
-
Alternatively, you can define the ``entrypoint
|
26
|
+
Alternatively, you can define the ``entrypoint``, ``cmd``,
|
27
|
+
``image_digest``and ``image_signature_id`` along with the image.
|
22
28
|
|
23
29
|
>>> ContainerRuntime()
|
24
30
|
>>> .with_image(
|
25
|
-
>>> "iad.ocir.io/<your_tenancy>/<your_image>",
|
31
|
+
>>> "iad.ocir.io/<your_tenancy>/<your_image>:<tag>",
|
26
32
|
>>> entrypoint=["/bin/sh", "-c"],
|
27
33
|
>>> cmd="sleep 5 && echo Hello World",
|
34
|
+
>>> image_digest="<image_digest>",
|
35
|
+
>>> image_signature_id="<image_signature_id>",
|
28
36
|
>>> )
|
29
37
|
>>> .with_environment_variable(MY_ENV="MY_VALUE")
|
30
38
|
|
@@ -46,20 +54,34 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
46
54
|
CONST_IMAGE = "image"
|
47
55
|
CONST_ENTRYPOINT = "entrypoint"
|
48
56
|
CONST_CMD = "cmd"
|
57
|
+
CONST_IMAGE_DIGEST = "imageDigest"
|
58
|
+
CONST_IMAGE_SIGNATURE_ID = "imageSignatureId"
|
49
59
|
attribute_map = {
|
50
60
|
CONST_IMAGE: CONST_IMAGE,
|
51
61
|
CONST_ENTRYPOINT: CONST_ENTRYPOINT,
|
52
62
|
CONST_CMD: CONST_CMD,
|
63
|
+
CONST_IMAGE_DIGEST: "image_digest",
|
64
|
+
CONST_IMAGE_SIGNATURE_ID: "image_signature_id",
|
53
65
|
}
|
54
66
|
attribute_map.update(MultiNodeRuntime.attribute_map)
|
55
67
|
|
68
|
+
@property
|
69
|
+
def job_env_type(self) -> str:
|
70
|
+
"""The container type"""
|
71
|
+
return "OCIR_CONTAINER"
|
72
|
+
|
56
73
|
@property
|
57
74
|
def image(self) -> str:
|
58
75
|
"""The container image"""
|
59
76
|
return self.get_spec(self.CONST_IMAGE)
|
60
77
|
|
61
78
|
def with_image(
|
62
|
-
self,
|
79
|
+
self,
|
80
|
+
image: str,
|
81
|
+
entrypoint: Union[str, list, None] = None,
|
82
|
+
cmd: str = None,
|
83
|
+
image_digest: str = None,
|
84
|
+
image_signature_id: str = None,
|
63
85
|
) -> "ContainerRuntime":
|
64
86
|
"""Specify the image for the container job.
|
65
87
|
|
@@ -71,16 +93,73 @@ class ContainerRuntime(MultiNodeRuntime):
|
|
71
93
|
Entrypoint for the job, by default None (the entrypoint defined in the image will be used).
|
72
94
|
cmd : str, optional
|
73
95
|
Command for the job, by default None.
|
96
|
+
image_digest: str, optional
|
97
|
+
The image digest, by default None.
|
98
|
+
image_signature_id: str, optional
|
99
|
+
The image signature id, by default None.
|
74
100
|
|
75
101
|
Returns
|
76
102
|
-------
|
77
103
|
ContainerRuntime
|
78
104
|
The runtime instance.
|
79
105
|
"""
|
106
|
+
if not isinstance(image, str):
|
107
|
+
raise ValueError(
|
108
|
+
"Custom image must be provided as a string."
|
109
|
+
)
|
110
|
+
if image.find(":") < 0:
|
111
|
+
logger.warning(
|
112
|
+
"Tag is required for custom image. Accepted format: iad.ocir.io/<tenancy>/<image>:<tag>."
|
113
|
+
)
|
80
114
|
self.with_entrypoint(entrypoint)
|
81
115
|
self.set_spec(self.CONST_CMD, cmd)
|
116
|
+
self.with_image_digest(image_digest)
|
117
|
+
self.with_image_signature_id(image_signature_id)
|
82
118
|
return self.set_spec(self.CONST_IMAGE, image)
|
83
119
|
|
120
|
+
@property
|
121
|
+
def image_digest(self) -> str:
|
122
|
+
"""The container image digest."""
|
123
|
+
return self.get_spec(self.CONST_IMAGE_DIGEST)
|
124
|
+
|
125
|
+
def with_image_digest(self, image_digest: str) -> "ContainerRuntime":
|
126
|
+
"""Sets the digest of custom image.
|
127
|
+
|
128
|
+
Parameters
|
129
|
+
----------
|
130
|
+
image_digest: str
|
131
|
+
The image digest.
|
132
|
+
|
133
|
+
Returns
|
134
|
+
-------
|
135
|
+
ContainerRuntime
|
136
|
+
The runtime instance.
|
137
|
+
"""
|
138
|
+
return self.set_spec(self.CONST_IMAGE_DIGEST, image_digest)
|
139
|
+
|
140
|
+
@property
|
141
|
+
def image_signature_id(self) -> str:
|
142
|
+
"""The container image signature id."""
|
143
|
+
return self.get_spec(self.CONST_IMAGE_SIGNATURE_ID)
|
144
|
+
|
145
|
+
def with_image_signature_id(self, image_signature_id: str) -> "ContainerRuntime":
|
146
|
+
"""Sets the signature id of custom image.
|
147
|
+
|
148
|
+
Parameters
|
149
|
+
----------
|
150
|
+
image_signature_id: str
|
151
|
+
The image signature id.
|
152
|
+
|
153
|
+
Returns
|
154
|
+
-------
|
155
|
+
ContainerRuntime
|
156
|
+
The runtime instance.
|
157
|
+
"""
|
158
|
+
return self.set_spec(
|
159
|
+
self.CONST_IMAGE_SIGNATURE_ID,
|
160
|
+
image_signature_id
|
161
|
+
)
|
162
|
+
|
84
163
|
@property
|
85
164
|
def entrypoint(self) -> str:
|
86
165
|
"""Entrypoint of the container job"""
|
@@ -58,7 +58,7 @@ The operator will run in your local environment without requiring any additional
|
|
58
58
|
|
59
59
|
## 4. Running anomaly detection on the local container
|
60
60
|
|
61
|
-
To run the anomaly detection
|
61
|
+
To run the anomaly detection operator within a local container, follow these steps:
|
62
62
|
|
63
63
|
Use the command below to build the anomaly detection container.
|
64
64
|
|
@@ -106,7 +106,7 @@ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_op
|
|
106
106
|
|
107
107
|
## 5. Running anomaly detection in the Data Science job within container runtime
|
108
108
|
|
109
|
-
To execute the anomaly detection
|
109
|
+
To execute the anomaly detection operator within a Data Science job using container runtime, please follow the steps outlined below:
|
110
110
|
|
111
111
|
You can use the following command to build the anomaly detection container. This step can be skipped if you have already done this for running the operator within a local container.
|
112
112
|
|
@@ -155,7 +155,7 @@ ads opctl watch <OCID>
|
|
155
155
|
|
156
156
|
## 6. Running anomaly detection in the Data Science job within conda runtime
|
157
157
|
|
158
|
-
To execute the anomaly detection
|
158
|
+
To execute the anomaly detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
|
159
159
|
|
160
160
|
You can use the following command to build the anomaly detection conda environment.
|
161
161
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
@@ -15,7 +14,7 @@ from ads.opctl import logger
|
|
15
14
|
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
|
16
15
|
from ads.opctl.operator.common.utils import _parse_input_args
|
17
16
|
|
18
|
-
from .model.anomaly_dataset import AnomalyDatasets
|
17
|
+
from .model.anomaly_dataset import AnomalyDatasets
|
19
18
|
from .operator_config import AnomalyOperatorConfig
|
20
19
|
|
21
20
|
|
@@ -34,7 +33,7 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
|
|
34
33
|
f"Failed to forecast with error {e.args}. Trying again with model `autots`."
|
35
34
|
)
|
36
35
|
operator_config.spec.model = "autots"
|
37
|
-
operator_config.spec.model_kwargs =
|
36
|
+
operator_config.spec.model_kwargs = {}
|
38
37
|
datasets = AnomalyDatasets(operator_config.spec)
|
39
38
|
try:
|
40
39
|
AnomalyOperatorModelFactory.get_model(
|
@@ -44,12 +43,12 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
|
|
44
43
|
logger.debug(
|
45
44
|
f"Failed to backup forecast with error {ee.args}. Raising original error."
|
46
45
|
)
|
47
|
-
|
46
|
+
raise ee
|
48
47
|
else:
|
49
48
|
raise e
|
50
49
|
|
51
50
|
|
52
|
-
def verify(spec: Dict
|
51
|
+
def verify(spec: Dict) -> bool:
|
53
52
|
"""Verifies the anomaly detection operator config."""
|
54
53
|
operator = AnomalyOperatorConfig.from_dict(spec)
|
55
54
|
msg_header = (
|
@@ -83,7 +82,7 @@ def main(raw_args: List[str]):
|
|
83
82
|
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
|
84
83
|
except json.JSONDecodeError:
|
85
84
|
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
|
86
|
-
except:
|
85
|
+
except Exception:
|
87
86
|
yaml_string = operator_spec_str
|
88
87
|
|
89
88
|
operator_config = AnomalyOperatorConfig.from_yaml(
|
@@ -16,6 +16,14 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
|
|
16
16
|
Auto = "auto"
|
17
17
|
# TODS = "tods"
|
18
18
|
|
19
|
+
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
|
20
|
+
"""Supported non time-based anomaly detection models."""
|
21
|
+
|
22
|
+
OneClassSVM = "oneclasssvm"
|
23
|
+
IsolationForest = "isolationforest"
|
24
|
+
# TODO : Add DBScan
|
25
|
+
# DBScan = "dbscan"
|
26
|
+
|
19
27
|
|
20
28
|
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
|
21
29
|
"""Supported TODS sub models."""
|
@@ -86,3 +94,4 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):
|
|
86
94
|
|
87
95
|
|
88
96
|
TODS_DEFAULT_MODEL = "ocsvm"
|
97
|
+
SUBSAMPLE_THRESHOLD = 1000
|
@@ -84,8 +84,10 @@ class AnomalyOutput:
|
|
84
84
|
scores = self.get_scores_by_cat(category)
|
85
85
|
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]
|
86
86
|
inliers = data.iloc[inlier_indices]
|
87
|
-
if scores is not None and not scores.empty:
|
87
|
+
if scores is not None and not scores.empty and self.date_column != "index":
|
88
88
|
inliers = pd.merge(inliers, scores, on=self.date_column, how="inner")
|
89
|
+
else:
|
90
|
+
inliers = pd.merge(inliers, anomaly, left_index=True, right_index=True, how="inner")
|
89
91
|
return inliers
|
90
92
|
|
91
93
|
def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
|
@@ -93,8 +95,10 @@ class AnomalyOutput:
|
|
93
95
|
scores = self.get_scores_by_cat(category)
|
94
96
|
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]
|
95
97
|
outliers = data.iloc[outliers_indices]
|
96
|
-
if scores is not None and not scores.empty:
|
98
|
+
if scores is not None and not scores.empty and self.date_column != "index":
|
97
99
|
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
|
100
|
+
else:
|
101
|
+
outliers = pd.merge(outliers, anomaly, left_index=True, right_index=True, how="inner")
|
98
102
|
return outliers
|
99
103
|
|
100
104
|
def get_inliers(self, datasets):
|
@@ -1,32 +1,33 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
3
|
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
import fsspec
|
8
|
-
import numpy as np
|
9
6
|
import os
|
10
|
-
import pandas as pd
|
11
7
|
import tempfile
|
12
8
|
import time
|
13
9
|
from abc import ABC, abstractmethod
|
14
|
-
from sklearn import linear_model
|
15
10
|
from typing import Tuple
|
16
11
|
|
12
|
+
import fsspec
|
13
|
+
import numpy as np
|
14
|
+
import pandas as pd
|
15
|
+
from sklearn import linear_model
|
16
|
+
|
17
17
|
from ads.common.object_storage_details import ObjectStorageDetails
|
18
18
|
from ads.opctl import logger
|
19
|
-
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
|
19
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
|
20
20
|
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
|
21
21
|
from ads.opctl.operator.lowcode.common.utils import (
|
22
|
-
human_time_friendly,
|
23
|
-
enable_print,
|
24
22
|
disable_print,
|
23
|
+
enable_print,
|
24
|
+
human_time_friendly,
|
25
25
|
write_data,
|
26
26
|
)
|
27
|
-
|
28
|
-
from ..const import SupportedModels
|
27
|
+
|
28
|
+
from ..const import NonTimeADSupportedModels, SupportedModels
|
29
29
|
from ..operator_config import AnomalyOperatorConfig, AnomalyOperatorSpec
|
30
|
+
from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
|
30
31
|
|
31
32
|
|
32
33
|
class AnomalyOperatorBaseModel(ABC):
|
@@ -53,15 +54,18 @@ class AnomalyOperatorBaseModel(ABC):
|
|
53
54
|
|
54
55
|
def generate_report(self):
|
55
56
|
"""Generates the report."""
|
56
|
-
import report_creator as rc
|
57
57
|
import matplotlib.pyplot as plt
|
58
|
+
import report_creator as rc
|
58
59
|
|
59
60
|
start_time = time.time()
|
60
61
|
# fallback using sklearn oneclasssvm when the sub model _build_model fails
|
61
62
|
try:
|
62
63
|
anomaly_output = self._build_model()
|
63
64
|
except Exception as e:
|
64
|
-
|
65
|
+
logger.warn(f"Found exception: {e}")
|
66
|
+
if self.spec.datetime_column:
|
67
|
+
anomaly_output = self._fallback_build_model()
|
68
|
+
raise e
|
65
69
|
|
66
70
|
elapsed_time = time.time() - start_time
|
67
71
|
|
@@ -75,11 +79,13 @@ class AnomalyOperatorBaseModel(ABC):
|
|
75
79
|
anomaly_output, test_data, elapsed_time
|
76
80
|
)
|
77
81
|
table_blocks = [
|
78
|
-
rc.DataTable(df, label=col, index=True)
|
82
|
+
rc.DataTable(df.head(SUBSAMPLE_THRESHOLD) if self.spec.subsample_report_data and len(df) > SUBSAMPLE_THRESHOLD else df, label=col, index=True)
|
79
83
|
for col, df in self.datasets.full_data_dict.items()
|
80
84
|
]
|
81
85
|
data_table = rc.Select(blocks=table_blocks)
|
82
|
-
date_column =
|
86
|
+
date_column = (
|
87
|
+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
|
88
|
+
)
|
83
89
|
|
84
90
|
blocks = []
|
85
91
|
for target, df in self.datasets.full_data_dict.items():
|
@@ -88,20 +94,36 @@ class AnomalyOperatorBaseModel(ABC):
|
|
88
94
|
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
|
89
95
|
OutputColumns.ANOMALY_COL
|
90
96
|
]
|
97
|
+
anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
|
98
|
+
downsampled_time_col = time_col
|
99
|
+
selected_indices = list(range(len(time_col)))
|
100
|
+
if self.spec.subsample_report_data:
|
101
|
+
non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
|
102
|
+
# Downsample non-anomalous data if it exceeds the threshold (1000)
|
103
|
+
if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
|
104
|
+
downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
|
105
|
+
selected_indices = anomaly_indices + downsampled_non_anomaly_indices
|
106
|
+
selected_indices.sort()
|
107
|
+
downsampled_time_col = time_col[selected_indices]
|
108
|
+
|
91
109
|
columns = set(df.columns).difference({date_column})
|
92
110
|
for col in columns:
|
93
111
|
y = df[col].reset_index(drop=True)
|
112
|
+
|
113
|
+
downsampled_y = y[selected_indices]
|
114
|
+
|
94
115
|
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
|
95
116
|
ax.grid()
|
96
|
-
ax.plot(
|
97
|
-
|
98
|
-
|
99
|
-
|
117
|
+
ax.plot(downsampled_time_col, downsampled_y, color="black")
|
118
|
+
# Plot anomalies
|
119
|
+
for i in anomaly_indices:
|
120
|
+
ax.scatter(time_col[i], y[i], color="red", marker="o")
|
100
121
|
plt.xlabel(date_column)
|
101
122
|
plt.ylabel(col)
|
102
123
|
plt.title(f"`{col}` with reference to anomalies")
|
103
124
|
figure_blocks.append(rc.Widget(ax))
|
104
|
-
|
125
|
+
|
126
|
+
blocks.append(rc.Group(*figure_blocks, label=target))
|
105
127
|
plots = rc.Select(blocks)
|
106
128
|
|
107
129
|
report_sections = []
|
@@ -114,7 +136,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
114
136
|
rc.Text(f"You selected the **`{self.spec.model}`** model."),
|
115
137
|
rc.Text(
|
116
138
|
"Based on your dataset, you could have also selected "
|
117
|
-
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
|
139
|
+
f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
|
118
140
|
),
|
119
141
|
rc.Metric(
|
120
142
|
heading="Analysis was completed in ",
|
@@ -170,7 +192,9 @@ class AnomalyOperatorBaseModel(ABC):
|
|
170
192
|
|
171
193
|
for cat in anomaly_output.list_categories():
|
172
194
|
output = anomaly_output.category_map[cat][0]
|
173
|
-
date_col =
|
195
|
+
date_col = (
|
196
|
+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
|
197
|
+
)
|
174
198
|
|
175
199
|
test_data_i = test_data.get_data_for_series(cat)
|
176
200
|
|
@@ -247,7 +271,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
247
271
|
if ObjectStorageDetails.is_oci_path(unique_output_dir):
|
248
272
|
storage_options = default_signer()
|
249
273
|
else:
|
250
|
-
storage_options =
|
274
|
+
storage_options = {}
|
251
275
|
|
252
276
|
# report-creator html report
|
253
277
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -301,12 +325,11 @@ class AnomalyOperatorBaseModel(ABC):
|
|
301
325
|
Fallback method for the sub model _build_model method.
|
302
326
|
"""
|
303
327
|
logger.warn(
|
304
|
-
"The build_model method has failed for the model: {}. "
|
305
|
-
"A fallback model will be built."
|
328
|
+
f"The build_model method has failed for the model: {self.spec.model}. "
|
329
|
+
"A fallback model will be built."
|
306
330
|
)
|
307
331
|
|
308
332
|
date_column = self.spec.datetime_column.name
|
309
|
-
dataset = self.datasets
|
310
333
|
|
311
334
|
anomaly_output = AnomalyOutput(date_column=date_column)
|
312
335
|
|
@@ -320,7 +343,9 @@ class AnomalyOperatorBaseModel(ABC):
|
|
320
343
|
y_pred = np.vectorize(self.outlier_map.get)(
|
321
344
|
est.predict(df[self.spec.target_column].fillna(0).values.reshape(-1, 1))
|
322
345
|
)
|
323
|
-
scores = est.score_samples(
|
346
|
+
scores = est.score_samples(
|
347
|
+
df[self.spec.target_column].fillna(0).values.reshape(-1, 1)
|
348
|
+
)
|
324
349
|
|
325
350
|
anomaly = pd.DataFrame(
|
326
351
|
{date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}
|
@@ -1,26 +1,41 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*--
|
3
2
|
|
4
|
-
# Copyright (c) 2023 Oracle and/or its affiliates.
|
3
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
5
|
|
7
|
-
from
|
6
|
+
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
7
|
+
|
8
|
+
from ..const import NonTimeADSupportedModels, SupportedModels
|
8
9
|
from ..operator_config import AnomalyOperatorConfig
|
10
|
+
from .anomaly_dataset import AnomalyDatasets
|
9
11
|
from .automlx import AutoMLXOperatorModel
|
10
12
|
from .autots import AutoTSOperatorModel
|
11
|
-
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
12
13
|
|
13
14
|
# from .tods import TODSOperatorModel
|
14
15
|
from .base_model import AnomalyOperatorBaseModel
|
15
|
-
from .
|
16
|
+
from .isolationforest import IsolationForestOperatorModel
|
17
|
+
from .oneclasssvm import OneClassSVMOperatorModel
|
16
18
|
|
17
19
|
|
18
20
|
class UnSupportedModelError(Exception):
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
"""Exception raised when the model is not supported.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
operator_config (AnomalyOperatorConfig): The operator configuration.
|
25
|
+
model_type (str): The type of the unsupported model.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, operator_config: AnomalyOperatorConfig, model_type: str):
|
29
|
+
supported_models = (
|
30
|
+
SupportedModels.values
|
31
|
+
if operator_config.spec.datetime_column
|
32
|
+
else NonTimeADSupportedModels.values
|
23
33
|
)
|
34
|
+
message = (
|
35
|
+
f"Model: `{model_type}` is not supported. "
|
36
|
+
f"Supported models: {supported_models}"
|
37
|
+
)
|
38
|
+
super().__init__(message)
|
24
39
|
|
25
40
|
|
26
41
|
class AnomalyOperatorModelFactory:
|
@@ -34,6 +49,13 @@ class AnomalyOperatorModelFactory:
|
|
34
49
|
SupportedModels.AutoTS: AutoTSOperatorModel,
|
35
50
|
}
|
36
51
|
|
52
|
+
_NonTime_MAP = {
|
53
|
+
NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
|
54
|
+
NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
|
55
|
+
# TODO: Add DBScan model for non time based anomaly
|
56
|
+
# NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
|
57
|
+
}
|
58
|
+
|
37
59
|
@classmethod
|
38
60
|
def get_model(
|
39
61
|
cls, operator_config: AnomalyOperatorConfig, datasets: AnomalyDatasets
|
@@ -61,7 +83,13 @@ class AnomalyOperatorModelFactory:
|
|
61
83
|
"""
|
62
84
|
model_type = operator_config.spec.model
|
63
85
|
if model_type == "auto":
|
64
|
-
model_type = select_auto_model(
|
65
|
-
|
66
|
-
|
67
|
-
|
86
|
+
model_type = select_auto_model(operator_config)
|
87
|
+
|
88
|
+
model_map = (
|
89
|
+
cls._MAP if operator_config.spec.datetime_column else cls._NonTime_MAP
|
90
|
+
)
|
91
|
+
|
92
|
+
if model_type not in model_map:
|
93
|
+
raise UnSupportedModelError(operator_config, model_type)
|
94
|
+
|
95
|
+
return model_map[model_type](config=operator_config, datasets=datasets)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
11
|
+
|
12
|
+
from .base_model import AnomalyOperatorBaseModel
|
13
|
+
from .anomaly_dataset import AnomalyOutput
|
14
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
15
|
+
|
16
|
+
|
17
|
+
class IsolationForestOperatorModel(AnomalyOperatorBaseModel):
|
18
|
+
"""Class representing OneClassSVM Anomaly Detection operator model."""
|
19
|
+
|
20
|
+
@runtime_dependency(
|
21
|
+
module="sklearn",
|
22
|
+
err_msg=(
|
23
|
+
"Please run `pip3 install scikit-learn` to "
|
24
|
+
"install the required dependencies for OneClassSVM."
|
25
|
+
),
|
26
|
+
)
|
27
|
+
def _build_model(self) -> AnomalyOutput:
|
28
|
+
from sklearn.ensemble import IsolationForest
|
29
|
+
|
30
|
+
model_kwargs = self.spec.model_kwargs
|
31
|
+
# map the output as per anomaly dataset class, 1: outlier, 0: inlier
|
32
|
+
self.outlier_map = {1: 0, -1: 1}
|
33
|
+
|
34
|
+
anomaly_output = AnomalyOutput(date_column="index")
|
35
|
+
|
36
|
+
for target, df in self.datasets.full_data_dict.items():
|
37
|
+
model = IsolationForest(**model_kwargs)
|
38
|
+
model.fit(df)
|
39
|
+
y_pred = np.vectorize(self.outlier_map.get)(
|
40
|
+
model.predict(df)
|
41
|
+
)
|
42
|
+
|
43
|
+
scores = model.score_samples(
|
44
|
+
df
|
45
|
+
)
|
46
|
+
|
47
|
+
index_col = df.columns[0]
|
48
|
+
|
49
|
+
anomaly = pd.DataFrame(
|
50
|
+
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
|
51
|
+
).reset_index(drop=True)
|
52
|
+
score = pd.DataFrame(
|
53
|
+
{"index": df[index_col], OutputColumns.SCORE_COL: scores}
|
54
|
+
).reset_index(drop=True)
|
55
|
+
|
56
|
+
anomaly_output.add_output(target, anomaly, score)
|
57
|
+
|
58
|
+
return anomaly_output
|
59
|
+
|
60
|
+
def _generate_report(self):
|
61
|
+
"""Generates the report."""
|
62
|
+
import report_creator as rc
|
63
|
+
|
64
|
+
other_sections = [
|
65
|
+
rc.Heading("Selected Models Overview", level=2),
|
66
|
+
rc.Text(
|
67
|
+
"The following tables provide information regarding the chosen model."
|
68
|
+
),
|
69
|
+
]
|
70
|
+
|
71
|
+
model_description = rc.Text(
|
72
|
+
"The Isolation Forest is an ensemble of “Isolation Trees” that “isolate” observations by recursive random partitioning"
|
73
|
+
" which can be represented by a tree structure. The number of splittings required to isolate a sample is lower for outliers and higher for inliers."
|
74
|
+
)
|
75
|
+
|
76
|
+
return (
|
77
|
+
model_description,
|
78
|
+
other_sections,
|
79
|
+
)
|