oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/auth.py +7 -0
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/dataset/correlation_plot.py +10 -12
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/config/merger.py +2 -2
- ads/opctl/operator/__init__.py +3 -1
- ads/opctl/operator/cli.py +7 -1
- ads/opctl/operator/cmd.py +3 -3
- ads/opctl/operator/common/errors.py +2 -1
- ads/opctl/operator/common/operator_config.py +22 -3
- ads/opctl/operator/common/utils.py +16 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
- ads/opctl/operator/lowcode/anomaly/README.md +209 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +88 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +96 -0
- ads/opctl/operator/lowcode/common/errors.py +41 -0
- ads/opctl/operator/lowcode/common/transformations.py +191 -0
- ads/opctl/operator/lowcode/common/utils.py +250 -0
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
- ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
- ads/opctl/operator/lowcode/forecast/const.py +17 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
- ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
- ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
- ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
- ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
- ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
- ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
- ads/opctl/operator/lowcode/forecast/utils.py +186 -356
- ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
- ads/opctl/operator/lowcode/pii/model/report.py +7 -7
- ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
- ads/opctl/operator/lowcode/pii/utils.py +0 -82
- ads/opctl/operator/runtime/runtime.py +3 -2
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
- ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import json
|
8
|
+
import os
|
9
|
+
import sys
|
10
|
+
from typing import Dict, List
|
11
|
+
|
12
|
+
import yaml
|
13
|
+
|
14
|
+
from ads.opctl import logger
|
15
|
+
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
|
16
|
+
from ads.opctl.operator.common.utils import _parse_input_args
|
17
|
+
|
18
|
+
from .model.anomaly_dataset import AnomalyDatasets, AnomalyData
|
19
|
+
from .operator_config import AnomalyOperatorConfig
|
20
|
+
|
21
|
+
|
22
|
+
def operate(operator_config: AnomalyOperatorConfig) -> None:
|
23
|
+
"""Runs the anomaly detection operator."""
|
24
|
+
from .model.factory import AnomalyOperatorModelFactory
|
25
|
+
|
26
|
+
datasets = AnomalyDatasets(operator_config.spec)
|
27
|
+
try:
|
28
|
+
AnomalyOperatorModelFactory.get_model(
|
29
|
+
operator_config, datasets
|
30
|
+
).generate_report()
|
31
|
+
except Exception as e:
|
32
|
+
if operator_config.spec.model == "auto":
|
33
|
+
logger.debug(
|
34
|
+
f"Failed to forecast with error {e.args}. Trying again with model `autots`."
|
35
|
+
)
|
36
|
+
operator_config.spec.model = "autots"
|
37
|
+
operator_config.spec.model_kwargs = dict()
|
38
|
+
datasets = AnomalyDatasets(operator_config.spec)
|
39
|
+
try:
|
40
|
+
AnomalyOperatorModelFactory.get_model(
|
41
|
+
operator_config, datasets
|
42
|
+
).generate_report()
|
43
|
+
except Exception as e2:
|
44
|
+
logger.debug(
|
45
|
+
f"Failed to backup forecast with error {e2.args}. Raising original error."
|
46
|
+
)
|
47
|
+
raise e
|
48
|
+
else:
|
49
|
+
raise e
|
50
|
+
|
51
|
+
|
52
|
+
def verify(spec: Dict, **kwargs: Dict) -> bool:
|
53
|
+
"""Verifies the anomaly detection operator config."""
|
54
|
+
operator = AnomalyOperatorConfig.from_dict(spec)
|
55
|
+
msg_header = (
|
56
|
+
f"{'*' * 50} The operator config has been successfully verified {'*' * 50}"
|
57
|
+
)
|
58
|
+
print(msg_header)
|
59
|
+
print(operator.to_yaml())
|
60
|
+
print("*" * len(msg_header))
|
61
|
+
|
62
|
+
|
63
|
+
def main(raw_args: List[str]):
|
64
|
+
"""The entry point of the anomaly the operator."""
|
65
|
+
args, _ = _parse_input_args(raw_args)
|
66
|
+
if not args.file and not args.spec and not os.environ.get(ENV_OPERATOR_ARGS):
|
67
|
+
logger.info(
|
68
|
+
"Please specify -f[--file] or -s[--spec] or "
|
69
|
+
f"pass operator's arguments via {ENV_OPERATOR_ARGS} environment variable."
|
70
|
+
)
|
71
|
+
return
|
72
|
+
|
73
|
+
logger.info("-" * 100)
|
74
|
+
logger.info(
|
75
|
+
f"{'Running' if not args.verify else 'Verifying'} the anomaly detection operator."
|
76
|
+
)
|
77
|
+
|
78
|
+
# if spec provided as input string, then convert the string into YAML
|
79
|
+
yaml_string = ""
|
80
|
+
if args.spec or os.environ.get(ENV_OPERATOR_ARGS):
|
81
|
+
operator_spec_str = args.spec or os.environ.get(ENV_OPERATOR_ARGS)
|
82
|
+
try:
|
83
|
+
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
|
84
|
+
except json.JSONDecodeError:
|
85
|
+
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
|
86
|
+
except:
|
87
|
+
yaml_string = operator_spec_str
|
88
|
+
|
89
|
+
operator_config = AnomalyOperatorConfig.from_yaml(
|
90
|
+
uri=args.file,
|
91
|
+
yaml_string=yaml_string,
|
92
|
+
)
|
93
|
+
|
94
|
+
logger.info(operator_config.to_yaml())
|
95
|
+
|
96
|
+
# run operator
|
97
|
+
if args.verify:
|
98
|
+
verify(operator_config)
|
99
|
+
else:
|
100
|
+
operate(operator_config)
|
101
|
+
|
102
|
+
|
103
|
+
if __name__ == "__main__":
|
104
|
+
main(sys.argv[1:])
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from typing import Dict
|
8
|
+
|
9
|
+
from ads.opctl import logger
|
10
|
+
from ads.opctl.operator.common.utils import _load_yaml_from_uri
|
11
|
+
from ads.opctl.operator.common.operator_yaml_generator import YamlGenerator
|
12
|
+
|
13
|
+
|
14
|
+
def init(**kwargs: Dict) -> str:
|
15
|
+
"""
|
16
|
+
Generates operator config by the schema.
|
17
|
+
|
18
|
+
Properties
|
19
|
+
----------
|
20
|
+
kwargs: (Dict, optional).
|
21
|
+
Additional key value arguments.
|
22
|
+
|
23
|
+
- type: str
|
24
|
+
The type of the operator.
|
25
|
+
|
26
|
+
Returns
|
27
|
+
-------
|
28
|
+
str
|
29
|
+
The YAML specification generated based on the schema.
|
30
|
+
"""
|
31
|
+
logger.info("==== Generating anomaly detection configuration yaml file ====")
|
32
|
+
|
33
|
+
return YamlGenerator(
|
34
|
+
schema=_load_yaml_from_uri(__file__.replace("cmd.py", "schema.yaml"))
|
35
|
+
).generate_example_dict(values={"type": kwargs.get("type")})
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from ads.common.extended_enum import ExtendedEnumMeta
|
8
|
+
from ads.opctl.operator.lowcode.common.const import DataColumns
|
9
|
+
|
10
|
+
|
11
|
+
class SupportedModels(str, metaclass=ExtendedEnumMeta):
|
12
|
+
"""Supported anomaly models."""
|
13
|
+
|
14
|
+
AutoMLX = "automlx"
|
15
|
+
AutoTS = "autots"
|
16
|
+
Auto = "auto"
|
17
|
+
# TODS = "tods"
|
18
|
+
|
19
|
+
|
20
|
+
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
|
21
|
+
"""Supported TODS sub models."""
|
22
|
+
|
23
|
+
OCSVM = "ocsvm"
|
24
|
+
DeepLog = "deeplog"
|
25
|
+
Telemanom = "telemanom"
|
26
|
+
IsolationForest = "isolationforest"
|
27
|
+
LSTMODetector = "lstmodetector"
|
28
|
+
KNN = "knn"
|
29
|
+
|
30
|
+
|
31
|
+
TODS_IMPORT_MODEL_MAP = {
|
32
|
+
TODSSubModels.OCSVM: ".OCSVM_skinterface",
|
33
|
+
TODSSubModels.DeepLog: ".DeepLog_skinterface",
|
34
|
+
TODSSubModels.Telemanom: ".Telemanom_skinterface",
|
35
|
+
TODSSubModels.IsolationForest: ".IsolationForest_skinterface",
|
36
|
+
TODSSubModels.LSTMODetector: ".LSTMODetector_skinterface",
|
37
|
+
TODSSubModels.KNN: ".KNN_skinterface",
|
38
|
+
}
|
39
|
+
|
40
|
+
TODS_MODEL_MAP = {
|
41
|
+
TODSSubModels.OCSVM: "OCSVMSKI",
|
42
|
+
TODSSubModels.DeepLog: "DeepLogSKI",
|
43
|
+
TODSSubModels.Telemanom: "TelemanomSKI",
|
44
|
+
TODSSubModels.IsolationForest: "IsolationForestSKI",
|
45
|
+
TODSSubModels.LSTMODetector: "LSTMODetectorSKI",
|
46
|
+
TODSSubModels.KNN: "KNNSKI",
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
|
51
|
+
UNSUPERVISED_UNIFY95 = "unsupervised_unify95"
|
52
|
+
UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss"
|
53
|
+
UNSUPERVISED_N1_EXPERTS = "unsupervised_n-1_experts"
|
54
|
+
RECALL = "Recall"
|
55
|
+
PRECISION = "Precision"
|
56
|
+
ACCURACY = "Accuracy"
|
57
|
+
F1_SCORE = "f1_score"
|
58
|
+
FP = "False Positive"
|
59
|
+
FN = "False Negative"
|
60
|
+
TP = "True Positive"
|
61
|
+
TN = "True Negative"
|
62
|
+
ROC_AUC = "ROC_AUC"
|
63
|
+
PRC_AUC = "PRC_AUC"
|
64
|
+
MCC = "MCC"
|
65
|
+
MEAN_RECALL = "Mean Recall"
|
66
|
+
MEAN_PRECISION = "Mean Precision"
|
67
|
+
MEAN_ACCURACY = "Mean Accuracy"
|
68
|
+
MEAN_F1_SCORE = "Mean f1_score"
|
69
|
+
MEAN_ROC_AUC = "Mean ROC_AUC"
|
70
|
+
MEAN_PRC_AUC = "Mean PRC_AUC"
|
71
|
+
MEAN_MCC = "Mean MCC"
|
72
|
+
MEDIAN_RECALL = "Median Recall"
|
73
|
+
MEDIAN_PRECISION = "Median Precision"
|
74
|
+
MEDIAN_ACCURACY = "Median Accuracy"
|
75
|
+
MEDIAN_F1_SCORE = "Median f1_score"
|
76
|
+
MEDIAN_ROC_AUC = "Median ROC_AUC"
|
77
|
+
MEDIAN_PRC_AUC = "Median PRC_AUC"
|
78
|
+
MEDIAN_MCC = "Median MCC"
|
79
|
+
ELAPSED_TIME = "Elapsed Time"
|
80
|
+
|
81
|
+
|
82
|
+
class OutputColumns(str, metaclass=ExtendedEnumMeta):
|
83
|
+
ANOMALY_COL = "anomaly"
|
84
|
+
SCORE_COL = "score"
|
85
|
+
Series = DataColumns.Series
|
86
|
+
|
87
|
+
|
88
|
+
TODS_DEFAULT_MODEL = "ocsvm"
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from ..operator_config import AnomalyOperatorSpec
|
8
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
9
|
+
default_signer,
|
10
|
+
merge_category_columns,
|
11
|
+
)
|
12
|
+
from ads.opctl.operator.lowcode.common.data import AbstractData
|
13
|
+
from ads.opctl.operator.lowcode.common.data import AbstractData
|
14
|
+
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
|
15
|
+
from ads.opctl import logger
|
16
|
+
import pandas as pd
|
17
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
18
|
+
|
19
|
+
|
20
|
+
class AnomalyData(AbstractData):
|
21
|
+
def __init__(self, spec: AnomalyOperatorSpec):
|
22
|
+
super().__init__(spec=spec, name="input_data")
|
23
|
+
|
24
|
+
|
25
|
+
class TestData(AbstractData):
|
26
|
+
def __init__(self, spec: AnomalyOperatorSpec):
|
27
|
+
super().__init__(spec=spec, name="test_data")
|
28
|
+
|
29
|
+
|
30
|
+
class ValidationData(AbstractData):
|
31
|
+
def __init__(self, spec: AnomalyOperatorSpec):
|
32
|
+
super().__init__(spec=spec, name="validation_data")
|
33
|
+
|
34
|
+
def _ingest_data(self, spec):
|
35
|
+
self.X_valid_dict = dict()
|
36
|
+
self.y_valid_dict = dict()
|
37
|
+
for s_id, df in self.get_dict_by_series().items():
|
38
|
+
self.X_valid_dict[s_id] = df.drop([OutputColumns.ANOMALY_COL], axis=1)
|
39
|
+
self.y_valid_dict[s_id] = df[OutputColumns.ANOMALY_COL]
|
40
|
+
|
41
|
+
|
42
|
+
class AnomalyDatasets:
|
43
|
+
def __init__(self, spec: AnomalyOperatorSpec):
|
44
|
+
"""Instantiates the DataIO instance.
|
45
|
+
|
46
|
+
Properties
|
47
|
+
----------
|
48
|
+
spec: AnomalyOperatorSpec
|
49
|
+
The anomaly operator spec.
|
50
|
+
"""
|
51
|
+
self._data = AnomalyData(spec)
|
52
|
+
self.data = self._data.get_data_long()
|
53
|
+
self.full_data_dict = self._data.get_dict_by_series()
|
54
|
+
if spec.validation_data is not None:
|
55
|
+
self.valid_data = ValidationData(spec)
|
56
|
+
self.X_valid_dict = self.valid_data.X_valid_dict
|
57
|
+
self.y_valid_dict = self.valid_data.y_valid_dict
|
58
|
+
|
59
|
+
|
60
|
+
class AnomalyOutput:
|
61
|
+
def __init__(self, date_column):
|
62
|
+
self.category_map = dict()
|
63
|
+
self.date_column = date_column
|
64
|
+
|
65
|
+
def list_categories(self):
|
66
|
+
categories = list(self.category_map.keys())
|
67
|
+
categories.sort()
|
68
|
+
return categories
|
69
|
+
|
70
|
+
def add_output(self, category: str, anomalies: pd.DataFrame, scores: pd.DataFrame):
|
71
|
+
self.category_map[category] = (anomalies, scores)
|
72
|
+
|
73
|
+
def get_anomalies_by_cat(self, category: str):
|
74
|
+
return self.category_map[category][0]
|
75
|
+
|
76
|
+
def get_scores_by_cat(self, category: str):
|
77
|
+
return self.category_map[category][1]
|
78
|
+
|
79
|
+
def get_inliers_by_cat(self, category: str, data: pd.DataFrame):
|
80
|
+
anomaly = self.get_anomalies_by_cat(category)
|
81
|
+
scores = self.get_scores_by_cat(category)
|
82
|
+
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]
|
83
|
+
inliers = data.iloc[inlier_indices]
|
84
|
+
if scores is not None and not scores.empty:
|
85
|
+
inliers = pd.merge(inliers, scores, on=self.date_column, how="inner")
|
86
|
+
return inliers
|
87
|
+
|
88
|
+
def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
|
89
|
+
anomaly = self.get_anomalies_by_cat(category)
|
90
|
+
scores = self.get_scores_by_cat(category)
|
91
|
+
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]
|
92
|
+
outliers = data.iloc[outliers_indices]
|
93
|
+
if scores is not None and not scores.empty:
|
94
|
+
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
|
95
|
+
return outliers
|
96
|
+
|
97
|
+
def get_inliers(self, data):
|
98
|
+
inliers = pd.DataFrame()
|
99
|
+
|
100
|
+
for category in self.list_categories():
|
101
|
+
inliers = pd.concat(
|
102
|
+
[
|
103
|
+
inliers,
|
104
|
+
self.get_inliers_by_cat(
|
105
|
+
category,
|
106
|
+
data[data[OutputColumns.Series] == category]
|
107
|
+
.reset_index(drop=True)
|
108
|
+
.drop(OutputColumns.Series, axis=1),
|
109
|
+
),
|
110
|
+
],
|
111
|
+
axis=0,
|
112
|
+
ignore_index=True,
|
113
|
+
)
|
114
|
+
return inliers
|
115
|
+
|
116
|
+
def get_outliers(self, data):
|
117
|
+
outliers = pd.DataFrame()
|
118
|
+
|
119
|
+
for category in self.list_categories():
|
120
|
+
outliers = pd.concat(
|
121
|
+
[
|
122
|
+
outliers,
|
123
|
+
self.get_outliers_by_cat(
|
124
|
+
category,
|
125
|
+
data[data[OutputColumns.Series] == category]
|
126
|
+
.reset_index(drop=True)
|
127
|
+
.drop(OutputColumns.Series, axis=1),
|
128
|
+
),
|
129
|
+
],
|
130
|
+
axis=0,
|
131
|
+
ignore_index=True,
|
132
|
+
)
|
133
|
+
return outliers
|
134
|
+
|
135
|
+
def get_scores(self, target_category_columns):
|
136
|
+
if target_category_columns is None:
|
137
|
+
return self.get_scores_by_cat(self.list_categories()[0])
|
138
|
+
|
139
|
+
scores = pd.DataFrame()
|
140
|
+
for category in self.list_categories():
|
141
|
+
score = self.get_scores_by_cat(category)
|
142
|
+
score[target_category_columns[0]] = category
|
143
|
+
scores = pd.concat([scores, score], axis=0, ignore_index=True)
|
144
|
+
return scores
|
145
|
+
|
146
|
+
def get_num_anomalies_by_cat(self, category: str):
|
147
|
+
return (self.category_map[category][0][OutputColumns.ANOMALY_COL] == 1).sum()
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
10
|
+
from .anomaly_dataset import AnomalyOutput
|
11
|
+
|
12
|
+
from .base_model import AnomalyOperatorBaseModel
|
13
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
14
|
+
|
15
|
+
|
16
|
+
class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
|
17
|
+
"""Class representing AutoMLX operator model."""
|
18
|
+
|
19
|
+
@runtime_dependency(
|
20
|
+
module="automlx",
|
21
|
+
err_msg=(
|
22
|
+
"Please run `pip3 install oracle-automlx==23.4.1` and "
|
23
|
+
"`pip3 install oracle-automlx[classic]==23.4.1` "
|
24
|
+
"to install the required dependencies for automlx."
|
25
|
+
),
|
26
|
+
)
|
27
|
+
def _build_model(self) -> pd.DataFrame:
|
28
|
+
from automlx import init
|
29
|
+
try:
|
30
|
+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
|
31
|
+
except Exception as e:
|
32
|
+
logger.info("Ray already initialized")
|
33
|
+
date_column = self.spec.datetime_column.name
|
34
|
+
anomaly_output = AnomalyOutput(date_column=date_column)
|
35
|
+
time_budget = self.spec.model_kwargs.pop("time_budget", -1)
|
36
|
+
|
37
|
+
# Iterate over the full_data_dict items
|
38
|
+
for target, df in self.datasets.full_data_dict.items():
|
39
|
+
est = automlx.Pipeline(task="anomaly_detection", **self.spec.model_kwargs)
|
40
|
+
est.fit(
|
41
|
+
X=df,
|
42
|
+
X_valid=self.X_valid_dict[target]
|
43
|
+
if self.X_valid_dict is not None
|
44
|
+
else None,
|
45
|
+
y_valid=self.y_valid_dict[target]
|
46
|
+
if self.y_valid_dict is not None
|
47
|
+
else None,
|
48
|
+
contamination=self.spec.contamination
|
49
|
+
if self.y_valid_dict is not None
|
50
|
+
else None,
|
51
|
+
time_budget=time_budget,
|
52
|
+
)
|
53
|
+
y_pred = est.predict(df)
|
54
|
+
scores = est.predict_proba(df)
|
55
|
+
|
56
|
+
anomaly = pd.DataFrame(
|
57
|
+
{date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}
|
58
|
+
).reset_index(drop=True)
|
59
|
+
score = pd.DataFrame(
|
60
|
+
{
|
61
|
+
date_column: df[date_column],
|
62
|
+
OutputColumns.SCORE_COL: [item[1] for item in scores],
|
63
|
+
}
|
64
|
+
).reset_index(drop=True)
|
65
|
+
anomaly_output.add_output(target, anomaly, score)
|
66
|
+
|
67
|
+
return anomaly_output
|
68
|
+
|
69
|
+
def _generate_report(self):
|
70
|
+
import datapane as dp
|
71
|
+
|
72
|
+
"""The method that needs to be implemented on the particular model level."""
|
73
|
+
selected_models_text = dp.Text(
|
74
|
+
f"## Selected Models Overview \n "
|
75
|
+
"The following tables provide information regarding the chosen model."
|
76
|
+
)
|
77
|
+
all_sections = [selected_models_text]
|
78
|
+
|
79
|
+
model_description = dp.Text(
|
80
|
+
"The automlx model automatically pre-processes, selects and engineers "
|
81
|
+
"high-quality features in your dataset, which then given to an automatically "
|
82
|
+
"chosen and optimized machine learning model.."
|
83
|
+
)
|
84
|
+
other_sections = all_sections
|
85
|
+
|
86
|
+
return (
|
87
|
+
model_description,
|
88
|
+
other_sections,
|
89
|
+
)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
10
|
+
|
11
|
+
from .base_model import AnomalyOperatorBaseModel
|
12
|
+
from .anomaly_dataset import AnomalyOutput
|
13
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
14
|
+
|
15
|
+
|
16
|
+
class AutoTSOperatorModel(AnomalyOperatorBaseModel):
|
17
|
+
"""Class representing AutoTS Anomaly Detection operator model."""
|
18
|
+
|
19
|
+
@runtime_dependency(
|
20
|
+
module="autots",
|
21
|
+
err_msg=(
|
22
|
+
"Please run `pip3 install autots` to "
|
23
|
+
"install the required dependencies for AutoTS."
|
24
|
+
),
|
25
|
+
)
|
26
|
+
def _build_model(self) -> AnomalyOutput:
|
27
|
+
from autots.evaluator.anomaly_detector import AnomalyDetector
|
28
|
+
|
29
|
+
method = self.spec.model_kwargs.get("method")
|
30
|
+
transform_dict = self.spec.model_kwargs.get("transform_dict", {})
|
31
|
+
|
32
|
+
if method == "random" or method == "deep" or method == "fast":
|
33
|
+
new_params = AnomalyDetector.get_new_params(method=method)
|
34
|
+
transform_dict = new_params.pop("transform_dict")
|
35
|
+
|
36
|
+
for key, value in new_params.items():
|
37
|
+
self.spec.model_kwargs[key] = value
|
38
|
+
|
39
|
+
if self.spec.model_kwargs.get("output") is None:
|
40
|
+
self.spec.model_kwargs["output"] = "univariate"
|
41
|
+
|
42
|
+
if "transform_dict" not in self.spec.model_kwargs:
|
43
|
+
self.spec.model_kwargs["transform_dict"] = transform_dict
|
44
|
+
|
45
|
+
if self.spec.contamination != 0.1: # TODO: remove hard-coding
|
46
|
+
self.spec.model_kwargs.get("method_params", {})[
|
47
|
+
"contamination"
|
48
|
+
] = self.spec.contamination
|
49
|
+
|
50
|
+
model = AnomalyDetector(**self.spec.model_kwargs)
|
51
|
+
|
52
|
+
date_column = self.spec.datetime_column.name
|
53
|
+
|
54
|
+
anomaly_output = AnomalyOutput(date_column=date_column)
|
55
|
+
|
56
|
+
for target, df in self.datasets.full_data_dict.items():
|
57
|
+
data = df.set_index(date_column)
|
58
|
+
|
59
|
+
(anomaly, score) = model.detect(data)
|
60
|
+
|
61
|
+
if len(anomaly.columns) == 1:
|
62
|
+
score.rename(
|
63
|
+
columns={score.columns.values[0]: OutputColumns.SCORE_COL},
|
64
|
+
inplace=True,
|
65
|
+
)
|
66
|
+
score = 1 - score
|
67
|
+
score = score.reset_index(drop=False)
|
68
|
+
|
69
|
+
col = anomaly.columns.values[0]
|
70
|
+
anomaly[col] = anomaly[col].replace({1: 0, -1: 1})
|
71
|
+
anomaly.rename(columns={col: OutputColumns.ANOMALY_COL}, inplace=True)
|
72
|
+
anomaly = anomaly.reset_index(drop=False)
|
73
|
+
|
74
|
+
anomaly_output.add_output(target, anomaly, score)
|
75
|
+
|
76
|
+
else:
|
77
|
+
raise NotImplementedError(
|
78
|
+
"Multi-Output Anomaly Detection is not yet supported in autots"
|
79
|
+
)
|
80
|
+
|
81
|
+
return anomaly_output
|
82
|
+
|
83
|
+
def _generate_report(self):
|
84
|
+
import datapane as dp
|
85
|
+
|
86
|
+
"""The method that needs to be implemented on the particular model level."""
|
87
|
+
selected_models_text = dp.Text(
|
88
|
+
f"## Selected Models Overview \n "
|
89
|
+
"The following tables provide information regarding the chosen model."
|
90
|
+
)
|
91
|
+
all_sections = [selected_models_text]
|
92
|
+
|
93
|
+
model_description = dp.Text(
|
94
|
+
"The automlx model automatically pre-processes, selects and engineers "
|
95
|
+
"high-quality features in your dataset, which then given to an automatically "
|
96
|
+
"chosen and optimized machine learning model.."
|
97
|
+
)
|
98
|
+
other_sections = all_sections
|
99
|
+
|
100
|
+
return (
|
101
|
+
model_description,
|
102
|
+
other_sections,
|
103
|
+
)
|