oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/auth.py +7 -0
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/dataset/correlation_plot.py +10 -12
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/config/merger.py +2 -2
- ads/opctl/operator/__init__.py +3 -1
- ads/opctl/operator/cli.py +7 -1
- ads/opctl/operator/cmd.py +3 -3
- ads/opctl/operator/common/errors.py +2 -1
- ads/opctl/operator/common/operator_config.py +22 -3
- ads/opctl/operator/common/utils.py +16 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
- ads/opctl/operator/lowcode/anomaly/README.md +209 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +88 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +96 -0
- ads/opctl/operator/lowcode/common/errors.py +41 -0
- ads/opctl/operator/lowcode/common/transformations.py +191 -0
- ads/opctl/operator/lowcode/common/utils.py +250 -0
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
- ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
- ads/opctl/operator/lowcode/forecast/const.py +17 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
- ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
- ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
- ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
- ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
- ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
- ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
- ads/opctl/operator/lowcode/forecast/utils.py +186 -356
- ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
- ads/opctl/operator/lowcode/pii/model/report.py +7 -7
- ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
- ads/opctl/operator/lowcode/pii/utils.py +0 -82
- ads/opctl/operator/runtime/runtime.py +3 -2
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
- ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,354 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import os
|
8
|
+
import tempfile
|
9
|
+
import time
|
10
|
+
from abc import ABC, abstractmethod
|
11
|
+
from typing import Tuple
|
12
|
+
|
13
|
+
import fsspec
|
14
|
+
import pandas as pd
|
15
|
+
import numpy as np
|
16
|
+
from sklearn import linear_model
|
17
|
+
|
18
|
+
from ads.opctl import logger
|
19
|
+
|
20
|
+
from ..operator_config import AnomalyOperatorConfig, AnomalyOperatorSpec
|
21
|
+
from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
|
22
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
|
23
|
+
from ..const import SupportedModels
|
24
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
25
|
+
human_time_friendly,
|
26
|
+
enable_print,
|
27
|
+
disable_print,
|
28
|
+
write_data,
|
29
|
+
merge_category_columns,
|
30
|
+
find_output_dirname,
|
31
|
+
)
|
32
|
+
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
|
33
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
34
|
+
|
35
|
+
|
36
|
+
class AnomalyOperatorBaseModel(ABC):
|
37
|
+
"""The base class for the anomaly detection operator models."""
|
38
|
+
|
39
|
+
def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets):
|
40
|
+
"""Instantiates the AnomalyOperatorBaseModel instance.
|
41
|
+
|
42
|
+
Properties
|
43
|
+
----------
|
44
|
+
config: AnomalyOperatorConfig
|
45
|
+
The anomaly detection operator configuration.
|
46
|
+
"""
|
47
|
+
|
48
|
+
self.config: AnomalyOperatorConfig = config
|
49
|
+
self.spec: AnomalyOperatorSpec = config.spec
|
50
|
+
self.datasets = datasets
|
51
|
+
if self.spec.validation_data is not None:
|
52
|
+
self.X_valid_dict = self.datasets.valid_data.X_valid_dict
|
53
|
+
self.y_valid_dict = self.datasets.valid_data.y_valid_dict
|
54
|
+
else:
|
55
|
+
self.X_valid_dict = None
|
56
|
+
self.y_valid_dict = None
|
57
|
+
|
58
|
+
def generate_report(self):
|
59
|
+
"""Generates the report."""
|
60
|
+
import datapane as dp
|
61
|
+
import matplotlib.pyplot as plt
|
62
|
+
|
63
|
+
start_time = time.time()
|
64
|
+
# fallback using sklearn oneclasssvm when the sub model _build_model fails
|
65
|
+
try:
|
66
|
+
anomaly_output = self._build_model()
|
67
|
+
except Exception as e:
|
68
|
+
anomaly_output = self._fallback_build_model()
|
69
|
+
|
70
|
+
elapsed_time = time.time() - start_time
|
71
|
+
|
72
|
+
summary_metrics = None
|
73
|
+
total_metrics = None
|
74
|
+
test_data = None
|
75
|
+
|
76
|
+
if self.spec.test_data:
|
77
|
+
test_data = TestData(self.spec)
|
78
|
+
total_metrics, summary_metrics = self._test_data_evaluate_metrics(
|
79
|
+
anomaly_output, test_data, elapsed_time
|
80
|
+
)
|
81
|
+
table_blocks = [
|
82
|
+
dp.DataTable(df, label=col)
|
83
|
+
for col, df in self.datasets.full_data_dict.items()
|
84
|
+
]
|
85
|
+
data_table = (
|
86
|
+
dp.Select(blocks=table_blocks) if len(table_blocks) > 1 else table_blocks[0]
|
87
|
+
)
|
88
|
+
date_column = self.spec.datetime_column.name
|
89
|
+
|
90
|
+
blocks = []
|
91
|
+
for target, df in self.datasets.full_data_dict.items():
|
92
|
+
figure_blocks = []
|
93
|
+
time_col = df[date_column].reset_index(drop=True)
|
94
|
+
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
|
95
|
+
OutputColumns.ANOMALY_COL
|
96
|
+
]
|
97
|
+
columns = set(df.columns).difference({date_column})
|
98
|
+
for col in columns:
|
99
|
+
y = df[col].reset_index(drop=True)
|
100
|
+
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
|
101
|
+
ax.grid()
|
102
|
+
ax.plot(time_col, y, color="black")
|
103
|
+
for i, index in enumerate(anomaly_col):
|
104
|
+
if anomaly_col[i] == 1:
|
105
|
+
ax.scatter(time_col[i], y[i], color="red", marker="o")
|
106
|
+
plt.xlabel(date_column)
|
107
|
+
plt.ylabel(col)
|
108
|
+
plt.title(f"`{col}` with reference to anomalies")
|
109
|
+
figure_blocks.append(ax)
|
110
|
+
blocks.append(dp.Group(blocks=figure_blocks, label=target))
|
111
|
+
plots = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
112
|
+
|
113
|
+
report_sections = []
|
114
|
+
title_text = dp.Text("# Anomaly Detection Report")
|
115
|
+
|
116
|
+
yaml_appendix_title = dp.Text(f"## Reference: YAML File")
|
117
|
+
yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml")
|
118
|
+
summary = dp.Blocks(
|
119
|
+
blocks=[
|
120
|
+
dp.Group(
|
121
|
+
dp.Text(f"You selected the **`{self.spec.model}`** model."),
|
122
|
+
dp.Text(
|
123
|
+
"Based on your dataset, you could have also selected "
|
124
|
+
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
|
125
|
+
),
|
126
|
+
dp.BigNumber(
|
127
|
+
heading="Analysis was completed in ",
|
128
|
+
value=human_time_friendly(elapsed_time),
|
129
|
+
),
|
130
|
+
label="Summary",
|
131
|
+
)
|
132
|
+
]
|
133
|
+
)
|
134
|
+
sec_text = dp.Text(f"## Train Evaluation Metrics")
|
135
|
+
sec = dp.DataTable(self._evaluation_metrics(anomaly_output))
|
136
|
+
evaluation_metrics_sec = [sec_text, sec]
|
137
|
+
|
138
|
+
test_metrics_sections = []
|
139
|
+
if total_metrics is not None and not total_metrics.empty:
|
140
|
+
sec_text = dp.Text(f"## Test Data Evaluation Metrics")
|
141
|
+
sec = dp.DataTable(total_metrics)
|
142
|
+
test_metrics_sections = test_metrics_sections + [sec_text, sec]
|
143
|
+
|
144
|
+
if summary_metrics is not None and not summary_metrics.empty:
|
145
|
+
sec_text = dp.Text(f"## Test Data Summary Metrics")
|
146
|
+
sec = dp.DataTable(summary_metrics)
|
147
|
+
test_metrics_sections = test_metrics_sections + [sec_text, sec]
|
148
|
+
|
149
|
+
report_sections = (
|
150
|
+
[title_text, summary]
|
151
|
+
+ [plots]
|
152
|
+
+ [data_table]
|
153
|
+
+ evaluation_metrics_sec
|
154
|
+
+ test_metrics_sections
|
155
|
+
+ [yaml_appendix_title, yaml_appendix]
|
156
|
+
)
|
157
|
+
|
158
|
+
# save the report and result CSV
|
159
|
+
self._save_report(
|
160
|
+
report_sections=report_sections,
|
161
|
+
anomaly_output=anomaly_output,
|
162
|
+
test_metrics=total_metrics,
|
163
|
+
)
|
164
|
+
|
165
|
+
def _evaluation_metrics(self, anomaly_output):
|
166
|
+
total_metrics = pd.DataFrame()
|
167
|
+
for cat in anomaly_output.list_categories():
|
168
|
+
num_anomalies = anomaly_output.get_num_anomalies_by_cat(cat)
|
169
|
+
metrics_df = pd.DataFrame.from_dict(
|
170
|
+
{"Num of Anomalies": num_anomalies}, orient="index", columns=[cat]
|
171
|
+
)
|
172
|
+
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
|
173
|
+
return total_metrics
|
174
|
+
|
175
|
+
def _test_data_evaluate_metrics(self, anomaly_output, test_data, elapsed_time):
|
176
|
+
total_metrics = pd.DataFrame()
|
177
|
+
summary_metrics = pd.DataFrame()
|
178
|
+
|
179
|
+
for cat in anomaly_output.list_categories():
|
180
|
+
output = anomaly_output.category_map[cat][0]
|
181
|
+
date_col = self.spec.datetime_column.name
|
182
|
+
|
183
|
+
test_data_i = test_data.get_data_for_series(cat)
|
184
|
+
|
185
|
+
dates = output[output[date_col].isin(test_data_i[date_col])][date_col]
|
186
|
+
|
187
|
+
metrics_df = _build_metrics_df(
|
188
|
+
test_data_i[test_data_i[date_col].isin(dates)][
|
189
|
+
OutputColumns.ANOMALY_COL
|
190
|
+
].values,
|
191
|
+
output[output[date_col].isin(dates)][OutputColumns.ANOMALY_COL].values,
|
192
|
+
cat,
|
193
|
+
)
|
194
|
+
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
|
195
|
+
|
196
|
+
if total_metrics.empty:
|
197
|
+
return total_metrics, summary_metrics
|
198
|
+
|
199
|
+
summary_metrics = pd.DataFrame(
|
200
|
+
{
|
201
|
+
SupportedMetrics.MEAN_RECALL: np.mean(
|
202
|
+
total_metrics.loc[SupportedMetrics.RECALL]
|
203
|
+
),
|
204
|
+
SupportedMetrics.MEDIAN_RECALL: np.median(
|
205
|
+
total_metrics.loc[SupportedMetrics.RECALL]
|
206
|
+
),
|
207
|
+
SupportedMetrics.MEAN_PRECISION: np.mean(
|
208
|
+
total_metrics.loc[SupportedMetrics.PRECISION]
|
209
|
+
),
|
210
|
+
SupportedMetrics.MEDIAN_PRECISION: np.median(
|
211
|
+
total_metrics.loc[SupportedMetrics.PRECISION]
|
212
|
+
),
|
213
|
+
SupportedMetrics.MEAN_ACCURACY: np.mean(
|
214
|
+
total_metrics.loc[SupportedMetrics.ACCURACY]
|
215
|
+
),
|
216
|
+
SupportedMetrics.MEDIAN_ACCURACY: np.median(
|
217
|
+
total_metrics.loc[SupportedMetrics.ACCURACY]
|
218
|
+
),
|
219
|
+
SupportedMetrics.MEAN_F1_SCORE: np.mean(
|
220
|
+
total_metrics.loc[SupportedMetrics.F1_SCORE]
|
221
|
+
),
|
222
|
+
SupportedMetrics.MEDIAN_F1_SCORE: np.median(
|
223
|
+
total_metrics.loc[SupportedMetrics.F1_SCORE]
|
224
|
+
),
|
225
|
+
SupportedMetrics.MEAN_ROC_AUC: np.mean(
|
226
|
+
total_metrics.loc[SupportedMetrics.ROC_AUC]
|
227
|
+
),
|
228
|
+
SupportedMetrics.MEDIAN_ROC_AUC: np.median(
|
229
|
+
total_metrics.loc[SupportedMetrics.ROC_AUC]
|
230
|
+
),
|
231
|
+
SupportedMetrics.MEAN_PRC_AUC: np.mean(
|
232
|
+
total_metrics.loc[SupportedMetrics.PRC_AUC]
|
233
|
+
),
|
234
|
+
SupportedMetrics.MEDIAN_PRC_AUC: np.median(
|
235
|
+
total_metrics.loc[SupportedMetrics.PRC_AUC]
|
236
|
+
),
|
237
|
+
SupportedMetrics.ELAPSED_TIME: elapsed_time,
|
238
|
+
},
|
239
|
+
index=["All Targets"],
|
240
|
+
)
|
241
|
+
|
242
|
+
return total_metrics, summary_metrics
|
243
|
+
|
244
|
+
def _save_report(
|
245
|
+
self,
|
246
|
+
report_sections: Tuple,
|
247
|
+
anomaly_output: AnomalyOutput,
|
248
|
+
test_metrics: pd.DataFrame,
|
249
|
+
):
|
250
|
+
"""Saves resulting reports to the given folder."""
|
251
|
+
import datapane as dp
|
252
|
+
|
253
|
+
unique_output_dir = find_output_dirname(self.spec.output_directory)
|
254
|
+
|
255
|
+
if ObjectStorageDetails.is_oci_path(unique_output_dir):
|
256
|
+
storage_options = default_signer()
|
257
|
+
else:
|
258
|
+
storage_options = dict()
|
259
|
+
|
260
|
+
# datapane html report
|
261
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
262
|
+
report_local_path = os.path.join(temp_dir, "___report.html")
|
263
|
+
disable_print()
|
264
|
+
dp.save_report(report_sections, report_local_path)
|
265
|
+
enable_print()
|
266
|
+
with open(report_local_path) as f1:
|
267
|
+
with fsspec.open(
|
268
|
+
os.path.join(unique_output_dir, self.spec.report_file_name),
|
269
|
+
"w",
|
270
|
+
**storage_options,
|
271
|
+
) as f2:
|
272
|
+
f2.write(f1.read())
|
273
|
+
|
274
|
+
if self.spec.generate_inliers:
|
275
|
+
inliers = anomaly_output.get_inliers(self.datasets.data)
|
276
|
+
write_data(
|
277
|
+
data=inliers,
|
278
|
+
filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
|
279
|
+
format="csv",
|
280
|
+
storage_options=storage_options,
|
281
|
+
)
|
282
|
+
|
283
|
+
outliers = anomaly_output.get_outliers(self.datasets.data)
|
284
|
+
write_data(
|
285
|
+
data=outliers,
|
286
|
+
filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
|
287
|
+
format="csv",
|
288
|
+
storage_options=storage_options,
|
289
|
+
)
|
290
|
+
|
291
|
+
if test_metrics is not None and not test_metrics.empty:
|
292
|
+
write_data(
|
293
|
+
data=test_metrics.rename_axis("metrics").reset_index(),
|
294
|
+
filename=os.path.join(
|
295
|
+
unique_output_dir, self.spec.test_metrics_filename
|
296
|
+
),
|
297
|
+
format="csv",
|
298
|
+
storage_options=storage_options,
|
299
|
+
)
|
300
|
+
|
301
|
+
logger.warn(
|
302
|
+
f"The report has been successfully "
|
303
|
+
f"generated and placed to the: {unique_output_dir}."
|
304
|
+
)
|
305
|
+
|
306
|
+
def _fallback_build_model(self):
|
307
|
+
"""
|
308
|
+
Fallback method for the sub model _build_model method.
|
309
|
+
"""
|
310
|
+
logger.warn(
|
311
|
+
"The build_model method has failed for the model: {}. "
|
312
|
+
"A fallback model will be built.".format(self.spec.model)
|
313
|
+
)
|
314
|
+
|
315
|
+
date_column = self.spec.datetime_column.name
|
316
|
+
dataset = self.datasets
|
317
|
+
|
318
|
+
anomaly_output = AnomalyOutput(date_column=date_column)
|
319
|
+
|
320
|
+
# map the output as per anomaly dataset class, 1: outlier, 0: inlier
|
321
|
+
self.outlier_map = {1: 0, -1: 1}
|
322
|
+
|
323
|
+
# Iterate over the full_data_dict items
|
324
|
+
for target, df in self.datasets.full_data_dict.items():
|
325
|
+
est = linear_model.SGDOneClassSVM(random_state=42)
|
326
|
+
est.fit(df[target].values.reshape(-1, 1))
|
327
|
+
y_pred = np.vectorize(self.outlier_map.get)(
|
328
|
+
est.predict(df[target].values.reshape(-1, 1))
|
329
|
+
)
|
330
|
+
scores = est.score_samples(df[target].values.reshape(-1, 1))
|
331
|
+
|
332
|
+
anomaly = pd.DataFrame(
|
333
|
+
{date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}
|
334
|
+
).reset_index(drop=True)
|
335
|
+
score = pd.DataFrame(
|
336
|
+
{date_column: df[date_column], OutputColumns.SCORE_COL: scores}
|
337
|
+
).reset_index(drop=True)
|
338
|
+
anomaly_output.add_output(target, anomaly, score)
|
339
|
+
|
340
|
+
return anomaly_output
|
341
|
+
|
342
|
+
@abstractmethod
|
343
|
+
def _generate_report(self):
|
344
|
+
"""
|
345
|
+
Generates the report for the particular model.
|
346
|
+
The method that needs to be implemented on the particular model level.
|
347
|
+
"""
|
348
|
+
|
349
|
+
@abstractmethod
|
350
|
+
def _build_model(self) -> pd.DataFrame:
|
351
|
+
"""
|
352
|
+
Build the model.
|
353
|
+
The method that needs to be implemented on the particular model level.
|
354
|
+
"""
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from ..const import SupportedModels
|
8
|
+
from ..operator_config import AnomalyOperatorConfig
|
9
|
+
from .automlx import AutoMLXOperatorModel
|
10
|
+
from .autots import AutoTSOperatorModel
|
11
|
+
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
|
12
|
+
|
13
|
+
# from .tods import TODSOperatorModel
|
14
|
+
from .base_model import AnomalyOperatorBaseModel
|
15
|
+
from .anomaly_dataset import AnomalyDatasets
|
16
|
+
|
17
|
+
|
18
|
+
class UnSupportedModelError(Exception):
|
19
|
+
def __init__(self, model_type: str):
|
20
|
+
super().__init__(
|
21
|
+
f"Model: `{model_type}` "
|
22
|
+
f"is not supported. Supported models: {SupportedModels.values}"
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
class AnomalyOperatorModelFactory:
|
27
|
+
"""
|
28
|
+
The factory class helps to instantiate proper model operator based on the model type.
|
29
|
+
"""
|
30
|
+
|
31
|
+
_MAP = {
|
32
|
+
SupportedModels.AutoMLX: AutoMLXOperatorModel,
|
33
|
+
# SupportedModels.TODS: TODSOperatorModel,
|
34
|
+
SupportedModels.AutoTS: AutoTSOperatorModel,
|
35
|
+
}
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def get_model(
|
39
|
+
cls, operator_config: AnomalyOperatorConfig, datasets: AnomalyDatasets
|
40
|
+
) -> AnomalyOperatorBaseModel:
|
41
|
+
"""
|
42
|
+
Gets the operator model based on the model type.
|
43
|
+
|
44
|
+
Parameters
|
45
|
+
----------
|
46
|
+
operator_config: AnomalyOperatorConfig
|
47
|
+
The anomaly detection operator config.
|
48
|
+
|
49
|
+
datasets: AnomalyDatasets
|
50
|
+
Datasets for finding anomaly
|
51
|
+
|
52
|
+
Returns
|
53
|
+
-------
|
54
|
+
AnomalyOperatorBaseModel
|
55
|
+
The anomaly detection operator model.
|
56
|
+
|
57
|
+
Raises
|
58
|
+
------
|
59
|
+
UnSupportedModelError
|
60
|
+
In case of not supported model.
|
61
|
+
"""
|
62
|
+
model_type = operator_config.spec.model
|
63
|
+
if model_type == "auto":
|
64
|
+
model_type = select_auto_model(datasets, operator_config)
|
65
|
+
if model_type not in cls._MAP:
|
66
|
+
raise UnSupportedModelError(model_type)
|
67
|
+
return cls._MAP[model_type](config=operator_config, datasets=datasets)
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# #!/usr/bin/env python
|
2
|
+
# # -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# # Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
# import importlib
|
8
|
+
|
9
|
+
# import numpy as np
|
10
|
+
# import pandas as pd
|
11
|
+
|
12
|
+
# from ads.common.decorator.runtime_dependency import runtime_dependency
|
13
|
+
# from .anomaly_dataset import AnomalyOutput
|
14
|
+
|
15
|
+
# from ..const import (
|
16
|
+
# TODS_IMPORT_MODEL_MAP,
|
17
|
+
# TODS_MODEL_MAP,
|
18
|
+
# OutputColumns,
|
19
|
+
# TODS_DEFAULT_MODEL,
|
20
|
+
# )
|
21
|
+
# from .base_model import AnomalyOperatorBaseModel
|
22
|
+
|
23
|
+
|
24
|
+
# class TODSOperatorModel(AnomalyOperatorBaseModel):
|
25
|
+
# """Class representing TODS Anomaly Detection operator model."""
|
26
|
+
|
27
|
+
# @runtime_dependency(
|
28
|
+
# module="tods",
|
29
|
+
# err_msg=(
|
30
|
+
# "Please run `pip3 install tods` to "
|
31
|
+
# "install the required dependencies for TODS."
|
32
|
+
# ),
|
33
|
+
# )
|
34
|
+
# def _build_model(self) -> pd.DataFrame:
|
35
|
+
# """
|
36
|
+
# Build the TODS model.
|
37
|
+
|
38
|
+
# Returns
|
39
|
+
# -------
|
40
|
+
# Tuple: model, predictions_train, and prediction_score_test
|
41
|
+
# """
|
42
|
+
# # Import the TODS module
|
43
|
+
# tods_module = importlib.import_module(
|
44
|
+
# name=TODS_IMPORT_MODEL_MAP.get(
|
45
|
+
# self.spec.model_kwargs.get("sub_model", TODS_DEFAULT_MODEL)
|
46
|
+
# ),
|
47
|
+
# package="tods.sk_interface.detection_algorithm",
|
48
|
+
# )
|
49
|
+
|
50
|
+
# # Get the model kwargs
|
51
|
+
# model_kwargs = self.spec.model_kwargs
|
52
|
+
# sub_model = self.spec.model_kwargs.get("sub_model", TODS_DEFAULT_MODEL)
|
53
|
+
# model_kwargs.pop("sub_model", None)
|
54
|
+
|
55
|
+
# # Initialize variables
|
56
|
+
# models = {}
|
57
|
+
# predictions_train = {}
|
58
|
+
# prediction_score_train = {}
|
59
|
+
# predictions_test = {}
|
60
|
+
# prediction_score_test = {}
|
61
|
+
# date_column = self.spec.datetime_column.name
|
62
|
+
# anomaly_output = AnomalyOutput(date_column=date_column)
|
63
|
+
|
64
|
+
# # Iterate over the full_data_dict items
|
65
|
+
# for target, df in self.datasets.full_data_dict.items():
|
66
|
+
# # Instantiate the model
|
67
|
+
# model = getattr(tods_module, TODS_MODEL_MAP.get(sub_model))(**model_kwargs)
|
68
|
+
|
69
|
+
# # Fit the model
|
70
|
+
# model.fit(np.array(df[self.spec.target_column]).reshape(-1, 1))
|
71
|
+
|
72
|
+
# # Make predictions
|
73
|
+
# predictions_train[target] = model.predict(
|
74
|
+
# np.array(df[self.spec.target_column]).reshape(-1, 1)
|
75
|
+
# )
|
76
|
+
# prediction_score_train[target] = model.predict_score(
|
77
|
+
# np.array(df[self.spec.target_column]).reshape(-1, 1)
|
78
|
+
# )
|
79
|
+
|
80
|
+
# # Store the model and predictions in dictionaries
|
81
|
+
# models[target] = model
|
82
|
+
|
83
|
+
# anomaly = pd.DataFrame(
|
84
|
+
# {
|
85
|
+
# date_column: df[date_column],
|
86
|
+
# OutputColumns.ANOMALY_COL: predictions_train[target],
|
87
|
+
# }
|
88
|
+
# )
|
89
|
+
# score = pd.DataFrame(
|
90
|
+
# {
|
91
|
+
# date_column: df[date_column],
|
92
|
+
# OutputColumns.SCORE_COL: prediction_score_train[target],
|
93
|
+
# }
|
94
|
+
# )
|
95
|
+
# anomaly_output.add_output(target, anomaly, score)
|
96
|
+
|
97
|
+
# return anomaly_output
|
98
|
+
|
99
|
+
# def _generate_report(self):
|
100
|
+
# import datapane as dp
|
101
|
+
|
102
|
+
# """The method that needs to be implemented on the particular model level."""
|
103
|
+
# selected_models_text = dp.Text(
|
104
|
+
# f"## Selected Models Overview \n "
|
105
|
+
# "The following tables provide information regarding the chosen model."
|
106
|
+
# )
|
107
|
+
# all_sections = [selected_models_text]
|
108
|
+
|
109
|
+
# model_description = dp.Text(
|
110
|
+
# "The tods model is a full-stack automated machine learning system for outlier detection "
|
111
|
+
# "on univariate / multivariate time-series data. It provides exhaustive modules for building "
|
112
|
+
# "machine learning-based outlier detection systems and wide range of algorithms."
|
113
|
+
# )
|
114
|
+
# other_sections = all_sections
|
115
|
+
|
116
|
+
# return (
|
117
|
+
# model_description,
|
118
|
+
# other_sections,
|
119
|
+
# )
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import os
|
8
|
+
from dataclasses import dataclass, field
|
9
|
+
from typing import Dict, List
|
10
|
+
|
11
|
+
from ads.common.serializer import DataClassSerializable
|
12
|
+
from ads.opctl.operator.common.utils import _load_yaml_from_uri
|
13
|
+
from ads.opctl.operator.common.operator_config import (
|
14
|
+
OperatorConfig,
|
15
|
+
OutputDirectory,
|
16
|
+
InputData,
|
17
|
+
)
|
18
|
+
from .const import SupportedModels
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass(repr=True)
|
22
|
+
class ValidationData(InputData):
|
23
|
+
"""Class representing operator specification input data details."""
|
24
|
+
|
25
|
+
|
26
|
+
@dataclass(repr=True)
|
27
|
+
class DateTimeColumn(DataClassSerializable):
|
28
|
+
"""Class representing operator specification date time column details."""
|
29
|
+
|
30
|
+
name: str = None
|
31
|
+
format: str = None
|
32
|
+
|
33
|
+
|
34
|
+
@dataclass(repr=True)
|
35
|
+
class TestData(InputData):
|
36
|
+
"""Class representing operator specification test data details."""
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass(repr=True)
|
40
|
+
class AnomalyOperatorSpec(DataClassSerializable):
|
41
|
+
"""Class representing operator specification."""
|
42
|
+
|
43
|
+
input_data: InputData = field(default_factory=InputData)
|
44
|
+
datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
|
45
|
+
test_data: TestData = field(default_factory=TestData)
|
46
|
+
validation_data: ValidationData = field(default_factory=ValidationData)
|
47
|
+
output_directory: OutputDirectory = field(default_factory=OutputDirectory)
|
48
|
+
report_file_name: str = None
|
49
|
+
report_title: str = None
|
50
|
+
report_theme: str = None
|
51
|
+
metrics_filename: str = None
|
52
|
+
test_metrics_filename: str = None
|
53
|
+
inliers_filename: str = None
|
54
|
+
outliers_filename: str = None
|
55
|
+
target_column: str = None
|
56
|
+
target_category_columns: List[str] = field(default_factory=list)
|
57
|
+
preprocessing: bool = None
|
58
|
+
generate_report: bool = None
|
59
|
+
generate_metrics: bool = None
|
60
|
+
generate_inliers: bool = None
|
61
|
+
model: str = None
|
62
|
+
model_kwargs: Dict = field(default_factory=dict)
|
63
|
+
contamination: float = None
|
64
|
+
|
65
|
+
def __post_init__(self):
|
66
|
+
"""Adjusts the specification details."""
|
67
|
+
self.report_file_name = self.report_file_name or "report.html"
|
68
|
+
self.report_theme = self.report_theme or "light"
|
69
|
+
self.inliers_filename = self.inliers_filename or "inliers.csv"
|
70
|
+
self.outliers_filename = self.outliers_filename or "outliers.csv"
|
71
|
+
self.test_metrics_filename = self.test_metrics_filename or "metrics.csv"
|
72
|
+
self.model = self.model or SupportedModels.Auto
|
73
|
+
self.generate_inliers = (
|
74
|
+
self.generate_inliers if self.generate_inliers is not None else False
|
75
|
+
)
|
76
|
+
self.model_kwargs = self.model_kwargs or dict()
|
77
|
+
|
78
|
+
|
79
|
+
@dataclass(repr=True)
|
80
|
+
class AnomalyOperatorConfig(OperatorConfig):
|
81
|
+
"""Class representing operator config.
|
82
|
+
|
83
|
+
Attributes
|
84
|
+
----------
|
85
|
+
kind: str
|
86
|
+
The kind of the resource. For operators it is always - `operator`.
|
87
|
+
type: str
|
88
|
+
The type of the operator.
|
89
|
+
version: str
|
90
|
+
The version of the operator.
|
91
|
+
spec: AnomalyOperatorSpec
|
92
|
+
The operator specification.
|
93
|
+
"""
|
94
|
+
|
95
|
+
kind: str = "operator"
|
96
|
+
type: str = "anomaly"
|
97
|
+
version: str = "v1"
|
98
|
+
spec: AnomalyOperatorSpec = field(default_factory=AnomalyOperatorSpec)
|
99
|
+
|
100
|
+
@classmethod
|
101
|
+
def _load_schema(cls) -> str:
|
102
|
+
"""Loads operator schema."""
|
103
|
+
return _load_yaml_from_uri(
|
104
|
+
os.path.join(os.path.dirname(os.path.abspath(__file__)), "schema.yaml")
|
105
|
+
)
|