oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. ads/aqua/__init__.py +12 -0
  2. ads/aqua/base.py +324 -0
  3. ads/aqua/cli.py +19 -0
  4. ads/aqua/config/deployment_config_defaults.json +9 -0
  5. ads/aqua/config/resource_limit_names.json +7 -0
  6. ads/aqua/constants.py +45 -0
  7. ads/aqua/data.py +40 -0
  8. ads/aqua/decorator.py +101 -0
  9. ads/aqua/deployment.py +643 -0
  10. ads/aqua/dummy_data/icon.txt +1 -0
  11. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  12. ads/aqua/dummy_data/oci_models.json +1 -0
  13. ads/aqua/dummy_data/readme.md +26 -0
  14. ads/aqua/evaluation.py +1751 -0
  15. ads/aqua/exception.py +82 -0
  16. ads/aqua/extension/__init__.py +40 -0
  17. ads/aqua/extension/base_handler.py +138 -0
  18. ads/aqua/extension/common_handler.py +21 -0
  19. ads/aqua/extension/deployment_handler.py +202 -0
  20. ads/aqua/extension/evaluation_handler.py +135 -0
  21. ads/aqua/extension/finetune_handler.py +66 -0
  22. ads/aqua/extension/model_handler.py +59 -0
  23. ads/aqua/extension/ui_handler.py +201 -0
  24. ads/aqua/extension/utils.py +23 -0
  25. ads/aqua/finetune.py +579 -0
  26. ads/aqua/job.py +29 -0
  27. ads/aqua/model.py +819 -0
  28. ads/aqua/training/__init__.py +4 -0
  29. ads/aqua/training/exceptions.py +459 -0
  30. ads/aqua/ui.py +453 -0
  31. ads/aqua/utils.py +715 -0
  32. ads/cli.py +37 -6
  33. ads/common/auth.py +7 -0
  34. ads/common/decorator/__init__.py +7 -3
  35. ads/common/decorator/require_nonempty_arg.py +65 -0
  36. ads/common/object_storage_details.py +166 -7
  37. ads/common/oci_client.py +18 -1
  38. ads/common/oci_logging.py +2 -2
  39. ads/common/oci_mixin.py +4 -5
  40. ads/common/serializer.py +34 -5
  41. ads/common/utils.py +75 -10
  42. ads/config.py +40 -1
  43. ads/dataset/correlation_plot.py +10 -12
  44. ads/jobs/ads_job.py +43 -25
  45. ads/jobs/builders/infrastructure/base.py +4 -2
  46. ads/jobs/builders/infrastructure/dsc_job.py +49 -39
  47. ads/jobs/builders/runtimes/base.py +71 -1
  48. ads/jobs/builders/runtimes/container_runtime.py +4 -4
  49. ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
  50. ads/jobs/templates/driver_pytorch.py +27 -10
  51. ads/model/artifact_downloader.py +84 -14
  52. ads/model/artifact_uploader.py +25 -23
  53. ads/model/datascience_model.py +388 -38
  54. ads/model/deployment/model_deployment.py +10 -2
  55. ads/model/generic_model.py +8 -0
  56. ads/model/model_file_description_schema.json +68 -0
  57. ads/model/model_metadata.py +1 -1
  58. ads/model/service/oci_datascience_model.py +34 -5
  59. ads/opctl/config/merger.py +2 -2
  60. ads/opctl/operator/__init__.py +3 -1
  61. ads/opctl/operator/cli.py +7 -1
  62. ads/opctl/operator/cmd.py +3 -3
  63. ads/opctl/operator/common/errors.py +2 -1
  64. ads/opctl/operator/common/operator_config.py +22 -3
  65. ads/opctl/operator/common/utils.py +16 -0
  66. ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
  67. ads/opctl/operator/lowcode/anomaly/README.md +209 -0
  68. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  69. ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
  70. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  71. ads/opctl/operator/lowcode/anomaly/const.py +88 -0
  72. ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
  73. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  74. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
  75. ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
  76. ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
  77. ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
  78. ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
  79. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  80. ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
  81. ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
  82. ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
  83. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  84. ads/opctl/operator/lowcode/common/const.py +10 -0
  85. ads/opctl/operator/lowcode/common/data.py +96 -0
  86. ads/opctl/operator/lowcode/common/errors.py +41 -0
  87. ads/opctl/operator/lowcode/common/transformations.py +191 -0
  88. ads/opctl/operator/lowcode/common/utils.py +250 -0
  89. ads/opctl/operator/lowcode/forecast/README.md +3 -2
  90. ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
  91. ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
  92. ads/opctl/operator/lowcode/forecast/const.py +17 -1
  93. ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
  94. ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
  95. ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
  96. ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
  97. ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
  98. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
  99. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
  100. ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
  101. ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
  102. ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
  103. ads/opctl/operator/lowcode/forecast/utils.py +186 -356
  104. ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
  105. ads/opctl/operator/lowcode/pii/model/report.py +7 -7
  106. ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
  107. ads/opctl/operator/lowcode/pii/utils.py +0 -82
  108. ads/opctl/operator/runtime/runtime.py +3 -2
  109. ads/telemetry/base.py +62 -0
  110. ads/telemetry/client.py +105 -0
  111. ads/telemetry/telemetry.py +6 -3
  112. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
  113. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
  114. ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
  115. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
  116. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
  117. {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,354 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import os
8
+ import tempfile
9
+ import time
10
+ from abc import ABC, abstractmethod
11
+ from typing import Tuple
12
+
13
+ import fsspec
14
+ import pandas as pd
15
+ import numpy as np
16
+ from sklearn import linear_model
17
+
18
+ from ads.opctl import logger
19
+
20
+ from ..operator_config import AnomalyOperatorConfig, AnomalyOperatorSpec
21
+ from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
22
+ from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
23
+ from ..const import SupportedModels
24
+ from ads.opctl.operator.lowcode.common.utils import (
25
+ human_time_friendly,
26
+ enable_print,
27
+ disable_print,
28
+ write_data,
29
+ merge_category_columns,
30
+ find_output_dirname,
31
+ )
32
+ from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
33
+ from ads.common.object_storage_details import ObjectStorageDetails
34
+
35
+
36
+ class AnomalyOperatorBaseModel(ABC):
37
+ """The base class for the anomaly detection operator models."""
38
+
39
+ def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets):
40
+ """Instantiates the AnomalyOperatorBaseModel instance.
41
+
42
+ Properties
43
+ ----------
44
+ config: AnomalyOperatorConfig
45
+ The anomaly detection operator configuration.
46
+ """
47
+
48
+ self.config: AnomalyOperatorConfig = config
49
+ self.spec: AnomalyOperatorSpec = config.spec
50
+ self.datasets = datasets
51
+ if self.spec.validation_data is not None:
52
+ self.X_valid_dict = self.datasets.valid_data.X_valid_dict
53
+ self.y_valid_dict = self.datasets.valid_data.y_valid_dict
54
+ else:
55
+ self.X_valid_dict = None
56
+ self.y_valid_dict = None
57
+
58
+ def generate_report(self):
59
+ """Generates the report."""
60
+ import datapane as dp
61
+ import matplotlib.pyplot as plt
62
+
63
+ start_time = time.time()
64
+ # fallback using sklearn oneclasssvm when the sub model _build_model fails
65
+ try:
66
+ anomaly_output = self._build_model()
67
+ except Exception as e:
68
+ anomaly_output = self._fallback_build_model()
69
+
70
+ elapsed_time = time.time() - start_time
71
+
72
+ summary_metrics = None
73
+ total_metrics = None
74
+ test_data = None
75
+
76
+ if self.spec.test_data:
77
+ test_data = TestData(self.spec)
78
+ total_metrics, summary_metrics = self._test_data_evaluate_metrics(
79
+ anomaly_output, test_data, elapsed_time
80
+ )
81
+ table_blocks = [
82
+ dp.DataTable(df, label=col)
83
+ for col, df in self.datasets.full_data_dict.items()
84
+ ]
85
+ data_table = (
86
+ dp.Select(blocks=table_blocks) if len(table_blocks) > 1 else table_blocks[0]
87
+ )
88
+ date_column = self.spec.datetime_column.name
89
+
90
+ blocks = []
91
+ for target, df in self.datasets.full_data_dict.items():
92
+ figure_blocks = []
93
+ time_col = df[date_column].reset_index(drop=True)
94
+ anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
95
+ OutputColumns.ANOMALY_COL
96
+ ]
97
+ columns = set(df.columns).difference({date_column})
98
+ for col in columns:
99
+ y = df[col].reset_index(drop=True)
100
+ fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
101
+ ax.grid()
102
+ ax.plot(time_col, y, color="black")
103
+ for i, index in enumerate(anomaly_col):
104
+ if anomaly_col[i] == 1:
105
+ ax.scatter(time_col[i], y[i], color="red", marker="o")
106
+ plt.xlabel(date_column)
107
+ plt.ylabel(col)
108
+ plt.title(f"`{col}` with reference to anomalies")
109
+ figure_blocks.append(ax)
110
+ blocks.append(dp.Group(blocks=figure_blocks, label=target))
111
+ plots = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
112
+
113
+ report_sections = []
114
+ title_text = dp.Text("# Anomaly Detection Report")
115
+
116
+ yaml_appendix_title = dp.Text(f"## Reference: YAML File")
117
+ yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml")
118
+ summary = dp.Blocks(
119
+ blocks=[
120
+ dp.Group(
121
+ dp.Text(f"You selected the **`{self.spec.model}`** model."),
122
+ dp.Text(
123
+ "Based on your dataset, you could have also selected "
124
+ f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
125
+ ),
126
+ dp.BigNumber(
127
+ heading="Analysis was completed in ",
128
+ value=human_time_friendly(elapsed_time),
129
+ ),
130
+ label="Summary",
131
+ )
132
+ ]
133
+ )
134
+ sec_text = dp.Text(f"## Train Evaluation Metrics")
135
+ sec = dp.DataTable(self._evaluation_metrics(anomaly_output))
136
+ evaluation_metrics_sec = [sec_text, sec]
137
+
138
+ test_metrics_sections = []
139
+ if total_metrics is not None and not total_metrics.empty:
140
+ sec_text = dp.Text(f"## Test Data Evaluation Metrics")
141
+ sec = dp.DataTable(total_metrics)
142
+ test_metrics_sections = test_metrics_sections + [sec_text, sec]
143
+
144
+ if summary_metrics is not None and not summary_metrics.empty:
145
+ sec_text = dp.Text(f"## Test Data Summary Metrics")
146
+ sec = dp.DataTable(summary_metrics)
147
+ test_metrics_sections = test_metrics_sections + [sec_text, sec]
148
+
149
+ report_sections = (
150
+ [title_text, summary]
151
+ + [plots]
152
+ + [data_table]
153
+ + evaluation_metrics_sec
154
+ + test_metrics_sections
155
+ + [yaml_appendix_title, yaml_appendix]
156
+ )
157
+
158
+ # save the report and result CSV
159
+ self._save_report(
160
+ report_sections=report_sections,
161
+ anomaly_output=anomaly_output,
162
+ test_metrics=total_metrics,
163
+ )
164
+
165
+ def _evaluation_metrics(self, anomaly_output):
166
+ total_metrics = pd.DataFrame()
167
+ for cat in anomaly_output.list_categories():
168
+ num_anomalies = anomaly_output.get_num_anomalies_by_cat(cat)
169
+ metrics_df = pd.DataFrame.from_dict(
170
+ {"Num of Anomalies": num_anomalies}, orient="index", columns=[cat]
171
+ )
172
+ total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
173
+ return total_metrics
174
+
175
+ def _test_data_evaluate_metrics(self, anomaly_output, test_data, elapsed_time):
176
+ total_metrics = pd.DataFrame()
177
+ summary_metrics = pd.DataFrame()
178
+
179
+ for cat in anomaly_output.list_categories():
180
+ output = anomaly_output.category_map[cat][0]
181
+ date_col = self.spec.datetime_column.name
182
+
183
+ test_data_i = test_data.get_data_for_series(cat)
184
+
185
+ dates = output[output[date_col].isin(test_data_i[date_col])][date_col]
186
+
187
+ metrics_df = _build_metrics_df(
188
+ test_data_i[test_data_i[date_col].isin(dates)][
189
+ OutputColumns.ANOMALY_COL
190
+ ].values,
191
+ output[output[date_col].isin(dates)][OutputColumns.ANOMALY_COL].values,
192
+ cat,
193
+ )
194
+ total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
195
+
196
+ if total_metrics.empty:
197
+ return total_metrics, summary_metrics
198
+
199
+ summary_metrics = pd.DataFrame(
200
+ {
201
+ SupportedMetrics.MEAN_RECALL: np.mean(
202
+ total_metrics.loc[SupportedMetrics.RECALL]
203
+ ),
204
+ SupportedMetrics.MEDIAN_RECALL: np.median(
205
+ total_metrics.loc[SupportedMetrics.RECALL]
206
+ ),
207
+ SupportedMetrics.MEAN_PRECISION: np.mean(
208
+ total_metrics.loc[SupportedMetrics.PRECISION]
209
+ ),
210
+ SupportedMetrics.MEDIAN_PRECISION: np.median(
211
+ total_metrics.loc[SupportedMetrics.PRECISION]
212
+ ),
213
+ SupportedMetrics.MEAN_ACCURACY: np.mean(
214
+ total_metrics.loc[SupportedMetrics.ACCURACY]
215
+ ),
216
+ SupportedMetrics.MEDIAN_ACCURACY: np.median(
217
+ total_metrics.loc[SupportedMetrics.ACCURACY]
218
+ ),
219
+ SupportedMetrics.MEAN_F1_SCORE: np.mean(
220
+ total_metrics.loc[SupportedMetrics.F1_SCORE]
221
+ ),
222
+ SupportedMetrics.MEDIAN_F1_SCORE: np.median(
223
+ total_metrics.loc[SupportedMetrics.F1_SCORE]
224
+ ),
225
+ SupportedMetrics.MEAN_ROC_AUC: np.mean(
226
+ total_metrics.loc[SupportedMetrics.ROC_AUC]
227
+ ),
228
+ SupportedMetrics.MEDIAN_ROC_AUC: np.median(
229
+ total_metrics.loc[SupportedMetrics.ROC_AUC]
230
+ ),
231
+ SupportedMetrics.MEAN_PRC_AUC: np.mean(
232
+ total_metrics.loc[SupportedMetrics.PRC_AUC]
233
+ ),
234
+ SupportedMetrics.MEDIAN_PRC_AUC: np.median(
235
+ total_metrics.loc[SupportedMetrics.PRC_AUC]
236
+ ),
237
+ SupportedMetrics.ELAPSED_TIME: elapsed_time,
238
+ },
239
+ index=["All Targets"],
240
+ )
241
+
242
+ return total_metrics, summary_metrics
243
+
244
+ def _save_report(
245
+ self,
246
+ report_sections: Tuple,
247
+ anomaly_output: AnomalyOutput,
248
+ test_metrics: pd.DataFrame,
249
+ ):
250
+ """Saves resulting reports to the given folder."""
251
+ import datapane as dp
252
+
253
+ unique_output_dir = find_output_dirname(self.spec.output_directory)
254
+
255
+ if ObjectStorageDetails.is_oci_path(unique_output_dir):
256
+ storage_options = default_signer()
257
+ else:
258
+ storage_options = dict()
259
+
260
+ # datapane html report
261
+ with tempfile.TemporaryDirectory() as temp_dir:
262
+ report_local_path = os.path.join(temp_dir, "___report.html")
263
+ disable_print()
264
+ dp.save_report(report_sections, report_local_path)
265
+ enable_print()
266
+ with open(report_local_path) as f1:
267
+ with fsspec.open(
268
+ os.path.join(unique_output_dir, self.spec.report_file_name),
269
+ "w",
270
+ **storage_options,
271
+ ) as f2:
272
+ f2.write(f1.read())
273
+
274
+ if self.spec.generate_inliers:
275
+ inliers = anomaly_output.get_inliers(self.datasets.data)
276
+ write_data(
277
+ data=inliers,
278
+ filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
279
+ format="csv",
280
+ storage_options=storage_options,
281
+ )
282
+
283
+ outliers = anomaly_output.get_outliers(self.datasets.data)
284
+ write_data(
285
+ data=outliers,
286
+ filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
287
+ format="csv",
288
+ storage_options=storage_options,
289
+ )
290
+
291
+ if test_metrics is not None and not test_metrics.empty:
292
+ write_data(
293
+ data=test_metrics.rename_axis("metrics").reset_index(),
294
+ filename=os.path.join(
295
+ unique_output_dir, self.spec.test_metrics_filename
296
+ ),
297
+ format="csv",
298
+ storage_options=storage_options,
299
+ )
300
+
301
+ logger.warn(
302
+ f"The report has been successfully "
303
+ f"generated and placed to the: {unique_output_dir}."
304
+ )
305
+
306
+ def _fallback_build_model(self):
307
+ """
308
+ Fallback method for the sub model _build_model method.
309
+ """
310
+ logger.warn(
311
+ "The build_model method has failed for the model: {}. "
312
+ "A fallback model will be built.".format(self.spec.model)
313
+ )
314
+
315
+ date_column = self.spec.datetime_column.name
316
+ dataset = self.datasets
317
+
318
+ anomaly_output = AnomalyOutput(date_column=date_column)
319
+
320
+ # map the output as per anomaly dataset class, 1: outlier, 0: inlier
321
+ self.outlier_map = {1: 0, -1: 1}
322
+
323
+ # Iterate over the full_data_dict items
324
+ for target, df in self.datasets.full_data_dict.items():
325
+ est = linear_model.SGDOneClassSVM(random_state=42)
326
+ est.fit(df[target].values.reshape(-1, 1))
327
+ y_pred = np.vectorize(self.outlier_map.get)(
328
+ est.predict(df[target].values.reshape(-1, 1))
329
+ )
330
+ scores = est.score_samples(df[target].values.reshape(-1, 1))
331
+
332
+ anomaly = pd.DataFrame(
333
+ {date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}
334
+ ).reset_index(drop=True)
335
+ score = pd.DataFrame(
336
+ {date_column: df[date_column], OutputColumns.SCORE_COL: scores}
337
+ ).reset_index(drop=True)
338
+ anomaly_output.add_output(target, anomaly, score)
339
+
340
+ return anomaly_output
341
+
342
+ @abstractmethod
343
+ def _generate_report(self):
344
+ """
345
+ Generates the report for the particular model.
346
+ The method that needs to be implemented on the particular model level.
347
+ """
348
+
349
+ @abstractmethod
350
+ def _build_model(self) -> pd.DataFrame:
351
+ """
352
+ Build the model.
353
+ The method that needs to be implemented on the particular model level.
354
+ """
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from ..const import SupportedModels
8
+ from ..operator_config import AnomalyOperatorConfig
9
+ from .automlx import AutoMLXOperatorModel
10
+ from .autots import AutoTSOperatorModel
11
+ from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
12
+
13
+ # from .tods import TODSOperatorModel
14
+ from .base_model import AnomalyOperatorBaseModel
15
+ from .anomaly_dataset import AnomalyDatasets
16
+
17
+
18
+ class UnSupportedModelError(Exception):
19
+ def __init__(self, model_type: str):
20
+ super().__init__(
21
+ f"Model: `{model_type}` "
22
+ f"is not supported. Supported models: {SupportedModels.values}"
23
+ )
24
+
25
+
26
+ class AnomalyOperatorModelFactory:
27
+ """
28
+ The factory class helps to instantiate proper model operator based on the model type.
29
+ """
30
+
31
+ _MAP = {
32
+ SupportedModels.AutoMLX: AutoMLXOperatorModel,
33
+ # SupportedModels.TODS: TODSOperatorModel,
34
+ SupportedModels.AutoTS: AutoTSOperatorModel,
35
+ }
36
+
37
+ @classmethod
38
+ def get_model(
39
+ cls, operator_config: AnomalyOperatorConfig, datasets: AnomalyDatasets
40
+ ) -> AnomalyOperatorBaseModel:
41
+ """
42
+ Gets the operator model based on the model type.
43
+
44
+ Parameters
45
+ ----------
46
+ operator_config: AnomalyOperatorConfig
47
+ The anomaly detection operator config.
48
+
49
+ datasets: AnomalyDatasets
50
+ Datasets for finding anomaly
51
+
52
+ Returns
53
+ -------
54
+ AnomalyOperatorBaseModel
55
+ The anomaly detection operator model.
56
+
57
+ Raises
58
+ ------
59
+ UnSupportedModelError
60
+ In case of not supported model.
61
+ """
62
+ model_type = operator_config.spec.model
63
+ if model_type == "auto":
64
+ model_type = select_auto_model(datasets, operator_config)
65
+ if model_type not in cls._MAP:
66
+ raise UnSupportedModelError(model_type)
67
+ return cls._MAP[model_type](config=operator_config, datasets=datasets)
@@ -0,0 +1,119 @@
1
+ # #!/usr/bin/env python
2
+ # # -*- coding: utf-8 -*--
3
+
4
+ # # Copyright (c) 2023 Oracle and/or its affiliates.
5
+ # # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ # import importlib
8
+
9
+ # import numpy as np
10
+ # import pandas as pd
11
+
12
+ # from ads.common.decorator.runtime_dependency import runtime_dependency
13
+ # from .anomaly_dataset import AnomalyOutput
14
+
15
+ # from ..const import (
16
+ # TODS_IMPORT_MODEL_MAP,
17
+ # TODS_MODEL_MAP,
18
+ # OutputColumns,
19
+ # TODS_DEFAULT_MODEL,
20
+ # )
21
+ # from .base_model import AnomalyOperatorBaseModel
22
+
23
+
24
+ # class TODSOperatorModel(AnomalyOperatorBaseModel):
25
+ # """Class representing TODS Anomaly Detection operator model."""
26
+
27
+ # @runtime_dependency(
28
+ # module="tods",
29
+ # err_msg=(
30
+ # "Please run `pip3 install tods` to "
31
+ # "install the required dependencies for TODS."
32
+ # ),
33
+ # )
34
+ # def _build_model(self) -> pd.DataFrame:
35
+ # """
36
+ # Build the TODS model.
37
+
38
+ # Returns
39
+ # -------
40
+ # Tuple: model, predictions_train, and prediction_score_test
41
+ # """
42
+ # # Import the TODS module
43
+ # tods_module = importlib.import_module(
44
+ # name=TODS_IMPORT_MODEL_MAP.get(
45
+ # self.spec.model_kwargs.get("sub_model", TODS_DEFAULT_MODEL)
46
+ # ),
47
+ # package="tods.sk_interface.detection_algorithm",
48
+ # )
49
+
50
+ # # Get the model kwargs
51
+ # model_kwargs = self.spec.model_kwargs
52
+ # sub_model = self.spec.model_kwargs.get("sub_model", TODS_DEFAULT_MODEL)
53
+ # model_kwargs.pop("sub_model", None)
54
+
55
+ # # Initialize variables
56
+ # models = {}
57
+ # predictions_train = {}
58
+ # prediction_score_train = {}
59
+ # predictions_test = {}
60
+ # prediction_score_test = {}
61
+ # date_column = self.spec.datetime_column.name
62
+ # anomaly_output = AnomalyOutput(date_column=date_column)
63
+
64
+ # # Iterate over the full_data_dict items
65
+ # for target, df in self.datasets.full_data_dict.items():
66
+ # # Instantiate the model
67
+ # model = getattr(tods_module, TODS_MODEL_MAP.get(sub_model))(**model_kwargs)
68
+
69
+ # # Fit the model
70
+ # model.fit(np.array(df[self.spec.target_column]).reshape(-1, 1))
71
+
72
+ # # Make predictions
73
+ # predictions_train[target] = model.predict(
74
+ # np.array(df[self.spec.target_column]).reshape(-1, 1)
75
+ # )
76
+ # prediction_score_train[target] = model.predict_score(
77
+ # np.array(df[self.spec.target_column]).reshape(-1, 1)
78
+ # )
79
+
80
+ # # Store the model and predictions in dictionaries
81
+ # models[target] = model
82
+
83
+ # anomaly = pd.DataFrame(
84
+ # {
85
+ # date_column: df[date_column],
86
+ # OutputColumns.ANOMALY_COL: predictions_train[target],
87
+ # }
88
+ # )
89
+ # score = pd.DataFrame(
90
+ # {
91
+ # date_column: df[date_column],
92
+ # OutputColumns.SCORE_COL: prediction_score_train[target],
93
+ # }
94
+ # )
95
+ # anomaly_output.add_output(target, anomaly, score)
96
+
97
+ # return anomaly_output
98
+
99
+ # def _generate_report(self):
100
+ # import datapane as dp
101
+
102
+ # """The method that needs to be implemented on the particular model level."""
103
+ # selected_models_text = dp.Text(
104
+ # f"## Selected Models Overview \n "
105
+ # "The following tables provide information regarding the chosen model."
106
+ # )
107
+ # all_sections = [selected_models_text]
108
+
109
+ # model_description = dp.Text(
110
+ # "The tods model is a full-stack automated machine learning system for outlier detection "
111
+ # "on univariate / multivariate time-series data. It provides exhaustive modules for building "
112
+ # "machine learning-based outlier detection systems and wide range of algorithms."
113
+ # )
114
+ # other_sections = all_sections
115
+
116
+ # return (
117
+ # model_description,
118
+ # other_sections,
119
+ # )
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List
10
+
11
+ from ads.common.serializer import DataClassSerializable
12
+ from ads.opctl.operator.common.utils import _load_yaml_from_uri
13
+ from ads.opctl.operator.common.operator_config import (
14
+ OperatorConfig,
15
+ OutputDirectory,
16
+ InputData,
17
+ )
18
+ from .const import SupportedModels
19
+
20
+
21
+ @dataclass(repr=True)
22
+ class ValidationData(InputData):
23
+ """Class representing operator specification input data details."""
24
+
25
+
26
+ @dataclass(repr=True)
27
+ class DateTimeColumn(DataClassSerializable):
28
+ """Class representing operator specification date time column details."""
29
+
30
+ name: str = None
31
+ format: str = None
32
+
33
+
34
+ @dataclass(repr=True)
35
+ class TestData(InputData):
36
+ """Class representing operator specification test data details."""
37
+
38
+
39
+ @dataclass(repr=True)
40
+ class AnomalyOperatorSpec(DataClassSerializable):
41
+ """Class representing operator specification."""
42
+
43
+ input_data: InputData = field(default_factory=InputData)
44
+ datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
45
+ test_data: TestData = field(default_factory=TestData)
46
+ validation_data: ValidationData = field(default_factory=ValidationData)
47
+ output_directory: OutputDirectory = field(default_factory=OutputDirectory)
48
+ report_file_name: str = None
49
+ report_title: str = None
50
+ report_theme: str = None
51
+ metrics_filename: str = None
52
+ test_metrics_filename: str = None
53
+ inliers_filename: str = None
54
+ outliers_filename: str = None
55
+ target_column: str = None
56
+ target_category_columns: List[str] = field(default_factory=list)
57
+ preprocessing: bool = None
58
+ generate_report: bool = None
59
+ generate_metrics: bool = None
60
+ generate_inliers: bool = None
61
+ model: str = None
62
+ model_kwargs: Dict = field(default_factory=dict)
63
+ contamination: float = None
64
+
65
+ def __post_init__(self):
66
+ """Adjusts the specification details."""
67
+ self.report_file_name = self.report_file_name or "report.html"
68
+ self.report_theme = self.report_theme or "light"
69
+ self.inliers_filename = self.inliers_filename or "inliers.csv"
70
+ self.outliers_filename = self.outliers_filename or "outliers.csv"
71
+ self.test_metrics_filename = self.test_metrics_filename or "metrics.csv"
72
+ self.model = self.model or SupportedModels.Auto
73
+ self.generate_inliers = (
74
+ self.generate_inliers if self.generate_inliers is not None else False
75
+ )
76
+ self.model_kwargs = self.model_kwargs or dict()
77
+
78
+
79
+ @dataclass(repr=True)
80
+ class AnomalyOperatorConfig(OperatorConfig):
81
+ """Class representing operator config.
82
+
83
+ Attributes
84
+ ----------
85
+ kind: str
86
+ The kind of the resource. For operators it is always - `operator`.
87
+ type: str
88
+ The type of the operator.
89
+ version: str
90
+ The version of the operator.
91
+ spec: AnomalyOperatorSpec
92
+ The operator specification.
93
+ """
94
+
95
+ kind: str = "operator"
96
+ type: str = "anomaly"
97
+ version: str = "v1"
98
+ spec: AnomalyOperatorSpec = field(default_factory=AnomalyOperatorSpec)
99
+
100
+ @classmethod
101
+ def _load_schema(cls) -> str:
102
+ """Loads operator schema."""
103
+ return _load_yaml_from_uri(
104
+ os.path.join(os.path.dirname(os.path.abspath(__file__)), "schema.yaml")
105
+ )