oracle-ads 2.11.17__py3-none-any.whl → 2.11.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/common/utils.py +20 -3
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/config.py +28 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +282 -0
- ads/aqua/config/evaluation/evaluation_service_model_config.py +8 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +1 -1
- ads/aqua/evaluation/entities.py +1 -0
- ads/aqua/evaluation/evaluation.py +56 -88
- ads/aqua/extension/common_handler.py +2 -3
- ads/aqua/extension/common_ws_msg_handler.py +2 -2
- ads/aqua/extension/evaluation_handler.py +4 -3
- ads/aqua/extension/model_handler.py +26 -1
- ads/aqua/extension/utils.py +12 -1
- ads/aqua/modeldeployment/deployment.py +31 -51
- ads/aqua/ui.py +27 -25
- ads/common/auth.py +4 -4
- ads/jobs/builders/infrastructure/dsc_job.py +11 -5
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +12 -25
- ads/jobs/builders/runtimes/artifact.py +0 -5
- ads/jobs/builders/runtimes/container_runtime.py +26 -3
- ads/opctl/conda/cmds.py +100 -42
- ads/opctl/conda/pack.py +3 -2
- ads/opctl/operator/lowcode/anomaly/const.py +1 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +58 -37
- ads/opctl/operator/lowcode/anomaly/model/factory.py +2 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +116 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +1 -0
- ads/opctl/operator/lowcode/forecast/const.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/arima.py +6 -2
- ads/opctl/operator/lowcode/forecast/model/automlx.py +6 -1
- ads/opctl/operator/lowcode/forecast/model/autots.py +3 -1
- ads/opctl/operator/lowcode/forecast/model/factory.py +1 -1
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +24 -15
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +6 -1
- ads/opctl/operator/lowcode/forecast/model/prophet.py +3 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +1 -1
- {oracle_ads-2.11.17.dist-info → oracle_ads-2.11.19.dist-info}/METADATA +5 -1
- {oracle_ads-2.11.17.dist-info → oracle_ads-2.11.19.dist-info}/RECORD +44 -37
- {oracle_ads-2.11.17.dist-info → oracle_ads-2.11.19.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.17.dist-info → oracle_ads-2.11.19.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.17.dist-info → oracle_ads-2.11.19.dist-info}/entry_points.txt +0 -0
ads/opctl/conda/cmds.py
CHANGED
@@ -80,7 +80,7 @@ def _check_job_image_exists(gpu: bool) -> None:
|
|
80
80
|
def _get_name(name: str, env_file: str) -> str:
|
81
81
|
if not name and env_file:
|
82
82
|
with open(env_file) as f:
|
83
|
-
name = yaml.safe_load(f.read()).get("name", None)
|
83
|
+
name = yaml.safe_load(f.read()).get("manifest").get("name", None)
|
84
84
|
if not name:
|
85
85
|
raise ValueError(
|
86
86
|
"Either specify environment name in environment yaml or with `--name`."
|
@@ -146,7 +146,14 @@ def _create(
|
|
146
146
|
if not os.path.exists(env_file):
|
147
147
|
raise FileNotFoundError(f"Environment file {env_file} is not found.")
|
148
148
|
|
149
|
-
|
149
|
+
conda_dep = None
|
150
|
+
with open(env_file) as mfile:
|
151
|
+
conda_dep = yaml.safe_load(mfile.read())
|
152
|
+
# If manifest exists in the environment.yaml file, use that
|
153
|
+
manifest = conda_dep.get("manifest", {})
|
154
|
+
slug = manifest.get(
|
155
|
+
"slug", f"{name}_v{version}".replace(" ", "").replace(".", "_").lower()
|
156
|
+
)
|
150
157
|
pack_folder_path = os.path.join(
|
151
158
|
os.path.abspath(os.path.expanduser(conda_pack_folder)), slug
|
152
159
|
)
|
@@ -171,27 +178,46 @@ def _create(
|
|
171
178
|
|
172
179
|
os.makedirs(pack_folder_path, exist_ok=True)
|
173
180
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
181
|
+
logger.info(
|
182
|
+
f"Preparing manifest. Manifest in the environment: {conda_dep.get('manifest')}"
|
183
|
+
)
|
184
|
+
manifest_template = _fetch_manifest_template()
|
185
|
+
if "name" not in manifest:
|
186
|
+
manifest_template["manifest"]["name"] = name
|
187
|
+
manifest_template["manifest"]["slug"] = slug
|
188
|
+
if "type" not in manifest:
|
189
|
+
logger.info("Setting manifest to published")
|
190
|
+
manifest_template["manifest"]["type"] = "published"
|
191
|
+
if "version" not in manifest:
|
192
|
+
manifest_template["manifest"]["version"] = version
|
193
|
+
manifest_template["manifest"]["arch_type"] = "GPU" if gpu else "CPU"
|
194
|
+
|
195
|
+
manifest_template["manifest"]["create_date"] = datetime.utcnow().strftime(
|
182
196
|
"%a, %b %d, %Y, %H:%M:%S %Z UTC"
|
183
197
|
)
|
184
|
-
|
198
|
+
|
199
|
+
if not "manifest_version" in manifest:
|
200
|
+
manifest_template["manifest"]["manifest_version"] = "1.0"
|
185
201
|
|
186
202
|
logger.info(f"Creating conda environment {slug}")
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
203
|
+
manifest_dict = {
|
204
|
+
k: manifest_template["manifest"][k]
|
205
|
+
for k in manifest_template["manifest"]
|
206
|
+
if manifest_template["manifest"][k]
|
207
|
+
}
|
208
|
+
if "manifest" in conda_dep:
|
209
|
+
conda_dep["manifest"].update(manifest_dict)
|
210
|
+
else:
|
211
|
+
conda_dep["manifest"] = manifest_dict
|
212
|
+
logger.info(f"Updated conda environment manifest: {conda_dep.get('manifest')}")
|
191
213
|
|
192
214
|
if is_in_notebook_session() or NO_CONTAINER:
|
193
215
|
command = f"conda env create --prefix {pack_folder_path} --file {os.path.abspath(os.path.expanduser(env_file))}"
|
194
|
-
run_command(command, shell=True)
|
216
|
+
proc = run_command(command, shell=True)
|
217
|
+
if proc.returncode != 0:
|
218
|
+
raise RuntimeError(
|
219
|
+
f"Failed to create conda environment. (exit code {proc.returncode})"
|
220
|
+
)
|
195
221
|
else:
|
196
222
|
_check_job_image_exists(gpu)
|
197
223
|
docker_pack_folder_path = os.path.join(DEFAULT_IMAGE_HOME_DIR, slug)
|
@@ -200,13 +226,12 @@ def _create(
|
|
200
226
|
)
|
201
227
|
|
202
228
|
create_command = f"conda env create --prefix {docker_pack_folder_path} --file {docker_env_file_path}"
|
203
|
-
|
229
|
+
|
204
230
|
volumes = {
|
205
231
|
pack_folder_path: {"bind": docker_pack_folder_path},
|
206
232
|
os.path.abspath(os.path.expanduser(env_file)): {
|
207
233
|
"bind": docker_env_file_path
|
208
234
|
},
|
209
|
-
|
210
235
|
}
|
211
236
|
|
212
237
|
if gpu:
|
@@ -217,26 +242,42 @@ def _create(
|
|
217
242
|
if prepare_publish:
|
218
243
|
tmp_file = tempfile.NamedTemporaryFile(suffix=".yaml")
|
219
244
|
# Save the manifest in the temp file that can be mounted inside the container so that archiving will work
|
220
|
-
with open(tmp_file.name,
|
221
|
-
yaml.safe_dump(conda_dep, f)
|
245
|
+
with open(tmp_file.name, "w") as f:
|
246
|
+
yaml.safe_dump(conda_dep, f)
|
222
247
|
|
223
|
-
pack_script = os.path.join(
|
248
|
+
pack_script = os.path.join(
|
249
|
+
os.path.dirname(os.path.abspath(__file__)), "pack.py"
|
250
|
+
)
|
224
251
|
pack_command = f"python {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'pack.py')} --conda-path {docker_pack_folder_path} --manifest-location {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'manifest.yaml')}"
|
225
252
|
|
226
253
|
# add pack script and manifest file to the mount so that archive can be created in the same container run
|
227
254
|
condapack_script = {
|
228
|
-
pack_script: {
|
229
|
-
|
255
|
+
pack_script: {
|
256
|
+
"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")
|
257
|
+
},
|
258
|
+
tmp_file.name: {
|
259
|
+
"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "manifest.yaml")
|
260
|
+
},
|
230
261
|
}
|
231
|
-
volumes = {
|
262
|
+
volumes = {
|
263
|
+
**volumes,
|
264
|
+
**condapack_script,
|
265
|
+
} # | not supported in python 3.8
|
232
266
|
|
233
267
|
run_container(
|
234
|
-
image=image,
|
268
|
+
image=image,
|
269
|
+
bind_volumes=volumes,
|
270
|
+
entrypoint="/bin/bash -c ",
|
271
|
+
env_vars={},
|
272
|
+
command=f" '{create_command} && {pack_command}'",
|
235
273
|
)
|
236
274
|
else:
|
237
275
|
run_container(
|
238
|
-
image=image,
|
239
|
-
|
276
|
+
image=image,
|
277
|
+
bind_volumes=volumes,
|
278
|
+
env_vars={},
|
279
|
+
command=create_command,
|
280
|
+
)
|
240
281
|
except Exception:
|
241
282
|
if os.path.exists(pack_folder_path):
|
242
283
|
shutil.rmtree(pack_folder_path)
|
@@ -507,9 +548,11 @@ def publish(**kwargs) -> None:
|
|
507
548
|
conda_pack_folder=exec_config["conda_pack_folder"],
|
508
549
|
gpu=exec_config.get("gpu", False),
|
509
550
|
overwrite=exec_config["overwrite"],
|
510
|
-
prepare_publish=True
|
551
|
+
prepare_publish=True,
|
552
|
+
)
|
553
|
+
skip_archive = (
|
554
|
+
True # The conda pack archive is already created during create process.
|
511
555
|
)
|
512
|
-
skip_archive = True # The conda pack archive is already created during create process.
|
513
556
|
else:
|
514
557
|
slug = exec_config.get("slug")
|
515
558
|
if not slug:
|
@@ -526,10 +569,10 @@ def publish(**kwargs) -> None:
|
|
526
569
|
oci_profile=exec_config.get("oci_profile"),
|
527
570
|
overwrite=exec_config["overwrite"],
|
528
571
|
auth_type=exec_config["auth"],
|
529
|
-
skip_archive=skip_archive
|
572
|
+
skip_archive=skip_archive,
|
530
573
|
)
|
531
574
|
|
532
|
-
|
575
|
+
|
533
576
|
def _publish(
|
534
577
|
conda_slug: str,
|
535
578
|
conda_uri_prefix: str,
|
@@ -538,7 +581,7 @@ def _publish(
|
|
538
581
|
oci_profile: str,
|
539
582
|
overwrite: bool,
|
540
583
|
auth_type: str,
|
541
|
-
skip_archive: bool = False
|
584
|
+
skip_archive: bool = False,
|
542
585
|
) -> None:
|
543
586
|
"""Publish a local conda pack to object storage location
|
544
587
|
|
@@ -616,8 +659,16 @@ def _publish(
|
|
616
659
|
pack_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pack.py")
|
617
660
|
if not skip_archive:
|
618
661
|
if is_in_notebook_session() or NO_CONTAINER:
|
662
|
+
# Set the CONDA_PUBLISH_TYPE environment variable so that the `type` attribute inside the manifest is not changed
|
663
|
+
publish_type = os.environ.get("CONDA_PUBLISH_TYPE")
|
619
664
|
command = f"python {pack_script} --conda-path {pack_folder_path}"
|
620
|
-
|
665
|
+
if publish_type:
|
666
|
+
command = f"CONDA_PUBLISH_TYPE={publish_type} {command}"
|
667
|
+
proc = run_command(command, shell=True)
|
668
|
+
if proc.returncode != 0:
|
669
|
+
raise RuntimeError(
|
670
|
+
f"Failed to archive the conda environment. (exit code {proc.returncode})"
|
671
|
+
)
|
621
672
|
else:
|
622
673
|
volumes = {
|
623
674
|
pack_folder_path: {
|
@@ -641,7 +692,9 @@ def _publish(
|
|
641
692
|
NOT_ALLOWED_CHARS = "@#$%^&*/"
|
642
693
|
|
643
694
|
if any(chr in conda_slug for chr in NOT_ALLOWED_CHARS):
|
644
|
-
raise ValueError(
|
695
|
+
raise ValueError(
|
696
|
+
f"Invalid conda_slug. Found {NOT_ALLOWED_CHARS} in slug name. Please use a different slug name."
|
697
|
+
)
|
645
698
|
pack_file = os.path.join(pack_folder_path, f"{conda_slug}.tar.gz")
|
646
699
|
if not os.path.exists(pack_file):
|
647
700
|
raise RuntimeError(f"Pack {pack_file} was not created.")
|
@@ -664,14 +717,19 @@ def _publish(
|
|
664
717
|
str(manifest["version"]),
|
665
718
|
publish_slug,
|
666
719
|
)
|
667
|
-
|
668
|
-
|
669
|
-
manifest
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
720
|
+
if os.environ.get("CONDA_PUBLISH_TYPE") != "service":
|
721
|
+
# Set these values only for published conda pack
|
722
|
+
manifest["pack_path"] = os.path.join(
|
723
|
+
prefix,
|
724
|
+
manifest.get("arch_type", "CPU").lower(),
|
725
|
+
manifest["name"],
|
726
|
+
str(manifest["version"]),
|
727
|
+
publish_slug,
|
728
|
+
)
|
729
|
+
manifest["pack_uri"] = pack_uri
|
730
|
+
else:
|
731
|
+
manifest["type"] = "published"
|
732
|
+
|
675
733
|
with open(manifest_location, "w") as f:
|
676
734
|
yaml.safe_dump(env, f)
|
677
735
|
if pack_size > 100:
|
ads/opctl/conda/pack.py
CHANGED
@@ -20,7 +20,6 @@ import argparse
|
|
20
20
|
|
21
21
|
|
22
22
|
def main(pack_folder_path, manifest_file=None):
|
23
|
-
slug = os.path.basename(pack_folder_path)
|
24
23
|
manifest_path = (
|
25
24
|
manifest_file or glob.glob(os.path.join(pack_folder_path, "*_manifest.yaml"))[0]
|
26
25
|
)
|
@@ -47,7 +46,9 @@ def main(pack_folder_path, manifest_file=None):
|
|
47
46
|
raise e
|
48
47
|
|
49
48
|
manifest = env["manifest"]
|
50
|
-
|
49
|
+
slug = manifest.get("slug", os.path.basename(pack_folder_path))
|
50
|
+
if os.environ.get("CONDA_PUBLISH_TYPE") != "service":
|
51
|
+
manifest["type"] = "published"
|
51
52
|
new_env_info["manifest"] = manifest
|
52
53
|
with open(manifest_path, "w") as f:
|
53
54
|
yaml.safe_dump(new_env_info, f)
|
@@ -16,7 +16,11 @@ from sklearn import linear_model
|
|
16
16
|
|
17
17
|
from ads.common.object_storage_details import ObjectStorageDetails
|
18
18
|
from ads.opctl import logger
|
19
|
-
from ads.opctl.operator.lowcode.anomaly.const import
|
19
|
+
from ads.opctl.operator.lowcode.anomaly.const import (
|
20
|
+
SUBSAMPLE_THRESHOLD,
|
21
|
+
OutputColumns,
|
22
|
+
SupportedMetrics,
|
23
|
+
)
|
20
24
|
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
|
21
25
|
from ads.opctl.operator.lowcode.common.utils import (
|
22
26
|
disable_print,
|
@@ -55,6 +59,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
55
59
|
def generate_report(self):
|
56
60
|
"""Generates the report."""
|
57
61
|
import matplotlib.pyplot as plt
|
62
|
+
plt.rcParams.update({'figure.max_open_warning': 0})
|
58
63
|
import report_creator as rc
|
59
64
|
|
60
65
|
start_time = time.time()
|
@@ -87,43 +92,59 @@ class AnomalyOperatorBaseModel(ABC):
|
|
87
92
|
self.spec.datetime_column.name if self.spec.datetime_column else "index"
|
88
93
|
)
|
89
94
|
|
95
|
+
(
|
96
|
+
model_description,
|
97
|
+
other_sections,
|
98
|
+
) = self._generate_report()
|
99
|
+
|
90
100
|
blocks = []
|
91
101
|
for target, df in self.datasets.full_data_dict.items():
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
if
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
for
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
102
|
+
if target in anomaly_output.list_categories():
|
103
|
+
figure_blocks = []
|
104
|
+
time_col = df[date_column].reset_index(drop=True)
|
105
|
+
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
|
106
|
+
OutputColumns.ANOMALY_COL
|
107
|
+
]
|
108
|
+
anomaly_indices = [
|
109
|
+
i for i, index in enumerate(anomaly_col) if index == 1
|
110
|
+
]
|
111
|
+
downsampled_time_col = time_col
|
112
|
+
selected_indices = list(range(len(time_col)))
|
113
|
+
if self.spec.subsample_report_data:
|
114
|
+
non_anomaly_indices = [
|
115
|
+
i for i in range(len(time_col)) if i not in anomaly_indices
|
116
|
+
]
|
117
|
+
# Downsample non-anomalous data if it exceeds the threshold (1000)
|
118
|
+
if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
|
119
|
+
downsampled_non_anomaly_indices = non_anomaly_indices[
|
120
|
+
:: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD
|
121
|
+
]
|
122
|
+
selected_indices = (
|
123
|
+
anomaly_indices + downsampled_non_anomaly_indices
|
124
|
+
)
|
125
|
+
selected_indices.sort()
|
126
|
+
downsampled_time_col = time_col[selected_indices]
|
127
|
+
|
128
|
+
columns = set(df.columns).difference({date_column})
|
129
|
+
for col in columns:
|
130
|
+
y = df[col].reset_index(drop=True)
|
131
|
+
|
132
|
+
downsampled_y = y[selected_indices]
|
133
|
+
|
134
|
+
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
|
135
|
+
ax.grid()
|
136
|
+
ax.plot(downsampled_time_col, downsampled_y, color="black")
|
137
|
+
# Plot anomalies
|
138
|
+
for i in anomaly_indices:
|
139
|
+
ax.scatter(time_col[i], y[i], color="red", marker="o")
|
140
|
+
plt.xlabel(date_column)
|
141
|
+
plt.ylabel(col)
|
142
|
+
plt.title(f"`{col}` with reference to anomalies")
|
143
|
+
figure_blocks.append(rc.Widget(ax))
|
144
|
+
else:
|
145
|
+
figure_blocks = None
|
146
|
+
|
147
|
+
blocks.append(rc.Group(*figure_blocks, label=target)) if figure_blocks else None
|
127
148
|
plots = rc.Select(blocks)
|
128
149
|
|
129
150
|
report_sections = []
|
@@ -133,7 +154,7 @@ class AnomalyOperatorBaseModel(ABC):
|
|
133
154
|
yaml_appendix = rc.Yaml(self.config.to_dict())
|
134
155
|
summary = rc.Block(
|
135
156
|
rc.Group(
|
136
|
-
rc.Text(f"You selected the **`{self.spec.model}`** model."),
|
157
|
+
rc.Text(f"You selected the **`{self.spec.model}`** model.\n{model_description.text}\n"),
|
137
158
|
rc.Text(
|
138
159
|
"Based on your dataset, you could have also selected "
|
139
160
|
f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
|
@@ -15,6 +15,7 @@ from .autots import AutoTSOperatorModel
|
|
15
15
|
from .base_model import AnomalyOperatorBaseModel
|
16
16
|
from .isolationforest import IsolationForestOperatorModel
|
17
17
|
from .oneclasssvm import OneClassSVMOperatorModel
|
18
|
+
from .randomcutforest import RandomCutForestOperatorModel
|
18
19
|
|
19
20
|
|
20
21
|
class UnSupportedModelError(Exception):
|
@@ -52,6 +53,7 @@ class AnomalyOperatorModelFactory:
|
|
52
53
|
_NonTime_MAP = {
|
53
54
|
NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
|
54
55
|
NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
|
56
|
+
NonTimeADSupportedModels.RandomCutForest: RandomCutForestOperatorModel,
|
55
57
|
# TODO: Add DBScan model for non time based anomaly
|
56
58
|
# NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
|
57
59
|
}
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
from ads.common.decorator.runtime_dependency import runtime_dependency
|
10
|
+
from ads.opctl import logger
|
11
|
+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
|
12
|
+
|
13
|
+
from .anomaly_dataset import AnomalyOutput
|
14
|
+
from .base_model import AnomalyOperatorBaseModel
|
15
|
+
|
16
|
+
|
17
|
+
class RandomCutForestOperatorModel(AnomalyOperatorBaseModel):
|
18
|
+
"""
|
19
|
+
Class representing Random Cut Forest Anomaly Detection operator model.
|
20
|
+
"""
|
21
|
+
|
22
|
+
@runtime_dependency(
|
23
|
+
module="rrcf",
|
24
|
+
err_msg=(
|
25
|
+
"Please run `pip install rrcf` to "
|
26
|
+
"install the required dependencies for RandomCutForest."
|
27
|
+
),
|
28
|
+
)
|
29
|
+
def _build_model(self) -> AnomalyOutput:
|
30
|
+
from rrcf import RCTree
|
31
|
+
|
32
|
+
model_kwargs = self.spec.model_kwargs
|
33
|
+
|
34
|
+
anomaly_output = AnomalyOutput(date_column="index")
|
35
|
+
|
36
|
+
# Set tree parameters
|
37
|
+
num_trees = model_kwargs.get("num_trees", 200)
|
38
|
+
shingle_size = model_kwargs.get("shingle_size", None)
|
39
|
+
anomaly_threshold = model_kwargs.get("anamoly_threshold", 95)
|
40
|
+
|
41
|
+
for target, df in self.datasets.full_data_dict.items():
|
42
|
+
try:
|
43
|
+
if df.shape[0] == 1:
|
44
|
+
raise ValueError("Dataset size must be greater than 1")
|
45
|
+
df_values = df[self.spec.target_column].astype(float).values
|
46
|
+
|
47
|
+
cal_shingle_size = (
|
48
|
+
shingle_size
|
49
|
+
if shingle_size
|
50
|
+
else int(2 ** np.floor(np.log2(df.shape[0])) / 2)
|
51
|
+
)
|
52
|
+
points = np.vstack(list(rrcf.shingle(df_values, size=cal_shingle_size)))
|
53
|
+
|
54
|
+
sample_size_range = (1, points.shape[0])
|
55
|
+
n = points.shape[0]
|
56
|
+
avg_codisp = pd.Series(0.0, index=np.arange(n))
|
57
|
+
index = np.zeros(n)
|
58
|
+
|
59
|
+
forest = []
|
60
|
+
while len(forest) < num_trees:
|
61
|
+
ixs = np.random.choice(n, size=sample_size_range, replace=False)
|
62
|
+
trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
|
63
|
+
forest.extend(trees)
|
64
|
+
|
65
|
+
for tree in forest:
|
66
|
+
codisp = pd.Series(
|
67
|
+
{leaf: tree.codisp(leaf) for leaf in tree.leaves}
|
68
|
+
)
|
69
|
+
avg_codisp[codisp.index] += codisp
|
70
|
+
np.add.at(index, codisp.index.values, 1)
|
71
|
+
|
72
|
+
avg_codisp /= index
|
73
|
+
avg_codisp.index = df.iloc[(cal_shingle_size - 1) :].index
|
74
|
+
avg_codisp = (avg_codisp - avg_codisp.min()) / (
|
75
|
+
avg_codisp.max() - avg_codisp.min()
|
76
|
+
)
|
77
|
+
|
78
|
+
y_pred = (
|
79
|
+
avg_codisp > np.percentile(avg_codisp, anomaly_threshold)
|
80
|
+
).astype(int)
|
81
|
+
|
82
|
+
index_col = df.columns[0]
|
83
|
+
|
84
|
+
anomaly = pd.DataFrame(
|
85
|
+
{index_col: y_pred.index, OutputColumns.ANOMALY_COL: y_pred}
|
86
|
+
).reset_index(drop=True)
|
87
|
+
score = pd.DataFrame(
|
88
|
+
{"index": avg_codisp.index, OutputColumns.SCORE_COL: avg_codisp}
|
89
|
+
).reset_index(drop=True)
|
90
|
+
|
91
|
+
anomaly_output.add_output(target, anomaly, score)
|
92
|
+
except Exception as e:
|
93
|
+
logger.warn(f"Encountered Error: {e}. Skipping series {target}.")
|
94
|
+
|
95
|
+
return anomaly_output
|
96
|
+
|
97
|
+
def _generate_report(self):
|
98
|
+
"""Generates the report."""
|
99
|
+
import report_creator as rc
|
100
|
+
|
101
|
+
other_sections = [
|
102
|
+
rc.Heading("Selected Models Overview", level=2),
|
103
|
+
rc.Text(
|
104
|
+
"The following tables provide information regarding the chosen model."
|
105
|
+
),
|
106
|
+
]
|
107
|
+
|
108
|
+
model_description = rc.Text(
|
109
|
+
"The Random Cut Forest (RCF) is an unsupervised machine learning algorithm that is used for anomaly detection."
|
110
|
+
" It works by building an ensemble of binary trees (random cut trees) and using them to compute anomaly scores for data points."
|
111
|
+
)
|
112
|
+
|
113
|
+
return (
|
114
|
+
model_description,
|
115
|
+
other_sections,
|
116
|
+
)
|
@@ -126,8 +126,12 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
|
|
126
126
|
|
127
127
|
logger.debug("===========Done===========")
|
128
128
|
except Exception as e:
|
129
|
-
self.errors_dict[s_id] = {
|
130
|
-
|
129
|
+
self.errors_dict[s_id] = {
|
130
|
+
"model_name": self.spec.model,
|
131
|
+
"error": str(e),
|
132
|
+
"error_trace": traceback.format_exc()}
|
133
|
+
logger.warn(f"Encountered Error: {e}. Skipping.")
|
134
|
+
logger.warn(traceback.format_exc())
|
131
135
|
|
132
136
|
def _build_model(self) -> pd.DataFrame:
|
133
137
|
full_data_dict = self.datasets.get_data_by_series()
|
@@ -149,6 +149,9 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
149
149
|
if f"{target}_ci_lower" not in summary_frame:
|
150
150
|
summary_frame[f"{target}_ci_lower"] = np.NAN
|
151
151
|
|
152
|
+
if summary_frame[target].isna().all():
|
153
|
+
raise ValueError("The forecasts are completely NaN")
|
154
|
+
|
152
155
|
self.forecast_output.populate_series_output(
|
153
156
|
series_id=s_id,
|
154
157
|
fit_val=fitted_values,
|
@@ -167,8 +170,10 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
|
|
167
170
|
self.errors_dict[s_id] = {
|
168
171
|
"model_name": self.spec.model,
|
169
172
|
"error": str(e),
|
173
|
+
"error_trace": traceback.format_exc()
|
170
174
|
}
|
171
|
-
logger.
|
175
|
+
logger.warn(f"Encountered Error: {e}. Skipping.")
|
176
|
+
logger.warn(traceback.format_exc())
|
172
177
|
|
173
178
|
logger.debug("===========Forecast Generated===========")
|
174
179
|
|
@@ -208,8 +208,10 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
|
|
208
208
|
self.errors_dict[s_id] = {
|
209
209
|
"model_name": self.spec.model,
|
210
210
|
"error": str(e),
|
211
|
+
"error_trace": traceback.format_exc()
|
211
212
|
}
|
212
|
-
logger.
|
213
|
+
logger.warn(f"Encountered Error: {e}. Skipping.")
|
214
|
+
logger.warn(traceback.format_exc())
|
213
215
|
|
214
216
|
logger.debug("===========Done===========")
|
215
217
|
|
@@ -33,7 +33,7 @@ class ForecastOperatorModelFactory:
|
|
33
33
|
SupportedModels.Prophet: ProphetOperatorModel,
|
34
34
|
SupportedModels.Arima: ArimaOperatorModel,
|
35
35
|
SupportedModels.NeuralProphet: NeuralProphetOperatorModel,
|
36
|
-
SupportedModels.
|
36
|
+
SupportedModels.LGBForecast: MLForecastOperatorModel,
|
37
37
|
SupportedModels.AutoMLX: AutoMLXOperatorModel,
|
38
38
|
SupportedModels.AutoTS: AutoTSOperatorModel
|
39
39
|
}
|