oracle-ads 2.11.16__py3-none-any.whl → 2.11.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. ads/aqua/app.py +5 -6
  2. ads/aqua/common/enums.py +9 -0
  3. ads/aqua/common/utils.py +128 -1
  4. ads/aqua/constants.py +1 -0
  5. ads/aqua/evaluation/evaluation.py +1 -1
  6. ads/aqua/extension/common_handler.py +75 -5
  7. ads/aqua/extension/deployment_handler.py +2 -0
  8. ads/aqua/extension/model_handler.py +113 -12
  9. ads/aqua/model/entities.py +20 -2
  10. ads/aqua/model/model.py +417 -172
  11. ads/aqua/modeldeployment/deployment.py +69 -55
  12. ads/common/auth.py +4 -4
  13. ads/jobs/builders/infrastructure/dsc_job.py +23 -14
  14. ads/jobs/builders/infrastructure/dsc_job_runtime.py +12 -25
  15. ads/jobs/builders/runtimes/artifact.py +0 -5
  16. ads/jobs/builders/runtimes/container_runtime.py +26 -3
  17. ads/opctl/conda/cmds.py +100 -42
  18. ads/opctl/conda/pack.py +3 -2
  19. ads/opctl/operator/lowcode/anomaly/const.py +1 -0
  20. ads/opctl/operator/lowcode/anomaly/model/base_model.py +58 -37
  21. ads/opctl/operator/lowcode/anomaly/model/factory.py +2 -0
  22. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +116 -0
  23. ads/opctl/operator/lowcode/anomaly/schema.yaml +1 -0
  24. ads/opctl/operator/lowcode/forecast/const.py +1 -1
  25. ads/opctl/operator/lowcode/forecast/model/arima.py +9 -3
  26. ads/opctl/operator/lowcode/forecast/model/automlx.py +6 -1
  27. ads/opctl/operator/lowcode/forecast/model/autots.py +3 -1
  28. ads/opctl/operator/lowcode/forecast/model/factory.py +1 -1
  29. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +24 -15
  30. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +6 -1
  31. ads/opctl/operator/lowcode/forecast/model/prophet.py +3 -1
  32. ads/opctl/operator/lowcode/forecast/schema.yaml +1 -1
  33. {oracle_ads-2.11.16.dist-info → oracle_ads-2.11.18.dist-info}/METADATA +5 -1
  34. {oracle_ads-2.11.16.dist-info → oracle_ads-2.11.18.dist-info}/RECORD +37 -36
  35. {oracle_ads-2.11.16.dist-info → oracle_ads-2.11.18.dist-info}/LICENSE.txt +0 -0
  36. {oracle_ads-2.11.16.dist-info → oracle_ads-2.11.18.dist-info}/WHEEL +0 -0
  37. {oracle_ads-2.11.16.dist-info → oracle_ads-2.11.18.dist-info}/entry_points.txt +0 -0
ads/opctl/conda/cmds.py CHANGED
@@ -80,7 +80,7 @@ def _check_job_image_exists(gpu: bool) -> None:
80
80
  def _get_name(name: str, env_file: str) -> str:
81
81
  if not name and env_file:
82
82
  with open(env_file) as f:
83
- name = yaml.safe_load(f.read()).get("name", None)
83
+ name = yaml.safe_load(f.read()).get("manifest").get("name", None)
84
84
  if not name:
85
85
  raise ValueError(
86
86
  "Either specify environment name in environment yaml or with `--name`."
@@ -146,7 +146,14 @@ def _create(
146
146
  if not os.path.exists(env_file):
147
147
  raise FileNotFoundError(f"Environment file {env_file} is not found.")
148
148
 
149
- slug = f"{name}_v{version}".replace(" ", "").replace(".", "_").lower()
149
+ conda_dep = None
150
+ with open(env_file) as mfile:
151
+ conda_dep = yaml.safe_load(mfile.read())
152
+ # If manifest exists in the environment.yaml file, use that
153
+ manifest = conda_dep.get("manifest", {})
154
+ slug = manifest.get(
155
+ "slug", f"{name}_v{version}".replace(" ", "").replace(".", "_").lower()
156
+ )
150
157
  pack_folder_path = os.path.join(
151
158
  os.path.abspath(os.path.expanduser(conda_pack_folder)), slug
152
159
  )
@@ -171,27 +178,46 @@ def _create(
171
178
 
172
179
  os.makedirs(pack_folder_path, exist_ok=True)
173
180
 
174
- manifest = _fetch_manifest_template()
175
- manifest["manifest"]["name"] = name
176
- manifest["manifest"]["slug"] = slug
177
- manifest["manifest"]["type"] = "published"
178
- manifest["manifest"]["version"] = version
179
- manifest["manifest"]["arch_type"] = "GPU" if gpu else "CPU"
180
-
181
- manifest["manifest"]["create_date"] = datetime.utcnow().strftime(
181
+ logger.info(
182
+ f"Preparing manifest. Manifest in the environment: {conda_dep.get('manifest')}"
183
+ )
184
+ manifest_template = _fetch_manifest_template()
185
+ if "name" not in manifest:
186
+ manifest_template["manifest"]["name"] = name
187
+ manifest_template["manifest"]["slug"] = slug
188
+ if "type" not in manifest:
189
+ logger.info("Setting manifest to published")
190
+ manifest_template["manifest"]["type"] = "published"
191
+ if "version" not in manifest:
192
+ manifest_template["manifest"]["version"] = version
193
+ manifest_template["manifest"]["arch_type"] = "GPU" if gpu else "CPU"
194
+
195
+ manifest_template["manifest"]["create_date"] = datetime.utcnow().strftime(
182
196
  "%a, %b %d, %Y, %H:%M:%S %Z UTC"
183
197
  )
184
- manifest["manifest"]["manifest_version"] = "1.0"
198
+
199
+ if not "manifest_version" in manifest:
200
+ manifest_template["manifest"]["manifest_version"] = "1.0"
185
201
 
186
202
  logger.info(f"Creating conda environment {slug}")
187
- conda_dep = None
188
- with open(env_file) as mfile:
189
- conda_dep = yaml.safe_load(mfile.read())
190
- conda_dep["manifest"] = manifest["manifest"]
203
+ manifest_dict = {
204
+ k: manifest_template["manifest"][k]
205
+ for k in manifest_template["manifest"]
206
+ if manifest_template["manifest"][k]
207
+ }
208
+ if "manifest" in conda_dep:
209
+ conda_dep["manifest"].update(manifest_dict)
210
+ else:
211
+ conda_dep["manifest"] = manifest_dict
212
+ logger.info(f"Updated conda environment manifest: {conda_dep.get('manifest')}")
191
213
 
192
214
  if is_in_notebook_session() or NO_CONTAINER:
193
215
  command = f"conda env create --prefix {pack_folder_path} --file {os.path.abspath(os.path.expanduser(env_file))}"
194
- run_command(command, shell=True)
216
+ proc = run_command(command, shell=True)
217
+ if proc.returncode != 0:
218
+ raise RuntimeError(
219
+ f"Failed to create conda environment. (exit code {proc.returncode})"
220
+ )
195
221
  else:
196
222
  _check_job_image_exists(gpu)
197
223
  docker_pack_folder_path = os.path.join(DEFAULT_IMAGE_HOME_DIR, slug)
@@ -200,13 +226,12 @@ def _create(
200
226
  )
201
227
 
202
228
  create_command = f"conda env create --prefix {docker_pack_folder_path} --file {docker_env_file_path}"
203
-
229
+
204
230
  volumes = {
205
231
  pack_folder_path: {"bind": docker_pack_folder_path},
206
232
  os.path.abspath(os.path.expanduser(env_file)): {
207
233
  "bind": docker_env_file_path
208
234
  },
209
-
210
235
  }
211
236
 
212
237
  if gpu:
@@ -217,26 +242,42 @@ def _create(
217
242
  if prepare_publish:
218
243
  tmp_file = tempfile.NamedTemporaryFile(suffix=".yaml")
219
244
  # Save the manifest in the temp file that can be mounted inside the container so that archiving will work
220
- with open(tmp_file.name, 'w') as f:
221
- yaml.safe_dump(conda_dep, f)
245
+ with open(tmp_file.name, "w") as f:
246
+ yaml.safe_dump(conda_dep, f)
222
247
 
223
- pack_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pack.py")
248
+ pack_script = os.path.join(
249
+ os.path.dirname(os.path.abspath(__file__)), "pack.py"
250
+ )
224
251
  pack_command = f"python {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'pack.py')} --conda-path {docker_pack_folder_path} --manifest-location {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'manifest.yaml')}"
225
252
 
226
253
  # add pack script and manifest file to the mount so that archive can be created in the same container run
227
254
  condapack_script = {
228
- pack_script: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")},
229
- tmp_file.name: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "manifest.yaml")}
255
+ pack_script: {
256
+ "bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")
257
+ },
258
+ tmp_file.name: {
259
+ "bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "manifest.yaml")
260
+ },
230
261
  }
231
- volumes = {**volumes, **condapack_script} # | not supported in python 3.8
262
+ volumes = {
263
+ **volumes,
264
+ **condapack_script,
265
+ } # | not supported in python 3.8
232
266
 
233
267
  run_container(
234
- image=image, bind_volumes=volumes, entrypoint="/bin/bash -c ", env_vars={}, command=f" '{create_command} && {pack_command}'"
268
+ image=image,
269
+ bind_volumes=volumes,
270
+ entrypoint="/bin/bash -c ",
271
+ env_vars={},
272
+ command=f" '{create_command} && {pack_command}'",
235
273
  )
236
274
  else:
237
275
  run_container(
238
- image=image, bind_volumes=volumes, env_vars={}, command=create_command
239
- )
276
+ image=image,
277
+ bind_volumes=volumes,
278
+ env_vars={},
279
+ command=create_command,
280
+ )
240
281
  except Exception:
241
282
  if os.path.exists(pack_folder_path):
242
283
  shutil.rmtree(pack_folder_path)
@@ -507,9 +548,11 @@ def publish(**kwargs) -> None:
507
548
  conda_pack_folder=exec_config["conda_pack_folder"],
508
549
  gpu=exec_config.get("gpu", False),
509
550
  overwrite=exec_config["overwrite"],
510
- prepare_publish=True
551
+ prepare_publish=True,
552
+ )
553
+ skip_archive = (
554
+ True # The conda pack archive is already created during create process.
511
555
  )
512
- skip_archive = True # The conda pack archive is already created during create process.
513
556
  else:
514
557
  slug = exec_config.get("slug")
515
558
  if not slug:
@@ -526,10 +569,10 @@ def publish(**kwargs) -> None:
526
569
  oci_profile=exec_config.get("oci_profile"),
527
570
  overwrite=exec_config["overwrite"],
528
571
  auth_type=exec_config["auth"],
529
- skip_archive=skip_archive
572
+ skip_archive=skip_archive,
530
573
  )
531
574
 
532
-
575
+
533
576
  def _publish(
534
577
  conda_slug: str,
535
578
  conda_uri_prefix: str,
@@ -538,7 +581,7 @@ def _publish(
538
581
  oci_profile: str,
539
582
  overwrite: bool,
540
583
  auth_type: str,
541
- skip_archive: bool = False
584
+ skip_archive: bool = False,
542
585
  ) -> None:
543
586
  """Publish a local conda pack to object storage location
544
587
 
@@ -616,8 +659,16 @@ def _publish(
616
659
  pack_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pack.py")
617
660
  if not skip_archive:
618
661
  if is_in_notebook_session() or NO_CONTAINER:
662
+ # Set the CONDA_PUBLISH_TYPE environment variable so that the `type` attribute inside the manifest is not changed
663
+ publish_type = os.environ.get("CONDA_PUBLISH_TYPE")
619
664
  command = f"python {pack_script} --conda-path {pack_folder_path}"
620
- run_command(command, shell=True)
665
+ if publish_type:
666
+ command = f"CONDA_PUBLISH_TYPE={publish_type} {command}"
667
+ proc = run_command(command, shell=True)
668
+ if proc.returncode != 0:
669
+ raise RuntimeError(
670
+ f"Failed to archive the conda environment. (exit code {proc.returncode})"
671
+ )
621
672
  else:
622
673
  volumes = {
623
674
  pack_folder_path: {
@@ -641,7 +692,9 @@ def _publish(
641
692
  NOT_ALLOWED_CHARS = "@#$%^&*/"
642
693
 
643
694
  if any(chr in conda_slug for chr in NOT_ALLOWED_CHARS):
644
- raise ValueError(f"Invalid conda_slug. Found {NOT_ALLOWED_CHARS} in slug name. Please use a different slug name.")
695
+ raise ValueError(
696
+ f"Invalid conda_slug. Found {NOT_ALLOWED_CHARS} in slug name. Please use a different slug name."
697
+ )
645
698
  pack_file = os.path.join(pack_folder_path, f"{conda_slug}.tar.gz")
646
699
  if not os.path.exists(pack_file):
647
700
  raise RuntimeError(f"Pack {pack_file} was not created.")
@@ -664,14 +717,19 @@ def _publish(
664
717
  str(manifest["version"]),
665
718
  publish_slug,
666
719
  )
667
- manifest["pack_path"] = os.path.join(
668
- prefix,
669
- manifest.get("arch_type", "CPU").lower(),
670
- manifest["name"],
671
- str(manifest["version"]),
672
- publish_slug,
673
- )
674
- manifest["pack_uri"] = pack_uri
720
+ if os.environ.get("CONDA_PUBLISH_TYPE") != "service":
721
+ # Set these values only for published conda pack
722
+ manifest["pack_path"] = os.path.join(
723
+ prefix,
724
+ manifest.get("arch_type", "CPU").lower(),
725
+ manifest["name"],
726
+ str(manifest["version"]),
727
+ publish_slug,
728
+ )
729
+ manifest["pack_uri"] = pack_uri
730
+ else:
731
+ manifest["type"] = "published"
732
+
675
733
  with open(manifest_location, "w") as f:
676
734
  yaml.safe_dump(env, f)
677
735
  if pack_size > 100:
ads/opctl/conda/pack.py CHANGED
@@ -20,7 +20,6 @@ import argparse
20
20
 
21
21
 
22
22
  def main(pack_folder_path, manifest_file=None):
23
- slug = os.path.basename(pack_folder_path)
24
23
  manifest_path = (
25
24
  manifest_file or glob.glob(os.path.join(pack_folder_path, "*_manifest.yaml"))[0]
26
25
  )
@@ -47,7 +46,9 @@ def main(pack_folder_path, manifest_file=None):
47
46
  raise e
48
47
 
49
48
  manifest = env["manifest"]
50
- manifest["type"] = "published"
49
+ slug = manifest.get("slug", os.path.basename(pack_folder_path))
50
+ if os.environ.get("CONDA_PUBLISH_TYPE") != "service":
51
+ manifest["type"] = "published"
51
52
  new_env_info["manifest"] = manifest
52
53
  with open(manifest_path, "w") as f:
53
54
  yaml.safe_dump(new_env_info, f)
@@ -21,6 +21,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
21
21
 
22
22
  OneClassSVM = "oneclasssvm"
23
23
  IsolationForest = "isolationforest"
24
+ RandomCutForest = "randomcutforest"
24
25
  # TODO : Add DBScan
25
26
  # DBScan = "dbscan"
26
27
 
@@ -16,7 +16,11 @@ from sklearn import linear_model
16
16
 
17
17
  from ads.common.object_storage_details import ObjectStorageDetails
18
18
  from ads.opctl import logger
19
- from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
19
+ from ads.opctl.operator.lowcode.anomaly.const import (
20
+ SUBSAMPLE_THRESHOLD,
21
+ OutputColumns,
22
+ SupportedMetrics,
23
+ )
20
24
  from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
21
25
  from ads.opctl.operator.lowcode.common.utils import (
22
26
  disable_print,
@@ -55,6 +59,7 @@ class AnomalyOperatorBaseModel(ABC):
55
59
  def generate_report(self):
56
60
  """Generates the report."""
57
61
  import matplotlib.pyplot as plt
62
+ plt.rcParams.update({'figure.max_open_warning': 0})
58
63
  import report_creator as rc
59
64
 
60
65
  start_time = time.time()
@@ -87,43 +92,59 @@ class AnomalyOperatorBaseModel(ABC):
87
92
  self.spec.datetime_column.name if self.spec.datetime_column else "index"
88
93
  )
89
94
 
95
+ (
96
+ model_description,
97
+ other_sections,
98
+ ) = self._generate_report()
99
+
90
100
  blocks = []
91
101
  for target, df in self.datasets.full_data_dict.items():
92
- figure_blocks = []
93
- time_col = df[date_column].reset_index(drop=True)
94
- anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
95
- OutputColumns.ANOMALY_COL
96
- ]
97
- anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
98
- downsampled_time_col = time_col
99
- selected_indices = list(range(len(time_col)))
100
- if self.spec.subsample_report_data:
101
- non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
102
- # Downsample non-anomalous data if it exceeds the threshold (1000)
103
- if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
104
- downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
105
- selected_indices = anomaly_indices + downsampled_non_anomaly_indices
106
- selected_indices.sort()
107
- downsampled_time_col = time_col[selected_indices]
108
-
109
- columns = set(df.columns).difference({date_column})
110
- for col in columns:
111
- y = df[col].reset_index(drop=True)
112
-
113
- downsampled_y = y[selected_indices]
114
-
115
- fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
116
- ax.grid()
117
- ax.plot(downsampled_time_col, downsampled_y, color="black")
118
- # Plot anomalies
119
- for i in anomaly_indices:
120
- ax.scatter(time_col[i], y[i], color="red", marker="o")
121
- plt.xlabel(date_column)
122
- plt.ylabel(col)
123
- plt.title(f"`{col}` with reference to anomalies")
124
- figure_blocks.append(rc.Widget(ax))
125
-
126
- blocks.append(rc.Group(*figure_blocks, label=target))
102
+ if target in anomaly_output.list_categories():
103
+ figure_blocks = []
104
+ time_col = df[date_column].reset_index(drop=True)
105
+ anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
106
+ OutputColumns.ANOMALY_COL
107
+ ]
108
+ anomaly_indices = [
109
+ i for i, index in enumerate(anomaly_col) if index == 1
110
+ ]
111
+ downsampled_time_col = time_col
112
+ selected_indices = list(range(len(time_col)))
113
+ if self.spec.subsample_report_data:
114
+ non_anomaly_indices = [
115
+ i for i in range(len(time_col)) if i not in anomaly_indices
116
+ ]
117
+ # Downsample non-anomalous data if it exceeds the threshold (1000)
118
+ if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
119
+ downsampled_non_anomaly_indices = non_anomaly_indices[
120
+ :: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD
121
+ ]
122
+ selected_indices = (
123
+ anomaly_indices + downsampled_non_anomaly_indices
124
+ )
125
+ selected_indices.sort()
126
+ downsampled_time_col = time_col[selected_indices]
127
+
128
+ columns = set(df.columns).difference({date_column})
129
+ for col in columns:
130
+ y = df[col].reset_index(drop=True)
131
+
132
+ downsampled_y = y[selected_indices]
133
+
134
+ fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
135
+ ax.grid()
136
+ ax.plot(downsampled_time_col, downsampled_y, color="black")
137
+ # Plot anomalies
138
+ for i in anomaly_indices:
139
+ ax.scatter(time_col[i], y[i], color="red", marker="o")
140
+ plt.xlabel(date_column)
141
+ plt.ylabel(col)
142
+ plt.title(f"`{col}` with reference to anomalies")
143
+ figure_blocks.append(rc.Widget(ax))
144
+ else:
145
+ figure_blocks = None
146
+
147
+ blocks.append(rc.Group(*figure_blocks, label=target)) if figure_blocks else None
127
148
  plots = rc.Select(blocks)
128
149
 
129
150
  report_sections = []
@@ -133,7 +154,7 @@ class AnomalyOperatorBaseModel(ABC):
133
154
  yaml_appendix = rc.Yaml(self.config.to_dict())
134
155
  summary = rc.Block(
135
156
  rc.Group(
136
- rc.Text(f"You selected the **`{self.spec.model}`** model."),
157
+ rc.Text(f"You selected the **`{self.spec.model}`** model.\n{model_description.text}\n"),
137
158
  rc.Text(
138
159
  "Based on your dataset, you could have also selected "
139
160
  f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
@@ -15,6 +15,7 @@ from .autots import AutoTSOperatorModel
15
15
  from .base_model import AnomalyOperatorBaseModel
16
16
  from .isolationforest import IsolationForestOperatorModel
17
17
  from .oneclasssvm import OneClassSVMOperatorModel
18
+ from .randomcutforest import RandomCutForestOperatorModel
18
19
 
19
20
 
20
21
  class UnSupportedModelError(Exception):
@@ -52,6 +53,7 @@ class AnomalyOperatorModelFactory:
52
53
  _NonTime_MAP = {
53
54
  NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
54
55
  NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
56
+ NonTimeADSupportedModels.RandomCutForest: RandomCutForestOperatorModel,
55
57
  # TODO: Add DBScan model for non time based anomaly
56
58
  # NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
57
59
  }
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from ads.common.decorator.runtime_dependency import runtime_dependency
10
+ from ads.opctl import logger
11
+ from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
12
+
13
+ from .anomaly_dataset import AnomalyOutput
14
+ from .base_model import AnomalyOperatorBaseModel
15
+
16
+
17
+ class RandomCutForestOperatorModel(AnomalyOperatorBaseModel):
18
+ """
19
+ Class representing Random Cut Forest Anomaly Detection operator model.
20
+ """
21
+
22
+ @runtime_dependency(
23
+ module="rrcf",
24
+ err_msg=(
25
+ "Please run `pip install rrcf` to "
26
+ "install the required dependencies for RandomCutForest."
27
+ ),
28
+ )
29
+ def _build_model(self) -> AnomalyOutput:
30
+ from rrcf import RCTree
31
+
32
+ model_kwargs = self.spec.model_kwargs
33
+
34
+ anomaly_output = AnomalyOutput(date_column="index")
35
+
36
+ # Set tree parameters
37
+ num_trees = model_kwargs.get("num_trees", 200)
38
+ shingle_size = model_kwargs.get("shingle_size", None)
39
+ anomaly_threshold = model_kwargs.get("anamoly_threshold", 95)
40
+
41
+ for target, df in self.datasets.full_data_dict.items():
42
+ try:
43
+ if df.shape[0] == 1:
44
+ raise ValueError("Dataset size must be greater than 1")
45
+ df_values = df[self.spec.target_column].astype(float).values
46
+
47
+ cal_shingle_size = (
48
+ shingle_size
49
+ if shingle_size
50
+ else int(2 ** np.floor(np.log2(df.shape[0])) / 2)
51
+ )
52
+ points = np.vstack(list(rrcf.shingle(df_values, size=cal_shingle_size)))
53
+
54
+ sample_size_range = (1, points.shape[0])
55
+ n = points.shape[0]
56
+ avg_codisp = pd.Series(0.0, index=np.arange(n))
57
+ index = np.zeros(n)
58
+
59
+ forest = []
60
+ while len(forest) < num_trees:
61
+ ixs = np.random.choice(n, size=sample_size_range, replace=False)
62
+ trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
63
+ forest.extend(trees)
64
+
65
+ for tree in forest:
66
+ codisp = pd.Series(
67
+ {leaf: tree.codisp(leaf) for leaf in tree.leaves}
68
+ )
69
+ avg_codisp[codisp.index] += codisp
70
+ np.add.at(index, codisp.index.values, 1)
71
+
72
+ avg_codisp /= index
73
+ avg_codisp.index = df.iloc[(cal_shingle_size - 1) :].index
74
+ avg_codisp = (avg_codisp - avg_codisp.min()) / (
75
+ avg_codisp.max() - avg_codisp.min()
76
+ )
77
+
78
+ y_pred = (
79
+ avg_codisp > np.percentile(avg_codisp, anomaly_threshold)
80
+ ).astype(int)
81
+
82
+ index_col = df.columns[0]
83
+
84
+ anomaly = pd.DataFrame(
85
+ {index_col: y_pred.index, OutputColumns.ANOMALY_COL: y_pred}
86
+ ).reset_index(drop=True)
87
+ score = pd.DataFrame(
88
+ {"index": avg_codisp.index, OutputColumns.SCORE_COL: avg_codisp}
89
+ ).reset_index(drop=True)
90
+
91
+ anomaly_output.add_output(target, anomaly, score)
92
+ except Exception as e:
93
+ logger.warn(f"Encountered Error: {e}. Skipping series {target}.")
94
+
95
+ return anomaly_output
96
+
97
+ def _generate_report(self):
98
+ """Generates the report."""
99
+ import report_creator as rc
100
+
101
+ other_sections = [
102
+ rc.Heading("Selected Models Overview", level=2),
103
+ rc.Text(
104
+ "The following tables provide information regarding the chosen model."
105
+ ),
106
+ ]
107
+
108
+ model_description = rc.Text(
109
+ "The Random Cut Forest (RCF) is an unsupervised machine learning algorithm that is used for anomaly detection."
110
+ " It works by building an ensemble of binary trees (random cut trees) and using them to compute anomaly scores for data points."
111
+ )
112
+
113
+ return (
114
+ model_description,
115
+ other_sections,
116
+ )
@@ -363,6 +363,7 @@ spec:
363
363
  - auto
364
364
  - oneclasssvm
365
365
  - isolationforest
366
+ - randomcutforest
366
367
  meta:
367
368
  description: "The model to be used for anomaly detection"
368
369
 
@@ -14,7 +14,7 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
14
14
  Prophet = "prophet"
15
15
  Arima = "arima"
16
16
  NeuralProphet = "neuralprophet"
17
- MLForecast = "mlforecast"
17
+ LGBForecast = "lgbforecast"
18
18
  AutoMLX = "automlx"
19
19
  AutoTS = "autots"
20
20
  Auto = "auto"
@@ -67,7 +67,9 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
67
67
  self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
68
68
  # If trend is constant, remove constant columns
69
69
  if "trend" not in model_kwargs or model_kwargs["trend"] == "c":
70
- self.constant_cols[s_id] = df.columns[df.nunique() == 1]
70
+ self.constant_cols[s_id] = list(df.columns[df.nunique() == 1])
71
+ if target in self.constant_cols[s_id]:
72
+ self.constant_cols[s_id].remove(target)
71
73
  df = df.drop(columns=self.constant_cols[s_id])
72
74
 
73
75
  # format the dataframe for this target. Dropping NA on target[df] will remove all future data
@@ -124,8 +126,12 @@ class ArimaOperatorModel(ForecastOperatorBaseModel):
124
126
 
125
127
  logger.debug("===========Done===========")
126
128
  except Exception as e:
127
- self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
128
- logger.debug(f"Encountered Error: {e}. Skipping.")
129
+ self.errors_dict[s_id] = {
130
+ "model_name": self.spec.model,
131
+ "error": str(e),
132
+ "error_trace": traceback.format_exc()}
133
+ logger.warn(f"Encountered Error: {e}. Skipping.")
134
+ logger.warn(traceback.format_exc())
129
135
 
130
136
  def _build_model(self) -> pd.DataFrame:
131
137
  full_data_dict = self.datasets.get_data_by_series()
@@ -149,6 +149,9 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
149
149
  if f"{target}_ci_lower" not in summary_frame:
150
150
  summary_frame[f"{target}_ci_lower"] = np.NAN
151
151
 
152
+ if summary_frame[target].isna().all():
153
+ raise ValueError("The forecasts are completely NaN")
154
+
152
155
  self.forecast_output.populate_series_output(
153
156
  series_id=s_id,
154
157
  fit_val=fitted_values,
@@ -167,8 +170,10 @@ class AutoMLXOperatorModel(ForecastOperatorBaseModel):
167
170
  self.errors_dict[s_id] = {
168
171
  "model_name": self.spec.model,
169
172
  "error": str(e),
173
+ "error_trace": traceback.format_exc()
170
174
  }
171
- logger.debug(f"Encountered Error: {e}. Skipping.")
175
+ logger.warn(f"Encountered Error: {e}. Skipping.")
176
+ logger.warn(traceback.format_exc())
172
177
 
173
178
  logger.debug("===========Forecast Generated===========")
174
179
 
@@ -208,8 +208,10 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel):
208
208
  self.errors_dict[s_id] = {
209
209
  "model_name": self.spec.model,
210
210
  "error": str(e),
211
+ "error_trace": traceback.format_exc()
211
212
  }
212
- logger.debug(f"Encountered Error: {e}. Skipping.")
213
+ logger.warn(f"Encountered Error: {e}. Skipping.")
214
+ logger.warn(traceback.format_exc())
213
215
 
214
216
  logger.debug("===========Done===========")
215
217
 
@@ -33,7 +33,7 @@ class ForecastOperatorModelFactory:
33
33
  SupportedModels.Prophet: ProphetOperatorModel,
34
34
  SupportedModels.Arima: ArimaOperatorModel,
35
35
  SupportedModels.NeuralProphet: NeuralProphetOperatorModel,
36
- SupportedModels.MLForecast: MLForecastOperatorModel,
36
+ SupportedModels.LGBForecast: MLForecastOperatorModel,
37
37
  SupportedModels.AutoMLX: AutoMLXOperatorModel,
38
38
  SupportedModels.AutoTS: AutoTSOperatorModel
39
39
  }