oracle-ads 2.11.14__py3-none-any.whl → 2.11.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. ads/aqua/common/entities.py +17 -0
  2. ads/aqua/common/enums.py +5 -1
  3. ads/aqua/common/utils.py +109 -22
  4. ads/aqua/config/config.py +1 -1
  5. ads/aqua/config/deployment_config_defaults.json +29 -1
  6. ads/aqua/config/resource_limit_names.json +1 -0
  7. ads/aqua/constants.py +35 -18
  8. ads/aqua/evaluation/entities.py +0 -1
  9. ads/aqua/evaluation/evaluation.py +165 -121
  10. ads/aqua/extension/common_ws_msg_handler.py +57 -0
  11. ads/aqua/extension/deployment_handler.py +14 -13
  12. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  13. ads/aqua/extension/errors.py +1 -1
  14. ads/aqua/extension/evaluation_handler.py +4 -7
  15. ads/aqua/extension/evaluation_ws_msg_handler.py +28 -10
  16. ads/aqua/extension/model_handler.py +31 -6
  17. ads/aqua/extension/models/ws_models.py +78 -3
  18. ads/aqua/extension/models_ws_msg_handler.py +49 -0
  19. ads/aqua/extension/ui_websocket_handler.py +7 -1
  20. ads/aqua/model/entities.py +17 -9
  21. ads/aqua/model/model.py +260 -90
  22. ads/aqua/modeldeployment/constants.py +0 -16
  23. ads/aqua/modeldeployment/deployment.py +97 -74
  24. ads/aqua/modeldeployment/entities.py +9 -20
  25. ads/aqua/ui.py +152 -28
  26. ads/common/object_storage_details.py +2 -5
  27. ads/common/serializer.py +2 -3
  28. ads/jobs/builders/infrastructure/dsc_job.py +29 -3
  29. ads/jobs/builders/infrastructure/dsc_job_runtime.py +74 -27
  30. ads/jobs/builders/runtimes/container_runtime.py +83 -4
  31. ads/opctl/operator/common/operator_config.py +1 -0
  32. ads/opctl/operator/lowcode/anomaly/README.md +3 -3
  33. ads/opctl/operator/lowcode/anomaly/__main__.py +5 -6
  34. ads/opctl/operator/lowcode/anomaly/const.py +9 -0
  35. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +6 -2
  36. ads/opctl/operator/lowcode/anomaly/model/base_model.py +51 -26
  37. ads/opctl/operator/lowcode/anomaly/model/factory.py +41 -13
  38. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +79 -0
  39. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +79 -0
  40. ads/opctl/operator/lowcode/anomaly/operator_config.py +1 -0
  41. ads/opctl/operator/lowcode/anomaly/schema.yaml +16 -2
  42. ads/opctl/operator/lowcode/anomaly/utils.py +16 -13
  43. ads/opctl/operator/lowcode/common/data.py +2 -1
  44. ads/opctl/operator/lowcode/common/errors.py +6 -0
  45. ads/opctl/operator/lowcode/common/transformations.py +37 -9
  46. ads/opctl/operator/lowcode/common/utils.py +32 -10
  47. ads/opctl/operator/lowcode/forecast/model/base_model.py +21 -13
  48. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +14 -18
  49. ads/opctl/operator/lowcode/forecast/model_evaluator.py +15 -4
  50. ads/opctl/operator/lowcode/forecast/schema.yaml +9 -0
  51. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  52. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  53. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  54. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  55. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  56. ads/opctl/operator/lowcode/recommender/constant.py +25 -0
  57. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  58. ads/opctl/operator/lowcode/recommender/model/base_model.py +198 -0
  59. ads/opctl/operator/lowcode/recommender/model/factory.py +58 -0
  60. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  61. ads/opctl/operator/lowcode/recommender/model/svd.py +88 -0
  62. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  63. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  64. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  65. ads/pipeline/ads_pipeline_run.py +13 -2
  66. {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/METADATA +6 -1
  67. {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/RECORD +70 -50
  68. {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/LICENSE.txt +0 -0
  69. {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/WHEEL +0 -0
  70. {oracle_ads-2.11.14.dist-info → oracle_ads-2.11.16.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from ads.common.decorator.runtime_dependency import runtime_dependency
11
+
12
+ from .base_model import AnomalyOperatorBaseModel
13
+ from .anomaly_dataset import AnomalyOutput
14
+ from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
15
+
16
+
17
+ class OneClassSVMOperatorModel(AnomalyOperatorBaseModel):
18
+ """Class representing OneClassSVM Anomaly Detection operator model."""
19
+
20
+ @runtime_dependency(
21
+ module="sklearn",
22
+ err_msg=(
23
+ "Please run `pip3 install scikit-learn` to "
24
+ "install the required dependencies for OneClassSVM."
25
+ ),
26
+ )
27
+ def _build_model(self) -> AnomalyOutput:
28
+ from sklearn.svm import OneClassSVM
29
+
30
+ model_kwargs = self.spec.model_kwargs
31
+ # map the output as per anomaly dataset class, 1: outlier, 0: inlier
32
+ self.outlier_map = {1: 0, -1: 1}
33
+
34
+ anomaly_output = AnomalyOutput(date_column="index")
35
+
36
+ for target, df in self.datasets.full_data_dict.items():
37
+ model = OneClassSVM(**model_kwargs)
38
+ model.fit(df)
39
+ y_pred = np.vectorize(self.outlier_map.get)(
40
+ model.predict(df)
41
+ )
42
+
43
+ scores = model.score_samples(
44
+ df
45
+ )
46
+
47
+ index_col = df.columns[0]
48
+
49
+ anomaly = pd.DataFrame(
50
+ {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
51
+ ).reset_index(drop=True)
52
+ score = pd.DataFrame(
53
+ {"index": df[index_col], OutputColumns.SCORE_COL: scores}
54
+ ).reset_index(drop=True)
55
+
56
+ anomaly_output.add_output(target, anomaly, score)
57
+
58
+ return anomaly_output
59
+
60
+ def _generate_report(self):
61
+ """Generates the report."""
62
+ import report_creator as rc
63
+
64
+ other_sections = [
65
+ rc.Heading("Selected Models Overview", level=2),
66
+ rc.Text(
67
+ "The following tables provide information regarding the chosen model."
68
+ ),
69
+ ]
70
+
71
+ model_description = rc.Text(
72
+ "The oneclasssvm model is a full-stack automated machine learning system for outlier detection. "
73
+ "It is best suited for novelty detection when the training set is not contaminated by outliers"
74
+ )
75
+
76
+ return (
77
+ model_description,
78
+ other_sections,
79
+ )
@@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
77
77
  model: str = None
78
78
  model_kwargs: Dict = field(default_factory=dict)
79
79
  contamination: float = None
80
+ subsample_report_data: bool = None
80
81
 
81
82
  def __post_init__(self):
82
83
  """Adjusts the specification details."""
@@ -29,7 +29,7 @@ spec:
29
29
  input_data:
30
30
  required: true
31
31
  type: dict
32
- default: {"url": "data.csv"}
32
+ default: { "url": "data.csv" }
33
33
  meta:
34
34
  description: "The payload that the detector should evaluate."
35
35
  schema:
@@ -78,6 +78,9 @@ spec:
78
78
  limit:
79
79
  required: false
80
80
  type: integer
81
+ vault_secret_id:
82
+ required: false
83
+ type: string
81
84
 
82
85
  validation_data:
83
86
  required: false
@@ -130,10 +133,15 @@ spec:
130
133
  limit:
131
134
  required: false
132
135
  type: integer
136
+ vault_secret_id:
137
+ required: false
138
+ type: string
133
139
 
134
140
  datetime_column:
135
141
  type: dict
136
- required: true
142
+ required: false
143
+ meta:
144
+ description: "`datetime_column` is required for time series anomaly detection, only non time-based anomaly detection models can be run without `datetime_column`"
137
145
  schema:
138
146
  name:
139
147
  type: string
@@ -353,6 +361,8 @@ spec:
353
361
  allowed:
354
362
  - autots
355
363
  - auto
364
+ - oneclasssvm
365
+ - isolationforest
356
366
  meta:
357
367
  description: "The model to be used for anomaly detection"
358
368
 
@@ -367,4 +377,8 @@ spec:
367
377
  type: dict
368
378
  required: false
369
379
 
380
+ subsample_report_data:
381
+ type: boolean
382
+ required: false
383
+
370
384
  type: dict
@@ -1,31 +1,32 @@
1
1
  #!/usr/bin/env python
2
- # -*- coding: utf-8 -*--
3
2
 
4
3
  # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5
4
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
5
 
7
6
  import os
7
+
8
8
  import pandas as pd
9
- import fsspec
10
- from .operator_config import AnomalyOperatorSpec
11
- from .const import SupportedMetrics, SupportedModels
9
+
12
10
  from ads.opctl import logger
13
11
 
12
+ from .const import NonTimeADSupportedModels, SupportedMetrics, SupportedModels
13
+ from .operator_config import AnomalyOperatorSpec
14
+
14
15
 
15
16
  def _build_metrics_df(y_true, y_pred, column_name):
16
17
  from sklearn.metrics import (
17
- recall_score,
18
- precision_score,
19
18
  accuracy_score,
20
- f1_score,
21
- confusion_matrix,
22
- roc_auc_score,
23
- precision_recall_curve,
24
19
  auc,
20
+ confusion_matrix,
21
+ f1_score,
25
22
  matthews_corrcoef,
23
+ precision_recall_curve,
24
+ precision_score,
25
+ recall_score,
26
+ roc_auc_score,
26
27
  )
27
28
 
28
- metrics = dict()
29
+ metrics = {}
29
30
  metrics[SupportedMetrics.RECALL] = recall_score(y_true, y_pred)
30
31
  metrics[SupportedMetrics.PRECISION] = precision_score(y_true, y_pred)
31
32
  metrics[SupportedMetrics.ACCURACY] = accuracy_score(y_true, y_pred)
@@ -78,5 +79,7 @@ def default_signer(**kwargs):
78
79
  return default_signer(**kwargs)
79
80
 
80
81
 
81
- def select_auto_model(datasets, operator_config):
82
- return SupportedModels.AutoTS
82
+ def select_auto_model(operator_config):
83
+ if operator_config.spec.datetime_column is not None:
84
+ return SupportedModels.AutoTS
85
+ return NonTimeADSupportedModels.IsolationForest
@@ -25,6 +25,7 @@ class AbstractData(ABC):
25
25
  self.data = None
26
26
  self._data_dict = dict()
27
27
  self.name = name
28
+ self.spec = spec
28
29
  self.load_transform_ingest_data(spec)
29
30
 
30
31
  def get_raw_data_by_cat(self, category):
@@ -36,7 +37,7 @@ class AbstractData(ABC):
36
37
  for col, val in mapping[category].items():
37
38
  condition &= (self.raw_data[col] == val)
38
39
  data_by_cat = self.raw_data[condition].reset_index(drop=True)
39
- data_by_cat = self._data_transformer._format_datetime_col(data_by_cat)
40
+ data_by_cat = self._data_transformer._format_datetime_col(data_by_cat) if self.spec.datetime_column else data_by_cat
40
41
  return data_by_cat
41
42
 
42
43
 
@@ -39,3 +39,9 @@ class PermissionsError(Exception):
39
39
  "complies with the required schema for the operator. \n"
40
40
  f"{error}"
41
41
  )
42
+
43
+
44
+ class InsufficientDataError(Exception):
45
+ def __init__(self, message: str):
46
+ self.message = message
47
+ super().__init__(message)
@@ -32,8 +32,14 @@ class Transformations(ABC):
32
32
  self.dataset_info = dataset_info
33
33
  self.target_category_columns = dataset_info.target_category_columns
34
34
  self.target_column_name = dataset_info.target_column
35
- self.dt_column_name = dataset_info.datetime_column.name
36
- self.dt_column_format = dataset_info.datetime_column.format
35
+ self.dt_column_name = (
36
+ dataset_info.datetime_column.name if dataset_info.datetime_column else None
37
+ )
38
+ self.dt_column_format = (
39
+ dataset_info.datetime_column.format
40
+ if dataset_info.datetime_column
41
+ else None
42
+ )
37
43
  self.preprocessing = dataset_info.preprocessing
38
44
 
39
45
  def run(self, data):
@@ -55,8 +61,10 @@ class Transformations(ABC):
55
61
  if self.name == "historical_data":
56
62
  self._check_historical_dataset(clean_df)
57
63
  clean_df = self._set_series_id_column(clean_df)
58
- clean_df = self._format_datetime_col(clean_df)
64
+ if self.dt_column_name:
65
+ clean_df = self._format_datetime_col(clean_df)
59
66
  clean_df = self._set_multi_index(clean_df)
67
+ clean_df = self._fill_na(clean_df) if not self.dt_column_name else clean_df
60
68
 
61
69
  if self.preprocessing and self.preprocessing.enabled:
62
70
  if self.name == "historical_data":
@@ -66,7 +74,9 @@ class Transformations(ABC):
66
74
  except Exception as e:
67
75
  logger.debug(f"Missing value imputation failed with {e.args}")
68
76
  else:
69
- logger.info("Skipping missing value imputation because it is disabled")
77
+ logger.info(
78
+ "Skipping missing value imputation because it is disabled"
79
+ )
70
80
  if self.preprocessing.steps.outlier_treatment:
71
81
  try:
72
82
  clean_df = self._outlier_treatment(clean_df)
@@ -77,7 +87,9 @@ class Transformations(ABC):
77
87
  elif self.name == "additional_data":
78
88
  clean_df = self._missing_value_imputation_add(clean_df)
79
89
  else:
80
- logger.info("Skipping all preprocessing steps because preprocessing is disabled")
90
+ logger.info(
91
+ "Skipping all preprocessing steps because preprocessing is disabled"
92
+ )
81
93
  return clean_df
82
94
 
83
95
  def _remove_trailing_whitespace(self, df):
@@ -95,7 +107,14 @@ class Transformations(ABC):
95
107
  merged_values = df[DataColumns.Series].unique().tolist()
96
108
  if self.target_category_columns:
97
109
  for value in merged_values:
98
- self._target_category_columns_map[value] = df[df[DataColumns.Series] == value][self.target_category_columns].drop_duplicates().iloc[0].to_dict()
110
+ self._target_category_columns_map[value] = (
111
+ df[df[DataColumns.Series] == value][
112
+ self.target_category_columns
113
+ ]
114
+ .drop_duplicates()
115
+ .iloc[0]
116
+ .to_dict()
117
+ )
99
118
 
100
119
  if self.target_category_columns != [DataColumns.Series]:
101
120
  df = df.drop(self.target_category_columns, axis=1)
@@ -124,8 +143,12 @@ class Transformations(ABC):
124
143
  -------
125
144
  A new Pandas DataFrame with sorted dates for each series
126
145
  """
127
- df = df.set_index([self.dt_column_name, DataColumns.Series])
128
- return df.sort_values([self.dt_column_name, DataColumns.Series], ascending=True)
146
+ if self.dt_column_name:
147
+ df = df.set_index([self.dt_column_name, DataColumns.Series])
148
+ return df.sort_values(
149
+ [self.dt_column_name, DataColumns.Series], ascending=True
150
+ )
151
+ return df.set_index([df.index, DataColumns.Series])
129
152
 
130
153
  def _missing_value_imputation_hist(self, df):
131
154
  """
@@ -222,5 +245,10 @@ class Transformations(ABC):
222
245
 
223
246
  }
224
247
  """
248
+
225
249
  def get_target_category_columns_map(self):
226
- return self._target_category_columns_map
250
+ return self._target_category_columns_map
251
+
252
+ def _fill_na(self, df: pd.DataFrame, na_value=0) -> pd.DataFrame:
253
+ """Fill nans in dataframe"""
254
+ return df.fillna(value=na_value)
@@ -7,7 +7,9 @@
7
7
  import argparse
8
8
  import logging
9
9
  import os
10
+ import shutil
10
11
  import sys
12
+ import tempfile
11
13
  import time
12
14
  from string import Template
13
15
  from typing import Any, Dict, List, Tuple
@@ -28,6 +30,7 @@ from ads.opctl.operator.lowcode.common.errors import (
28
30
  )
29
31
  from ads.opctl.operator.common.operator_config import OutputDirectory
30
32
  from ads.common.object_storage_details import ObjectStorageDetails
33
+ from ads.secrets import ADBSecretKeeper
31
34
 
32
35
 
33
36
  def call_pandas_fsspec(pd_fn, filename, storage_options, **kwargs):
@@ -53,10 +56,12 @@ def load_data(data_spec, storage_options=None, **kwargs):
53
56
  sql = data_spec.sql
54
57
  table_name = data_spec.table_name
55
58
  limit = data_spec.limit
56
-
59
+ vault_secret_id = data_spec.vault_secret_id
57
60
  storage_options = storage_options or (
58
61
  default_signer() if ObjectStorageDetails.is_oci_path(filename) else {}
59
62
  )
63
+ if vault_secret_id is not None and connect_args is None:
64
+ connect_args = dict()
60
65
 
61
66
  if filename is not None:
62
67
  if not format:
@@ -76,15 +81,32 @@ def load_data(data_spec, storage_options=None, **kwargs):
76
81
  f"The format {format} is not currently supported for reading data. Please reformat the data source: {filename} ."
77
82
  )
78
83
  elif connect_args is not None:
79
- con = oracledb.connect(**connect_args)
80
- if table_name is not None:
81
- data = pd.read_sql_table(table_name, con)
82
- elif sql is not None:
83
- data = pd.read_sql(sql, con)
84
- else:
85
- raise InvalidParameterError(
86
- f"Database `connect_args` provided without sql query or table name. Please specify either `sql` or `table_name`."
87
- )
84
+ with tempfile.TemporaryDirectory() as temp_dir:
85
+ if vault_secret_id is not None:
86
+ try:
87
+ with ADBSecretKeeper.load_secret(vault_secret_id, wallet_dir=temp_dir) as adwsecret:
88
+ if 'wallet_location' in adwsecret and 'wallet_location' not in connect_args:
89
+ shutil.unpack_archive(adwsecret["wallet_location"], temp_dir)
90
+ connect_args['wallet_location'] = temp_dir
91
+ if 'user_name' in adwsecret and 'user' not in connect_args:
92
+ connect_args['user'] = adwsecret['user_name']
93
+ if 'password' in adwsecret and 'password' not in connect_args:
94
+ connect_args['password'] = adwsecret['password']
95
+ if 'service_name' in adwsecret and 'service_name' not in connect_args:
96
+ connect_args['service_name'] = adwsecret['service_name']
97
+
98
+ except Exception as e:
99
+ raise Exception(f"Could not retrieve database credentials from vault {vault_secret_id}: {e}")
100
+
101
+ con = oracledb.connect(**connect_args)
102
+ if table_name is not None:
103
+ data = pd.read_sql(f"SELECT * FROM {table_name}", con)
104
+ elif sql is not None:
105
+ data = pd.read_sql(sql, con)
106
+ else:
107
+ raise InvalidParameterError(
108
+ f"Database `connect_args` provided without sql query or table name. Please specify either `sql` or `table_name`."
109
+ )
88
110
  else:
89
111
  raise InvalidParameterError(
90
112
  f"No filename/url provided, and no connect_args provided. Please specify one of these if you want to read data from a file or a database respectively."
@@ -249,20 +249,28 @@ class ForecastOperatorBaseModel(ABC):
249
249
  train_metrics_sections = [sec9_text, sec9]
250
250
 
251
251
  backtest_sections = []
252
+ output_dir = self.spec.output_directory.url
253
+ backtest_report_name = "backtest_stats.csv"
254
+ file_path = f"{output_dir}/{backtest_report_name}"
252
255
  if self.spec.model == AUTO_SELECT:
253
- output_dir = self.spec.output_directory.url
254
- backtest_report_name = "backtest_stats.csv"
255
- backtest_stats = pd.read_csv(f"{output_dir}/{backtest_report_name}")
256
- average_dict = backtest_stats.mean().to_dict()
257
- del average_dict['backtest']
258
- best_model = min(average_dict, key=average_dict.get)
259
- backtest_text = rc.Heading("Back Testing Metrics", level=2)
260
- summary_text = rc.Text(
261
- f"Overall, the average scores for the models are {average_dict}, with {best_model}"
262
- f" being identified as the top-performing model during backtesting.")
263
- backtest_table = rc.DataTable(backtest_stats, index=True)
264
- liner_plot = get_auto_select_plot(backtest_stats)
265
- backtest_sections = [backtest_text, backtest_table, summary_text, liner_plot]
256
+ backtest_sections.append(rc.Heading("Auto-select statistics", level=2))
257
+ if not os.path.exists(file_path):
258
+ failure_msg = rc.Text("auto-select could not be executed. Please check the "
259
+ "logs for more details.")
260
+ backtest_sections.append(failure_msg)
261
+ else:
262
+ backtest_stats = pd.read_csv(file_path)
263
+ average_dict = backtest_stats.mean().to_dict()
264
+ del average_dict['backtest']
265
+ best_model = min(average_dict, key=average_dict.get)
266
+ backtest_text = rc.Heading("Back Testing Metrics", level=3)
267
+ summary_text = rc.Text(
268
+ f"Overall, the average scores for the models are {average_dict}, with {best_model}"
269
+ f" being identified as the top-performing model during backtesting.")
270
+ backtest_table = rc.DataTable(backtest_stats, index=True)
271
+ liner_plot = get_auto_select_plot(backtest_stats)
272
+ backtest_sections.extend([backtest_text, backtest_table, summary_text,
273
+ liner_plot])
266
274
 
267
275
 
268
276
  forecast_plots = []
@@ -61,6 +61,18 @@ class MLForecastOperatorModel(ForecastOperatorBaseModel):
61
61
  "verbosity": -1,
62
62
  "num_leaves": 512,
63
63
  }
64
+ additional_data_params = {}
65
+ if len(self.datasets.get_additional_data_column_names()) > 0:
66
+ additional_data_params = {
67
+ "target_transforms": [Differences([12])],
68
+ "lags": model_kwargs.get("lags", [1, 6, 12]),
69
+ "lag_transforms": (
70
+ {
71
+ 1: [ExpandingMean()],
72
+ 12: [RollingMean(window_size=24)],
73
+ }
74
+ ),
75
+ }
64
76
 
65
77
  fcst = MLForecast(
66
78
  models={
@@ -80,24 +92,7 @@ class MLForecastOperatorModel(ForecastOperatorBaseModel):
80
92
  },
81
93
  freq=pd.infer_freq(data_train[self.date_col].drop_duplicates())
82
94
  or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]),
83
- target_transforms=[Differences([12])],
84
- lags=model_kwargs.get(
85
- "lags",
86
- (
87
- [1, 6, 12]
88
- if len(self.datasets.get_additional_data_column_names()) > 0
89
- else []
90
- ),
91
- ),
92
- lag_transforms=(
93
- {
94
- 1: [ExpandingMean()],
95
- 12: [RollingMean(window_size=24)],
96
- }
97
- if len(self.datasets.get_additional_data_column_names()) > 0
98
- else {}
99
- ),
100
- # date_features=[hour_index],
95
+ **additional_data_params,
101
96
  )
102
97
 
103
98
  num_models = model_kwargs.get("recursive_models", False)
@@ -164,6 +159,7 @@ class MLForecastOperatorModel(ForecastOperatorBaseModel):
164
159
  "error": str(e),
165
160
  }
166
161
  logger.debug(f"Encountered Error: {e}. Skipping.")
162
+ raise e
167
163
 
168
164
  def _build_model(self) -> pd.DataFrame:
169
165
  data_train = self.datasets.get_all_data_long(include_horizon=False)
@@ -12,7 +12,8 @@ from ads.opctl import logger
12
12
  from ads.opctl.operator.lowcode.common.const import DataColumns
13
13
  from .model.forecast_datasets import ForecastDatasets
14
14
  from .operator_config import ForecastOperatorConfig
15
-
15
+ from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
16
+ from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
16
17
 
17
18
  class ModelEvaluator:
18
19
  """
@@ -39,7 +40,7 @@ class ModelEvaluator:
39
40
  def generate_cutoffs(self, unique_dates, horizon):
40
41
  sorted_dates = np.sort(unique_dates)
41
42
  train_window_size = [len(sorted_dates) - (i + 1) * horizon for i in range(self.k)]
42
- valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 3]
43
+ valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
43
44
  if len(valid_train_window_size) < self.k:
44
45
  logger.warn(f"Only {valid_train_window_size} backtests can be created")
45
46
  cut_offs = sorted_dates[-horizon - 1:-horizon * (self.k + 1):-horizon][:len(valid_train_window_size)]
@@ -61,6 +62,9 @@ class ModelEvaluator:
61
62
  unique_dates = min_series_data[date_col].unique()
62
63
 
63
64
  cut_offs = self.generate_cutoffs(unique_dates, horizon)
65
+ if not len(cut_offs):
66
+ raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
67
+ "instead of using auto-select.")
64
68
  training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
65
69
  in cut_offs]
66
70
  test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
@@ -95,7 +99,9 @@ class ModelEvaluator:
95
99
  backtest_op_config_draft = operator_config.to_dict()
96
100
  backtest_spec = backtest_op_config_draft["spec"]
97
101
  backtest_spec["historical_data"]["url"] = historical_data_url
98
- backtest_spec["additional_data"]["url"] = additional_data_url
102
+ if backtest_spec["additional_data"]:
103
+ backtest_spec["additional_data"]["url"] = additional_data_url
104
+ backtest_spec["test_data"] = {}
99
105
  backtest_spec["test_data"]["url"] = test_data_url
100
106
  backtest_spec["model"] = model
101
107
  backtest_spec['model_kwargs'] = None
@@ -135,7 +141,12 @@ class ModelEvaluator:
135
141
  return metrics
136
142
 
137
143
  def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
138
- metrics = self.run_all_models(datasets, operator_config)
144
+ try:
145
+ metrics = self.run_all_models(datasets, operator_config)
146
+ except InsufficientDataError as e:
147
+ model = SupportedModels.Prophet
148
+ logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
149
+ return model
139
150
  avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
140
151
  best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
141
152
  logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
@@ -78,6 +78,9 @@ spec:
78
78
  limit:
79
79
  required: false
80
80
  type: integer
81
+ vault_secret_id:
82
+ required: false
83
+ type: string
81
84
 
82
85
  additional_data:
83
86
  required: false
@@ -130,6 +133,9 @@ spec:
130
133
  limit:
131
134
  required: false
132
135
  type: integer
136
+ vault_secret_id:
137
+ required: false
138
+ type: string
133
139
 
134
140
  test_data:
135
141
  required: false
@@ -181,6 +187,9 @@ spec:
181
187
  limit:
182
188
  required: false
183
189
  type: integer
190
+ vault_secret_id:
191
+ required: false
192
+ type: string
184
193
  type: dict
185
194
 
186
195
  output_directory:
@@ -0,0 +1,16 @@
1
+ type: recommender
2
+ version: v1
3
+ conda_type: service
4
+ name: Recommender Operator
5
+ gpu: no
6
+ keywords:
7
+ - Recommender
8
+ backends:
9
+ - job
10
+ - operator.local
11
+ description: |
12
+ Recommender Systems are designed to suggest relevant items, products, or content to users based on their
13
+ preferences and behaviors. These systems are widely used in various industries such as e-commerce, entertainment,
14
+ and social media to enhance user experience by providing personalized recommendations. They help in increasing user
15
+ engagement, satisfaction, and sales by predicting what users might like or need based on their past interactions
16
+ and the preferences of similar users.