oracle-ads 2.13.2__py3-none-any.whl → 2.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. ads/aqua/app.py +2 -1
  2. ads/aqua/evaluation/evaluation.py +11 -10
  3. ads/aqua/finetuning/finetuning.py +2 -3
  4. ads/opctl/operator/lowcode/anomaly/model/base_model.py +3 -3
  5. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
  6. ads/opctl/operator/lowcode/anomaly/utils.py +1 -1
  7. ads/opctl/operator/lowcode/common/transformations.py +5 -1
  8. ads/opctl/operator/lowcode/common/utils.py +7 -2
  9. ads/opctl/operator/lowcode/forecast/model/arima.py +15 -10
  10. ads/opctl/operator/lowcode/forecast/model/automlx.py +31 -9
  11. ads/opctl/operator/lowcode/forecast/model/autots.py +7 -5
  12. ads/opctl/operator/lowcode/forecast/model/base_model.py +127 -101
  13. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +14 -6
  14. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +2 -2
  15. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +46 -32
  16. ads/opctl/operator/lowcode/forecast/model/prophet.py +82 -29
  17. ads/opctl/operator/lowcode/forecast/model_evaluator.py +136 -54
  18. ads/opctl/operator/lowcode/forecast/operator_config.py +29 -3
  19. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +103 -58
  20. {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/METADATA +1 -1
  21. {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/RECORD +24 -24
  22. {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/WHEEL +0 -0
  23. {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/entry_points.txt +0 -0
  24. {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/licenses/LICENSE.txt +0 -0
@@ -22,7 +22,6 @@ from ads.opctl.operator.lowcode.forecast.utils import (
22
22
  from ..const import (
23
23
  DEFAULT_TRIALS,
24
24
  PROPHET_INTERNAL_DATE_COL,
25
- ForecastOutputColumns,
26
25
  SupportedModels,
27
26
  )
28
27
  from .base_model import ForecastOperatorBaseModel
@@ -44,12 +43,23 @@ def _fit_model(data, params, additional_regressors):
44
43
  from prophet import Prophet
45
44
 
46
45
  monthly_seasonality = params.pop("monthly_seasonality", False)
46
+ data_floor = params.pop("min", None)
47
+ data_cap = params.pop("max", None)
48
+ if data_cap or data_floor:
49
+ params["growth"] = "logistic"
47
50
  model = Prophet(**params)
48
51
  if monthly_seasonality:
49
52
  model.add_seasonality(name="monthly", period=30.5, fourier_order=5)
50
53
  params["monthly_seasonality"] = monthly_seasonality
51
54
  for add_reg in additional_regressors:
52
55
  model.add_regressor(add_reg)
56
+ if data_floor:
57
+ data["floor"] = float(data_floor)
58
+ params["floor"] = data_floor
59
+ if data_cap:
60
+ data["cap"] = float(data_cap)
61
+ params["cap"] = data_cap
62
+
53
63
  model.fit(data)
54
64
  return model
55
65
 
@@ -112,6 +122,41 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
112
122
  upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
113
123
  lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
114
124
  )
125
+ # Get all features that make up the forecast. Exclude CI (upper/lower)
126
+ core_columns = forecast.columns[
127
+ ~forecast.columns.str.endswith("_lower")
128
+ & ~forecast.columns.str.endswith("_upper")
129
+ ]
130
+ core_columns = set(core_columns) - {
131
+ "additive_terms",
132
+ "extra_regressors_additive",
133
+ "multiplicative_terms",
134
+ "extra_regressors_multiplicative",
135
+ "cap",
136
+ "floor",
137
+ "yhat",
138
+ }
139
+ combine_terms = list(
140
+ core_columns.intersection(
141
+ {
142
+ "trend",
143
+ "daily",
144
+ "weekly",
145
+ "yearly",
146
+ "monthly",
147
+ "holidays",
148
+ "zeros",
149
+ }
150
+ )
151
+ )
152
+
153
+ temp_df = (
154
+ forecast[list(core_columns)]
155
+ .rename({"ds": "Date"}, axis=1)
156
+ .set_index("Date")
157
+ )
158
+ temp_df[self.spec.target_column] = temp_df[combine_terms].sum(axis=1)
159
+ self.explanations_info[series_id] = temp_df.drop(combine_terms, axis=1)
115
160
 
116
161
  self.models[series_id] = {}
117
162
  self.models[series_id]["model"] = model
@@ -133,13 +178,14 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
133
178
  "error": str(e),
134
179
  "error_trace": traceback.format_exc(),
135
180
  }
136
- logger.warn(f"Encountered Error: {e}. Skipping.")
137
- logger.warn(traceback.format_exc())
181
+ logger.warning(f"Encountered Error: {e}. Skipping.")
182
+ logger.warning(traceback.format_exc())
138
183
 
139
184
  def _build_model(self) -> pd.DataFrame:
140
185
  full_data_dict = self.datasets.get_data_by_series()
141
186
  self.models = {}
142
187
  self.outputs = {}
188
+ self.explanations_info = {}
143
189
  self.additional_regressors = self.datasets.get_additional_data_column_names()
144
190
  model_kwargs = self.set_kwargs()
145
191
  self.forecast_output = ForecastOutput(
@@ -149,9 +195,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
149
195
  dt_column=self.spec.datetime_column.name,
150
196
  )
151
197
 
152
- # if os.environ["OCI__IS_SPARK"]:
153
- # pass
154
- # else:
155
198
  Parallel(n_jobs=-1, require="sharedmem")(
156
199
  delayed(ProphetOperatorModel._train_model)(
157
200
  self, i, series_id, df, model_kwargs.copy()
@@ -222,7 +265,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
222
265
  try:
223
266
  return np.mean(df_p[self.spec.metric])
224
267
  except KeyError:
225
- logger.warn(
268
+ logger.warning(
226
269
  f"Could not find the metric {self.spec.metric} within "
227
270
  f"the performance metrics: {df_p.columns}. Defaulting to `rmse`"
228
271
  )
@@ -249,6 +292,25 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
249
292
  model_kwargs_i = study.best_params
250
293
  return model_kwargs_i
251
294
 
295
+ def explain_model(self):
296
+ self.local_explanation = {}
297
+ global_expl = []
298
+
299
+ for s_id, expl_df in self.explanations_info.items():
300
+ # Local Expl
301
+ self.local_explanation[s_id] = self.get_horizon(expl_df)
302
+ self.local_explanation[s_id]["Series"] = s_id
303
+ self.local_explanation[s_id].index.rename(self.dt_column_name, inplace=True)
304
+ # Global Expl
305
+ g_expl = self.drop_horizon(expl_df).mean()
306
+ g_expl.name = s_id
307
+ global_expl.append(g_expl)
308
+ self.global_explanation = pd.concat(global_expl, axis=1)
309
+ self.formatted_global_explanation = (
310
+ self.global_explanation / self.global_explanation.sum(axis=0) * 100
311
+ )
312
+ self.formatted_local_explanation = pd.concat(self.local_explanation.values())
313
+
252
314
  def _generate_report(self):
253
315
  import report_creator as rc
254
316
  from prophet.plot import add_changepoints_to_plot
@@ -274,7 +336,9 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
274
336
  )
275
337
 
276
338
  sec2 = _select_plot_list(
277
- lambda s_id: self.models[s_id]["model"].plot_components(self.outputs[s_id]),
339
+ lambda s_id: self.models[s_id]["model"].plot_components(
340
+ self.outputs[s_id]
341
+ ),
278
342
  series_ids=series_ids,
279
343
  target_category_column=self.target_cat_col,
280
344
  )
@@ -283,11 +347,14 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
283
347
  )
284
348
 
285
349
  sec3_figs = {
286
- s_id: self.models[s_id]["model"].plot(self.outputs[s_id]) for s_id in series_ids
350
+ s_id: self.models[s_id]["model"].plot(self.outputs[s_id])
351
+ for s_id in series_ids
287
352
  }
288
353
  for s_id in series_ids:
289
354
  add_changepoints_to_plot(
290
- sec3_figs[s_id].gca(), self.models[s_id]["model"], self.outputs[s_id]
355
+ sec3_figs[s_id].gca(),
356
+ self.models[s_id]["model"],
357
+ self.outputs[s_id],
291
358
  )
292
359
  sec3 = _select_plot_list(
293
360
  lambda s_id: sec3_figs[s_id],
@@ -322,22 +389,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
322
389
  # If the key is present, call the "explain_model" method
323
390
  self.explain_model()
324
391
 
325
- # Convert the global explanation data to a DataFrame
326
- global_explanation_df = pd.DataFrame(self.global_explanation)
327
-
328
- self.formatted_global_explanation = (
329
- global_explanation_df / global_explanation_df.sum(axis=0) * 100
330
- )
331
-
332
- aggregate_local_explanations = pd.DataFrame()
333
- for s_id, local_ex_df in self.local_explanation.items():
334
- local_ex_df_copy = local_ex_df.copy()
335
- local_ex_df_copy[ForecastOutputColumns.SERIES] = s_id
336
- aggregate_local_explanations = pd.concat(
337
- [aggregate_local_explanations, local_ex_df_copy], axis=0
338
- )
339
- self.formatted_local_explanation = aggregate_local_explanations
340
-
341
392
  if not self.target_cat_col:
342
393
  self.formatted_global_explanation = (
343
394
  self.formatted_global_explanation.rename(
@@ -351,7 +402,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
351
402
 
352
403
  # Create a markdown section for the global explainability
353
404
  global_explanation_section = rc.Block(
354
- rc.Heading("Global Explanation of Models", level=2),
405
+ rc.Heading("Global Explainability", level=2),
355
406
  rc.Text(
356
407
  "The following tables provide the feature attribution for the global explainability."
357
408
  ),
@@ -360,7 +411,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
360
411
 
361
412
  blocks = [
362
413
  rc.DataTable(
363
- local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
414
+ local_ex_df.drop("Series", axis=1),
364
415
  label=s_id if self.target_cat_col else None,
365
416
  index=True,
366
417
  )
@@ -378,8 +429,10 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
378
429
  ]
379
430
  except Exception as e:
380
431
  # Do not fail the whole run due to explanations failure
381
- logger.warn(f"Failed to generate Explanations with error: {e}.")
432
+ logger.warning(f"Failed to generate Explanations with error: {e}.")
382
433
  logger.debug(f"Full Traceback: {traceback.format_exc()}")
434
+ self.errors_dict["explainer_error"] = str(e)
435
+ self.errors_dict["explainer_error_error"] = traceback.format_exc()
383
436
 
384
437
  model_description = rc.Text(
385
438
  """Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well."""
@@ -1,20 +1,21 @@
1
- # -*- coding: utf-8; -*-
2
-
3
1
  # Copyright (c) 2023 Oracle and/or its affiliates.
4
2
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
3
 
6
4
 
5
+ from pathlib import Path
6
+
7
7
  import numpy as np
8
8
  import pandas as pd
9
- from pathlib import Path
10
9
 
11
10
  from ads.opctl import logger
12
11
  from ads.opctl.operator.lowcode.common.const import DataColumns
12
+ from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
13
13
  from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
14
+ from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
15
+
14
16
  from .model.forecast_datasets import ForecastDatasets
15
17
  from .operator_config import ForecastOperatorConfig
16
- from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
17
- from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
18
+
18
19
 
19
20
  class ModelEvaluator:
20
21
  """
@@ -23,6 +24,7 @@ class ModelEvaluator:
23
24
  This class is responsible for comparing different models or frameworks based on specified evaluation
24
25
  metrics and returning the best-performing option.
25
26
  """
27
+
26
28
  def __init__(self, models, k=5, subsample_ratio=0.20):
27
29
  """
28
30
  Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
@@ -40,23 +42,33 @@ class ModelEvaluator:
40
42
 
41
43
  def generate_cutoffs(self, unique_dates, horizon):
42
44
  sorted_dates = np.sort(unique_dates)
43
- train_window_size = [len(sorted_dates) - (i + 1) * horizon for i in range(self.k)]
45
+ train_window_size = [
46
+ len(sorted_dates) - (i + 1) * horizon for i in range(self.k)
47
+ ]
44
48
  valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
45
49
  if len(valid_train_window_size) < self.k:
46
- logger.warn(f"Only {valid_train_window_size} backtests can be created")
47
- cut_offs = sorted_dates[-horizon - 1:-horizon * (self.k + 1):-horizon][:len(valid_train_window_size)]
50
+ logger.warning(f"Only {valid_train_window_size} backtests can be created")
51
+ cut_offs = sorted_dates[-horizon - 1 : -horizon * (self.k + 1) : -horizon][
52
+ : len(valid_train_window_size)
53
+ ]
48
54
  return cut_offs
49
55
 
50
- def generate_k_fold_data(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
56
+ def generate_k_fold_data(
57
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
58
+ ):
51
59
  date_col = operator_config.spec.datetime_column.name
52
60
  horizon = operator_config.spec.horizon
53
61
  historical_data = datasets.historical_data.data.reset_index()
54
62
  series_col = DataColumns.Series
55
63
  group_counts = historical_data[series_col].value_counts()
56
64
 
57
- sample_count = max(self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio))
65
+ sample_count = max(
66
+ self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio)
67
+ )
58
68
  sampled_groups = group_counts.head(sample_count)
59
- sampled_historical_data = historical_data[historical_data[series_col].isin(sampled_groups.index)]
69
+ sampled_historical_data = historical_data[
70
+ historical_data[series_col].isin(sampled_groups.index)
71
+ ]
60
72
 
61
73
  min_group = group_counts.idxmin()
62
74
  min_series_data = historical_data[historical_data[series_col] == min_group]
@@ -64,32 +76,62 @@ class ModelEvaluator:
64
76
 
65
77
  cut_offs = self.generate_cutoffs(unique_dates, horizon)
66
78
  if not len(cut_offs):
67
- raise InsufficientDataError("Insufficient data to evaluate multiple models. Please specify a model "
68
- "instead of using auto-select.")
69
- training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
70
- in cut_offs]
71
- test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
79
+ raise InsufficientDataError(
80
+ "Insufficient data to evaluate multiple models. Please specify a model "
81
+ "instead of using auto-select."
82
+ )
83
+ training_datasets = [
84
+ sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
85
+ for cut_off_date in cut_offs
86
+ ]
87
+ test_datasets = [
88
+ sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]
89
+ ]
72
90
  for i, current in enumerate(cut_offs[1:]):
73
- test_datasets.append(sampled_historical_data[(current < sampled_historical_data[date_col]) & (
74
- sampled_historical_data[date_col] <= cut_offs[i])])
91
+ test_datasets.append(
92
+ sampled_historical_data[
93
+ (current < sampled_historical_data[date_col])
94
+ & (sampled_historical_data[date_col] <= cut_offs[i])
95
+ ]
96
+ )
75
97
  all_additional = datasets.additional_data.data.reset_index()
76
- sampled_additional_data = all_additional[all_additional[series_col].isin(sampled_groups.index)]
98
+ sampled_additional_data = all_additional[
99
+ all_additional[series_col].isin(sampled_groups.index)
100
+ ]
77
101
  max_historical_date = sampled_historical_data[date_col].max()
78
- additional_data = [sampled_additional_data[sampled_additional_data[date_col] <= max_historical_date]]
102
+ additional_data = [
103
+ sampled_additional_data[
104
+ sampled_additional_data[date_col] <= max_historical_date
105
+ ]
106
+ ]
79
107
  for cut_off in cut_offs[:-1]:
80
- trimmed_additional_data = sampled_additional_data[sampled_additional_data[date_col] <= cut_off]
108
+ trimmed_additional_data = sampled_additional_data[
109
+ sampled_additional_data[date_col] <= cut_off
110
+ ]
81
111
  additional_data.append(trimmed_additional_data)
82
112
  return cut_offs, training_datasets, additional_data, test_datasets
83
113
 
84
114
  def remove_none_values(self, obj):
85
115
  if isinstance(obj, dict):
86
- return {k: self.remove_none_values(v) for k, v in obj.items() if k is not None and v is not None}
116
+ return {
117
+ k: self.remove_none_values(v)
118
+ for k, v in obj.items()
119
+ if k is not None and v is not None
120
+ }
87
121
  else:
88
122
  return obj
89
123
 
90
- def create_operator_config(self, operator_config, backtest, model, historical_data, additional_data, test_data):
124
+ def create_operator_config(
125
+ self,
126
+ operator_config,
127
+ backtest,
128
+ model,
129
+ historical_data,
130
+ additional_data,
131
+ test_data,
132
+ ):
91
133
  output_dir = operator_config.spec.output_directory.url
92
- output_file_path = f'{output_dir}/back_testing/{model}/{backtest}'
134
+ output_file_path = f"{output_dir}/back_testing/{model}/{backtest}"
93
135
  Path(output_file_path).mkdir(parents=True, exist_ok=True)
94
136
  backtest_op_config_draft = operator_config.to_dict()
95
137
  backtest_spec = backtest_op_config_draft["spec"]
@@ -99,62 +141,102 @@ class ModelEvaluator:
99
141
  backtest_spec.pop("historical_data")
100
142
  backtest_spec["generate_report"] = False
101
143
  backtest_spec["model"] = model
102
- backtest_spec['model_kwargs'] = None
144
+ backtest_spec["model_kwargs"] = None
103
145
  backtest_spec["output_directory"] = {"url": output_file_path}
104
146
  backtest_spec["target_category_columns"] = [DataColumns.Series]
105
- backtest_spec['generate_explanations'] = False
147
+ backtest_spec["generate_explanations"] = False
106
148
  cleaned_config = self.remove_none_values(backtest_op_config_draft)
107
149
 
108
- backtest_op_config = ForecastOperatorConfig.from_dict(
109
- obj_dict=cleaned_config)
150
+ backtest_op_config = ForecastOperatorConfig.from_dict(obj_dict=cleaned_config)
110
151
  return backtest_op_config
111
152
 
112
- def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
113
- cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(datasets, operator_config)
153
+ def run_all_models(
154
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
155
+ ):
156
+ cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(
157
+ datasets, operator_config
158
+ )
114
159
  metrics = {}
115
160
  date_col = operator_config.spec.datetime_column.name
116
161
  for model in self.models:
117
162
  from .model.factory import ForecastOperatorModelFactory
163
+
118
164
  metrics[model] = {}
119
165
  for i in range(len(cut_offs)):
120
166
  try:
121
- backtest_historical_data = train_sets[i].set_index([date_col, DataColumns.Series])
122
- backtest_additional_data = additional_data[i].set_index([date_col, DataColumns.Series])
123
- backtest_test_data = test_sets[i].set_index([date_col, DataColumns.Series])
124
- backtest_operator_config = self.create_operator_config(operator_config, i, model,
125
- backtest_historical_data,
126
- backtest_additional_data,
127
- backtest_test_data)
128
- datasets = ForecastDatasets(backtest_operator_config,
129
- backtest_historical_data,
130
- backtest_additional_data,
131
- backtest_test_data)
167
+ backtest_historical_data = train_sets[i].set_index(
168
+ [date_col, DataColumns.Series]
169
+ )
170
+ backtest_additional_data = additional_data[i].set_index(
171
+ [date_col, DataColumns.Series]
172
+ )
173
+ backtest_test_data = test_sets[i].set_index(
174
+ [date_col, DataColumns.Series]
175
+ )
176
+ backtest_operator_config = self.create_operator_config(
177
+ operator_config,
178
+ i,
179
+ model,
180
+ backtest_historical_data,
181
+ backtest_additional_data,
182
+ backtest_test_data,
183
+ )
184
+ datasets = ForecastDatasets(
185
+ backtest_operator_config,
186
+ backtest_historical_data,
187
+ backtest_additional_data,
188
+ backtest_test_data,
189
+ )
132
190
  ForecastOperatorModelFactory.get_model(
133
191
  backtest_operator_config, datasets
134
192
  ).generate_report()
135
- test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
193
+ test_metrics_filename = (
194
+ backtest_operator_config.spec.test_metrics_filename
195
+ )
136
196
  metrics_df = pd.read_csv(
137
- f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
138
- metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
139
- metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
140
- metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
197
+ f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
198
+ )
199
+ metrics_df["average_across_series"] = metrics_df.drop(
200
+ "metrics", axis=1
201
+ ).mean(axis=1)
202
+ metrics_average_dict = dict(
203
+ zip(
204
+ metrics_df["metrics"].str.lower(),
205
+ metrics_df["average_across_series"],
206
+ )
207
+ )
208
+ metrics[model][i] = metrics_average_dict[
209
+ operator_config.spec.metric
210
+ ]
141
211
  except:
142
- logger.warn(f"Failed to calculate metrics for {model} and {i} backtest")
212
+ logger.warning(
213
+ f"Failed to calculate metrics for {model} and {i} backtest"
214
+ )
143
215
  return metrics
144
216
 
145
- def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
217
+ def find_best_model(
218
+ self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
219
+ ):
146
220
  try:
147
221
  metrics = self.run_all_models(datasets, operator_config)
148
222
  except InsufficientDataError as e:
149
223
  model = SupportedModels.Prophet
150
- logger.error(f"Running {model} model as auto-select failed with the following error: {e.message}")
224
+ logger.error(
225
+ f"Running {model} model as auto-select failed with the following error: {e.message}"
226
+ )
151
227
  return model
152
- nonempty_metrics = {model: metric for model, metric in metrics.items() if metric != {}}
153
- avg_backtests_metric = {model: sum(value.values()) / len(value.values())
154
- for model, value in nonempty_metrics.items()}
228
+ nonempty_metrics = {
229
+ model: metric for model, metric in metrics.items() if metric != {}
230
+ }
231
+ avg_backtests_metric = {
232
+ model: sum(value.values()) / len(value.values())
233
+ for model, value in nonempty_metrics.items()
234
+ }
155
235
  best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
156
- logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
157
- backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis('backtest')
236
+ logger.info(
237
+ f"Among models {self.models}, {best_model} model shows better performance during backtesting."
238
+ )
239
+ backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis("backtest")
158
240
  backtest_stats["metric"] = operator_config.spec.metric
159
241
  backtest_stats.reset_index(inplace=True)
160
242
  output_dir = operator_config.spec.output_directory.url
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python
2
2
 
3
- # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
3
+ # Copyright (c) 2023, 2025 Oracle and/or its affiliates.
4
4
  # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
5
 
6
6
  import os
@@ -18,9 +18,11 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
18
18
 
19
19
  from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
20
20
 
21
+
21
22
  @dataclass
22
23
  class AutoScaling(DataClassSerializable):
23
24
  """Class representing simple autoscaling policy"""
25
+
24
26
  minimum_instance: int = 1
25
27
  maximum_instance: int = None
26
28
  cool_down_in_seconds: int = 600
@@ -28,9 +30,11 @@ class AutoScaling(DataClassSerializable):
28
30
  scale_out_threshold: int = 80
29
31
  scaling_metric: str = "CPU_UTILIZATION"
30
32
 
33
+
31
34
  @dataclass(repr=True)
32
35
  class ModelDeploymentServer(DataClassSerializable):
33
36
  """Class representing model deployment server specification for whatif-analysis."""
37
+
34
38
  display_name: str = None
35
39
  initial_shape: str = None
36
40
  description: str = None
@@ -42,10 +46,13 @@ class ModelDeploymentServer(DataClassSerializable):
42
46
  @dataclass(repr=True)
43
47
  class WhatIfAnalysis(DataClassSerializable):
44
48
  """Class representing operator specification for whatif-analysis."""
49
+
45
50
  model_display_name: str = None
46
51
  compartment_id: str = None
47
52
  project_id: str = None
48
- model_deployment: ModelDeploymentServer = field(default_factory=ModelDeploymentServer)
53
+ model_deployment: ModelDeploymentServer = field(
54
+ default_factory=ModelDeploymentServer
55
+ )
49
56
 
50
57
 
51
58
  @dataclass(repr=True)
@@ -106,8 +113,11 @@ class ForecastOperatorSpec(DataClassSerializable):
106
113
  datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
107
114
  target_category_columns: List[str] = field(default_factory=list)
108
115
  generate_report: bool = None
116
+ generate_forecast_file: bool = None
109
117
  generate_metrics: bool = None
118
+ generate_metrics_file: bool = None
110
119
  generate_explanations: bool = None
120
+ generate_explanation_files: bool = None
111
121
  explanations_accuracy_mode: str = None
112
122
  horizon: int = None
113
123
  model: str = None
@@ -126,7 +136,7 @@ class ForecastOperatorSpec(DataClassSerializable):
126
136
  self.output_directory = self.output_directory or OutputDirectory(
127
137
  url=find_output_dirname(self.output_directory)
128
138
  )
129
- self.generate_model_pickle = True if self.generate_model_pickle or self.what_if_analysis else False
139
+ self.generate_model_pickle = self.generate_model_pickle or self.what_if_analysis
130
140
  self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
131
141
  self.model = self.model or SupportedModels.Prophet
132
142
  self.confidence_interval_width = self.confidence_interval_width or 0.80
@@ -144,6 +154,21 @@ class ForecastOperatorSpec(DataClassSerializable):
144
154
  self.generate_metrics = (
145
155
  self.generate_metrics if self.generate_metrics is not None else True
146
156
  )
157
+ self.generate_metrics_file = (
158
+ self.generate_metrics_file
159
+ if self.generate_metrics_file is not None
160
+ else True
161
+ )
162
+ self.generate_forecast_file = (
163
+ self.generate_forecast_file
164
+ if self.generate_forecast_file is not None
165
+ else True
166
+ )
167
+ self.generate_explanation_files = (
168
+ self.generate_explanation_files
169
+ if self.generate_explanation_files is not None
170
+ else True
171
+ )
147
172
  # For Explanations Generation. When user doesn't specify defaults to False
148
173
  self.generate_explanations = (
149
174
  self.generate_explanations
@@ -164,6 +189,7 @@ class ForecastOperatorSpec(DataClassSerializable):
164
189
  if self.generate_model_pickle is not None
165
190
  else False
166
191
  )
192
+ self.report_title = self.report_title or "Forecast Report"
167
193
  self.report_theme = self.report_theme or "light"
168
194
  self.metrics_filename = self.metrics_filename or "metrics.csv"
169
195
  self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv"