oracle-ads 2.13.2__py3-none-any.whl → 2.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/app.py +2 -1
- ads/aqua/evaluation/evaluation.py +11 -10
- ads/aqua/finetuning/finetuning.py +2 -3
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +3 -3
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +1 -1
- ads/opctl/operator/lowcode/anomaly/utils.py +1 -1
- ads/opctl/operator/lowcode/common/transformations.py +5 -1
- ads/opctl/operator/lowcode/common/utils.py +7 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +15 -10
- ads/opctl/operator/lowcode/forecast/model/automlx.py +31 -9
- ads/opctl/operator/lowcode/forecast/model/autots.py +7 -5
- ads/opctl/operator/lowcode/forecast/model/base_model.py +127 -101
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +14 -6
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +2 -2
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +46 -32
- ads/opctl/operator/lowcode/forecast/model/prophet.py +82 -29
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +136 -54
- ads/opctl/operator/lowcode/forecast/operator_config.py +29 -3
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +103 -58
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/METADATA +1 -1
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/RECORD +24 -24
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/WHEEL +0 -0
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/entry_points.txt +0 -0
- {oracle_ads-2.13.2.dist-info → oracle_ads-2.13.3.dist-info}/licenses/LICENSE.txt +0 -0
@@ -22,7 +22,6 @@ from ads.opctl.operator.lowcode.forecast.utils import (
|
|
22
22
|
from ..const import (
|
23
23
|
DEFAULT_TRIALS,
|
24
24
|
PROPHET_INTERNAL_DATE_COL,
|
25
|
-
ForecastOutputColumns,
|
26
25
|
SupportedModels,
|
27
26
|
)
|
28
27
|
from .base_model import ForecastOperatorBaseModel
|
@@ -44,12 +43,23 @@ def _fit_model(data, params, additional_regressors):
|
|
44
43
|
from prophet import Prophet
|
45
44
|
|
46
45
|
monthly_seasonality = params.pop("monthly_seasonality", False)
|
46
|
+
data_floor = params.pop("min", None)
|
47
|
+
data_cap = params.pop("max", None)
|
48
|
+
if data_cap or data_floor:
|
49
|
+
params["growth"] = "logistic"
|
47
50
|
model = Prophet(**params)
|
48
51
|
if monthly_seasonality:
|
49
52
|
model.add_seasonality(name="monthly", period=30.5, fourier_order=5)
|
50
53
|
params["monthly_seasonality"] = monthly_seasonality
|
51
54
|
for add_reg in additional_regressors:
|
52
55
|
model.add_regressor(add_reg)
|
56
|
+
if data_floor:
|
57
|
+
data["floor"] = float(data_floor)
|
58
|
+
params["floor"] = data_floor
|
59
|
+
if data_cap:
|
60
|
+
data["cap"] = float(data_cap)
|
61
|
+
params["cap"] = data_cap
|
62
|
+
|
53
63
|
model.fit(data)
|
54
64
|
return model
|
55
65
|
|
@@ -112,6 +122,41 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
112
122
|
upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
|
113
123
|
lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
|
114
124
|
)
|
125
|
+
# Get all features that make up the forecast. Exclude CI (upper/lower)
|
126
|
+
core_columns = forecast.columns[
|
127
|
+
~forecast.columns.str.endswith("_lower")
|
128
|
+
& ~forecast.columns.str.endswith("_upper")
|
129
|
+
]
|
130
|
+
core_columns = set(core_columns) - {
|
131
|
+
"additive_terms",
|
132
|
+
"extra_regressors_additive",
|
133
|
+
"multiplicative_terms",
|
134
|
+
"extra_regressors_multiplicative",
|
135
|
+
"cap",
|
136
|
+
"floor",
|
137
|
+
"yhat",
|
138
|
+
}
|
139
|
+
combine_terms = list(
|
140
|
+
core_columns.intersection(
|
141
|
+
{
|
142
|
+
"trend",
|
143
|
+
"daily",
|
144
|
+
"weekly",
|
145
|
+
"yearly",
|
146
|
+
"monthly",
|
147
|
+
"holidays",
|
148
|
+
"zeros",
|
149
|
+
}
|
150
|
+
)
|
151
|
+
)
|
152
|
+
|
153
|
+
temp_df = (
|
154
|
+
forecast[list(core_columns)]
|
155
|
+
.rename({"ds": "Date"}, axis=1)
|
156
|
+
.set_index("Date")
|
157
|
+
)
|
158
|
+
temp_df[self.spec.target_column] = temp_df[combine_terms].sum(axis=1)
|
159
|
+
self.explanations_info[series_id] = temp_df.drop(combine_terms, axis=1)
|
115
160
|
|
116
161
|
self.models[series_id] = {}
|
117
162
|
self.models[series_id]["model"] = model
|
@@ -133,13 +178,14 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
133
178
|
"error": str(e),
|
134
179
|
"error_trace": traceback.format_exc(),
|
135
180
|
}
|
136
|
-
logger.
|
137
|
-
logger.
|
181
|
+
logger.warning(f"Encountered Error: {e}. Skipping.")
|
182
|
+
logger.warning(traceback.format_exc())
|
138
183
|
|
139
184
|
def _build_model(self) -> pd.DataFrame:
|
140
185
|
full_data_dict = self.datasets.get_data_by_series()
|
141
186
|
self.models = {}
|
142
187
|
self.outputs = {}
|
188
|
+
self.explanations_info = {}
|
143
189
|
self.additional_regressors = self.datasets.get_additional_data_column_names()
|
144
190
|
model_kwargs = self.set_kwargs()
|
145
191
|
self.forecast_output = ForecastOutput(
|
@@ -149,9 +195,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
149
195
|
dt_column=self.spec.datetime_column.name,
|
150
196
|
)
|
151
197
|
|
152
|
-
# if os.environ["OCI__IS_SPARK"]:
|
153
|
-
# pass
|
154
|
-
# else:
|
155
198
|
Parallel(n_jobs=-1, require="sharedmem")(
|
156
199
|
delayed(ProphetOperatorModel._train_model)(
|
157
200
|
self, i, series_id, df, model_kwargs.copy()
|
@@ -222,7 +265,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
222
265
|
try:
|
223
266
|
return np.mean(df_p[self.spec.metric])
|
224
267
|
except KeyError:
|
225
|
-
logger.
|
268
|
+
logger.warning(
|
226
269
|
f"Could not find the metric {self.spec.metric} within "
|
227
270
|
f"the performance metrics: {df_p.columns}. Defaulting to `rmse`"
|
228
271
|
)
|
@@ -249,6 +292,25 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
249
292
|
model_kwargs_i = study.best_params
|
250
293
|
return model_kwargs_i
|
251
294
|
|
295
|
+
def explain_model(self):
|
296
|
+
self.local_explanation = {}
|
297
|
+
global_expl = []
|
298
|
+
|
299
|
+
for s_id, expl_df in self.explanations_info.items():
|
300
|
+
# Local Expl
|
301
|
+
self.local_explanation[s_id] = self.get_horizon(expl_df)
|
302
|
+
self.local_explanation[s_id]["Series"] = s_id
|
303
|
+
self.local_explanation[s_id].index.rename(self.dt_column_name, inplace=True)
|
304
|
+
# Global Expl
|
305
|
+
g_expl = self.drop_horizon(expl_df).mean()
|
306
|
+
g_expl.name = s_id
|
307
|
+
global_expl.append(g_expl)
|
308
|
+
self.global_explanation = pd.concat(global_expl, axis=1)
|
309
|
+
self.formatted_global_explanation = (
|
310
|
+
self.global_explanation / self.global_explanation.sum(axis=0) * 100
|
311
|
+
)
|
312
|
+
self.formatted_local_explanation = pd.concat(self.local_explanation.values())
|
313
|
+
|
252
314
|
def _generate_report(self):
|
253
315
|
import report_creator as rc
|
254
316
|
from prophet.plot import add_changepoints_to_plot
|
@@ -274,7 +336,9 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
274
336
|
)
|
275
337
|
|
276
338
|
sec2 = _select_plot_list(
|
277
|
-
lambda s_id: self.models[s_id]["model"].plot_components(
|
339
|
+
lambda s_id: self.models[s_id]["model"].plot_components(
|
340
|
+
self.outputs[s_id]
|
341
|
+
),
|
278
342
|
series_ids=series_ids,
|
279
343
|
target_category_column=self.target_cat_col,
|
280
344
|
)
|
@@ -283,11 +347,14 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
283
347
|
)
|
284
348
|
|
285
349
|
sec3_figs = {
|
286
|
-
s_id: self.models[s_id]["model"].plot(self.outputs[s_id])
|
350
|
+
s_id: self.models[s_id]["model"].plot(self.outputs[s_id])
|
351
|
+
for s_id in series_ids
|
287
352
|
}
|
288
353
|
for s_id in series_ids:
|
289
354
|
add_changepoints_to_plot(
|
290
|
-
sec3_figs[s_id].gca(),
|
355
|
+
sec3_figs[s_id].gca(),
|
356
|
+
self.models[s_id]["model"],
|
357
|
+
self.outputs[s_id],
|
291
358
|
)
|
292
359
|
sec3 = _select_plot_list(
|
293
360
|
lambda s_id: sec3_figs[s_id],
|
@@ -322,22 +389,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
322
389
|
# If the key is present, call the "explain_model" method
|
323
390
|
self.explain_model()
|
324
391
|
|
325
|
-
# Convert the global explanation data to a DataFrame
|
326
|
-
global_explanation_df = pd.DataFrame(self.global_explanation)
|
327
|
-
|
328
|
-
self.formatted_global_explanation = (
|
329
|
-
global_explanation_df / global_explanation_df.sum(axis=0) * 100
|
330
|
-
)
|
331
|
-
|
332
|
-
aggregate_local_explanations = pd.DataFrame()
|
333
|
-
for s_id, local_ex_df in self.local_explanation.items():
|
334
|
-
local_ex_df_copy = local_ex_df.copy()
|
335
|
-
local_ex_df_copy[ForecastOutputColumns.SERIES] = s_id
|
336
|
-
aggregate_local_explanations = pd.concat(
|
337
|
-
[aggregate_local_explanations, local_ex_df_copy], axis=0
|
338
|
-
)
|
339
|
-
self.formatted_local_explanation = aggregate_local_explanations
|
340
|
-
|
341
392
|
if not self.target_cat_col:
|
342
393
|
self.formatted_global_explanation = (
|
343
394
|
self.formatted_global_explanation.rename(
|
@@ -351,7 +402,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
351
402
|
|
352
403
|
# Create a markdown section for the global explainability
|
353
404
|
global_explanation_section = rc.Block(
|
354
|
-
rc.Heading("Global
|
405
|
+
rc.Heading("Global Explainability", level=2),
|
355
406
|
rc.Text(
|
356
407
|
"The following tables provide the feature attribution for the global explainability."
|
357
408
|
),
|
@@ -360,7 +411,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
360
411
|
|
361
412
|
blocks = [
|
362
413
|
rc.DataTable(
|
363
|
-
local_ex_df.
|
414
|
+
local_ex_df.drop("Series", axis=1),
|
364
415
|
label=s_id if self.target_cat_col else None,
|
365
416
|
index=True,
|
366
417
|
)
|
@@ -378,8 +429,10 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
378
429
|
]
|
379
430
|
except Exception as e:
|
380
431
|
# Do not fail the whole run due to explanations failure
|
381
|
-
logger.
|
432
|
+
logger.warning(f"Failed to generate Explanations with error: {e}.")
|
382
433
|
logger.debug(f"Full Traceback: {traceback.format_exc()}")
|
434
|
+
self.errors_dict["explainer_error"] = str(e)
|
435
|
+
self.errors_dict["explainer_error_error"] = traceback.format_exc()
|
383
436
|
|
384
437
|
model_description = rc.Text(
|
385
438
|
"""Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well."""
|
@@ -1,20 +1,21 @@
|
|
1
|
-
# -*- coding: utf-8; -*-
|
2
|
-
|
3
1
|
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
2
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
3
|
|
6
4
|
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
|
-
from pathlib import Path
|
10
9
|
|
11
10
|
from ads.opctl import logger
|
12
11
|
from ads.opctl.operator.lowcode.common.const import DataColumns
|
12
|
+
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
13
13
|
from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
|
14
|
+
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
|
15
|
+
|
14
16
|
from .model.forecast_datasets import ForecastDatasets
|
15
17
|
from .operator_config import ForecastOperatorConfig
|
16
|
-
|
17
|
-
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
|
18
|
+
|
18
19
|
|
19
20
|
class ModelEvaluator:
|
20
21
|
"""
|
@@ -23,6 +24,7 @@ class ModelEvaluator:
|
|
23
24
|
This class is responsible for comparing different models or frameworks based on specified evaluation
|
24
25
|
metrics and returning the best-performing option.
|
25
26
|
"""
|
27
|
+
|
26
28
|
def __init__(self, models, k=5, subsample_ratio=0.20):
|
27
29
|
"""
|
28
30
|
Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
|
@@ -40,23 +42,33 @@ class ModelEvaluator:
|
|
40
42
|
|
41
43
|
def generate_cutoffs(self, unique_dates, horizon):
|
42
44
|
sorted_dates = np.sort(unique_dates)
|
43
|
-
train_window_size = [
|
45
|
+
train_window_size = [
|
46
|
+
len(sorted_dates) - (i + 1) * horizon for i in range(self.k)
|
47
|
+
]
|
44
48
|
valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 2]
|
45
49
|
if len(valid_train_window_size) < self.k:
|
46
|
-
logger.
|
47
|
-
cut_offs = sorted_dates[-horizon - 1
|
50
|
+
logger.warning(f"Only {valid_train_window_size} backtests can be created")
|
51
|
+
cut_offs = sorted_dates[-horizon - 1 : -horizon * (self.k + 1) : -horizon][
|
52
|
+
: len(valid_train_window_size)
|
53
|
+
]
|
48
54
|
return cut_offs
|
49
55
|
|
50
|
-
def generate_k_fold_data(
|
56
|
+
def generate_k_fold_data(
|
57
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
58
|
+
):
|
51
59
|
date_col = operator_config.spec.datetime_column.name
|
52
60
|
horizon = operator_config.spec.horizon
|
53
61
|
historical_data = datasets.historical_data.data.reset_index()
|
54
62
|
series_col = DataColumns.Series
|
55
63
|
group_counts = historical_data[series_col].value_counts()
|
56
64
|
|
57
|
-
sample_count = max(
|
65
|
+
sample_count = max(
|
66
|
+
self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio)
|
67
|
+
)
|
58
68
|
sampled_groups = group_counts.head(sample_count)
|
59
|
-
sampled_historical_data = historical_data[
|
69
|
+
sampled_historical_data = historical_data[
|
70
|
+
historical_data[series_col].isin(sampled_groups.index)
|
71
|
+
]
|
60
72
|
|
61
73
|
min_group = group_counts.idxmin()
|
62
74
|
min_series_data = historical_data[historical_data[series_col] == min_group]
|
@@ -64,32 +76,62 @@ class ModelEvaluator:
|
|
64
76
|
|
65
77
|
cut_offs = self.generate_cutoffs(unique_dates, horizon)
|
66
78
|
if not len(cut_offs):
|
67
|
-
raise InsufficientDataError(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
raise InsufficientDataError(
|
80
|
+
"Insufficient data to evaluate multiple models. Please specify a model "
|
81
|
+
"instead of using auto-select."
|
82
|
+
)
|
83
|
+
training_datasets = [
|
84
|
+
sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
|
85
|
+
for cut_off_date in cut_offs
|
86
|
+
]
|
87
|
+
test_datasets = [
|
88
|
+
sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]
|
89
|
+
]
|
72
90
|
for i, current in enumerate(cut_offs[1:]):
|
73
|
-
test_datasets.append(
|
74
|
-
|
91
|
+
test_datasets.append(
|
92
|
+
sampled_historical_data[
|
93
|
+
(current < sampled_historical_data[date_col])
|
94
|
+
& (sampled_historical_data[date_col] <= cut_offs[i])
|
95
|
+
]
|
96
|
+
)
|
75
97
|
all_additional = datasets.additional_data.data.reset_index()
|
76
|
-
sampled_additional_data = all_additional[
|
98
|
+
sampled_additional_data = all_additional[
|
99
|
+
all_additional[series_col].isin(sampled_groups.index)
|
100
|
+
]
|
77
101
|
max_historical_date = sampled_historical_data[date_col].max()
|
78
|
-
additional_data = [
|
102
|
+
additional_data = [
|
103
|
+
sampled_additional_data[
|
104
|
+
sampled_additional_data[date_col] <= max_historical_date
|
105
|
+
]
|
106
|
+
]
|
79
107
|
for cut_off in cut_offs[:-1]:
|
80
|
-
trimmed_additional_data = sampled_additional_data[
|
108
|
+
trimmed_additional_data = sampled_additional_data[
|
109
|
+
sampled_additional_data[date_col] <= cut_off
|
110
|
+
]
|
81
111
|
additional_data.append(trimmed_additional_data)
|
82
112
|
return cut_offs, training_datasets, additional_data, test_datasets
|
83
113
|
|
84
114
|
def remove_none_values(self, obj):
|
85
115
|
if isinstance(obj, dict):
|
86
|
-
return {
|
116
|
+
return {
|
117
|
+
k: self.remove_none_values(v)
|
118
|
+
for k, v in obj.items()
|
119
|
+
if k is not None and v is not None
|
120
|
+
}
|
87
121
|
else:
|
88
122
|
return obj
|
89
123
|
|
90
|
-
def create_operator_config(
|
124
|
+
def create_operator_config(
|
125
|
+
self,
|
126
|
+
operator_config,
|
127
|
+
backtest,
|
128
|
+
model,
|
129
|
+
historical_data,
|
130
|
+
additional_data,
|
131
|
+
test_data,
|
132
|
+
):
|
91
133
|
output_dir = operator_config.spec.output_directory.url
|
92
|
-
output_file_path = f
|
134
|
+
output_file_path = f"{output_dir}/back_testing/{model}/{backtest}"
|
93
135
|
Path(output_file_path).mkdir(parents=True, exist_ok=True)
|
94
136
|
backtest_op_config_draft = operator_config.to_dict()
|
95
137
|
backtest_spec = backtest_op_config_draft["spec"]
|
@@ -99,62 +141,102 @@ class ModelEvaluator:
|
|
99
141
|
backtest_spec.pop("historical_data")
|
100
142
|
backtest_spec["generate_report"] = False
|
101
143
|
backtest_spec["model"] = model
|
102
|
-
backtest_spec[
|
144
|
+
backtest_spec["model_kwargs"] = None
|
103
145
|
backtest_spec["output_directory"] = {"url": output_file_path}
|
104
146
|
backtest_spec["target_category_columns"] = [DataColumns.Series]
|
105
|
-
backtest_spec[
|
147
|
+
backtest_spec["generate_explanations"] = False
|
106
148
|
cleaned_config = self.remove_none_values(backtest_op_config_draft)
|
107
149
|
|
108
|
-
backtest_op_config = ForecastOperatorConfig.from_dict(
|
109
|
-
obj_dict=cleaned_config)
|
150
|
+
backtest_op_config = ForecastOperatorConfig.from_dict(obj_dict=cleaned_config)
|
110
151
|
return backtest_op_config
|
111
152
|
|
112
|
-
def run_all_models(
|
113
|
-
|
153
|
+
def run_all_models(
|
154
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
155
|
+
):
|
156
|
+
cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(
|
157
|
+
datasets, operator_config
|
158
|
+
)
|
114
159
|
metrics = {}
|
115
160
|
date_col = operator_config.spec.datetime_column.name
|
116
161
|
for model in self.models:
|
117
162
|
from .model.factory import ForecastOperatorModelFactory
|
163
|
+
|
118
164
|
metrics[model] = {}
|
119
165
|
for i in range(len(cut_offs)):
|
120
166
|
try:
|
121
|
-
backtest_historical_data = train_sets[i].set_index(
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
167
|
+
backtest_historical_data = train_sets[i].set_index(
|
168
|
+
[date_col, DataColumns.Series]
|
169
|
+
)
|
170
|
+
backtest_additional_data = additional_data[i].set_index(
|
171
|
+
[date_col, DataColumns.Series]
|
172
|
+
)
|
173
|
+
backtest_test_data = test_sets[i].set_index(
|
174
|
+
[date_col, DataColumns.Series]
|
175
|
+
)
|
176
|
+
backtest_operator_config = self.create_operator_config(
|
177
|
+
operator_config,
|
178
|
+
i,
|
179
|
+
model,
|
180
|
+
backtest_historical_data,
|
181
|
+
backtest_additional_data,
|
182
|
+
backtest_test_data,
|
183
|
+
)
|
184
|
+
datasets = ForecastDatasets(
|
185
|
+
backtest_operator_config,
|
186
|
+
backtest_historical_data,
|
187
|
+
backtest_additional_data,
|
188
|
+
backtest_test_data,
|
189
|
+
)
|
132
190
|
ForecastOperatorModelFactory.get_model(
|
133
191
|
backtest_operator_config, datasets
|
134
192
|
).generate_report()
|
135
|
-
test_metrics_filename =
|
193
|
+
test_metrics_filename = (
|
194
|
+
backtest_operator_config.spec.test_metrics_filename
|
195
|
+
)
|
136
196
|
metrics_df = pd.read_csv(
|
137
|
-
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
138
|
-
|
139
|
-
|
140
|
-
|
197
|
+
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}"
|
198
|
+
)
|
199
|
+
metrics_df["average_across_series"] = metrics_df.drop(
|
200
|
+
"metrics", axis=1
|
201
|
+
).mean(axis=1)
|
202
|
+
metrics_average_dict = dict(
|
203
|
+
zip(
|
204
|
+
metrics_df["metrics"].str.lower(),
|
205
|
+
metrics_df["average_across_series"],
|
206
|
+
)
|
207
|
+
)
|
208
|
+
metrics[model][i] = metrics_average_dict[
|
209
|
+
operator_config.spec.metric
|
210
|
+
]
|
141
211
|
except:
|
142
|
-
logger.
|
212
|
+
logger.warning(
|
213
|
+
f"Failed to calculate metrics for {model} and {i} backtest"
|
214
|
+
)
|
143
215
|
return metrics
|
144
216
|
|
145
|
-
def find_best_model(
|
217
|
+
def find_best_model(
|
218
|
+
self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig
|
219
|
+
):
|
146
220
|
try:
|
147
221
|
metrics = self.run_all_models(datasets, operator_config)
|
148
222
|
except InsufficientDataError as e:
|
149
223
|
model = SupportedModels.Prophet
|
150
|
-
logger.error(
|
224
|
+
logger.error(
|
225
|
+
f"Running {model} model as auto-select failed with the following error: {e.message}"
|
226
|
+
)
|
151
227
|
return model
|
152
|
-
nonempty_metrics = {
|
153
|
-
|
154
|
-
|
228
|
+
nonempty_metrics = {
|
229
|
+
model: metric for model, metric in metrics.items() if metric != {}
|
230
|
+
}
|
231
|
+
avg_backtests_metric = {
|
232
|
+
model: sum(value.values()) / len(value.values())
|
233
|
+
for model, value in nonempty_metrics.items()
|
234
|
+
}
|
155
235
|
best_model = min(avg_backtests_metric, key=avg_backtests_metric.get)
|
156
|
-
logger.info(
|
157
|
-
|
236
|
+
logger.info(
|
237
|
+
f"Among models {self.models}, {best_model} model shows better performance during backtesting."
|
238
|
+
)
|
239
|
+
backtest_stats = pd.DataFrame(nonempty_metrics).rename_axis("backtest")
|
158
240
|
backtest_stats["metric"] = operator_config.spec.metric
|
159
241
|
backtest_stats.reset_index(inplace=True)
|
160
242
|
output_dir = operator_config.spec.output_directory.url
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
|
3
|
-
# Copyright (c) 2023,
|
3
|
+
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4
4
|
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
5
|
|
6
6
|
import os
|
@@ -18,9 +18,11 @@ from ads.opctl.operator.lowcode.common.utils import find_output_dirname
|
|
18
18
|
|
19
19
|
from .const import SpeedAccuracyMode, SupportedMetrics, SupportedModels
|
20
20
|
|
21
|
+
|
21
22
|
@dataclass
|
22
23
|
class AutoScaling(DataClassSerializable):
|
23
24
|
"""Class representing simple autoscaling policy"""
|
25
|
+
|
24
26
|
minimum_instance: int = 1
|
25
27
|
maximum_instance: int = None
|
26
28
|
cool_down_in_seconds: int = 600
|
@@ -28,9 +30,11 @@ class AutoScaling(DataClassSerializable):
|
|
28
30
|
scale_out_threshold: int = 80
|
29
31
|
scaling_metric: str = "CPU_UTILIZATION"
|
30
32
|
|
33
|
+
|
31
34
|
@dataclass(repr=True)
|
32
35
|
class ModelDeploymentServer(DataClassSerializable):
|
33
36
|
"""Class representing model deployment server specification for whatif-analysis."""
|
37
|
+
|
34
38
|
display_name: str = None
|
35
39
|
initial_shape: str = None
|
36
40
|
description: str = None
|
@@ -42,10 +46,13 @@ class ModelDeploymentServer(DataClassSerializable):
|
|
42
46
|
@dataclass(repr=True)
|
43
47
|
class WhatIfAnalysis(DataClassSerializable):
|
44
48
|
"""Class representing operator specification for whatif-analysis."""
|
49
|
+
|
45
50
|
model_display_name: str = None
|
46
51
|
compartment_id: str = None
|
47
52
|
project_id: str = None
|
48
|
-
model_deployment: ModelDeploymentServer = field(
|
53
|
+
model_deployment: ModelDeploymentServer = field(
|
54
|
+
default_factory=ModelDeploymentServer
|
55
|
+
)
|
49
56
|
|
50
57
|
|
51
58
|
@dataclass(repr=True)
|
@@ -106,8 +113,11 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
106
113
|
datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn)
|
107
114
|
target_category_columns: List[str] = field(default_factory=list)
|
108
115
|
generate_report: bool = None
|
116
|
+
generate_forecast_file: bool = None
|
109
117
|
generate_metrics: bool = None
|
118
|
+
generate_metrics_file: bool = None
|
110
119
|
generate_explanations: bool = None
|
120
|
+
generate_explanation_files: bool = None
|
111
121
|
explanations_accuracy_mode: str = None
|
112
122
|
horizon: int = None
|
113
123
|
model: str = None
|
@@ -126,7 +136,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
126
136
|
self.output_directory = self.output_directory or OutputDirectory(
|
127
137
|
url=find_output_dirname(self.output_directory)
|
128
138
|
)
|
129
|
-
self.generate_model_pickle =
|
139
|
+
self.generate_model_pickle = self.generate_model_pickle or self.what_if_analysis
|
130
140
|
self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
|
131
141
|
self.model = self.model or SupportedModels.Prophet
|
132
142
|
self.confidence_interval_width = self.confidence_interval_width or 0.80
|
@@ -144,6 +154,21 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
144
154
|
self.generate_metrics = (
|
145
155
|
self.generate_metrics if self.generate_metrics is not None else True
|
146
156
|
)
|
157
|
+
self.generate_metrics_file = (
|
158
|
+
self.generate_metrics_file
|
159
|
+
if self.generate_metrics_file is not None
|
160
|
+
else True
|
161
|
+
)
|
162
|
+
self.generate_forecast_file = (
|
163
|
+
self.generate_forecast_file
|
164
|
+
if self.generate_forecast_file is not None
|
165
|
+
else True
|
166
|
+
)
|
167
|
+
self.generate_explanation_files = (
|
168
|
+
self.generate_explanation_files
|
169
|
+
if self.generate_explanation_files is not None
|
170
|
+
else True
|
171
|
+
)
|
147
172
|
# For Explanations Generation. When user doesn't specify defaults to False
|
148
173
|
self.generate_explanations = (
|
149
174
|
self.generate_explanations
|
@@ -164,6 +189,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
164
189
|
if self.generate_model_pickle is not None
|
165
190
|
else False
|
166
191
|
)
|
192
|
+
self.report_title = self.report_title or "Forecast Report"
|
167
193
|
self.report_theme = self.report_theme or "light"
|
168
194
|
self.metrics_filename = self.metrics_filename or "metrics.csv"
|
169
195
|
self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv"
|