oracle-ads 2.11.9__py3-none-any.whl → 2.11.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +1 -1
- ads/aqua/{base.py → app.py} +27 -7
- ads/aqua/cli.py +59 -17
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/{decorator.py → common/decorator.py} +14 -8
- ads/aqua/common/enums.py +69 -0
- ads/aqua/{exception.py → common/errors.py} +28 -0
- ads/aqua/{utils.py → common/utils.py} +168 -77
- ads/aqua/config/config.py +18 -0
- ads/aqua/constants.py +51 -33
- ads/aqua/data.py +15 -26
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +170 -0
- ads/aqua/evaluation/errors.py +71 -0
- ads/aqua/{evaluation.py → evaluation/evaluation.py} +122 -370
- ads/aqua/extension/__init__.py +2 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +97 -0
- ads/aqua/extension/base_handler.py +0 -7
- ads/aqua/extension/common_handler.py +12 -6
- ads/aqua/extension/deployment_handler.py +70 -4
- ads/aqua/extension/errors.py +10 -0
- ads/aqua/extension/evaluation_handler.py +5 -3
- ads/aqua/extension/evaluation_ws_msg_handler.py +43 -0
- ads/aqua/extension/finetune_handler.py +41 -3
- ads/aqua/extension/model_handler.py +56 -4
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +69 -0
- ads/aqua/extension/ui_handler.py +65 -4
- ads/aqua/extension/ui_websocket_handler.py +124 -0
- ads/aqua/extension/utils.py +1 -1
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +17 -0
- ads/aqua/finetuning/entities.py +102 -0
- ads/aqua/{finetune.py → finetuning/finetuning.py} +162 -136
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +46 -0
- ads/aqua/model/entities.py +266 -0
- ads/aqua/model/enums.py +26 -0
- ads/aqua/{model.py → model/model.py} +401 -309
- ads/aqua/modeldeployment/__init__.py +8 -0
- ads/aqua/modeldeployment/constants.py +26 -0
- ads/aqua/{deployment.py → modeldeployment/deployment.py} +288 -227
- ads/aqua/modeldeployment/entities.py +142 -0
- ads/aqua/modeldeployment/inference.py +75 -0
- ads/aqua/ui.py +88 -8
- ads/cli.py +55 -7
- ads/common/serializer.py +2 -2
- ads/config.py +2 -1
- ads/jobs/builders/infrastructure/dsc_job.py +49 -6
- ads/model/datascience_model.py +1 -1
- ads/model/deployment/model_deployment.py +11 -0
- ads/model/model_metadata.py +17 -6
- ads/opctl/operator/lowcode/anomaly/README.md +0 -2
- ads/opctl/operator/lowcode/anomaly/__main__.py +3 -3
- ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -2
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +2 -2
- ads/opctl/operator/lowcode/anomaly/model/autots.py +1 -1
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +13 -17
- ads/opctl/operator/lowcode/anomaly/operator_config.py +2 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +1 -2
- ads/opctl/operator/lowcode/anomaly/utils.py +3 -2
- ads/opctl/operator/lowcode/common/transformations.py +2 -1
- ads/opctl/operator/lowcode/common/utils.py +1 -1
- ads/opctl/operator/lowcode/forecast/README.md +1 -3
- ads/opctl/operator/lowcode/forecast/__main__.py +3 -18
- ads/opctl/operator/lowcode/forecast/const.py +2 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +1 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +1 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +7 -4
- ads/opctl/operator/lowcode/forecast/model/autots.py +1 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +38 -22
- ads/opctl/operator/lowcode/forecast/model/factory.py +33 -4
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +15 -1
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +234 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +9 -1
- ads/opctl/operator/lowcode/forecast/model/prophet.py +1 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +147 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +2 -1
- ads/opctl/operator/lowcode/forecast/schema.yaml +7 -2
- ads/opctl/operator/lowcode/forecast/utils.py +18 -44
- {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.10.dist-info}/METADATA +9 -12
- {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.10.dist-info}/RECORD +86 -61
- ads/aqua/job.py +0 -29
- {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.10.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.10.dist-info}/WHEEL +0 -0
- {oracle_ads-2.11.9.dist-info → oracle_ads-2.11.10.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,234 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
import pandas as pd
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from ads.opctl import logger
|
10
|
+
from ads.common.decorator import runtime_dependency
|
11
|
+
from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list
|
12
|
+
from .base_model import ForecastOperatorBaseModel
|
13
|
+
from .forecast_datasets import ForecastDatasets, ForecastOutput
|
14
|
+
from ..operator_config import ForecastOperatorConfig
|
15
|
+
from ..const import ForecastOutputColumns, SupportedModels
|
16
|
+
|
17
|
+
|
18
|
+
class MLForecastOperatorModel(ForecastOperatorBaseModel):
|
19
|
+
"""Class representing MLForecast operator model."""
|
20
|
+
|
21
|
+
def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
|
22
|
+
super().__init__(config=config, datasets=datasets)
|
23
|
+
self.global_explanation = {}
|
24
|
+
self.local_explanation = {}
|
25
|
+
self.formatted_global_explanation = None
|
26
|
+
self.formatted_local_explanation = None
|
27
|
+
self.date_col = config.spec.datetime_column.name
|
28
|
+
|
29
|
+
def set_kwargs(self):
|
30
|
+
"""
|
31
|
+
Returns the model parameters.
|
32
|
+
"""
|
33
|
+
model_kwargs = self.spec.model_kwargs
|
34
|
+
|
35
|
+
uppper_quantile = round(0.5 + self.spec.confidence_interval_width / 2, 2)
|
36
|
+
lower_quantile = round(0.5 - self.spec.confidence_interval_width / 2, 2)
|
37
|
+
|
38
|
+
model_kwargs["lower_quantile"] = lower_quantile
|
39
|
+
model_kwargs["uppper_quantile"] = uppper_quantile
|
40
|
+
return model_kwargs
|
41
|
+
|
42
|
+
def preprocess(self, df, series_id):
|
43
|
+
pass
|
44
|
+
|
45
|
+
@runtime_dependency(
|
46
|
+
module="mlforecast",
|
47
|
+
err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'",
|
48
|
+
)
|
49
|
+
@runtime_dependency(
|
50
|
+
module="lightgbm",
|
51
|
+
err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'",
|
52
|
+
)
|
53
|
+
def _train_model(self, data_train, data_test, model_kwargs):
|
54
|
+
try:
|
55
|
+
import lightgbm as lgb
|
56
|
+
from mlforecast import MLForecast
|
57
|
+
from mlforecast.lag_transforms import ExpandingMean, RollingMean
|
58
|
+
from mlforecast.target_transforms import Differences
|
59
|
+
|
60
|
+
lgb_params = {
|
61
|
+
"verbosity": -1,
|
62
|
+
"num_leaves": 512,
|
63
|
+
}
|
64
|
+
|
65
|
+
fcst = MLForecast(
|
66
|
+
models={
|
67
|
+
"forecast": lgb.LGBMRegressor(**lgb_params),
|
68
|
+
# "p" + str(int(model_kwargs["uppper_quantile"] * 100))
|
69
|
+
"upper": lgb.LGBMRegressor(
|
70
|
+
**lgb_params,
|
71
|
+
objective="quantile",
|
72
|
+
alpha=model_kwargs["uppper_quantile"],
|
73
|
+
),
|
74
|
+
# "p" + str(int(model_kwargs["lower_quantile"] * 100))
|
75
|
+
"lower": lgb.LGBMRegressor(
|
76
|
+
**lgb_params,
|
77
|
+
objective="quantile",
|
78
|
+
alpha=model_kwargs["lower_quantile"],
|
79
|
+
),
|
80
|
+
},
|
81
|
+
freq=pd.infer_freq(data_train[self.date_col].drop_duplicates())
|
82
|
+
or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]),
|
83
|
+
target_transforms=[Differences([12])],
|
84
|
+
lags=model_kwargs.get(
|
85
|
+
"lags",
|
86
|
+
(
|
87
|
+
[1, 6, 12]
|
88
|
+
if len(self.datasets.get_additional_data_column_names()) > 0
|
89
|
+
else []
|
90
|
+
),
|
91
|
+
),
|
92
|
+
lag_transforms=(
|
93
|
+
{
|
94
|
+
1: [ExpandingMean()],
|
95
|
+
12: [RollingMean(window_size=24)],
|
96
|
+
}
|
97
|
+
if len(self.datasets.get_additional_data_column_names()) > 0
|
98
|
+
else {}
|
99
|
+
),
|
100
|
+
# date_features=[hour_index],
|
101
|
+
)
|
102
|
+
|
103
|
+
num_models = model_kwargs.get("recursive_models", False)
|
104
|
+
|
105
|
+
self.model_columns = [
|
106
|
+
ForecastOutputColumns.SERIES
|
107
|
+
] + data_train.select_dtypes(exclude=["object"]).columns.to_list()
|
108
|
+
fcst.fit(
|
109
|
+
data_train[self.model_columns],
|
110
|
+
static_features=model_kwargs.get("static_features", []),
|
111
|
+
id_col=ForecastOutputColumns.SERIES,
|
112
|
+
time_col=self.date_col,
|
113
|
+
target_col=self.spec.target_column,
|
114
|
+
fitted=True,
|
115
|
+
max_horizon=None if num_models is False else self.spec.horizon,
|
116
|
+
)
|
117
|
+
|
118
|
+
self.outputs = fcst.predict(
|
119
|
+
h=self.spec.horizon,
|
120
|
+
X_df=pd.concat(
|
121
|
+
[
|
122
|
+
data_test[self.model_columns],
|
123
|
+
fcst.get_missing_future(
|
124
|
+
h=self.spec.horizon, X_df=data_test[self.model_columns]
|
125
|
+
),
|
126
|
+
],
|
127
|
+
axis=0,
|
128
|
+
ignore_index=True,
|
129
|
+
).fillna(0),
|
130
|
+
)
|
131
|
+
self.fitted_values = fcst.forecast_fitted_values()
|
132
|
+
for s_id in self.datasets.list_series_ids():
|
133
|
+
self.forecast_output.init_series_output(
|
134
|
+
series_id=s_id,
|
135
|
+
data_at_series=self.datasets.get_data_at_series(s_id),
|
136
|
+
)
|
137
|
+
|
138
|
+
self.forecast_output.populate_series_output(
|
139
|
+
series_id=s_id,
|
140
|
+
fit_val=self.fitted_values[
|
141
|
+
self.fitted_values[ForecastOutputColumns.SERIES] == s_id
|
142
|
+
].forecast.values,
|
143
|
+
forecast_val=self.outputs[
|
144
|
+
self.outputs[ForecastOutputColumns.SERIES] == s_id
|
145
|
+
].forecast.values,
|
146
|
+
upper_bound=self.outputs[
|
147
|
+
self.outputs[ForecastOutputColumns.SERIES] == s_id
|
148
|
+
].upper.values,
|
149
|
+
lower_bound=self.outputs[
|
150
|
+
self.outputs[ForecastOutputColumns.SERIES] == s_id
|
151
|
+
].lower.values,
|
152
|
+
)
|
153
|
+
|
154
|
+
self.model_parameters[s_id] = {
|
155
|
+
"framework": SupportedModels.MLForecast,
|
156
|
+
**lgb_params,
|
157
|
+
}
|
158
|
+
|
159
|
+
logger.debug("===========Done===========")
|
160
|
+
|
161
|
+
except Exception as e:
|
162
|
+
self.errors_dict[self.spec.model] = {
|
163
|
+
"model_name": self.spec.model,
|
164
|
+
"error": str(e),
|
165
|
+
}
|
166
|
+
logger.debug(f"Encountered Error: {e}. Skipping.")
|
167
|
+
|
168
|
+
def _build_model(self) -> pd.DataFrame:
|
169
|
+
data_train = self.datasets.get_all_data_long(include_horizon=False)
|
170
|
+
data_test = self.datasets.get_all_data_long_forecast_horizon()
|
171
|
+
self.models = dict()
|
172
|
+
model_kwargs = self.set_kwargs()
|
173
|
+
self.forecast_output = ForecastOutput(
|
174
|
+
confidence_interval_width=self.spec.confidence_interval_width,
|
175
|
+
horizon=self.spec.horizon,
|
176
|
+
target_column=self.original_target_column,
|
177
|
+
dt_column=self.date_col,
|
178
|
+
)
|
179
|
+
self._train_model(data_train, data_test, model_kwargs)
|
180
|
+
return self.forecast_output.get_forecast_long()
|
181
|
+
|
182
|
+
def _generate_report(self):
|
183
|
+
"""
|
184
|
+
Generates the report for the model
|
185
|
+
"""
|
186
|
+
import report_creator as rc
|
187
|
+
from utilsforecast.plotting import plot_series
|
188
|
+
|
189
|
+
# Section 1: Forecast Overview
|
190
|
+
sec1_text = rc.Block(
|
191
|
+
rc.Heading("Forecast Overview", level=2),
|
192
|
+
rc.Text(
|
193
|
+
"These plots show your forecast in the context of historical data."
|
194
|
+
),
|
195
|
+
)
|
196
|
+
sec_1 = _select_plot_list(
|
197
|
+
lambda s_id: plot_series(
|
198
|
+
self.datasets.get_all_data_long(include_horizon=False),
|
199
|
+
pd.concat(
|
200
|
+
[self.fitted_values, self.outputs], axis=0, ignore_index=True
|
201
|
+
),
|
202
|
+
id_col=ForecastOutputColumns.SERIES,
|
203
|
+
time_col=self.spec.datetime_column.name,
|
204
|
+
target_col=self.original_target_column,
|
205
|
+
seed=42,
|
206
|
+
ids=[s_id],
|
207
|
+
),
|
208
|
+
self.datasets.list_series_ids(),
|
209
|
+
)
|
210
|
+
|
211
|
+
# Section 2: MlForecast Model Parameters
|
212
|
+
sec2_text = rc.Block(
|
213
|
+
rc.Heading("MlForecast Model Parameters", level=2),
|
214
|
+
rc.Text("These are the parameters used for the MlForecast model."),
|
215
|
+
)
|
216
|
+
|
217
|
+
blocks = [
|
218
|
+
rc.Html(
|
219
|
+
str(s_id[1]),
|
220
|
+
label=s_id[0],
|
221
|
+
)
|
222
|
+
for _, s_id in enumerate(self.model_parameters.items())
|
223
|
+
]
|
224
|
+
sec_2 = rc.Select(blocks=blocks)
|
225
|
+
|
226
|
+
all_sections = [sec1_text, sec_1, sec2_text, sec_2]
|
227
|
+
model_description = rc.Text(
|
228
|
+
"mlforecast is a framework to perform time series forecasting using machine learning models"
|
229
|
+
"with the option to scale to massive amounts of data using remote clusters."
|
230
|
+
"Fastest implementations of feature engineering for time series forecasting in Python."
|
231
|
+
"Support for exogenous variables and static covariates."
|
232
|
+
)
|
233
|
+
|
234
|
+
return model_description, all_sections
|
@@ -75,6 +75,8 @@ def _fit_model(data, params, additional_regressors, select_metric):
|
|
75
75
|
m = m.add_future_regressor(name=add_reg)
|
76
76
|
m.fit(df=data)
|
77
77
|
accepted_regressors_config = m.config_regressors or dict()
|
78
|
+
if hasattr(accepted_regressors_config, "regressors"):
|
79
|
+
accepted_regressors_config = accepted_regressors_config.regressors or dict()
|
78
80
|
|
79
81
|
enable_print()
|
80
82
|
return m, list(accepted_regressors_config.keys())
|
@@ -122,7 +124,13 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
|
|
122
124
|
|
123
125
|
if self.loaded_models is not None and s_id in self.loaded_models:
|
124
126
|
model = self.loaded_models[s_id]
|
125
|
-
accepted_regressors_config =
|
127
|
+
accepted_regressors_config = (
|
128
|
+
model.config_regressors.regressors or dict()
|
129
|
+
)
|
130
|
+
if hasattr(accepted_regressors_config, "regressors"):
|
131
|
+
accepted_regressors_config = (
|
132
|
+
accepted_regressors_config.regressors or dict()
|
133
|
+
)
|
126
134
|
self.accepted_regressors[s_id] = list(accepted_regressors_config.keys())
|
127
135
|
if self.loaded_trainers is not None and s_id in self.loaded_trainers:
|
128
136
|
model.trainer = self.loaded_trainers[s_id]
|
@@ -131,6 +131,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel):
|
|
131
131
|
"model_name": self.spec.model,
|
132
132
|
"error": str(e),
|
133
133
|
}
|
134
|
+
logger.debug(f"Encountered Error: {e}. Skipping.")
|
134
135
|
|
135
136
|
def _build_model(self) -> pd.DataFrame:
|
136
137
|
full_data_dict = self.datasets.get_data_by_series()
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
|
3
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from pathlib import Path
|
10
|
+
|
11
|
+
from ads.opctl import logger
|
12
|
+
from ads.opctl.operator.lowcode.common.const import DataColumns
|
13
|
+
from .model.forecast_datasets import ForecastDatasets
|
14
|
+
from .operator_config import ForecastOperatorConfig
|
15
|
+
|
16
|
+
|
17
|
+
class ModelEvaluator:
|
18
|
+
"""
|
19
|
+
A class used to evaluate and determine the best model or framework from a given set of candidates.
|
20
|
+
|
21
|
+
This class is responsible for comparing different models or frameworks based on specified evaluation
|
22
|
+
metrics and returning the best-performing option.
|
23
|
+
"""
|
24
|
+
def __init__(self, models, k=5, subsample_ratio=0.20):
|
25
|
+
"""
|
26
|
+
Initializes the ModelEvaluator with a list of models, number of backtests and subsample ratio.
|
27
|
+
|
28
|
+
Properties:
|
29
|
+
----------
|
30
|
+
models (list): The list of model to be evaluated.
|
31
|
+
k (int): The number of times each model is backtested to verify its performance.
|
32
|
+
subsample_ratio (float): The proportion of the data used in the evaluation process.
|
33
|
+
"""
|
34
|
+
self.models = models
|
35
|
+
self.k = k
|
36
|
+
self.subsample_ratio = subsample_ratio
|
37
|
+
self.minimum_sample_count = 5
|
38
|
+
|
39
|
+
def generate_cutoffs(self, unique_dates, horizon):
|
40
|
+
sorted_dates = np.sort(unique_dates)
|
41
|
+
train_window_size = [len(sorted_dates) - (i + 1) * horizon for i in range(self.k)]
|
42
|
+
valid_train_window_size = [ws for ws in train_window_size if ws >= horizon * 3]
|
43
|
+
if len(valid_train_window_size) < self.k:
|
44
|
+
logger.warn(f"Only {valid_train_window_size} backtests can be created")
|
45
|
+
cut_offs = sorted_dates[-horizon - 1:-horizon * (self.k + 1):-horizon][:len(valid_train_window_size)]
|
46
|
+
return cut_offs
|
47
|
+
|
48
|
+
def generate_k_fold_data(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
|
49
|
+
date_col = operator_config.spec.datetime_column.name
|
50
|
+
horizon = operator_config.spec.horizon
|
51
|
+
historical_data = datasets.historical_data.data.reset_index()
|
52
|
+
series_col = DataColumns.Series
|
53
|
+
group_counts = historical_data[series_col].value_counts()
|
54
|
+
|
55
|
+
sample_count = max(self.minimum_sample_count, int(len(group_counts) * self.subsample_ratio))
|
56
|
+
sampled_groups = group_counts.head(sample_count)
|
57
|
+
sampled_historical_data = historical_data[historical_data[series_col].isin(sampled_groups.index)]
|
58
|
+
|
59
|
+
min_group = group_counts.idxmin()
|
60
|
+
min_series_data = historical_data[historical_data[series_col] == min_group]
|
61
|
+
unique_dates = min_series_data[date_col].unique()
|
62
|
+
|
63
|
+
cut_offs = self.generate_cutoffs(unique_dates, horizon)
|
64
|
+
training_datasets = [sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date] for cut_off_date
|
65
|
+
in cut_offs]
|
66
|
+
test_datasets = [sampled_historical_data[sampled_historical_data[date_col] > cut_offs[0]]]
|
67
|
+
for i, current in enumerate(cut_offs[1:]):
|
68
|
+
test_datasets.append(sampled_historical_data[(current < sampled_historical_data[date_col]) & (
|
69
|
+
sampled_historical_data[date_col] <= cut_offs[i])])
|
70
|
+
all_additional = datasets.additional_data.data.reset_index()
|
71
|
+
sampled_additional_data = all_additional[all_additional[series_col].isin(sampled_groups.index)]
|
72
|
+
max_historical_date = sampled_historical_data[date_col].max()
|
73
|
+
additional_data = [sampled_additional_data[sampled_additional_data[date_col] <= max_historical_date]]
|
74
|
+
for cut_off in cut_offs[:-1]:
|
75
|
+
trimmed_additional_data = sampled_additional_data[sampled_additional_data[date_col] <= cut_off]
|
76
|
+
additional_data.append(trimmed_additional_data)
|
77
|
+
return cut_offs, training_datasets, additional_data, test_datasets
|
78
|
+
|
79
|
+
def remove_none_values(self, obj):
|
80
|
+
if isinstance(obj, dict):
|
81
|
+
return {k: self.remove_none_values(v) for k, v in obj.items() if k is not None and v is not None}
|
82
|
+
else:
|
83
|
+
return obj
|
84
|
+
|
85
|
+
def create_operator_config(self, operator_config, backtest, model, historical_data, additional_data, test_data):
|
86
|
+
output_dir = operator_config.spec.output_directory.url
|
87
|
+
output_file_path = f'{output_dir}/back_testing/{model}/{backtest}'
|
88
|
+
Path(output_file_path).mkdir(parents=True, exist_ok=True)
|
89
|
+
historical_data_url = f'{output_file_path}/historical.csv'
|
90
|
+
additional_data_url = f'{output_file_path}/additional.csv'
|
91
|
+
test_data_url = f'{output_file_path}/test.csv'
|
92
|
+
historical_data.to_csv(historical_data_url, index=False)
|
93
|
+
additional_data.to_csv(additional_data_url, index=False)
|
94
|
+
test_data.to_csv(test_data_url, index=False)
|
95
|
+
backtest_op_config_draft = operator_config.to_dict()
|
96
|
+
backtest_spec = backtest_op_config_draft["spec"]
|
97
|
+
backtest_spec["historical_data"]["url"] = historical_data_url
|
98
|
+
backtest_spec["additional_data"]["url"] = additional_data_url
|
99
|
+
backtest_spec["test_data"]["url"] = test_data_url
|
100
|
+
backtest_spec["model"] = model
|
101
|
+
backtest_spec['model_kwargs'] = None
|
102
|
+
backtest_spec["output_directory"] = {"url": output_file_path}
|
103
|
+
backtest_spec["target_category_columns"] = [DataColumns.Series]
|
104
|
+
backtest_spec['generate_explanations'] = False
|
105
|
+
cleaned_config = self.remove_none_values(backtest_op_config_draft)
|
106
|
+
|
107
|
+
backtest_op_config = ForecastOperatorConfig.from_dict(
|
108
|
+
obj_dict=cleaned_config)
|
109
|
+
return backtest_op_config
|
110
|
+
|
111
|
+
def run_all_models(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
|
112
|
+
cut_offs, train_sets, additional_data, test_sets = self.generate_k_fold_data(datasets, operator_config)
|
113
|
+
metrics = {}
|
114
|
+
for model in self.models:
|
115
|
+
from .model.factory import ForecastOperatorModelFactory
|
116
|
+
metrics[model] = {}
|
117
|
+
for i in range(len(cut_offs)):
|
118
|
+
backtest_historical_data = train_sets[i]
|
119
|
+
backtest_additional_data = additional_data[i]
|
120
|
+
backtest_test_data = test_sets[i]
|
121
|
+
backtest_operator_config = self.create_operator_config(operator_config, i, model,
|
122
|
+
backtest_historical_data,
|
123
|
+
backtest_additional_data,
|
124
|
+
backtest_test_data)
|
125
|
+
datasets = ForecastDatasets(backtest_operator_config)
|
126
|
+
ForecastOperatorModelFactory.get_model(
|
127
|
+
backtest_operator_config, datasets
|
128
|
+
).generate_report()
|
129
|
+
test_metrics_filename = backtest_operator_config.spec.test_metrics_filename
|
130
|
+
metrics_df = pd.read_csv(
|
131
|
+
f"{backtest_operator_config.spec.output_directory.url}/{test_metrics_filename}")
|
132
|
+
metrics_df["average_across_series"] = metrics_df.drop('metrics', axis=1).mean(axis=1)
|
133
|
+
metrics_average_dict = dict(zip(metrics_df['metrics'].str.lower(), metrics_df['average_across_series']))
|
134
|
+
metrics[model][i] = metrics_average_dict[operator_config.spec.metric]
|
135
|
+
return metrics
|
136
|
+
|
137
|
+
def find_best_model(self, datasets: ForecastDatasets, operator_config: ForecastOperatorConfig):
|
138
|
+
metrics = self.run_all_models(datasets, operator_config)
|
139
|
+
avg_backtests_metrics = {key: sum(value.values()) / len(value.values()) for key, value in metrics.items()}
|
140
|
+
best_model = min(avg_backtests_metrics, key=avg_backtests_metrics.get)
|
141
|
+
logger.info(f"Among models {self.models}, {best_model} model shows better performance during backtesting.")
|
142
|
+
backtest_stats = pd.DataFrame(metrics).rename_axis('backtest')
|
143
|
+
backtest_stats.reset_index(inplace=True)
|
144
|
+
output_dir = operator_config.spec.output_directory.url
|
145
|
+
backtest_report_name = "backtest_stats.csv"
|
146
|
+
backtest_stats.to_csv(f"{output_dir}/{backtest_report_name}", index=False)
|
147
|
+
return best_model
|
@@ -14,7 +14,7 @@ from ads.opctl.operator.common.operator_config import OperatorConfig, OutputDire
|
|
14
14
|
|
15
15
|
from .const import SupportedMetrics, SpeedAccuracyMode
|
16
16
|
from .const import SupportedModels
|
17
|
-
|
17
|
+
from ads.opctl.operator.lowcode.common.utils import find_output_dirname
|
18
18
|
|
19
19
|
@dataclass(repr=True)
|
20
20
|
class TestData(InputData):
|
@@ -90,6 +90,7 @@ class ForecastOperatorSpec(DataClassSerializable):
|
|
90
90
|
|
91
91
|
def __post_init__(self):
|
92
92
|
"""Adjusts the specification details."""
|
93
|
+
self.output_directory = self.output_directory or OutputDirectory(url=find_output_dirname(self.output_directory))
|
93
94
|
self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower()
|
94
95
|
self.model = self.model or SupportedModels.Auto
|
95
96
|
self.confidence_interval_width = self.confidence_interval_width or 0.80
|
@@ -365,14 +365,15 @@ spec:
|
|
365
365
|
model:
|
366
366
|
type: string
|
367
367
|
required: false
|
368
|
-
default: auto
|
368
|
+
default: auto-select
|
369
369
|
allowed:
|
370
370
|
- prophet
|
371
371
|
- arima
|
372
372
|
- neuralprophet
|
373
|
+
- mlforecast
|
373
374
|
- automlx
|
374
375
|
- autots
|
375
|
-
- auto
|
376
|
+
- auto-select
|
376
377
|
|
377
378
|
model_kwargs:
|
378
379
|
type: dict
|
@@ -413,4 +414,8 @@ spec:
|
|
413
414
|
- RMSE
|
414
415
|
- MSE
|
415
416
|
- SMAPE
|
417
|
+
- mape
|
418
|
+
- rmse
|
419
|
+
- mse
|
420
|
+
- smape
|
416
421
|
type: dict
|
@@ -19,7 +19,6 @@ from sklearn.metrics import (
|
|
19
19
|
mean_absolute_percentage_error,
|
20
20
|
mean_squared_error,
|
21
21
|
)
|
22
|
-
|
23
22
|
try:
|
24
23
|
from scipy.stats import linregress
|
25
24
|
except:
|
@@ -34,8 +33,7 @@ from .errors import ForecastInputDataError, ForecastSchemaYamlError
|
|
34
33
|
from .operator_config import ForecastOperatorSpec, ForecastOperatorConfig
|
35
34
|
from ads.opctl.operator.lowcode.common.utils import merge_category_columns
|
36
35
|
from ads.opctl.operator.lowcode.forecast.const import ForecastOutputColumns
|
37
|
-
|
38
|
-
# from ads.opctl.operator.lowcode.forecast.model.forecast_datasets import TestData, ForecastOutput
|
36
|
+
import report_creator as rc
|
39
37
|
|
40
38
|
|
41
39
|
def _label_encode_dataframe(df, no_encode=set()):
|
@@ -258,8 +256,6 @@ def evaluate_train_metrics(output, metrics_col_name=None):
|
|
258
256
|
|
259
257
|
|
260
258
|
def _select_plot_list(fn, series_ids):
|
261
|
-
import report_creator as rc
|
262
|
-
|
263
259
|
blocks = [rc.Widget(fn(s_id=s_id), label=s_id) for s_id in series_ids]
|
264
260
|
return rc.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
|
265
261
|
|
@@ -267,6 +263,23 @@ def _select_plot_list(fn, series_ids):
|
|
267
263
|
def _add_unit(num, unit):
|
268
264
|
return f"{num} {unit}"
|
269
265
|
|
266
|
+
def get_auto_select_plot(backtest_results):
|
267
|
+
fig = go.Figure()
|
268
|
+
columns = backtest_results.columns.tolist()
|
269
|
+
back_test_column = "backtest"
|
270
|
+
columns.remove(back_test_column)
|
271
|
+
for i, column in enumerate(columns):
|
272
|
+
color = 0 #int(i * 255 / len(columns))
|
273
|
+
fig.add_trace(
|
274
|
+
go.Scatter(
|
275
|
+
x=backtest_results[back_test_column],
|
276
|
+
y=backtest_results[column],
|
277
|
+
mode="lines",
|
278
|
+
name=column,
|
279
|
+
))
|
280
|
+
|
281
|
+
return rc.Widget(fig)
|
282
|
+
|
270
283
|
|
271
284
|
def get_forecast_plots(
|
272
285
|
forecast_output,
|
@@ -371,45 +384,6 @@ def get_forecast_plots(
|
|
371
384
|
|
372
385
|
return _select_plot_list(plot_forecast_plotly, forecast_output.list_series_ids())
|
373
386
|
|
374
|
-
|
375
|
-
def select_auto_model(
|
376
|
-
datasets: "ForecastDatasets", operator_config: ForecastOperatorConfig
|
377
|
-
) -> str:
|
378
|
-
"""
|
379
|
-
Selects AutoMLX or Arima model based on column count.
|
380
|
-
|
381
|
-
If the number of columns is less than or equal to the maximum allowed for AutoMLX,
|
382
|
-
returns 'AutoMLX'. Otherwise, returns 'Arima'.
|
383
|
-
|
384
|
-
Parameters
|
385
|
-
------------
|
386
|
-
datasets: ForecastDatasets
|
387
|
-
Datasets for predictions
|
388
|
-
|
389
|
-
Returns
|
390
|
-
--------
|
391
|
-
str
|
392
|
-
The type of the model.
|
393
|
-
"""
|
394
|
-
freq_in_secs = datasets.get_datetime_frequency_in_seconds()
|
395
|
-
num_of_additional_cols = len(datasets.get_additional_data_column_names())
|
396
|
-
row_count = datasets.get_num_rows()
|
397
|
-
number_of_series = len(datasets.list_series_ids())
|
398
|
-
if (
|
399
|
-
num_of_additional_cols < 15
|
400
|
-
and row_count < 10000
|
401
|
-
and number_of_series < 10
|
402
|
-
and freq_in_secs > 3600
|
403
|
-
):
|
404
|
-
return SupportedModels.AutoMLX
|
405
|
-
elif row_count < 10000 and number_of_series > 10:
|
406
|
-
return SupportedModels.AutoTS
|
407
|
-
elif row_count > 20000:
|
408
|
-
return SupportedModels.NeuralProphet
|
409
|
-
else:
|
410
|
-
return SupportedModels.NeuralProphet
|
411
|
-
|
412
|
-
|
413
387
|
def convert_target(target: str, target_col: str):
|
414
388
|
"""
|
415
389
|
Removes the target_column that got appended to target.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oracle_ads
|
3
|
-
Version: 2.11.
|
3
|
+
Version: 2.11.10
|
4
4
|
Summary: Oracle Accelerated Data Science SDK
|
5
5
|
Keywords: Oracle Cloud Infrastructure,OCI,Machine Learning,ML,Artificial Intelligence,AI,Data Science,Cloud,Oracle
|
6
6
|
Author: Oracle Data Science
|
@@ -20,7 +20,7 @@ Requires-Dist: cloudpickle>=1.6.0
|
|
20
20
|
Requires-Dist: fsspec>=0.8.7
|
21
21
|
Requires-Dist: gitpython>=3.1.2
|
22
22
|
Requires-Dist: jinja2>=2.11.2
|
23
|
-
Requires-Dist: matplotlib>=3.1.3
|
23
|
+
Requires-Dist: matplotlib>=3.1.3, <=3.8.4
|
24
24
|
Requires-Dist: numpy>=1.19.2
|
25
25
|
Requires-Dist: oci>=2.125.3
|
26
26
|
Requires-Dist: ocifs>=1.1.3
|
@@ -34,10 +34,8 @@ Requires-Dist: tabulate>=0.8.9
|
|
34
34
|
Requires-Dist: tqdm>=4.59.0
|
35
35
|
Requires-Dist: oracle_ads[opctl] ; extra == "anomaly"
|
36
36
|
Requires-Dist: autots ; extra == "anomaly"
|
37
|
-
Requires-Dist: oracle-automlx[forecasting]==23.4.1 ; extra == "anomaly"
|
38
|
-
Requires-Dist: oracle-automlx[classic]==23.4.1 ; extra == "anomaly"
|
39
37
|
Requires-Dist: oracledb ; extra == "anomaly"
|
40
|
-
Requires-Dist: report-creator ; extra == "anomaly"
|
38
|
+
Requires-Dist: report-creator==1.0.9 ; extra == "anomaly"
|
41
39
|
Requires-Dist: jupyter_server ; extra == "aqua"
|
42
40
|
Requires-Dist: hdfs[kerberos] ; extra == "bds"
|
43
41
|
Requires-Dist: ibis-framework[impala] ; extra == "bds"
|
@@ -61,14 +59,12 @@ Requires-Dist: oci-cli ; extra == "forecast"
|
|
61
59
|
Requires-Dist: py-cpuinfo ; extra == "forecast"
|
62
60
|
Requires-Dist: rich ; extra == "forecast"
|
63
61
|
Requires-Dist: autots[additional] ; extra == "forecast"
|
64
|
-
Requires-Dist:
|
65
|
-
Requires-Dist: neuralprophet ; extra == "forecast"
|
62
|
+
Requires-Dist: mlforecast ; extra == "forecast"
|
63
|
+
Requires-Dist: neuralprophet>=0.7.0 ; extra == "forecast"
|
66
64
|
Requires-Dist: numpy ; extra == "forecast"
|
67
65
|
Requires-Dist: oci-cli ; extra == "forecast"
|
68
|
-
Requires-Dist: optuna
|
66
|
+
Requires-Dist: optuna ; extra == "forecast"
|
69
67
|
Requires-Dist: oracle-ads ; extra == "forecast"
|
70
|
-
Requires-Dist: oracle-automlx[forecasting]==23.4.1 ; extra == "forecast"
|
71
|
-
Requires-Dist: oracle-automlx[classic]==23.4.1 ; extra == "forecast"
|
72
68
|
Requires-Dist: pmdarima ; extra == "forecast"
|
73
69
|
Requires-Dist: prophet ; extra == "forecast"
|
74
70
|
Requires-Dist: shap ; extra == "forecast"
|
@@ -76,13 +72,14 @@ Requires-Dist: sktime ; extra == "forecast"
|
|
76
72
|
Requires-Dist: statsmodels ; extra == "forecast"
|
77
73
|
Requires-Dist: plotly ; extra == "forecast"
|
78
74
|
Requires-Dist: oracledb ; extra == "forecast"
|
79
|
-
Requires-Dist: report-creator ; extra == "forecast"
|
75
|
+
Requires-Dist: report-creator==1.0.9 ; extra == "forecast"
|
80
76
|
Requires-Dist: geopandas ; extra == "geo"
|
81
77
|
Requires-Dist: oracle_ads[viz] ; extra == "geo"
|
82
78
|
Requires-Dist: transformers ; extra == "huggingface"
|
83
79
|
Requires-Dist: langchain-community<0.0.32 ; extra == "llm"
|
84
80
|
Requires-Dist: langchain>=0.1.10,<0.1.14 ; extra == "llm"
|
85
81
|
Requires-Dist: evaluate>=0.4.0 ; extra == "llm"
|
82
|
+
Requires-Dist: langchain-core<0.1.51 ; extra == "llm"
|
86
83
|
Requires-Dist: ipython>=7.23.1, <8.0 ; extra == "notebook"
|
87
84
|
Requires-Dist: ipywidgets~=7.6.3 ; extra == "notebook"
|
88
85
|
Requires-Dist: lightgbm<4.0.0 ; extra == "onnx"
|
@@ -115,7 +112,7 @@ Requires-Dist: scrubadub==2.0.1 ; extra == "pii"
|
|
115
112
|
Requires-Dist: scrubadub_spacy ; extra == "pii"
|
116
113
|
Requires-Dist: spacy-transformers==1.2.5 ; extra == "pii"
|
117
114
|
Requires-Dist: spacy==3.6.1 ; extra == "pii"
|
118
|
-
Requires-Dist: report-creator ; extra == "pii"
|
115
|
+
Requires-Dist: report-creator==1.0.9 ; extra == "pii"
|
119
116
|
Requires-Dist: pyspark>=3.0.0 ; extra == "spark"
|
120
117
|
Requires-Dist: oracle_ads[viz] ; extra == "tensorflow"
|
121
118
|
Requires-Dist: tensorflow ; extra == "tensorflow"
|