oracle-ads 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +12 -0
- ads/aqua/base.py +324 -0
- ads/aqua/cli.py +19 -0
- ads/aqua/config/deployment_config_defaults.json +9 -0
- ads/aqua/config/resource_limit_names.json +7 -0
- ads/aqua/constants.py +45 -0
- ads/aqua/data.py +40 -0
- ads/aqua/decorator.py +101 -0
- ads/aqua/deployment.py +643 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation.py +1751 -0
- ads/aqua/exception.py +82 -0
- ads/aqua/extension/__init__.py +40 -0
- ads/aqua/extension/base_handler.py +138 -0
- ads/aqua/extension/common_handler.py +21 -0
- ads/aqua/extension/deployment_handler.py +202 -0
- ads/aqua/extension/evaluation_handler.py +135 -0
- ads/aqua/extension/finetune_handler.py +66 -0
- ads/aqua/extension/model_handler.py +59 -0
- ads/aqua/extension/ui_handler.py +201 -0
- ads/aqua/extension/utils.py +23 -0
- ads/aqua/finetune.py +579 -0
- ads/aqua/job.py +29 -0
- ads/aqua/model.py +819 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +459 -0
- ads/aqua/ui.py +453 -0
- ads/aqua/utils.py +715 -0
- ads/cli.py +37 -6
- ads/common/auth.py +7 -0
- ads/common/decorator/__init__.py +7 -3
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/object_storage_details.py +166 -7
- ads/common/oci_client.py +18 -1
- ads/common/oci_logging.py +2 -2
- ads/common/oci_mixin.py +4 -5
- ads/common/serializer.py +34 -5
- ads/common/utils.py +75 -10
- ads/config.py +40 -1
- ads/dataset/correlation_plot.py +10 -12
- ads/jobs/ads_job.py +43 -25
- ads/jobs/builders/infrastructure/base.py +4 -2
- ads/jobs/builders/infrastructure/dsc_job.py +49 -39
- ads/jobs/builders/runtimes/base.py +71 -1
- ads/jobs/builders/runtimes/container_runtime.py +4 -4
- ads/jobs/builders/runtimes/pytorch_runtime.py +10 -63
- ads/jobs/templates/driver_pytorch.py +27 -10
- ads/model/artifact_downloader.py +84 -14
- ads/model/artifact_uploader.py +25 -23
- ads/model/datascience_model.py +388 -38
- ads/model/deployment/model_deployment.py +10 -2
- ads/model/generic_model.py +8 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_metadata.py +1 -1
- ads/model/service/oci_datascience_model.py +34 -5
- ads/opctl/config/merger.py +2 -2
- ads/opctl/operator/__init__.py +3 -1
- ads/opctl/operator/cli.py +7 -1
- ads/opctl/operator/cmd.py +3 -3
- ads/opctl/operator/common/errors.py +2 -1
- ads/opctl/operator/common/operator_config.py +22 -3
- ads/opctl/operator/common/utils.py +16 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +15 -0
- ads/opctl/operator/lowcode/anomaly/README.md +209 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +104 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +88 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +12 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +147 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +89 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +103 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +354 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +67 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +105 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +359 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +81 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +96 -0
- ads/opctl/operator/lowcode/common/errors.py +41 -0
- ads/opctl/operator/lowcode/common/transformations.py +191 -0
- ads/opctl/operator/lowcode/common/utils.py +250 -0
- ads/opctl/operator/lowcode/forecast/README.md +3 -2
- ads/opctl/operator/lowcode/forecast/__main__.py +18 -2
- ads/opctl/operator/lowcode/forecast/cmd.py +8 -7
- ads/opctl/operator/lowcode/forecast/const.py +17 -1
- ads/opctl/operator/lowcode/forecast/environment.yaml +3 -2
- ads/opctl/operator/lowcode/forecast/model/arima.py +106 -117
- ads/opctl/operator/lowcode/forecast/model/automlx.py +204 -180
- ads/opctl/operator/lowcode/forecast/model/autots.py +144 -253
- ads/opctl/operator/lowcode/forecast/model/base_model.py +326 -259
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +325 -176
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +293 -237
- ads/opctl/operator/lowcode/forecast/model/prophet.py +191 -208
- ads/opctl/operator/lowcode/forecast/operator_config.py +24 -33
- ads/opctl/operator/lowcode/forecast/schema.yaml +116 -29
- ads/opctl/operator/lowcode/forecast/utils.py +186 -356
- ads/opctl/operator/lowcode/pii/model/guardrails.py +18 -15
- ads/opctl/operator/lowcode/pii/model/report.py +7 -7
- ads/opctl/operator/lowcode/pii/operator_config.py +1 -8
- ads/opctl/operator/lowcode/pii/utils.py +0 -82
- ads/opctl/operator/runtime/runtime.py +3 -2
- ads/telemetry/base.py +62 -0
- ads/telemetry/client.py +105 -0
- ads/telemetry/telemetry.py +6 -3
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/METADATA +44 -7
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/RECORD +116 -59
- ads/opctl/operator/lowcode/forecast/model/transformations.py +0 -125
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/LICENSE.txt +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/WHEEL +0 -0
- {oracle_ads-2.10.0.dist-info → oracle_ads-2.11.0.dist-info}/entry_points.txt +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
import numpy as np
|
8
8
|
import optuna
|
9
9
|
import pandas as pd
|
10
|
+
from joblib import Parallel, delayed
|
10
11
|
from torch import Tensor
|
11
12
|
from torchmetrics.regression import (
|
12
13
|
MeanAbsoluteError,
|
@@ -22,8 +23,18 @@ from ads.common.decorator.runtime_dependency import (
|
|
22
23
|
)
|
23
24
|
from ads.opctl import logger
|
24
25
|
|
25
|
-
from ..const import DEFAULT_TRIALS, ForecastOutputColumns
|
26
|
-
from
|
26
|
+
from ..const import DEFAULT_TRIALS, ForecastOutputColumns, SupportedModels
|
27
|
+
from ads.opctl.operator.lowcode.forecast.utils import (
|
28
|
+
load_pkl,
|
29
|
+
write_pkl,
|
30
|
+
_select_plot_list,
|
31
|
+
_label_encode_dataframe,
|
32
|
+
)
|
33
|
+
from ads.opctl.operator.lowcode.common.utils import (
|
34
|
+
disable_print,
|
35
|
+
enable_print,
|
36
|
+
seconds_to_datetime,
|
37
|
+
)
|
27
38
|
from .base_model import ForecastOperatorBaseModel
|
28
39
|
from ..operator_config import ForecastOperatorConfig
|
29
40
|
from .forecast_datasets import ForecastDatasets, ForecastOutput
|
@@ -52,7 +63,11 @@ def _get_np_metrics_dict(selected_metric):
|
|
52
63
|
install_from=OptionalDependency.FORECAST,
|
53
64
|
)
|
54
65
|
def _fit_model(data, params, additional_regressors, select_metric):
|
55
|
-
from neuralprophet import NeuralProphet
|
66
|
+
from neuralprophet import NeuralProphet, set_log_level
|
67
|
+
|
68
|
+
if logger.level > 10:
|
69
|
+
set_log_level(logger.level)
|
70
|
+
disable_print()
|
56
71
|
|
57
72
|
m = NeuralProphet(**params)
|
58
73
|
m.metrics = _get_np_metrics_dict(select_metric)
|
@@ -60,6 +75,8 @@ def _fit_model(data, params, additional_regressors, select_metric):
|
|
60
75
|
m = m.add_future_regressor(name=add_reg)
|
61
76
|
m.fit(df=data)
|
62
77
|
accepted_regressors_config = m.config_regressors or dict()
|
78
|
+
|
79
|
+
enable_print()
|
63
80
|
return m, list(accepted_regressors_config.keys())
|
64
81
|
|
65
82
|
|
@@ -68,21 +85,21 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
|
|
68
85
|
|
69
86
|
def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
|
70
87
|
super().__init__(config=config, datasets=datasets)
|
71
|
-
self.train_metrics = True
|
72
88
|
self.forecast_col_name = "yhat1"
|
89
|
+
self.loaded_trainers = None
|
90
|
+
self.trainers = None
|
91
|
+
|
92
|
+
def _load_model(self):
|
93
|
+
try:
|
94
|
+
self.loaded_models = load_pkl(self.spec.previous_output_dir + "/model.pkl")
|
95
|
+
self.loaded_trainers = load_pkl(
|
96
|
+
self.spec.previous_output_dir + "/trainer.pkl"
|
97
|
+
)
|
98
|
+
except:
|
99
|
+
logger.debug("model.pkl/trainer.pkl is not present")
|
73
100
|
|
74
|
-
def
|
75
|
-
|
76
|
-
|
77
|
-
full_data_dict = self.datasets.full_data_dict
|
78
|
-
models = []
|
79
|
-
outputs = dict()
|
80
|
-
outputs_legacy = []
|
81
|
-
|
82
|
-
# Extract the Confidence Interval Width and
|
83
|
-
# convert to neural prophets equivalent - quantiles
|
84
|
-
model_kwargs = self.spec.model_kwargs
|
85
|
-
|
101
|
+
def set_kwargs(self):
|
102
|
+
# Extract the Confidence Interval Width and convert to prophet's equivalent - interval_width
|
86
103
|
if self.spec.confidence_interval_width is None:
|
87
104
|
quantiles = model_kwargs.get("quantiles", [0.05, 0.95])
|
88
105
|
self.spec.confidence_interval_width = float(quantiles[1]) - float(
|
@@ -92,243 +109,262 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
|
|
92
109
|
boundaries = round((1 - self.spec.confidence_interval_width) / 2, 2)
|
93
110
|
quantiles = [boundaries, self.spec.confidence_interval_width + boundaries]
|
94
111
|
|
112
|
+
model_kwargs = self.spec.model_kwargs
|
95
113
|
model_kwargs["quantiles"] = quantiles
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
self.
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
def objective(trial):
|
122
|
-
params = {
|
123
|
-
# 'seasonality_mode': trial.suggest_categorical('seasonality_mode', ['additive', 'multiplicative']),
|
124
|
-
# 'seasonality_reg': trial.suggest_float('seasonality_reg', 0.1, 500, log=True),
|
125
|
-
# 'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.1, log=True),
|
126
|
-
"newer_samples_start": trial.suggest_float(
|
127
|
-
"newer_samples_start", 0.001, 0.999
|
128
|
-
),
|
129
|
-
"newer_samples_weight": trial.suggest_float(
|
130
|
-
"newer_samples_weight", 0, 100
|
131
|
-
),
|
132
|
-
"changepoints_range": trial.suggest_float(
|
133
|
-
"changepoints_range", 0.8, 0.95
|
134
|
-
),
|
135
|
-
}
|
136
|
-
# trend_reg, trend_reg_threshold, ar_reg, impute_rolling/impute_linear,
|
137
|
-
params.update(model_kwargs_i)
|
138
|
-
|
139
|
-
folds = NeuralProphet(**params).crossvalidation_split_df(
|
140
|
-
data_i, k=3
|
141
|
-
)
|
142
|
-
test_metrics_total_i = []
|
143
|
-
for df_train, df_test in folds:
|
144
|
-
m, accepted_regressors = _fit_model(
|
145
|
-
data=df_train,
|
146
|
-
params=params,
|
147
|
-
additional_regressors=additional_regressors,
|
148
|
-
select_metric=self.spec.metric,
|
149
|
-
)
|
150
|
-
df_test = df_test[["y", "ds"] + accepted_regressors]
|
151
|
-
|
152
|
-
test_forecast_i = m.predict(df=df_test)
|
153
|
-
fold_metric_i = (
|
154
|
-
m.metrics[self.spec.metric]
|
155
|
-
.forward(
|
156
|
-
Tensor(test_forecast_i["yhat1"]),
|
157
|
-
Tensor(test_forecast_i["y"]),
|
158
|
-
)
|
159
|
-
.item()
|
160
|
-
)
|
161
|
-
test_metrics_total_i.append(fold_metric_i)
|
162
|
-
logger.debug(
|
163
|
-
f"----------------------{np.asarray(test_metrics_total_i).mean()}----------------------"
|
164
|
-
)
|
165
|
-
return np.asarray(test_metrics_total_i).mean()
|
166
|
-
|
167
|
-
study = optuna.create_study(direction="minimize")
|
168
|
-
m_params = NeuralProphet().parameters()
|
169
|
-
study.enqueue_trial(
|
170
|
-
{
|
171
|
-
# 'seasonality_mode': m_params['seasonality_mode'],
|
172
|
-
# 'seasonality_reg': m_params['seasonality_reg'],
|
173
|
-
# 'learning_rate': m_params['learning_rate'],
|
174
|
-
"newer_samples_start": m_params["newer_samples_start"],
|
175
|
-
"newer_samples_weight": m_params["newer_samples_weight"],
|
176
|
-
"changepoints_range": m_params["changepoints_range"],
|
177
|
-
}
|
178
|
-
)
|
179
|
-
study.optimize(
|
180
|
-
objective,
|
181
|
-
n_trials=self.spec.tuning.n_trials
|
182
|
-
if self.spec.tuning
|
183
|
-
else DEFAULT_TRIALS,
|
184
|
-
n_jobs=-1,
|
114
|
+
return model_kwargs
|
115
|
+
|
116
|
+
def _train_model(self, i, s_id, df, model_kwargs):
|
117
|
+
try:
|
118
|
+
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
|
119
|
+
|
120
|
+
data = self.preprocess(df, s_id)
|
121
|
+
data_i = self.drop_horizon(data)
|
122
|
+
|
123
|
+
if self.loaded_models is not None:
|
124
|
+
model = self.loaded_models[s_id]
|
125
|
+
accepted_regressors_config = model.config_regressors or dict()
|
126
|
+
self.accepted_regressors[s_id] = list(accepted_regressors_config.keys())
|
127
|
+
if self.loaded_trainers is not None:
|
128
|
+
model.trainer = self.loaded_trainers[s_id]
|
129
|
+
else:
|
130
|
+
if self.perform_tuning:
|
131
|
+
model_kwargs = self.run_tuning(data_i, model_kwargs)
|
132
|
+
|
133
|
+
# Build and fit model
|
134
|
+
model, self.accepted_regressors[s_id] = _fit_model(
|
135
|
+
data=data_i,
|
136
|
+
params=model_kwargs,
|
137
|
+
additional_regressors=self.additional_regressors,
|
138
|
+
select_metric=self.spec.metric,
|
185
139
|
)
|
186
140
|
|
187
|
-
selected_params = study.best_params
|
188
|
-
selected_params.update(model_kwargs_i)
|
189
|
-
model_kwargs_i = selected_params
|
190
|
-
|
191
|
-
# Build and fit model
|
192
|
-
model, accepted_regressors = _fit_model(
|
193
|
-
data=training_data,
|
194
|
-
params=model_kwargs_i,
|
195
|
-
additional_regressors=additional_regressors,
|
196
|
-
select_metric=self.spec.metric,
|
197
|
-
)
|
198
|
-
logger.debug(
|
199
|
-
f"Found the following additional data columns: {additional_regressors}"
|
200
|
-
)
|
201
141
|
logger.debug(
|
202
|
-
f"
|
203
|
-
f"discarded. Only using the columns: {accepted_regressors}"
|
142
|
+
f"Found the following additional data columns: {self.additional_regressors}"
|
204
143
|
)
|
205
|
-
|
144
|
+
if set(self.additional_regressors) - set(self.accepted_regressors[s_id]):
|
145
|
+
logger.debug(
|
146
|
+
f"While fitting the model, some additional data may have been "
|
147
|
+
f"discarded. Only using the columns: {self.accepted_regressors[s_id]}"
|
148
|
+
)
|
206
149
|
# Build future dataframe
|
207
|
-
future =
|
150
|
+
future = data[self.accepted_regressors[s_id] + ["ds"]].reset_index(
|
151
|
+
drop=True
|
152
|
+
)
|
208
153
|
future["y"] = None
|
209
|
-
future = future[["y", "ds"] + list(accepted_regressors)]
|
210
154
|
|
211
|
-
# Forecast model and collect outputs
|
212
155
|
forecast = model.predict(future)
|
213
156
|
logger.debug(f"-----------------Model {i}----------------------")
|
214
157
|
logger.debug(forecast.tail())
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
self.models = models
|
220
|
-
self.outputs = outputs_legacy
|
221
|
-
|
222
|
-
logger.debug("===========Done===========")
|
223
|
-
|
224
|
-
# Merge the outputs from each model into 1 df with all outputs by target and category
|
225
|
-
col = self.original_target_column
|
226
|
-
output_col = pd.DataFrame()
|
227
|
-
yhat_upper_name = ForecastOutputColumns.UPPER_BOUND
|
228
|
-
yhat_lower_name = ForecastOutputColumns.LOWER_BOUND
|
229
|
-
for cat in self.categories:
|
230
|
-
output_i = pd.DataFrame()
|
231
|
-
|
232
|
-
output_i["Date"] = outputs[f"{col}_{cat}"]["ds"]
|
233
|
-
output_i["Series"] = cat
|
234
|
-
output_i[f"input_value"] = full_data_dict[f"{col}_{cat}"][f"{col}_{cat}"]
|
235
|
-
|
236
|
-
output_i[f"fitted_value"] = float("nan")
|
237
|
-
output_i[f"forecast_value"] = float("nan")
|
238
|
-
output_i[yhat_lower_name] = float("nan")
|
239
|
-
output_i[yhat_upper_name] = float("nan")
|
240
|
-
|
241
|
-
output_i.iloc[
|
242
|
-
: -self.spec.horizon, output_i.columns.get_loc(f"fitted_value")
|
243
|
-
] = (outputs[f"{col}_{cat}"]["yhat1"].iloc[: -self.spec.horizon].values)
|
244
|
-
output_i.iloc[
|
245
|
-
-self.spec.horizon :,
|
246
|
-
output_i.columns.get_loc(f"forecast_value"),
|
247
|
-
] = (
|
248
|
-
outputs[f"{col}_{cat}"]["yhat1"].iloc[-self.spec.horizon :].values
|
249
|
-
)
|
250
|
-
output_i.iloc[
|
251
|
-
-self.spec.horizon :,
|
252
|
-
output_i.columns.get_loc(yhat_upper_name),
|
253
|
-
] = (
|
254
|
-
outputs[f"{col}_{cat}"][f"yhat1 {quantiles[1]*100}%"]
|
255
|
-
.iloc[-self.spec.horizon :]
|
256
|
-
.values
|
158
|
+
|
159
|
+
# TODO; could also extract trend and seasonality?
|
160
|
+
cols_to_read = filter(
|
161
|
+
lambda x: x.startswith("future_regressor"), forecast.columns
|
257
162
|
)
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
outputs[f"{col}_{cat}"][f"yhat1 {quantiles[0]*100}%"]
|
263
|
-
.iloc[-self.spec.horizon :]
|
264
|
-
.values
|
163
|
+
self.explanations_info[s_id] = forecast[cols_to_read]
|
164
|
+
self.explanations_info[s_id]["Date"] = forecast["ds"]
|
165
|
+
self.explanations_info[s_id] = self.explanations_info[s_id].set_index(
|
166
|
+
"Date"
|
265
167
|
)
|
266
|
-
output_col = pd.concat([output_col, output_i])
|
267
168
|
|
268
|
-
self.
|
269
|
-
|
169
|
+
self.outputs[s_id] = forecast
|
170
|
+
self.forecast_output.populate_series_output(
|
171
|
+
series_id=s_id,
|
172
|
+
fit_val=self.drop_horizon(forecast["yhat1"]).values,
|
173
|
+
forecast_val=self.get_horizon(forecast["yhat1"]).values,
|
174
|
+
upper_bound=self.get_horizon(
|
175
|
+
forecast[f"yhat1 {model_kwargs['quantiles'][1]*100}%"]
|
176
|
+
).values,
|
177
|
+
lower_bound=self.get_horizon(
|
178
|
+
forecast[f"yhat1 {model_kwargs['quantiles'][0]*100}%"]
|
179
|
+
).values,
|
270
180
|
)
|
271
181
|
|
272
|
-
|
182
|
+
self.models[s_id] = model
|
183
|
+
self.trainers[s_id] = model.trainer
|
184
|
+
|
185
|
+
self.model_parameters[s_id] = {
|
186
|
+
"framework": SupportedModels.NeuralProphet,
|
187
|
+
"config": model.config,
|
188
|
+
"config_trend": model.config_trend,
|
189
|
+
"config_train": model.config_train,
|
190
|
+
"config_seasonality": model.config_seasonality,
|
191
|
+
"config_regressors": model.config_regressors,
|
192
|
+
"config_ar": model.config_ar,
|
193
|
+
"config_events": model.config_events,
|
194
|
+
"config_country_holidays": model.config_country_holidays,
|
195
|
+
"config_lagged_regressors": model.config_lagged_regressors,
|
196
|
+
"config_normalization": model.config_normalization,
|
197
|
+
"config_missing": model.config_missing,
|
198
|
+
"config_model": model.config_model,
|
199
|
+
"config_normalization": model.config_normalization,
|
200
|
+
"data_freq": model.data_freq,
|
201
|
+
"fitted": model.fitted,
|
202
|
+
"data_params": model.data_params,
|
203
|
+
"future_periods": model.future_periods,
|
204
|
+
"predict_steps": model.predict_steps,
|
205
|
+
"highlight_forecast_step_n": model.highlight_forecast_step_n,
|
206
|
+
"true_ar_weights": model.true_ar_weights,
|
207
|
+
}
|
208
|
+
|
209
|
+
logger.debug("===========Done===========")
|
210
|
+
except Exception as e:
|
211
|
+
self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
|
212
|
+
|
213
|
+
def _build_model(self) -> pd.DataFrame:
|
214
|
+
full_data_dict = self.datasets.get_data_by_series()
|
215
|
+
self.models = dict()
|
216
|
+
self.trainers = dict()
|
217
|
+
self.outputs = dict()
|
218
|
+
self.errors_dict = dict()
|
219
|
+
self.explanations_info = dict()
|
220
|
+
self.accepted_regressors = dict()
|
221
|
+
self.additional_regressors = self.datasets.get_additional_data_column_names()
|
222
|
+
model_kwargs = self.set_kwargs()
|
223
|
+
self.forecast_output = ForecastOutput(
|
224
|
+
confidence_interval_width=self.spec.confidence_interval_width,
|
225
|
+
horizon=self.spec.horizon,
|
226
|
+
target_column=self.original_target_column,
|
227
|
+
dt_column=self.spec.datetime_column.name,
|
228
|
+
)
|
229
|
+
|
230
|
+
for i, (s_id, df) in enumerate(full_data_dict.items()):
|
231
|
+
self._train_model(i, s_id, df, model_kwargs=model_kwargs.copy())
|
273
232
|
|
274
|
-
|
233
|
+
# Parallel(n_jobs=-1, require="sharedmem")(
|
234
|
+
# delayed(NeuralProphetOperatorModel._train_model)(self, i, s_id, df, model_kwargs=model_kwargs.copy())
|
235
|
+
# for self, (i, (s_id, df)) in zip(
|
236
|
+
# [self] * len(full_data_dict), enumerate(full_data_dict.items())
|
237
|
+
# )
|
238
|
+
# )
|
275
239
|
|
276
|
-
|
277
|
-
import datapane as dp
|
240
|
+
return self.forecast_output.get_forecast_long()
|
278
241
|
|
279
|
-
|
280
|
-
|
281
|
-
|
242
|
+
def run_tuning(self, data, model_kwargs):
|
243
|
+
from neuralprophet import NeuralProphet
|
244
|
+
|
245
|
+
def objective(trial):
|
246
|
+
params = {
|
247
|
+
# 'seasonality_mode': trial.suggest_categorical('seasonality_mode', ['additive', 'multiplicative']),
|
248
|
+
# 'seasonality_reg': trial.suggest_float('seasonality_reg', 0.1, 500, log=True),
|
249
|
+
# 'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.1, log=True),
|
250
|
+
"newer_samples_start": trial.suggest_float(
|
251
|
+
"newer_samples_start", 0.001, 0.999
|
252
|
+
),
|
253
|
+
"newer_samples_weight": trial.suggest_float(
|
254
|
+
"newer_samples_weight", 0, 100
|
255
|
+
),
|
256
|
+
"changepoints_range": trial.suggest_float(
|
257
|
+
"changepoints_range", 0.8, 0.95
|
258
|
+
),
|
259
|
+
}
|
260
|
+
# trend_reg, trend_reg_threshold, ar_reg, impute_rolling/impute_linear,
|
261
|
+
params.update(model_kwargs)
|
262
|
+
|
263
|
+
folds = NeuralProphet(**params).crossvalidation_split_df(data, k=3)
|
264
|
+
test_metrics_total_i = []
|
265
|
+
for df_train, df_test in folds:
|
266
|
+
m, accepted_regressors = _fit_model(
|
267
|
+
data=df_train,
|
268
|
+
params=params,
|
269
|
+
additional_regressors=self.additional_regressors,
|
270
|
+
select_metric=self.spec.metric,
|
271
|
+
)
|
272
|
+
df_test = df_test[["y", "ds"] + accepted_regressors]
|
273
|
+
|
274
|
+
test_forecast_i = m.predict(df=df_test)
|
275
|
+
fold_metric_i = (
|
276
|
+
m.metrics[self.spec.metric]
|
277
|
+
.forward(
|
278
|
+
Tensor(test_forecast_i["yhat1"]),
|
279
|
+
Tensor(test_forecast_i["y"]),
|
280
|
+
)
|
281
|
+
.item()
|
282
|
+
)
|
283
|
+
test_metrics_total_i.append(fold_metric_i)
|
284
|
+
logger.debug(
|
285
|
+
f"----------------------{np.asarray(test_metrics_total_i).mean()}----------------------"
|
286
|
+
)
|
287
|
+
return np.asarray(test_metrics_total_i).mean()
|
288
|
+
|
289
|
+
study = optuna.create_study(direction="minimize")
|
290
|
+
m_params = NeuralProphet().parameters()
|
291
|
+
study.enqueue_trial(
|
292
|
+
{
|
293
|
+
# 'seasonality_mode': m_params['seasonality_mode'],
|
294
|
+
# 'seasonality_reg': m_params['seasonality_reg'],
|
295
|
+
# 'learning_rate': m_params['learning_rate'],
|
296
|
+
"newer_samples_start": m_params["newer_samples_start"],
|
297
|
+
"newer_samples_weight": m_params["newer_samples_weight"],
|
298
|
+
"changepoints_range": m_params["changepoints_range"],
|
299
|
+
}
|
282
300
|
)
|
283
|
-
|
284
|
-
|
285
|
-
|
301
|
+
study.optimize(
|
302
|
+
objective,
|
303
|
+
n_trials=self.spec.tuning.n_trials if self.spec.tuning else DEFAULT_TRIALS,
|
304
|
+
n_jobs=-1,
|
286
305
|
)
|
287
306
|
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
target_columns=self.target_columns,
|
292
|
-
)
|
307
|
+
selected_params = study.best_params
|
308
|
+
selected_params.update(model_kwargs)
|
309
|
+
return selected_params
|
293
310
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
311
|
+
def _generate_report(self):
|
312
|
+
import datapane as dp
|
313
|
+
|
314
|
+
all_sections = []
|
315
|
+
|
316
|
+
try:
|
317
|
+
sec1_text = dp.Text(
|
318
|
+
"## Forecast Overview \nThese plots show your "
|
319
|
+
"forecast in the context of historical data."
|
320
|
+
)
|
321
|
+
sec1 = _select_plot_list(
|
322
|
+
lambda s_id: self.models[s_id].plot(self.outputs[s_id]),
|
323
|
+
series_ids=self.datasets.list_series_ids(),
|
324
|
+
)
|
325
|
+
all_sections = all_sections + [sec1_text, sec1]
|
326
|
+
except Exception as e:
|
327
|
+
logger.debug(f"Failed to plot with exception: {e.args}")
|
328
|
+
|
329
|
+
try:
|
330
|
+
sec2_text = dp.Text(f"## Forecast Broken Down by Trend Component")
|
331
|
+
sec2 = _select_plot_list(
|
332
|
+
lambda s_id: self.models[s_id].plot_components(self.outputs[s_id]),
|
333
|
+
series_ids=self.datasets.list_series_ids(),
|
334
|
+
)
|
335
|
+
all_sections = all_sections + [sec2_text, sec2]
|
336
|
+
except Exception as e:
|
337
|
+
logger.debug(f"Failed to plot with exception: {e.args}")
|
338
|
+
|
339
|
+
try:
|
340
|
+
sec3_text = dp.Text(f"## Forecast Parameter Plots")
|
341
|
+
sec3 = _select_plot_list(
|
342
|
+
lambda s_id: self.models[s_id].plot_parameters(),
|
343
|
+
series_ids=self.datasets.list_series_ids(),
|
344
|
+
)
|
345
|
+
all_sections = all_sections + [sec3_text, sec3]
|
346
|
+
except Exception as e:
|
347
|
+
logger.debug(f"Failed to plot with exception: {e.args}")
|
299
348
|
|
300
349
|
sec5_text = dp.Text(f"## Neural Prophet Model Parameters")
|
301
350
|
model_states = []
|
302
|
-
for i, m in enumerate(self.models):
|
351
|
+
for i, (s_id, m) in enumerate(self.models.items()):
|
303
352
|
model_states.append(
|
304
353
|
pd.Series(
|
305
354
|
m.state_dict(),
|
306
355
|
index=m.state_dict().keys(),
|
307
|
-
name=
|
356
|
+
name=s_id,
|
308
357
|
)
|
309
358
|
)
|
310
359
|
all_model_states = pd.concat(model_states, axis=1)
|
311
360
|
sec5 = dp.DataTable(all_model_states)
|
312
361
|
|
313
|
-
|
314
|
-
all_sections = [
|
315
|
-
sec1_text,
|
316
|
-
sec1,
|
317
|
-
sec2_text,
|
318
|
-
sec2,
|
319
|
-
sec3_text,
|
320
|
-
sec3,
|
321
|
-
sec5_text,
|
322
|
-
sec5,
|
323
|
-
]
|
362
|
+
all_sections = all_sections + [sec5_text, sec5]
|
324
363
|
|
325
364
|
if self.spec.generate_explanations:
|
326
365
|
try:
|
327
366
|
# If the key is present, call the "explain_model" method
|
328
|
-
self.explain_model(
|
329
|
-
datetime_col_name="ds",
|
330
|
-
explain_predict_fn=self._custom_predict_neuralprophet,
|
331
|
-
)
|
367
|
+
self.explain_model()
|
332
368
|
|
333
369
|
# Create a markdown text block for the global explanation section
|
334
370
|
global_explanation_text = dp.Text(
|
@@ -336,32 +372,16 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
|
|
336
372
|
"The following tables provide the feature attribution for the global explainability."
|
337
373
|
)
|
338
374
|
|
339
|
-
# Convert the global explanation data to a DataFrame
|
340
|
-
global_explanation_df = pd.DataFrame(self.global_explanation)
|
341
|
-
|
342
|
-
self.formatted_global_explanation = (
|
343
|
-
global_explanation_df / global_explanation_df.sum(axis=0) * 100
|
344
|
-
)
|
345
|
-
|
346
375
|
# Create a markdown section for the global explainability
|
347
376
|
global_explanation_section = dp.Blocks(
|
348
377
|
"### Global Explainability ",
|
349
378
|
dp.DataTable(self.formatted_global_explanation),
|
350
379
|
)
|
351
380
|
|
352
|
-
aggregate_local_explanations = pd.DataFrame()
|
353
|
-
for s_id, local_ex_df in self.local_explanation.items():
|
354
|
-
local_ex_df_copy = local_ex_df.copy()
|
355
|
-
local_ex_df_copy["Series"] = s_id
|
356
|
-
aggregate_local_explanations = pd.concat(
|
357
|
-
[aggregate_local_explanations, local_ex_df_copy], axis=0
|
358
|
-
)
|
359
|
-
self.formatted_local_explanation = aggregate_local_explanations
|
360
|
-
|
361
381
|
local_explanation_text = dp.Text(f"## Local Explanation of Models \n ")
|
362
382
|
blocks = [
|
363
383
|
dp.DataTable(
|
364
|
-
local_ex_df.
|
384
|
+
local_ex_df.drop("Series", axis=1),
|
365
385
|
label=s_id,
|
366
386
|
)
|
367
387
|
for s_id, local_ex_df in self.local_explanation.items()
|
@@ -395,9 +415,45 @@ class NeuralProphetOperatorModel(ForecastOperatorBaseModel):
|
|
395
415
|
other_sections,
|
396
416
|
)
|
397
417
|
|
398
|
-
def
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
418
|
+
def _save_model(self, output_dir, storage_options):
|
419
|
+
write_pkl(
|
420
|
+
obj=self.models,
|
421
|
+
filename="model.pkl",
|
422
|
+
output_dir=output_dir,
|
423
|
+
storage_options=storage_options,
|
424
|
+
)
|
425
|
+
write_pkl(
|
426
|
+
obj=self.trainers,
|
427
|
+
filename="trainer.pkl",
|
428
|
+
output_dir=output_dir,
|
429
|
+
storage_options=storage_options,
|
430
|
+
)
|
431
|
+
|
432
|
+
def explain_model(self):
|
433
|
+
self.local_explanation = dict()
|
434
|
+
global_expl = []
|
435
|
+
rename_cols = {
|
436
|
+
f"future_regressor_{col}": col
|
437
|
+
for col in self.datasets.get_additional_data_column_names()
|
438
|
+
}
|
439
|
+
|
440
|
+
for s_id, expl_df in self.explanations_info.items():
|
441
|
+
expl_df = expl_df.rename(rename_cols, axis=1)
|
442
|
+
# Local Expl
|
443
|
+
self.local_explanation[s_id] = self.get_horizon(expl_df).drop(
|
444
|
+
["future_regressors_additive"], axis=1
|
445
|
+
)
|
446
|
+
self.local_explanation[s_id]["Series"] = s_id
|
447
|
+
self.local_explanation[s_id].index.rename(self.dt_column_name, inplace=True)
|
448
|
+
# Global Expl
|
449
|
+
g_expl = self.drop_horizon(expl_df).mean()
|
450
|
+
g_expl.name = s_id
|
451
|
+
global_expl.append(g_expl)
|
452
|
+
self.global_explanation = pd.concat(global_expl, axis=1)
|
453
|
+
self.global_explanation = self.global_explanation.drop(
|
454
|
+
index=["future_regressors_additive"], axis=0
|
455
|
+
)
|
456
|
+
self.formatted_global_explanation = (
|
457
|
+
self.global_explanation / self.global_explanation.sum(axis=0) * 100
|
458
|
+
)
|
459
|
+
self.formatted_local_explanation = pd.concat(self.local_explanation.values())
|