emhass 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import logging
4
+ import time
5
+ import warnings
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from skforecast.model_selection import (
10
+ TimeSeriesFold,
11
+ backtesting_forecaster,
12
+ bayesian_search_forecaster,
13
+ )
14
+ from skforecast.recursive import ForecasterRecursive
15
+ from sklearn.linear_model import ElasticNet, LinearRegression
16
+ from sklearn.metrics import r2_score
17
+ from sklearn.neighbors import KNeighborsRegressor
18
+
19
+ from emhass import utils
20
+
21
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
22
+
23
+
24
+ class MLForecaster:
25
+ r"""
26
+ A forecaster class using machine learning models with auto-regressive approach and features\
27
+ based on timestamp information (hour, day, week, etc).
28
+
29
+ This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
30
+
31
+ It exposes three main methods:
32
+
33
+ - `fit`: to train a model with the passed data.
34
+
35
+ - `predict`: to obtain a forecast from a pre-trained model.
36
+
37
+ - `tune`: to optimize the models hyperparameters using bayesian optimization.
38
+
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ data: pd.DataFrame,
44
+ model_type: str,
45
+ var_model: str,
46
+ sklearn_model: str,
47
+ num_lags: int,
48
+ emhass_conf: dict,
49
+ logger: logging.Logger,
50
+ ) -> None:
51
+ r"""Define constructor for the forecast class.
52
+
53
+ :param data: The data that will be used for train/test
54
+ :type data: pd.DataFrame
55
+ :param model_type: A unique name defining this model and useful to identify \
56
+ for what it will be used for.
57
+ :type model_type: str
58
+ :param var_model: The name of the sensor to retrieve data from Home Assistant. \
59
+ Example: `sensor.power_load_no_var_loads`.
60
+ :type var_model: str
61
+ :param sklearn_model: The `scikit-learn` model that will be used. For now only \
62
+ this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
63
+ :type sklearn_model: str
64
+ :param num_lags: The number of auto-regression lags to consider. A good starting point \
65
+ is to fix this as one day. For example if your time step is 30 minutes, then fix this \
66
+ to 48, if the time step is 1 hour the fix this to 24 and so on.
67
+ :type num_lags: int
68
+ :param emhass_conf: Dictionary containing the needed emhass paths
69
+ :type emhass_conf: dict
70
+ :param logger: The passed logger object
71
+ :type logger: logging.Logger
72
+ """
73
+ self.data = data
74
+ self.model_type = model_type
75
+ self.var_model = var_model
76
+ self.sklearn_model = sklearn_model
77
+ self.num_lags = num_lags
78
+ self.emhass_conf = emhass_conf
79
+ self.logger = logger
80
+ self.is_tuned = False
81
+ # A quick data preparation
82
+ self.data.index = pd.to_datetime(self.data.index)
83
+ self.data.sort_index(inplace=True)
84
+ self.data = self.data[~self.data.index.duplicated(keep="first")]
85
+
86
+ @staticmethod
87
+ def neg_r2_score(y_true, y_pred):
88
+ """The negative of the r2 score."""
89
+ return -r2_score(y_true, y_pred)
90
+
91
+ @staticmethod
92
+ def generate_exog(data_last_window, periods, var_name):
93
+ """Generate the exogenous data for future timestamps."""
94
+ forecast_dates = pd.date_range(
95
+ start=data_last_window.index[-1] + data_last_window.index.freq,
96
+ periods=periods,
97
+ freq=data_last_window.index.freq,
98
+ )
99
+ exog = pd.DataFrame({var_name: [np.nan] * periods}, index=forecast_dates)
100
+ exog = utils.add_date_features(exog)
101
+ return exog
102
+
103
+ def fit(
104
+ self,
105
+ split_date_delta: str | None = "48h",
106
+ perform_backtest: bool | None = False,
107
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
108
+ r"""The fit method to train the ML model.
109
+
110
+ :param split_date_delta: The delta from now to `split_date_delta` that will be used \
111
+ as the test period to evaluate the model, defaults to '48h'
112
+ :type split_date_delta: Optional[str], optional
113
+ :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
114
+ the performance of the model on the complete train set, defaults to False
115
+ :type perform_backtest: Optional[bool], optional
116
+ :return: The DataFrame containing the forecast data results without and with backtest
117
+ :rtype: Tuple[pd.DataFrame, pd.DataFrame]
118
+ """
119
+ self.logger.info("Performing a forecast model fit for " + self.model_type)
120
+ # Preparing the data: adding exogenous features
121
+ self.data_exo = pd.DataFrame(index=self.data.index)
122
+ self.data_exo = utils.add_date_features(self.data_exo)
123
+ self.data_exo[self.var_model] = self.data[self.var_model]
124
+ self.data_exo = self.data_exo.interpolate(method="linear", axis=0, limit=None)
125
+ # train/test split
126
+ self.date_train = (
127
+ self.data_exo.index[-1] - pd.Timedelta("5days") + self.data_exo.index.freq
128
+ ) # The last 5 days
129
+ self.date_split = (
130
+ self.data_exo.index[-1]
131
+ - pd.Timedelta(split_date_delta)
132
+ + self.data_exo.index.freq
133
+ ) # The last 48h
134
+ self.data_train = self.data_exo.loc[
135
+ : self.date_split - self.data_exo.index.freq, :
136
+ ]
137
+ self.data_test = self.data_exo.loc[self.date_split :, :]
138
+ self.steps = len(self.data_test)
139
+ # Pick correct sklearn model
140
+ if self.sklearn_model == "LinearRegression":
141
+ base_model = LinearRegression()
142
+ elif self.sklearn_model == "ElasticNet":
143
+ base_model = ElasticNet()
144
+ elif self.sklearn_model == "KNeighborsRegressor":
145
+ base_model = KNeighborsRegressor()
146
+ else:
147
+ self.logger.error(
148
+ "Passed sklearn model "
149
+ + self.sklearn_model
150
+ + " is not valid. Defaulting to KNeighborsRegressor"
151
+ )
152
+ base_model = KNeighborsRegressor()
153
+ # Define the forecaster object
154
+ self.forecaster = ForecasterRecursive(regressor=base_model, lags=self.num_lags)
155
+ # Fit and time it
156
+ self.logger.info("Training a " + self.sklearn_model + " model")
157
+ start_time = time.time()
158
+ self.forecaster.fit(
159
+ y=self.data_train[self.var_model],
160
+ exog=self.data_train.drop(self.var_model, axis=1),
161
+ )
162
+ self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
163
+ # Make a prediction to print metrics
164
+ predictions = self.forecaster.predict(
165
+ steps=self.steps, exog=self.data_test.drop(self.var_model, axis=1)
166
+ )
167
+ pred_metric = r2_score(self.data_test[self.var_model], predictions)
168
+ self.logger.info(
169
+ f"Prediction R2 score of fitted model on test data: {pred_metric}"
170
+ )
171
+ # Packing results in a DataFrame
172
+ df_pred = pd.DataFrame(
173
+ index=self.data_exo.index, columns=["train", "test", "pred"]
174
+ )
175
+ df_pred["train"] = self.data_train[self.var_model]
176
+ df_pred["test"] = self.data_test[self.var_model]
177
+ df_pred["pred"] = predictions
178
+ df_pred_backtest = None
179
+ if perform_backtest is True:
180
+ # Using backtesting tool to evaluate the model
181
+ self.logger.info("Performing simple backtesting of fitted model")
182
+ start_time = time.time()
183
+ cv = TimeSeriesFold(
184
+ steps=self.num_lags,
185
+ initial_train_size=None,
186
+ fixed_train_size=False,
187
+ gap=0,
188
+ allow_incomplete_fold=True,
189
+ refit=False,
190
+ )
191
+ metric, predictions_backtest = backtesting_forecaster(
192
+ forecaster=self.forecaster,
193
+ y=self.data_train[self.var_model],
194
+ exog=self.data_train.drop(self.var_model, axis=1),
195
+ cv=cv,
196
+ metric=MLForecaster.neg_r2_score,
197
+ verbose=False,
198
+ show_progress=True,
199
+ )
200
+ self.logger.info(f"Elapsed backtesting time: {time.time() - start_time}")
201
+ self.logger.info(f"Backtest R2 score: {-metric}")
202
+ df_pred_backtest = pd.DataFrame(
203
+ index=self.data_exo.index, columns=["train", "pred"]
204
+ )
205
+ df_pred_backtest["train"] = self.data_exo[self.var_model]
206
+ df_pred_backtest["pred"] = predictions_backtest
207
+ return df_pred, df_pred_backtest
208
+
209
+ def predict(self, data_last_window: pd.DataFrame | None = None) -> pd.Series:
210
+ """The predict method to generate forecasts from a previously fitted ML model.
211
+
212
+ :param data_last_window: The data that will be used to generate the new forecast, this \
213
+ will be freshly retrieved from Home Assistant. This data is needed because the forecast \
214
+ model is an auto-regressive model with lags. If not passed then the data used during the \
215
+ model train is used, defaults to None
216
+ :type data_last_window: Optional[pd.DataFrame], optional
217
+ :return: A pandas series containing the generated forecasts.
218
+ :rtype: pd.Series
219
+ """
220
+ if data_last_window is None:
221
+ predictions = self.forecaster.predict(
222
+ steps=self.num_lags, exog=self.data_test.drop(self.var_model, axis=1)
223
+ )
224
+ else:
225
+ data_last_window = data_last_window.interpolate(
226
+ method="linear", axis=0, limit=None
227
+ )
228
+ if self.is_tuned:
229
+ exog = MLForecaster.generate_exog(
230
+ data_last_window, self.lags_opt, self.var_model
231
+ )
232
+ predictions = self.forecaster.predict(
233
+ steps=self.lags_opt,
234
+ last_window=data_last_window[self.var_model],
235
+ exog=exog.drop(self.var_model, axis=1),
236
+ )
237
+ else:
238
+ exog = MLForecaster.generate_exog(
239
+ data_last_window, self.num_lags, self.var_model
240
+ )
241
+ predictions = self.forecaster.predict(
242
+ steps=self.num_lags,
243
+ last_window=data_last_window[self.var_model],
244
+ exog=exog.drop(self.var_model, axis=1),
245
+ )
246
+ return predictions
247
+
248
+ def tune(self, debug: bool | None = False) -> pd.DataFrame:
249
+ """Tuning a previously fitted model using bayesian optimization.
250
+
251
+ :param debug: Set to True for testing and faster optimizations, defaults to False
252
+ :type debug: Optional[bool], optional
253
+ :return: The DataFrame with the forecasts using the optimized model.
254
+ :rtype: pd.DataFrame
255
+ """
256
+ # Regressor hyperparameters search space
257
+ if self.sklearn_model == "LinearRegression":
258
+ if debug:
259
+
260
+ def search_space(trial):
261
+ search_space = {
262
+ "fit_intercept": trial.suggest_categorical(
263
+ "fit_intercept", [True]
264
+ ),
265
+ "lags": trial.suggest_categorical("lags", [3]),
266
+ }
267
+ return search_space
268
+ else:
269
+
270
+ def search_space(trial):
271
+ search_space = {
272
+ "fit_intercept": trial.suggest_categorical(
273
+ "fit_intercept", [True, False]
274
+ ),
275
+ "lags": trial.suggest_categorical(
276
+ "lags", [6, 12, 24, 36, 48, 60, 72]
277
+ ),
278
+ }
279
+ return search_space
280
+ elif self.sklearn_model == "ElasticNet":
281
+ if debug:
282
+
283
+ def search_space(trial):
284
+ search_space = {
285
+ "selection": trial.suggest_categorical("selection", ["random"]),
286
+ "lags": trial.suggest_categorical("lags", [3]),
287
+ }
288
+ return search_space
289
+ else:
290
+
291
+ def search_space(trial):
292
+ search_space = {
293
+ "alpha": trial.suggest_float("alpha", 0.0, 2.0),
294
+ "l1_ratio": trial.suggest_float("l1_ratio", 0.0, 1.0),
295
+ "selection": trial.suggest_categorical(
296
+ "selection", ["cyclic", "random"]
297
+ ),
298
+ "lags": trial.suggest_categorical(
299
+ "lags", [6, 12, 24, 36, 48, 60, 72]
300
+ ),
301
+ }
302
+ return search_space
303
+ elif self.sklearn_model == "KNeighborsRegressor":
304
+ if debug:
305
+
306
+ def search_space(trial):
307
+ search_space = {
308
+ "weights": trial.suggest_categorical("weights", ["uniform"]),
309
+ "lags": trial.suggest_categorical("lags", [3]),
310
+ }
311
+ return search_space
312
+ else:
313
+
314
+ def search_space(trial):
315
+ search_space = {
316
+ "n_neighbors": trial.suggest_int("n_neighbors", 2, 20),
317
+ "leaf_size": trial.suggest_int("leaf_size", 20, 40),
318
+ "weights": trial.suggest_categorical(
319
+ "weights", ["uniform", "distance"]
320
+ ),
321
+ "lags": trial.suggest_categorical(
322
+ "lags", [6, 12, 24, 36, 48, 60, 72]
323
+ ),
324
+ }
325
+ return search_space
326
+
327
+ # Bayesian search hyperparameter and lags with skforecast/optuna
328
+ # Lags used as predictors
329
+ if debug:
330
+ refit = False
331
+ num_lags = 3
332
+ else:
333
+ refit = True
334
+ num_lags = self.num_lags
335
+ # The optimization routine call
336
+ self.logger.info("Bayesian hyperparameter optimization with backtesting")
337
+ start_time = time.time()
338
+ cv = TimeSeriesFold(
339
+ steps=num_lags,
340
+ initial_train_size=len(self.data_exo.loc[: self.date_train]),
341
+ fixed_train_size=True,
342
+ gap=0,
343
+ skip_folds=None,
344
+ allow_incomplete_fold=True,
345
+ refit=refit,
346
+ )
347
+ self.optimize_results, self.optimize_results_object = (
348
+ bayesian_search_forecaster(
349
+ forecaster=self.forecaster,
350
+ y=self.data_train[self.var_model],
351
+ exog=self.data_train.drop(self.var_model, axis=1),
352
+ cv=cv,
353
+ search_space=search_space,
354
+ metric=MLForecaster.neg_r2_score,
355
+ n_trials=10,
356
+ random_state=123,
357
+ return_best=True,
358
+ )
359
+ )
360
+ self.logger.info(f"Elapsed time: {time.time() - start_time}")
361
+ self.is_tuned = True
362
+ predictions_opt = self.forecaster.predict(
363
+ steps=self.num_lags, exog=self.data_test.drop(self.var_model, axis=1)
364
+ )
365
+ freq_hours = self.data_exo.index.freq.delta.seconds / 3600
366
+ self.lags_opt = int(np.round(len(self.optimize_results.iloc[0]["lags"])))
367
+ self.days_needed = int(np.round(self.lags_opt * freq_hours / 24))
368
+ df_pred_opt = pd.DataFrame(
369
+ index=self.data_exo.index, columns=["train", "test", "pred_optim"]
370
+ )
371
+ df_pred_opt["train"] = self.data_train[self.var_model]
372
+ df_pred_opt["test"] = self.data_test[self.var_model]
373
+ df_pred_opt["pred_optim"] = predictions_opt
374
+ pred_optim_metric_train = -self.optimize_results.iloc[0]["neg_r2_score"]
375
+ self.logger.info(
376
+ f"R2 score for optimized prediction in train period: {pred_optim_metric_train}"
377
+ )
378
+ pred_optim_metric_test = r2_score(
379
+ df_pred_opt.loc[predictions_opt.index, "test"],
380
+ df_pred_opt.loc[predictions_opt.index, "pred_optim"],
381
+ )
382
+ self.logger.info(
383
+ f"R2 score for optimized prediction in test period: {pred_optim_metric_test}"
384
+ )
385
+ self.logger.info("Number of optimal lags obtained: " + str(self.lags_opt))
386
+ return df_pred_opt
@@ -0,0 +1,245 @@
1
+ """Machine learning regressor module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ import warnings
7
+ from typing import TYPE_CHECKING
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sklearn.ensemble import (
12
+ AdaBoostRegressor,
13
+ GradientBoostingRegressor,
14
+ RandomForestRegressor,
15
+ )
16
+ from sklearn.linear_model import Lasso, LinearRegression, Ridge
17
+ from sklearn.metrics import r2_score
18
+ from sklearn.model_selection import GridSearchCV, train_test_split
19
+ from sklearn.pipeline import make_pipeline
20
+ from sklearn.preprocessing import StandardScaler
21
+
22
+ from emhass import utils
23
+
24
+ if TYPE_CHECKING:
25
+ import logging
26
+
27
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
28
+
29
+ REGRESSION_METHODS = {
30
+ "LinearRegression": {
31
+ "model": LinearRegression(),
32
+ "param_grid": {
33
+ "linearregression__fit_intercept": [True, False],
34
+ "linearregression__positive": [True, False],
35
+ },
36
+ },
37
+ "RidgeRegression": {
38
+ "model": Ridge(),
39
+ "param_grid": {"ridge__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
40
+ },
41
+ "LassoRegression": {
42
+ "model": Lasso(),
43
+ "param_grid": {"lasso__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
44
+ },
45
+ "RandomForestRegression": {
46
+ "model": RandomForestRegressor(),
47
+ "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
48
+ },
49
+ "GradientBoostingRegression": {
50
+ "model": GradientBoostingRegressor(),
51
+ "param_grid": {
52
+ "gradientboostingregressor__n_estimators": [50, 100, 200],
53
+ "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
54
+ },
55
+ },
56
+ "AdaBoostRegression": {
57
+ "model": AdaBoostRegressor(),
58
+ "param_grid": {
59
+ "adaboostregressor__n_estimators": [50, 100, 200],
60
+ "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
61
+ },
62
+ },
63
+ }
64
+
65
+
66
+ class MLRegressor:
67
+ r"""A forecaster class using machine learning models.
68
+
69
+ This class uses the `sklearn` module and the machine learning models are \
70
+ from `scikit-learn`.
71
+
72
+ It exposes two main methods:
73
+
74
+ - `fit`: to train a model with the passed data.
75
+
76
+ - `predict`: to obtain a forecast from a pre-trained model.
77
+
78
+ """
79
+
80
+ def __init__(
81
+ self: MLRegressor,
82
+ data: pd.DataFrame,
83
+ model_type: str,
84
+ regression_model: str,
85
+ features: list,
86
+ target: str,
87
+ timestamp: str,
88
+ logger: logging.Logger,
89
+ ) -> None:
90
+ r"""Define constructor for the forecast class.
91
+
92
+ :param data: The data that will be used for train/test
93
+ :type data: pd.DataFrame
94
+ :param model_type: A unique name defining this model and useful to identify \
95
+ for what it will be used for.
96
+ :type model_type: str
97
+ :param regression_model: The model that will be used. For now only \
98
+ this options are possible: `LinearRegression`, `RidgeRegression`, \
99
+ `LassoRegression`, `RandomForestRegression`, \
100
+ `GradientBoostingRegression` and `AdaBoostRegression`.
101
+ :type regression_model: str
102
+ :param features: A list of features. \
103
+ Example: [`solar_production`, `degree_days`].
104
+ :type features: list
105
+ :param target: The target(to be predicted). \
106
+ Example: `heating_hours`.
107
+ :type target: str
108
+ :param timestamp: If defined, the column key that has to be used of timestamp.
109
+ :type timestamp: str
110
+ :param logger: The passed logger object
111
+ :type logger: logging.Logger
112
+ """
113
+ self.data = data
114
+ self.features = features
115
+ self.target = target
116
+ self.timestamp = timestamp
117
+ self.model_type = model_type
118
+ self.regression_model = regression_model
119
+ self.logger = logger
120
+ self.data = self.data.sort_index()
121
+ self.data = self.data[~self.data.index.duplicated(keep="first")]
122
+ self.data_exo = None
123
+ self.steps = None
124
+ self.model = None
125
+ self.grid_search = None
126
+
127
+ def get_regression_model(self: MLRegressor) -> tuple[str, str]:
128
+ r"""
129
+ Get the base model and parameter grid for the specified regression model.
130
+ Returns a tuple containing the base model and parameter grid corresponding to \
131
+ the specified regression model.
132
+
133
+ :param self: The instance of the MLRegressor class.
134
+ :type self: MLRegressor
135
+ :return: A tuple containing the base model and parameter grid.
136
+ :rtype: tuple[str, str]
137
+ """
138
+ if self.regression_model == "LinearRegression":
139
+ base_model = REGRESSION_METHODS["LinearRegression"]["model"]
140
+ param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
141
+ elif self.regression_model == "RidgeRegression":
142
+ base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
143
+ param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
144
+ elif self.regression_model == "LassoRegression":
145
+ base_model = REGRESSION_METHODS["LassoRegression"]["model"]
146
+ param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
147
+ elif self.regression_model == "RandomForestRegression":
148
+ base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
149
+ param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
150
+ elif self.regression_model == "GradientBoostingRegression":
151
+ base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
152
+ param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
153
+ elif self.regression_model == "AdaBoostRegression":
154
+ base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
155
+ param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
156
+ else:
157
+ self.logger.error(
158
+ "Passed model %s is not valid",
159
+ self.regression_model,
160
+ )
161
+ return None, None
162
+ return base_model, param_grid
163
+
164
+ def fit(self: MLRegressor, date_features: list | None = None) -> bool:
165
+ r"""Fit the model using the provided data.
166
+
167
+ :param date_features: A list of 'date_features' to take into account when \
168
+ fitting the model.
169
+ :type data: list
170
+ :return: bool if successful
171
+ :rtype: bool
172
+ """
173
+ self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
174
+ self.data_exo = pd.DataFrame(self.data)
175
+ self.data_exo[self.features] = self.data[self.features]
176
+ self.data_exo[self.target] = self.data[self.target]
177
+ keep_columns = []
178
+ keep_columns.extend(self.features)
179
+ if self.timestamp is not None:
180
+ keep_columns.append(self.timestamp)
181
+ keep_columns.append(self.target)
182
+ self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
183
+ self.data_exo = self.data_exo.reset_index(drop=True)
184
+ if date_features is not None:
185
+ if self.timestamp is not None:
186
+ self.data_exo = utils.add_date_features(
187
+ self.data_exo, timestamp=self.timestamp, date_features=date_features
188
+ )
189
+ else:
190
+ self.logger.error(
191
+ "If no timestamp provided, you can't use date_features, going \
192
+ further without date_features.",
193
+ )
194
+ y = self.data_exo[self.target]
195
+ self.data_exo = self.data_exo.drop(self.target, axis=1)
196
+ if self.timestamp is not None:
197
+ self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
198
+ X = self.data_exo
199
+ X_train, X_test, y_train, y_test = train_test_split(
200
+ X, y, test_size=0.2, random_state=42
201
+ )
202
+ self.steps = len(X_test)
203
+ base_model, param_grid = self.get_regression_model()
204
+ if base_model is None:
205
+ return False
206
+ self.model = make_pipeline(StandardScaler(), base_model)
207
+ # Create a grid search object
208
+ self.grid_search = GridSearchCV(
209
+ self.model,
210
+ param_grid,
211
+ cv=5,
212
+ scoring="neg_mean_squared_error",
213
+ refit=True,
214
+ verbose=0,
215
+ n_jobs=-1,
216
+ )
217
+ # Fit the grid search object to the data
218
+ self.logger.info("Training a %s model", self.regression_model)
219
+ start_time = time.time()
220
+ self.grid_search.fit(X_train.values, y_train.values)
221
+ self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
222
+ self.model = self.grid_search.best_estimator_
223
+ # Make predictions
224
+ predictions = self.model.predict(X_test.values)
225
+ predictions = pd.Series(predictions, index=X_test.index)
226
+ pred_metric = r2_score(y_test, predictions)
227
+ self.logger.info(
228
+ "Prediction R2 score of fitted model on test data: %s",
229
+ pred_metric,
230
+ )
231
+ return True
232
+
233
+ def predict(self: MLRegressor, new_values: list) -> np.ndarray:
234
+ """Predict a new value.
235
+
236
+ :param new_values: The new values for the features \
237
+ (in the same order as the features list). \
238
+ Example: [2.24, 5.68].
239
+ :type new_values: list
240
+ :return: The np.ndarray containing the predicted value.
241
+ :rtype: np.ndarray
242
+ """
243
+ self.logger.info("Performing a prediction for %s", self.model_type)
244
+ new_values = np.array([new_values])
245
+ return self.model.predict(new_values)