emhass 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +705 -272
- emhass/forecast.py +114 -45
- emhass/machine_learning_forecaster.py +4 -4
- emhass/machine_learning_regressor.py +290 -0
- emhass/optimization.py +4 -3
- emhass/retrieve_hass.py +235 -103
- emhass/static/advanced.html +3 -0
- emhass/static/script.js +2 -0
- emhass/utils.py +605 -305
- emhass/web_server.py +48 -26
- {emhass-0.8.5.dist-info → emhass-0.9.0.dist-info}/METADATA +19 -5
- emhass-0.9.0.dist-info/RECORD +26 -0
- emhass-0.8.5.dist-info/RECORD +0 -25
- {emhass-0.8.5.dist-info → emhass-0.9.0.dist-info}/LICENSE +0 -0
- {emhass-0.8.5.dist-info → emhass-0.9.0.dist-info}/WHEEL +0 -0
- {emhass-0.8.5.dist-info → emhass-0.9.0.dist-info}/entry_points.txt +0 -0
- {emhass-0.8.5.dist-info → emhass-0.9.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,290 @@
|
|
1
|
+
"""Machine learning regressor module."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import time
|
7
|
+
import warnings
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
12
|
+
from sklearn.ensemble import (
|
13
|
+
AdaBoostRegressor,
|
14
|
+
GradientBoostingRegressor,
|
15
|
+
RandomForestRegressor,
|
16
|
+
)
|
17
|
+
from sklearn.linear_model import Lasso, LinearRegression, Ridge
|
18
|
+
from sklearn.metrics import r2_score
|
19
|
+
from sklearn.model_selection import GridSearchCV, train_test_split
|
20
|
+
from sklearn.pipeline import make_pipeline
|
21
|
+
from sklearn.preprocessing import StandardScaler
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
import logging
|
25
|
+
|
26
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
27
|
+
|
28
|
+
REGRESSION_METHODS = {
|
29
|
+
"LinearRegression": {
|
30
|
+
"model": LinearRegression(),
|
31
|
+
"param_grid": {
|
32
|
+
"linearregression__fit_intercept": [True, False],
|
33
|
+
"linearregression__positive": [True, False],
|
34
|
+
},
|
35
|
+
},
|
36
|
+
"RidgeRegression": {
|
37
|
+
"model": Ridge(),
|
38
|
+
"param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
|
39
|
+
},
|
40
|
+
"LassoRegression": {
|
41
|
+
"model": Lasso(),
|
42
|
+
"param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
|
43
|
+
},
|
44
|
+
"RandomForestRegression": {
|
45
|
+
"model": RandomForestRegressor(),
|
46
|
+
"param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
|
47
|
+
},
|
48
|
+
"GradientBoostingRegression": {
|
49
|
+
"model": GradientBoostingRegressor(),
|
50
|
+
"param_grid": {
|
51
|
+
"gradientboostingregressor__n_estimators": [50, 100, 200],
|
52
|
+
"gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
|
53
|
+
},
|
54
|
+
},
|
55
|
+
"AdaBoostRegression": {
|
56
|
+
"model": AdaBoostRegressor(),
|
57
|
+
"param_grid": {
|
58
|
+
"adaboostregressor__n_estimators": [50, 100, 200],
|
59
|
+
"adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
|
60
|
+
},
|
61
|
+
},
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
class MLRegressor:
|
66
|
+
r"""A forecaster class using machine learning models.
|
67
|
+
|
68
|
+
This class uses the `sklearn` module and the machine learning models are \
|
69
|
+
from `scikit-learn`.
|
70
|
+
|
71
|
+
It exposes two main methods:
|
72
|
+
|
73
|
+
- `fit`: to train a model with the passed data.
|
74
|
+
|
75
|
+
- `predict`: to obtain a forecast from a pre-trained model.
|
76
|
+
|
77
|
+
"""
|
78
|
+
|
79
|
+
def __init__( # noqa: PLR0913
|
80
|
+
self: MLRegressor,
|
81
|
+
data: pd.DataFrame,
|
82
|
+
model_type: str,
|
83
|
+
regression_model: str,
|
84
|
+
features: list,
|
85
|
+
target: str,
|
86
|
+
timestamp: str,
|
87
|
+
logger: logging.Logger,
|
88
|
+
) -> None:
|
89
|
+
r"""Define constructor for the forecast class.
|
90
|
+
|
91
|
+
:param data: The data that will be used for train/test
|
92
|
+
:type data: pd.DataFrame
|
93
|
+
:param model_type: A unique name defining this model and useful to identify \
|
94
|
+
for what it will be used for.
|
95
|
+
:type model_type: str
|
96
|
+
:param regression_model: The model that will be used. For now only \
|
97
|
+
this options are possible: `LinearRegression`, `RidgeRegression`, \
|
98
|
+
`LassoRegression`, `RandomForestRegression`, \
|
99
|
+
`GradientBoostingRegression` and `AdaBoostRegression`.
|
100
|
+
:type regression_model: str
|
101
|
+
:param features: A list of features. \
|
102
|
+
Example: [`solar_production`, `degree_days`].
|
103
|
+
:type features: list
|
104
|
+
:param target: The target(to be predicted). \
|
105
|
+
Example: `heating_hours`.
|
106
|
+
:type target: str
|
107
|
+
:param timestamp: If defined, the column key that has to be used of timestamp.
|
108
|
+
:type timestamp: str
|
109
|
+
:param logger: The passed logger object
|
110
|
+
:type logger: logging.Logger
|
111
|
+
"""
|
112
|
+
self.data = data
|
113
|
+
self.features = features
|
114
|
+
self.target = target
|
115
|
+
self.timestamp = timestamp
|
116
|
+
self.model_type = model_type
|
117
|
+
self.regression_model = regression_model
|
118
|
+
self.logger = logger
|
119
|
+
self.data = self.data.sort_index()
|
120
|
+
self.data = self.data[~self.data.index.duplicated(keep="first")]
|
121
|
+
self.data_exo = None
|
122
|
+
self.steps = None
|
123
|
+
self.model = None
|
124
|
+
self.grid_search = None
|
125
|
+
|
126
|
+
@staticmethod
|
127
|
+
def add_date_features(
|
128
|
+
data: pd.DataFrame,
|
129
|
+
date_features: list,
|
130
|
+
timestamp: str,
|
131
|
+
) -> pd.DataFrame:
|
132
|
+
"""Add date features from the input DataFrame timestamp.
|
133
|
+
|
134
|
+
:param data: The input DataFrame
|
135
|
+
:type data: pd.DataFrame
|
136
|
+
:param timestamp: The column containing the timestamp
|
137
|
+
:type timestamp: str
|
138
|
+
:return: The DataFrame with the added features
|
139
|
+
:rtype: pd.DataFrame
|
140
|
+
"""
|
141
|
+
df = copy.deepcopy(data) # noqa: PD901
|
142
|
+
df[timestamp] = pd.to_datetime(df["timestamp"])
|
143
|
+
if "year" in date_features:
|
144
|
+
df["year"] = [i.year for i in df["timestamp"]]
|
145
|
+
if "month" in date_features:
|
146
|
+
df["month"] = [i.month for i in df["timestamp"]]
|
147
|
+
if "day_of_week" in date_features:
|
148
|
+
df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
|
149
|
+
if "day_of_year" in date_features:
|
150
|
+
df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
|
151
|
+
if "day" in date_features:
|
152
|
+
df["day"] = [i.day for i in df["timestamp"]]
|
153
|
+
if "hour" in date_features:
|
154
|
+
df["hour"] = [i.day for i in df["timestamp"]]
|
155
|
+
|
156
|
+
return df
|
157
|
+
|
158
|
+
def get_regression_model(self: MLRegressor) -> tuple[str, str]:
|
159
|
+
"""Get the base model and parameter grid for the specified regression model.
|
160
|
+
|
161
|
+
Returns a tuple containing the base model and parameter grid corresponding to \
|
162
|
+
the specified regression model.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
----
|
166
|
+
self: The instance of the MLRegressor class.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
-------
|
170
|
+
A tuple containing the base model and parameter grid.
|
171
|
+
|
172
|
+
"""
|
173
|
+
if self.regression_model == "LinearRegression":
|
174
|
+
base_model = REGRESSION_METHODS["LinearRegression"]["model"]
|
175
|
+
param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
|
176
|
+
elif self.regression_model == "RidgeRegression":
|
177
|
+
base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
|
178
|
+
param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
|
179
|
+
elif self.regression_model == "LassoRegression":
|
180
|
+
base_model = REGRESSION_METHODS["LassoRegression"]["model"]
|
181
|
+
param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
|
182
|
+
elif self.regression_model == "RandomForestRegression":
|
183
|
+
base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
|
184
|
+
param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
|
185
|
+
elif self.regression_model == "GradientBoostingRegression":
|
186
|
+
base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
|
187
|
+
param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
|
188
|
+
elif self.regression_model == "AdaBoostRegression":
|
189
|
+
base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
|
190
|
+
param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
|
191
|
+
else:
|
192
|
+
self.logger.error(
|
193
|
+
"Passed model %s is not valid",
|
194
|
+
self.regression_model,
|
195
|
+
)
|
196
|
+
return None
|
197
|
+
return base_model, param_grid
|
198
|
+
|
199
|
+
def fit(self: MLRegressor, date_features: list | None = None) -> None:
|
200
|
+
"""Fit the model using the provided data.
|
201
|
+
|
202
|
+
:param date_features: A list of 'date_features' to take into account when \
|
203
|
+
fitting the model.
|
204
|
+
:type data: list
|
205
|
+
"""
|
206
|
+
self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
|
207
|
+
self.data_exo = pd.DataFrame(self.data)
|
208
|
+
self.data_exo[self.features] = self.data[self.features]
|
209
|
+
self.data_exo[self.target] = self.data[self.target]
|
210
|
+
keep_columns = []
|
211
|
+
keep_columns.extend(self.features)
|
212
|
+
if self.timestamp is not None:
|
213
|
+
keep_columns.append(self.timestamp)
|
214
|
+
keep_columns.append(self.target)
|
215
|
+
self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
|
216
|
+
self.data_exo = self.data_exo.reset_index(drop=True)
|
217
|
+
if date_features is not None:
|
218
|
+
if self.timestamp is not None:
|
219
|
+
self.data_exo = MLRegressor.add_date_features(
|
220
|
+
self.data_exo,
|
221
|
+
date_features,
|
222
|
+
self.timestamp,
|
223
|
+
)
|
224
|
+
else:
|
225
|
+
self.logger.error(
|
226
|
+
"If no timestamp provided, you can't use date_features, going \
|
227
|
+
further without date_features.",
|
228
|
+
)
|
229
|
+
|
230
|
+
y = self.data_exo[self.target]
|
231
|
+
self.data_exo = self.data_exo.drop(self.target, axis=1)
|
232
|
+
if self.timestamp is not None:
|
233
|
+
self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
|
234
|
+
X = self.data_exo # noqa: N806
|
235
|
+
|
236
|
+
X_train, X_test, y_train, y_test = train_test_split( # noqa: N806
|
237
|
+
X,
|
238
|
+
y,
|
239
|
+
test_size=0.2,
|
240
|
+
random_state=42,
|
241
|
+
)
|
242
|
+
|
243
|
+
self.steps = len(X_test)
|
244
|
+
|
245
|
+
base_model, param_grid = self.get_regression_model()
|
246
|
+
|
247
|
+
self.model = make_pipeline(StandardScaler(), base_model)
|
248
|
+
|
249
|
+
# Create a grid search object
|
250
|
+
self.grid_search = GridSearchCV(
|
251
|
+
self.model,
|
252
|
+
param_grid,
|
253
|
+
cv=5,
|
254
|
+
scoring="neg_mean_squared_error",
|
255
|
+
refit=True,
|
256
|
+
verbose=0,
|
257
|
+
n_jobs=-1,
|
258
|
+
)
|
259
|
+
|
260
|
+
# Fit the grid search object to the data
|
261
|
+
self.logger.info("Training a %s model", self.regression_model)
|
262
|
+
start_time = time.time()
|
263
|
+
self.grid_search.fit(X_train.values, y_train.values)
|
264
|
+
self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
|
265
|
+
|
266
|
+
self.model = self.grid_search.best_estimator_
|
267
|
+
|
268
|
+
# Make predictions
|
269
|
+
predictions = self.model.predict(X_test.values)
|
270
|
+
predictions = pd.Series(predictions, index=X_test.index)
|
271
|
+
pred_metric = r2_score(y_test, predictions)
|
272
|
+
self.logger.info(
|
273
|
+
"Prediction R2 score of fitted model on test data: %s",
|
274
|
+
pred_metric,
|
275
|
+
)
|
276
|
+
|
277
|
+
def predict(self: MLRegressor, new_values: list) -> np.ndarray:
|
278
|
+
"""Predict a new value.
|
279
|
+
|
280
|
+
:param new_values: The new values for the features \
|
281
|
+
(in the same order as the features list). \
|
282
|
+
Example: [2.24, 5.68].
|
283
|
+
:type new_values: list
|
284
|
+
:return: The np.ndarray containing the predicted value.
|
285
|
+
:rtype: np.ndarray
|
286
|
+
"""
|
287
|
+
self.logger.info("Performing a prediction for %s", self.model_type)
|
288
|
+
new_values = np.array([new_values])
|
289
|
+
|
290
|
+
return self.model.predict(new_values)
|
emhass/optimization.py
CHANGED
@@ -31,7 +31,7 @@ class Optimization:
|
|
31
31
|
|
32
32
|
def __init__(self, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
|
33
33
|
var_load_cost: str, var_prod_price: str,
|
34
|
-
costfun: str,
|
34
|
+
costfun: str, emhass_conf: dict, logger: logging.Logger,
|
35
35
|
opt_time_delta: Optional[int] = 24) -> None:
|
36
36
|
r"""
|
37
37
|
Define constructor for Optimization class.
|
@@ -50,8 +50,8 @@ class Optimization:
|
|
50
50
|
:type var_prod_price: str
|
51
51
|
:param costfun: The type of cost function to use for optimization problem
|
52
52
|
:type costfun: str
|
53
|
-
:param
|
54
|
-
:type
|
53
|
+
:param emhass_conf: Dictionary containing the needed emhass paths
|
54
|
+
:type emhass_conf: dict
|
55
55
|
:param logger: The passed logger object
|
56
56
|
:type logger: logging object
|
57
57
|
:param opt_time_delta: The number of hours to optimize. If days_list has \
|
@@ -71,6 +71,7 @@ class Optimization:
|
|
71
71
|
self.var_load = self.retrieve_hass_conf['var_load']
|
72
72
|
self.var_load_new = self.var_load+'_positive'
|
73
73
|
self.costfun = costfun
|
74
|
+
# self.emhass_conf = emhass_conf
|
74
75
|
self.logger = logger
|
75
76
|
self.var_load_cost = var_load_cost
|
76
77
|
self.var_prod_price = var_prod_price
|