emhass 0.10.6__py3-none-any.whl → 0.15.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +1827 -735
- emhass/connection_manager.py +108 -0
- emhass/data/associations.csv +98 -0
- emhass/data/cec_inverters.pbz2 +0 -0
- emhass/data/cec_modules.pbz2 +0 -0
- emhass/data/config_defaults.json +120 -0
- emhass/forecast.py +1482 -622
- emhass/img/emhass_icon.png +0 -0
- emhass/machine_learning_forecaster.py +565 -212
- emhass/machine_learning_regressor.py +162 -122
- emhass/optimization.py +1724 -590
- emhass/retrieve_hass.py +1104 -248
- emhass/static/advanced.html +9 -1
- emhass/static/basic.html +4 -2
- emhass/static/configuration_list.html +48 -0
- emhass/static/configuration_script.js +956 -0
- emhass/static/data/param_definitions.json +592 -0
- emhass/static/script.js +377 -322
- emhass/static/style.css +270 -13
- emhass/templates/configuration.html +77 -0
- emhass/templates/index.html +23 -14
- emhass/templates/template.html +4 -5
- emhass/utils.py +1797 -428
- emhass/web_server.py +850 -448
- emhass/websocket_client.py +224 -0
- emhass-0.15.5.dist-info/METADATA +164 -0
- emhass-0.15.5.dist-info/RECORD +34 -0
- {emhass-0.10.6.dist-info → emhass-0.15.5.dist-info}/WHEEL +1 -2
- emhass-0.15.5.dist-info/entry_points.txt +2 -0
- emhass-0.10.6.dist-info/METADATA +0 -622
- emhass-0.10.6.dist-info/RECORD +0 -26
- emhass-0.10.6.dist-info/entry_points.txt +0 -2
- emhass-0.10.6.dist-info/top_level.txt +0 -1
- {emhass-0.10.6.dist-info → emhass-0.15.5.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
import asyncio
|
|
6
6
|
import time
|
|
7
7
|
import warnings
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
@@ -11,20 +11,36 @@ import numpy as np
|
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from sklearn.ensemble import (
|
|
13
13
|
AdaBoostRegressor,
|
|
14
|
+
ExtraTreesRegressor,
|
|
14
15
|
GradientBoostingRegressor,
|
|
15
16
|
RandomForestRegressor,
|
|
16
17
|
)
|
|
17
|
-
from sklearn.linear_model import Lasso, LinearRegression, Ridge
|
|
18
|
+
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge
|
|
18
19
|
from sklearn.metrics import r2_score
|
|
19
20
|
from sklearn.model_selection import GridSearchCV, train_test_split
|
|
21
|
+
from sklearn.neighbors import KNeighborsRegressor
|
|
22
|
+
from sklearn.neural_network import MLPRegressor
|
|
20
23
|
from sklearn.pipeline import make_pipeline
|
|
21
24
|
from sklearn.preprocessing import StandardScaler
|
|
25
|
+
from sklearn.svm import SVR
|
|
26
|
+
from sklearn.tree import DecisionTreeRegressor
|
|
27
|
+
|
|
28
|
+
from emhass import utils
|
|
22
29
|
|
|
23
30
|
if TYPE_CHECKING:
|
|
24
31
|
import logging
|
|
25
32
|
|
|
26
33
|
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
27
34
|
|
|
35
|
+
|
|
36
|
+
# AUTHORITATIVE SOURCE: Supported regression models for MLRegressor and adjust_pv_forecast
|
|
37
|
+
# When adding/removing models, also update:
|
|
38
|
+
# - src/emhass/static/data/param_definitions.json (adjusted_pv_regression_model select_options)
|
|
39
|
+
# - docs/config.md (adjusted_pv_regression_model description)
|
|
40
|
+
# - docs/forecasts.md (Model Training section)
|
|
41
|
+
# - src/emhass/forecast.py (adjust_pv_forecast_fit docstring)
|
|
42
|
+
# Define a seed for reproducibility
|
|
43
|
+
seed = 42
|
|
28
44
|
REGRESSION_METHODS = {
|
|
29
45
|
"LinearRegression": {
|
|
30
46
|
"model": LinearRegression(),
|
|
@@ -35,30 +51,81 @@ REGRESSION_METHODS = {
|
|
|
35
51
|
},
|
|
36
52
|
"RidgeRegression": {
|
|
37
53
|
"model": Ridge(),
|
|
38
|
-
"param_grid": {"ridge__alpha": [
|
|
54
|
+
"param_grid": {"ridge__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
|
|
39
55
|
},
|
|
40
56
|
"LassoRegression": {
|
|
41
|
-
"model": Lasso(),
|
|
42
|
-
"param_grid": {"lasso__alpha": [
|
|
57
|
+
"model": Lasso(random_state=seed),
|
|
58
|
+
"param_grid": {"lasso__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
|
|
59
|
+
},
|
|
60
|
+
"ElasticNet": {
|
|
61
|
+
"model": ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=seed),
|
|
62
|
+
"param_grid": {
|
|
63
|
+
"elasticnet__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100],
|
|
64
|
+
"elasticnet__l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1],
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
"KNeighborsRegressor": {
|
|
68
|
+
"model": KNeighborsRegressor(),
|
|
69
|
+
"param_grid": {
|
|
70
|
+
"kneighborsregressor__n_neighbors": [3, 5, 7, 10, 15],
|
|
71
|
+
"kneighborsregressor__weights": ["uniform", "distance"],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
"DecisionTreeRegressor": {
|
|
75
|
+
"model": DecisionTreeRegressor(ccp_alpha=0.0, random_state=seed),
|
|
76
|
+
"param_grid": {
|
|
77
|
+
"decisiontreeregressor__max_depth": [None, 5, 10, 20],
|
|
78
|
+
"decisiontreeregressor__min_samples_split": [2, 5, 10],
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
"SVR": {
|
|
82
|
+
"model": SVR(),
|
|
83
|
+
"param_grid": {
|
|
84
|
+
"svr__C": [0.1, 1, 10, 100],
|
|
85
|
+
"svr__gamma": ["scale", "auto"],
|
|
86
|
+
"svr__kernel": ["rbf", "linear"],
|
|
87
|
+
},
|
|
43
88
|
},
|
|
44
|
-
"
|
|
45
|
-
"model": RandomForestRegressor(),
|
|
46
|
-
"param_grid": {
|
|
89
|
+
"RandomForestRegressor": {
|
|
90
|
+
"model": RandomForestRegressor(min_samples_leaf=1, max_features=1.0, random_state=seed),
|
|
91
|
+
"param_grid": {
|
|
92
|
+
"randomforestregressor__n_estimators": [50, 100, 200],
|
|
93
|
+
"randomforestregressor__max_depth": [None, 10, 20],
|
|
94
|
+
"randomforestregressor__max_features": ["sqrt", "log2", None],
|
|
95
|
+
},
|
|
47
96
|
},
|
|
48
|
-
"
|
|
49
|
-
"model":
|
|
97
|
+
"ExtraTreesRegressor": {
|
|
98
|
+
"model": ExtraTreesRegressor(min_samples_leaf=1, max_features=1.0, random_state=seed),
|
|
99
|
+
"param_grid": {
|
|
100
|
+
"extratreesregressor__n_estimators": [50, 100, 200],
|
|
101
|
+
"extratreesregressor__max_depth": [None, 10, 20],
|
|
102
|
+
"extratreesregressor__max_features": ["sqrt", "log2", None],
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
"GradientBoostingRegressor": {
|
|
106
|
+
"model": GradientBoostingRegressor(learning_rate=0.1, random_state=seed),
|
|
50
107
|
"param_grid": {
|
|
51
108
|
"gradientboostingregressor__n_estimators": [50, 100, 200],
|
|
52
109
|
"gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
|
|
110
|
+
"gradientboostingregressor__max_depth": [3, 5, 10],
|
|
53
111
|
},
|
|
54
112
|
},
|
|
55
|
-
"
|
|
56
|
-
"model": AdaBoostRegressor(),
|
|
113
|
+
"AdaBoostRegressor": {
|
|
114
|
+
"model": AdaBoostRegressor(learning_rate=1.0, random_state=seed),
|
|
57
115
|
"param_grid": {
|
|
58
116
|
"adaboostregressor__n_estimators": [50, 100, 200],
|
|
59
117
|
"adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
|
|
60
118
|
},
|
|
61
119
|
},
|
|
120
|
+
"MLPRegressor": {
|
|
121
|
+
"model": MLPRegressor(hidden_layer_sizes=(100,), random_state=seed),
|
|
122
|
+
"param_grid": {
|
|
123
|
+
"mlpregressor__hidden_layer_sizes": [(50,), (100,), (50, 50)],
|
|
124
|
+
"mlpregressor__activation": ["relu", "tanh"],
|
|
125
|
+
"mlpregressor__alpha": [1e-4, 1e-3],
|
|
126
|
+
"mlpregressor__max_iter": [500],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
62
129
|
}
|
|
63
130
|
|
|
64
131
|
|
|
@@ -76,8 +143,16 @@ class MLRegressor:
|
|
|
76
143
|
|
|
77
144
|
"""
|
|
78
145
|
|
|
79
|
-
def __init__(
|
|
80
|
-
|
|
146
|
+
def __init__(
|
|
147
|
+
self: MLRegressor,
|
|
148
|
+
data: pd.DataFrame,
|
|
149
|
+
model_type: str,
|
|
150
|
+
regression_model: str,
|
|
151
|
+
features: list[str],
|
|
152
|
+
target: str,
|
|
153
|
+
timestamp: str,
|
|
154
|
+
logger: logging.Logger,
|
|
155
|
+
) -> None:
|
|
81
156
|
r"""Define constructor for the forecast class.
|
|
82
157
|
|
|
83
158
|
:param data: The data that will be used for train/test
|
|
@@ -101,48 +176,45 @@ class MLRegressor:
|
|
|
101
176
|
:param logger: The passed logger object
|
|
102
177
|
:type logger: logging.Logger
|
|
103
178
|
"""
|
|
104
|
-
self.data = data
|
|
179
|
+
self.data = data.sort_index()
|
|
105
180
|
self.features = features
|
|
106
181
|
self.target = target
|
|
107
182
|
self.timestamp = timestamp
|
|
108
183
|
self.model_type = model_type
|
|
109
184
|
self.regression_model = regression_model
|
|
110
185
|
self.logger = logger
|
|
111
|
-
|
|
186
|
+
|
|
112
187
|
self.data = self.data[~self.data.index.duplicated(keep="first")]
|
|
113
|
-
self.data_exo = None
|
|
114
|
-
self.steps = None
|
|
188
|
+
self.data_exo: pd.DataFrame | None = None
|
|
189
|
+
self.steps: int | None = None
|
|
115
190
|
self.model = None
|
|
116
|
-
self.grid_search = None
|
|
191
|
+
self.grid_search: GridSearchCV | None = None
|
|
117
192
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
193
|
+
def _prepare_data(self, date_features: list[str] | None) -> tuple[pd.DataFrame, pd.Series]:
|
|
194
|
+
self.data_exo = self.data.copy()
|
|
195
|
+
self.data_exo[self.features] = self.data[self.features]
|
|
196
|
+
self.data_exo[self.target] = self.data[self.target]
|
|
121
197
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
df["hour"] = [i.day for i in df["timestamp"]]
|
|
143
|
-
return df
|
|
144
|
-
|
|
145
|
-
def get_regression_model(self: MLRegressor) -> tuple[str, str]:
|
|
198
|
+
keep_columns = list(self.features)
|
|
199
|
+
if self.timestamp:
|
|
200
|
+
keep_columns.append(self.timestamp)
|
|
201
|
+
keep_columns.append(self.target)
|
|
202
|
+
self.data_exo = self.data_exo[keep_columns].reset_index(drop=True)
|
|
203
|
+
|
|
204
|
+
if date_features and self.timestamp:
|
|
205
|
+
self.data_exo = utils.add_date_features(
|
|
206
|
+
self.data_exo, timestamp=self.timestamp, date_features=date_features
|
|
207
|
+
)
|
|
208
|
+
elif date_features:
|
|
209
|
+
self.logger.warning("Timestamp is required for date_features. Skipping date features.")
|
|
210
|
+
|
|
211
|
+
y = self.data_exo[self.target]
|
|
212
|
+
X = self.data_exo.drop(
|
|
213
|
+
columns=[self.target, self.timestamp] if self.timestamp else [self.target]
|
|
214
|
+
)
|
|
215
|
+
return X, y
|
|
216
|
+
|
|
217
|
+
def _get_model_and_params(self) -> tuple[GridSearchCV, dict] | tuple[None, None]:
|
|
146
218
|
r"""
|
|
147
219
|
Get the base model and parameter grid for the specified regression model.
|
|
148
220
|
Returns a tuple containing the base model and parameter grid corresponding to \
|
|
@@ -153,90 +225,57 @@ class MLRegressor:
|
|
|
153
225
|
:return: A tuple containing the base model and parameter grid.
|
|
154
226
|
:rtype: tuple[str, str]
|
|
155
227
|
"""
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
|
|
165
|
-
elif self.regression_model == "RandomForestRegression":
|
|
166
|
-
base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
|
|
167
|
-
param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
|
|
168
|
-
elif self.regression_model == "GradientBoostingRegression":
|
|
169
|
-
base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
|
|
170
|
-
param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
|
|
171
|
-
elif self.regression_model == "AdaBoostRegression":
|
|
172
|
-
base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
|
|
173
|
-
param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
|
|
174
|
-
else:
|
|
175
|
-
self.logger.error(
|
|
176
|
-
"Passed model %s is not valid",
|
|
177
|
-
self.regression_model,
|
|
178
|
-
)
|
|
179
|
-
return None
|
|
180
|
-
return base_model, param_grid
|
|
228
|
+
method = REGRESSION_METHODS.get(self.regression_model)
|
|
229
|
+
if not method:
|
|
230
|
+
self.logger.error("Invalid regression model: %s", self.regression_model)
|
|
231
|
+
return None, None
|
|
232
|
+
|
|
233
|
+
pipeline = make_pipeline(StandardScaler(), method["model"])
|
|
234
|
+
param_grid = method["param_grid"]
|
|
235
|
+
return pipeline, param_grid
|
|
181
236
|
|
|
182
|
-
def fit(self: MLRegressor, date_features: list | None = None) ->
|
|
237
|
+
async def fit(self: MLRegressor, date_features: list[str] | None = None) -> bool:
|
|
183
238
|
r"""Fit the model using the provided data.
|
|
184
239
|
|
|
185
240
|
:param date_features: A list of 'date_features' to take into account when \
|
|
186
241
|
fitting the model.
|
|
187
242
|
:type data: list
|
|
243
|
+
:return: bool if successful
|
|
244
|
+
:rtype: bool
|
|
188
245
|
"""
|
|
189
|
-
self.logger.info("
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
self.data_exo[self.target] = self.data[self.target]
|
|
193
|
-
keep_columns = []
|
|
194
|
-
keep_columns.extend(self.features)
|
|
195
|
-
if self.timestamp is not None:
|
|
196
|
-
keep_columns.append(self.timestamp)
|
|
197
|
-
keep_columns.append(self.target)
|
|
198
|
-
self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
|
|
199
|
-
self.data_exo = self.data_exo.reset_index(drop=True)
|
|
200
|
-
if date_features is not None:
|
|
201
|
-
if self.timestamp is not None:
|
|
202
|
-
self.data_exo = MLRegressor.add_date_features(
|
|
203
|
-
self.data_exo,
|
|
204
|
-
date_features,
|
|
205
|
-
self.timestamp,
|
|
206
|
-
)
|
|
207
|
-
else:
|
|
208
|
-
self.logger.error(
|
|
209
|
-
"If no timestamp provided, you can't use date_features, going \
|
|
210
|
-
further without date_features.",
|
|
211
|
-
)
|
|
212
|
-
y = self.data_exo[self.target]
|
|
213
|
-
self.data_exo = self.data_exo.drop(self.target, axis=1)
|
|
214
|
-
if self.timestamp is not None:
|
|
215
|
-
self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
|
|
216
|
-
X = self.data_exo
|
|
246
|
+
self.logger.info("Fitting MLRegressor model for %s", self.model_type)
|
|
247
|
+
|
|
248
|
+
X, y = self._prepare_data(date_features)
|
|
217
249
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
218
250
|
self.steps = len(X_test)
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
predictions = pd.Series(predictions, index=X_test.index)
|
|
233
|
-
pred_metric = r2_score(y_test, predictions)
|
|
234
|
-
self.logger.info(
|
|
235
|
-
"Prediction R2 score of fitted model on test data: %s",
|
|
236
|
-
pred_metric,
|
|
251
|
+
|
|
252
|
+
model_pipeline, param_grid = self._get_model_and_params()
|
|
253
|
+
if model_pipeline is None:
|
|
254
|
+
return False
|
|
255
|
+
|
|
256
|
+
self.grid_search = GridSearchCV(
|
|
257
|
+
model_pipeline,
|
|
258
|
+
param_grid,
|
|
259
|
+
cv=5,
|
|
260
|
+
scoring="neg_mean_squared_error",
|
|
261
|
+
refit=True,
|
|
262
|
+
verbose=0,
|
|
263
|
+
n_jobs=-1,
|
|
237
264
|
)
|
|
238
265
|
|
|
239
|
-
|
|
266
|
+
self.logger.info("Training model: %s", self.regression_model)
|
|
267
|
+
start = time.time()
|
|
268
|
+
await asyncio.to_thread(self.grid_search.fit, X_train.values, y_train.values)
|
|
269
|
+
self.logger.info("Model fit completed in %.2f seconds", time.time() - start)
|
|
270
|
+
|
|
271
|
+
self.model = self.grid_search.best_estimator_
|
|
272
|
+
|
|
273
|
+
predictions = await asyncio.to_thread(self.model.predict, X_test.values)
|
|
274
|
+
r2 = r2_score(y_test, predictions)
|
|
275
|
+
self.logger.info("R2 score on test set: %.4f", r2)
|
|
276
|
+
return True
|
|
277
|
+
|
|
278
|
+
async def predict(self: MLRegressor, new_values: list[float]) -> np.ndarray:
|
|
240
279
|
"""Predict a new value.
|
|
241
280
|
|
|
242
281
|
:param new_values: The new values for the features \
|
|
@@ -246,6 +285,7 @@ class MLRegressor:
|
|
|
246
285
|
:return: The np.ndarray containing the predicted value.
|
|
247
286
|
:rtype: np.ndarray
|
|
248
287
|
"""
|
|
249
|
-
self.logger.info("
|
|
250
|
-
|
|
251
|
-
|
|
288
|
+
self.logger.info("Making prediction with model %s", self.model_type)
|
|
289
|
+
new_values_array = np.array([new_values])
|
|
290
|
+
prediction = await asyncio.to_thread(self.model.predict, new_values_array)
|
|
291
|
+
return prediction
|