emhass 0.11.4__py3-none-any.whl → 0.15.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +1481 -811
- emhass/connection_manager.py +108 -0
- emhass/data/associations.csv +37 -2
- emhass/data/cec_inverters.pbz2 +0 -0
- emhass/data/cec_modules.pbz2 +0 -0
- emhass/data/config_defaults.json +53 -49
- emhass/forecast.py +1264 -731
- emhass/img/emhass_icon.png +0 -0
- emhass/machine_learning_forecaster.py +534 -281
- emhass/machine_learning_regressor.py +141 -125
- emhass/optimization.py +1173 -585
- emhass/retrieve_hass.py +958 -263
- emhass/static/advanced.html +7 -0
- emhass/static/configuration_list.html +5 -1
- emhass/static/configuration_script.js +146 -62
- emhass/static/data/param_definitions.json +215 -48
- emhass/static/script.js +58 -26
- emhass/static/style.css +6 -8
- emhass/templates/configuration.html +5 -3
- emhass/templates/index.html +8 -6
- emhass/templates/template.html +4 -5
- emhass/utils.py +1152 -403
- emhass/web_server.py +565 -379
- emhass/websocket_client.py +224 -0
- emhass-0.15.5.dist-info/METADATA +164 -0
- emhass-0.15.5.dist-info/RECORD +34 -0
- {emhass-0.11.4.dist-info → emhass-0.15.5.dist-info}/WHEEL +1 -2
- emhass-0.15.5.dist-info/entry_points.txt +2 -0
- emhass-0.11.4.dist-info/METADATA +0 -666
- emhass-0.11.4.dist-info/RECORD +0 -32
- emhass-0.11.4.dist-info/entry_points.txt +0 -2
- emhass-0.11.4.dist-info/top_level.txt +0 -1
- {emhass-0.11.4.dist-info → emhass-0.15.5.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
import asyncio
|
|
6
6
|
import time
|
|
7
7
|
import warnings
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
@@ -11,20 +11,36 @@ import numpy as np
|
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from sklearn.ensemble import (
|
|
13
13
|
AdaBoostRegressor,
|
|
14
|
+
ExtraTreesRegressor,
|
|
14
15
|
GradientBoostingRegressor,
|
|
15
16
|
RandomForestRegressor,
|
|
16
17
|
)
|
|
17
|
-
from sklearn.linear_model import Lasso, LinearRegression, Ridge
|
|
18
|
+
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge
|
|
18
19
|
from sklearn.metrics import r2_score
|
|
19
20
|
from sklearn.model_selection import GridSearchCV, train_test_split
|
|
21
|
+
from sklearn.neighbors import KNeighborsRegressor
|
|
22
|
+
from sklearn.neural_network import MLPRegressor
|
|
20
23
|
from sklearn.pipeline import make_pipeline
|
|
21
24
|
from sklearn.preprocessing import StandardScaler
|
|
25
|
+
from sklearn.svm import SVR
|
|
26
|
+
from sklearn.tree import DecisionTreeRegressor
|
|
27
|
+
|
|
28
|
+
from emhass import utils
|
|
22
29
|
|
|
23
30
|
if TYPE_CHECKING:
|
|
24
31
|
import logging
|
|
25
32
|
|
|
26
33
|
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
27
34
|
|
|
35
|
+
|
|
36
|
+
# AUTHORITATIVE SOURCE: Supported regression models for MLRegressor and adjust_pv_forecast
|
|
37
|
+
# When adding/removing models, also update:
|
|
38
|
+
# - src/emhass/static/data/param_definitions.json (adjusted_pv_regression_model select_options)
|
|
39
|
+
# - docs/config.md (adjusted_pv_regression_model description)
|
|
40
|
+
# - docs/forecasts.md (Model Training section)
|
|
41
|
+
# - src/emhass/forecast.py (adjust_pv_forecast_fit docstring)
|
|
42
|
+
# Define a seed for reproducibility
|
|
43
|
+
seed = 42
|
|
28
44
|
REGRESSION_METHODS = {
|
|
29
45
|
"LinearRegression": {
|
|
30
46
|
"model": LinearRegression(),
|
|
@@ -35,30 +51,81 @@ REGRESSION_METHODS = {
|
|
|
35
51
|
},
|
|
36
52
|
"RidgeRegression": {
|
|
37
53
|
"model": Ridge(),
|
|
38
|
-
"param_grid": {"ridge__alpha": [
|
|
54
|
+
"param_grid": {"ridge__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
|
|
39
55
|
},
|
|
40
56
|
"LassoRegression": {
|
|
41
|
-
"model": Lasso(),
|
|
42
|
-
"param_grid": {"lasso__alpha": [
|
|
57
|
+
"model": Lasso(random_state=seed),
|
|
58
|
+
"param_grid": {"lasso__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]},
|
|
59
|
+
},
|
|
60
|
+
"ElasticNet": {
|
|
61
|
+
"model": ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=seed),
|
|
62
|
+
"param_grid": {
|
|
63
|
+
"elasticnet__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100],
|
|
64
|
+
"elasticnet__l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1],
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
"KNeighborsRegressor": {
|
|
68
|
+
"model": KNeighborsRegressor(),
|
|
69
|
+
"param_grid": {
|
|
70
|
+
"kneighborsregressor__n_neighbors": [3, 5, 7, 10, 15],
|
|
71
|
+
"kneighborsregressor__weights": ["uniform", "distance"],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
"DecisionTreeRegressor": {
|
|
75
|
+
"model": DecisionTreeRegressor(ccp_alpha=0.0, random_state=seed),
|
|
76
|
+
"param_grid": {
|
|
77
|
+
"decisiontreeregressor__max_depth": [None, 5, 10, 20],
|
|
78
|
+
"decisiontreeregressor__min_samples_split": [2, 5, 10],
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
"SVR": {
|
|
82
|
+
"model": SVR(),
|
|
83
|
+
"param_grid": {
|
|
84
|
+
"svr__C": [0.1, 1, 10, 100],
|
|
85
|
+
"svr__gamma": ["scale", "auto"],
|
|
86
|
+
"svr__kernel": ["rbf", "linear"],
|
|
87
|
+
},
|
|
43
88
|
},
|
|
44
|
-
"
|
|
45
|
-
"model": RandomForestRegressor(),
|
|
46
|
-
"param_grid": {
|
|
89
|
+
"RandomForestRegressor": {
|
|
90
|
+
"model": RandomForestRegressor(min_samples_leaf=1, max_features=1.0, random_state=seed),
|
|
91
|
+
"param_grid": {
|
|
92
|
+
"randomforestregressor__n_estimators": [50, 100, 200],
|
|
93
|
+
"randomforestregressor__max_depth": [None, 10, 20],
|
|
94
|
+
"randomforestregressor__max_features": ["sqrt", "log2", None],
|
|
95
|
+
},
|
|
47
96
|
},
|
|
48
|
-
"
|
|
49
|
-
"model":
|
|
97
|
+
"ExtraTreesRegressor": {
|
|
98
|
+
"model": ExtraTreesRegressor(min_samples_leaf=1, max_features=1.0, random_state=seed),
|
|
99
|
+
"param_grid": {
|
|
100
|
+
"extratreesregressor__n_estimators": [50, 100, 200],
|
|
101
|
+
"extratreesregressor__max_depth": [None, 10, 20],
|
|
102
|
+
"extratreesregressor__max_features": ["sqrt", "log2", None],
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
"GradientBoostingRegressor": {
|
|
106
|
+
"model": GradientBoostingRegressor(learning_rate=0.1, random_state=seed),
|
|
50
107
|
"param_grid": {
|
|
51
108
|
"gradientboostingregressor__n_estimators": [50, 100, 200],
|
|
52
109
|
"gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
|
|
110
|
+
"gradientboostingregressor__max_depth": [3, 5, 10],
|
|
53
111
|
},
|
|
54
112
|
},
|
|
55
|
-
"
|
|
56
|
-
"model": AdaBoostRegressor(),
|
|
113
|
+
"AdaBoostRegressor": {
|
|
114
|
+
"model": AdaBoostRegressor(learning_rate=1.0, random_state=seed),
|
|
57
115
|
"param_grid": {
|
|
58
116
|
"adaboostregressor__n_estimators": [50, 100, 200],
|
|
59
117
|
"adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
|
|
60
118
|
},
|
|
61
119
|
},
|
|
120
|
+
"MLPRegressor": {
|
|
121
|
+
"model": MLPRegressor(hidden_layer_sizes=(100,), random_state=seed),
|
|
122
|
+
"param_grid": {
|
|
123
|
+
"mlpregressor__hidden_layer_sizes": [(50,), (100,), (50, 50)],
|
|
124
|
+
"mlpregressor__activation": ["relu", "tanh"],
|
|
125
|
+
"mlpregressor__alpha": [1e-4, 1e-3],
|
|
126
|
+
"mlpregressor__max_iter": [500],
|
|
127
|
+
},
|
|
128
|
+
},
|
|
62
129
|
}
|
|
63
130
|
|
|
64
131
|
|
|
@@ -81,7 +148,7 @@ class MLRegressor:
|
|
|
81
148
|
data: pd.DataFrame,
|
|
82
149
|
model_type: str,
|
|
83
150
|
regression_model: str,
|
|
84
|
-
features: list,
|
|
151
|
+
features: list[str],
|
|
85
152
|
target: str,
|
|
86
153
|
timestamp: str,
|
|
87
154
|
logger: logging.Logger,
|
|
@@ -109,50 +176,45 @@ class MLRegressor:
|
|
|
109
176
|
:param logger: The passed logger object
|
|
110
177
|
:type logger: logging.Logger
|
|
111
178
|
"""
|
|
112
|
-
self.data = data
|
|
179
|
+
self.data = data.sort_index()
|
|
113
180
|
self.features = features
|
|
114
181
|
self.target = target
|
|
115
182
|
self.timestamp = timestamp
|
|
116
183
|
self.model_type = model_type
|
|
117
184
|
self.regression_model = regression_model
|
|
118
185
|
self.logger = logger
|
|
119
|
-
|
|
186
|
+
|
|
120
187
|
self.data = self.data[~self.data.index.duplicated(keep="first")]
|
|
121
|
-
self.data_exo = None
|
|
122
|
-
self.steps = None
|
|
188
|
+
self.data_exo: pd.DataFrame | None = None
|
|
189
|
+
self.steps: int | None = None
|
|
123
190
|
self.model = None
|
|
124
|
-
self.grid_search = None
|
|
191
|
+
self.grid_search: GridSearchCV | None = None
|
|
125
192
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
"""Add date features from the input DataFrame timestamp.
|
|
193
|
+
def _prepare_data(self, date_features: list[str] | None) -> tuple[pd.DataFrame, pd.Series]:
|
|
194
|
+
self.data_exo = self.data.copy()
|
|
195
|
+
self.data_exo[self.features] = self.data[self.features]
|
|
196
|
+
self.data_exo[self.target] = self.data[self.target]
|
|
131
197
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
df["hour"] = [i.day for i in df["timestamp"]]
|
|
153
|
-
return df
|
|
154
|
-
|
|
155
|
-
def get_regression_model(self: MLRegressor) -> tuple[str, str]:
|
|
198
|
+
keep_columns = list(self.features)
|
|
199
|
+
if self.timestamp:
|
|
200
|
+
keep_columns.append(self.timestamp)
|
|
201
|
+
keep_columns.append(self.target)
|
|
202
|
+
self.data_exo = self.data_exo[keep_columns].reset_index(drop=True)
|
|
203
|
+
|
|
204
|
+
if date_features and self.timestamp:
|
|
205
|
+
self.data_exo = utils.add_date_features(
|
|
206
|
+
self.data_exo, timestamp=self.timestamp, date_features=date_features
|
|
207
|
+
)
|
|
208
|
+
elif date_features:
|
|
209
|
+
self.logger.warning("Timestamp is required for date_features. Skipping date features.")
|
|
210
|
+
|
|
211
|
+
y = self.data_exo[self.target]
|
|
212
|
+
X = self.data_exo.drop(
|
|
213
|
+
columns=[self.target, self.timestamp] if self.timestamp else [self.target]
|
|
214
|
+
)
|
|
215
|
+
return X, y
|
|
216
|
+
|
|
217
|
+
def _get_model_and_params(self) -> tuple[GridSearchCV, dict] | tuple[None, None]:
|
|
156
218
|
r"""
|
|
157
219
|
Get the base model and parameter grid for the specified regression model.
|
|
158
220
|
Returns a tuple containing the base model and parameter grid corresponding to \
|
|
@@ -163,33 +225,16 @@ class MLRegressor:
|
|
|
163
225
|
:return: A tuple containing the base model and parameter grid.
|
|
164
226
|
:rtype: tuple[str, str]
|
|
165
227
|
"""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
elif self.regression_model == "RidgeRegression":
|
|
170
|
-
base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
|
|
171
|
-
param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
|
|
172
|
-
elif self.regression_model == "LassoRegression":
|
|
173
|
-
base_model = REGRESSION_METHODS["LassoRegression"]["model"]
|
|
174
|
-
param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
|
|
175
|
-
elif self.regression_model == "RandomForestRegression":
|
|
176
|
-
base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
|
|
177
|
-
param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
|
|
178
|
-
elif self.regression_model == "GradientBoostingRegression":
|
|
179
|
-
base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
|
|
180
|
-
param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
|
|
181
|
-
elif self.regression_model == "AdaBoostRegression":
|
|
182
|
-
base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
|
|
183
|
-
param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
|
|
184
|
-
else:
|
|
185
|
-
self.logger.error(
|
|
186
|
-
"Passed model %s is not valid",
|
|
187
|
-
self.regression_model,
|
|
188
|
-
)
|
|
228
|
+
method = REGRESSION_METHODS.get(self.regression_model)
|
|
229
|
+
if not method:
|
|
230
|
+
self.logger.error("Invalid regression model: %s", self.regression_model)
|
|
189
231
|
return None, None
|
|
190
|
-
return base_model, param_grid
|
|
191
232
|
|
|
192
|
-
|
|
233
|
+
pipeline = make_pipeline(StandardScaler(), method["model"])
|
|
234
|
+
param_grid = method["param_grid"]
|
|
235
|
+
return pipeline, param_grid
|
|
236
|
+
|
|
237
|
+
async def fit(self: MLRegressor, date_features: list[str] | None = None) -> bool:
|
|
193
238
|
r"""Fit the model using the provided data.
|
|
194
239
|
|
|
195
240
|
:param date_features: A list of 'date_features' to take into account when \
|
|
@@ -198,45 +243,18 @@ class MLRegressor:
|
|
|
198
243
|
:return: bool if successful
|
|
199
244
|
:rtype: bool
|
|
200
245
|
"""
|
|
201
|
-
self.logger.info("
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
keep_columns = []
|
|
206
|
-
keep_columns.extend(self.features)
|
|
207
|
-
if self.timestamp is not None:
|
|
208
|
-
keep_columns.append(self.timestamp)
|
|
209
|
-
keep_columns.append(self.target)
|
|
210
|
-
self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
|
|
211
|
-
self.data_exo = self.data_exo.reset_index(drop=True)
|
|
212
|
-
if date_features is not None:
|
|
213
|
-
if self.timestamp is not None:
|
|
214
|
-
self.data_exo = MLRegressor.add_date_features(
|
|
215
|
-
self.data_exo,
|
|
216
|
-
date_features,
|
|
217
|
-
self.timestamp,
|
|
218
|
-
)
|
|
219
|
-
else:
|
|
220
|
-
self.logger.error(
|
|
221
|
-
"If no timestamp provided, you can't use date_features, going \
|
|
222
|
-
further without date_features.",
|
|
223
|
-
)
|
|
224
|
-
y = self.data_exo[self.target]
|
|
225
|
-
self.data_exo = self.data_exo.drop(self.target, axis=1)
|
|
226
|
-
if self.timestamp is not None:
|
|
227
|
-
self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
|
|
228
|
-
X = self.data_exo
|
|
229
|
-
X_train, X_test, y_train, y_test = train_test_split(
|
|
230
|
-
X, y, test_size=0.2, random_state=42
|
|
231
|
-
)
|
|
246
|
+
self.logger.info("Fitting MLRegressor model for %s", self.model_type)
|
|
247
|
+
|
|
248
|
+
X, y = self._prepare_data(date_features)
|
|
249
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
232
250
|
self.steps = len(X_test)
|
|
233
|
-
|
|
234
|
-
|
|
251
|
+
|
|
252
|
+
model_pipeline, param_grid = self._get_model_and_params()
|
|
253
|
+
if model_pipeline is None:
|
|
235
254
|
return False
|
|
236
|
-
|
|
237
|
-
# Create a grid search object
|
|
255
|
+
|
|
238
256
|
self.grid_search = GridSearchCV(
|
|
239
|
-
|
|
257
|
+
model_pipeline,
|
|
240
258
|
param_grid,
|
|
241
259
|
cv=5,
|
|
242
260
|
scoring="neg_mean_squared_error",
|
|
@@ -244,23 +262,20 @@ class MLRegressor:
|
|
|
244
262
|
verbose=0,
|
|
245
263
|
n_jobs=-1,
|
|
246
264
|
)
|
|
247
|
-
|
|
248
|
-
self.logger.info("Training
|
|
249
|
-
|
|
250
|
-
self.grid_search.fit
|
|
251
|
-
self.logger.info("
|
|
265
|
+
|
|
266
|
+
self.logger.info("Training model: %s", self.regression_model)
|
|
267
|
+
start = time.time()
|
|
268
|
+
await asyncio.to_thread(self.grid_search.fit, X_train.values, y_train.values)
|
|
269
|
+
self.logger.info("Model fit completed in %.2f seconds", time.time() - start)
|
|
270
|
+
|
|
252
271
|
self.model = self.grid_search.best_estimator_
|
|
253
|
-
|
|
254
|
-
predictions = self.model.predict
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
self.logger.info(
|
|
258
|
-
"Prediction R2 score of fitted model on test data: %s",
|
|
259
|
-
pred_metric,
|
|
260
|
-
)
|
|
272
|
+
|
|
273
|
+
predictions = await asyncio.to_thread(self.model.predict, X_test.values)
|
|
274
|
+
r2 = r2_score(y_test, predictions)
|
|
275
|
+
self.logger.info("R2 score on test set: %.4f", r2)
|
|
261
276
|
return True
|
|
262
277
|
|
|
263
|
-
def predict(self: MLRegressor, new_values: list) -> np.ndarray:
|
|
278
|
+
async def predict(self: MLRegressor, new_values: list[float]) -> np.ndarray:
|
|
264
279
|
"""Predict a new value.
|
|
265
280
|
|
|
266
281
|
:param new_values: The new values for the features \
|
|
@@ -270,6 +285,7 @@ class MLRegressor:
|
|
|
270
285
|
:return: The np.ndarray containing the predicted value.
|
|
271
286
|
:rtype: np.ndarray
|
|
272
287
|
"""
|
|
273
|
-
self.logger.info("
|
|
274
|
-
|
|
275
|
-
|
|
288
|
+
self.logger.info("Making prediction with model %s", self.model_type)
|
|
289
|
+
new_values_array = np.array([new_values])
|
|
290
|
+
prediction = await asyncio.to_thread(self.model.predict, new_values_array)
|
|
291
|
+
return prediction
|