emhass 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ """Machine learning regressor module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import time
7
+ import warnings
8
+ from typing import TYPE_CHECKING
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from sklearn.ensemble import (
13
+ AdaBoostRegressor,
14
+ GradientBoostingRegressor,
15
+ RandomForestRegressor,
16
+ )
17
+ from sklearn.linear_model import Lasso, LinearRegression, Ridge
18
+ from sklearn.metrics import r2_score
19
+ from sklearn.model_selection import GridSearchCV, train_test_split
20
+ from sklearn.pipeline import make_pipeline
21
+ from sklearn.preprocessing import StandardScaler
22
+
23
+ if TYPE_CHECKING:
24
+ import logging
25
+
26
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
27
+
28
+ REGRESSION_METHODS = {
29
+ "LinearRegression": {
30
+ "model": LinearRegression(),
31
+ "param_grid": {
32
+ "linearregression__fit_intercept": [True, False],
33
+ "linearregression__positive": [True, False],
34
+ },
35
+ },
36
+ "RidgeRegression": {
37
+ "model": Ridge(),
38
+ "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
39
+ },
40
+ "LassoRegression": {
41
+ "model": Lasso(),
42
+ "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
43
+ },
44
+ "RandomForestRegression": {
45
+ "model": RandomForestRegressor(),
46
+ "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
47
+ },
48
+ "GradientBoostingRegression": {
49
+ "model": GradientBoostingRegressor(),
50
+ "param_grid": {
51
+ "gradientboostingregressor__n_estimators": [50, 100, 200],
52
+ "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
53
+ },
54
+ },
55
+ "AdaBoostRegression": {
56
+ "model": AdaBoostRegressor(),
57
+ "param_grid": {
58
+ "adaboostregressor__n_estimators": [50, 100, 200],
59
+ "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
60
+ },
61
+ },
62
+ }
63
+
64
+
65
+ class MLRegressor:
66
+ r"""A forecaster class using machine learning models.
67
+
68
+ This class uses the `sklearn` module and the machine learning models are \
69
+ from `scikit-learn`.
70
+
71
+ It exposes two main methods:
72
+
73
+ - `fit`: to train a model with the passed data.
74
+
75
+ - `predict`: to obtain a forecast from a pre-trained model.
76
+
77
+ """
78
+
79
+ def __init__( # noqa: PLR0913
80
+ self: MLRegressor,
81
+ data: pd.DataFrame,
82
+ model_type: str,
83
+ regression_model: str,
84
+ features: list,
85
+ target: str,
86
+ timestamp: str,
87
+ logger: logging.Logger,
88
+ ) -> None:
89
+ r"""Define constructor for the forecast class.
90
+
91
+ :param data: The data that will be used for train/test
92
+ :type data: pd.DataFrame
93
+ :param model_type: A unique name defining this model and useful to identify \
94
+ for what it will be used for.
95
+ :type model_type: str
96
+ :param regression_model: The model that will be used. For now only \
97
+ this options are possible: `LinearRegression`, `RidgeRegression`, \
98
+ `LassoRegression`, `RandomForestRegression`, \
99
+ `GradientBoostingRegression` and `AdaBoostRegression`.
100
+ :type regression_model: str
101
+ :param features: A list of features. \
102
+ Example: [`solar_production`, `degree_days`].
103
+ :type features: list
104
+ :param target: The target(to be predicted). \
105
+ Example: `heating_hours`.
106
+ :type target: str
107
+ :param timestamp: If defined, the column key that has to be used of timestamp.
108
+ :type timestamp: str
109
+ :param logger: The passed logger object
110
+ :type logger: logging.Logger
111
+ """
112
+ self.data = data
113
+ self.features = features
114
+ self.target = target
115
+ self.timestamp = timestamp
116
+ self.model_type = model_type
117
+ self.regression_model = regression_model
118
+ self.logger = logger
119
+ self.data = self.data.sort_index()
120
+ self.data = self.data[~self.data.index.duplicated(keep="first")]
121
+ self.data_exo = None
122
+ self.steps = None
123
+ self.model = None
124
+ self.grid_search = None
125
+
126
+ @staticmethod
127
+ def add_date_features(
128
+ data: pd.DataFrame,
129
+ date_features: list,
130
+ timestamp: str,
131
+ ) -> pd.DataFrame:
132
+ """Add date features from the input DataFrame timestamp.
133
+
134
+ :param data: The input DataFrame
135
+ :type data: pd.DataFrame
136
+ :param timestamp: The column containing the timestamp
137
+ :type timestamp: str
138
+ :return: The DataFrame with the added features
139
+ :rtype: pd.DataFrame
140
+ """
141
+ df = copy.deepcopy(data) # noqa: PD901
142
+ df[timestamp] = pd.to_datetime(df["timestamp"])
143
+ if "year" in date_features:
144
+ df["year"] = [i.year for i in df["timestamp"]]
145
+ if "month" in date_features:
146
+ df["month"] = [i.month for i in df["timestamp"]]
147
+ if "day_of_week" in date_features:
148
+ df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
149
+ if "day_of_year" in date_features:
150
+ df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
151
+ if "day" in date_features:
152
+ df["day"] = [i.day for i in df["timestamp"]]
153
+ if "hour" in date_features:
154
+ df["hour"] = [i.day for i in df["timestamp"]]
155
+
156
+ return df
157
+
158
+ def get_regression_model(self: MLRegressor) -> tuple[str, str]:
159
+ """Get the base model and parameter grid for the specified regression model.
160
+
161
+ Returns a tuple containing the base model and parameter grid corresponding to \
162
+ the specified regression model.
163
+
164
+ Args:
165
+ ----
166
+ self: The instance of the MLRegressor class.
167
+
168
+ Returns:
169
+ -------
170
+ A tuple containing the base model and parameter grid.
171
+
172
+ """
173
+ if self.regression_model == "LinearRegression":
174
+ base_model = REGRESSION_METHODS["LinearRegression"]["model"]
175
+ param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
176
+ elif self.regression_model == "RidgeRegression":
177
+ base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
178
+ param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
179
+ elif self.regression_model == "LassoRegression":
180
+ base_model = REGRESSION_METHODS["LassoRegression"]["model"]
181
+ param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
182
+ elif self.regression_model == "RandomForestRegression":
183
+ base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
184
+ param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
185
+ elif self.regression_model == "GradientBoostingRegression":
186
+ base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
187
+ param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
188
+ elif self.regression_model == "AdaBoostRegression":
189
+ base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
190
+ param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
191
+ else:
192
+ self.logger.error(
193
+ "Passed model %s is not valid",
194
+ self.regression_model,
195
+ )
196
+ return None
197
+ return base_model, param_grid
198
+
199
+ def fit(self: MLRegressor, date_features: list | None = None) -> None:
200
+ """Fit the model using the provided data.
201
+
202
+ :param date_features: A list of 'date_features' to take into account when \
203
+ fitting the model.
204
+ :type data: list
205
+ """
206
+ self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
207
+ self.data_exo = pd.DataFrame(self.data)
208
+ self.data_exo[self.features] = self.data[self.features]
209
+ self.data_exo[self.target] = self.data[self.target]
210
+ keep_columns = []
211
+ keep_columns.extend(self.features)
212
+ if self.timestamp is not None:
213
+ keep_columns.append(self.timestamp)
214
+ keep_columns.append(self.target)
215
+ self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
216
+ self.data_exo = self.data_exo.reset_index(drop=True)
217
+ if date_features is not None:
218
+ if self.timestamp is not None:
219
+ self.data_exo = MLRegressor.add_date_features(
220
+ self.data_exo,
221
+ date_features,
222
+ self.timestamp,
223
+ )
224
+ else:
225
+ self.logger.error(
226
+ "If no timestamp provided, you can't use date_features, going \
227
+ further without date_features.",
228
+ )
229
+
230
+ y = self.data_exo[self.target]
231
+ self.data_exo = self.data_exo.drop(self.target, axis=1)
232
+ if self.timestamp is not None:
233
+ self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
234
+ X = self.data_exo # noqa: N806
235
+
236
+ X_train, X_test, y_train, y_test = train_test_split( # noqa: N806
237
+ X,
238
+ y,
239
+ test_size=0.2,
240
+ random_state=42,
241
+ )
242
+
243
+ self.steps = len(X_test)
244
+
245
+ base_model, param_grid = self.get_regression_model()
246
+
247
+ self.model = make_pipeline(StandardScaler(), base_model)
248
+
249
+ # Create a grid search object
250
+ self.grid_search = GridSearchCV(
251
+ self.model,
252
+ param_grid,
253
+ cv=5,
254
+ scoring="neg_mean_squared_error",
255
+ refit=True,
256
+ verbose=0,
257
+ n_jobs=-1,
258
+ )
259
+
260
+ # Fit the grid search object to the data
261
+ self.logger.info("Training a %s model", self.regression_model)
262
+ start_time = time.time()
263
+ self.grid_search.fit(X_train.values, y_train.values)
264
+ self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
265
+
266
+ self.model = self.grid_search.best_estimator_
267
+
268
+ # Make predictions
269
+ predictions = self.model.predict(X_test.values)
270
+ predictions = pd.Series(predictions, index=X_test.index)
271
+ pred_metric = r2_score(y_test, predictions)
272
+ self.logger.info(
273
+ "Prediction R2 score of fitted model on test data: %s",
274
+ pred_metric,
275
+ )
276
+
277
+ def predict(self: MLRegressor, new_values: list) -> np.ndarray:
278
+ """Predict a new value.
279
+
280
+ :param new_values: The new values for the features \
281
+ (in the same order as the features list). \
282
+ Example: [2.24, 5.68].
283
+ :type new_values: list
284
+ :return: The np.ndarray containing the predicted value.
285
+ :rtype: np.ndarray
286
+ """
287
+ self.logger.info("Performing a prediction for %s", self.model_type)
288
+ new_values = np.array([new_values])
289
+
290
+ return self.model.predict(new_values)
emhass/optimization.py CHANGED
@@ -31,7 +31,7 @@ class Optimization:
31
31
 
32
32
  def __init__(self, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
33
33
  var_load_cost: str, var_prod_price: str,
34
- costfun: str, base_path: str, logger: logging.Logger,
34
+ costfun: str, emhass_conf: dict, logger: logging.Logger,
35
35
  opt_time_delta: Optional[int] = 24) -> None:
36
36
  r"""
37
37
  Define constructor for Optimization class.
@@ -50,8 +50,8 @@ class Optimization:
50
50
  :type var_prod_price: str
51
51
  :param costfun: The type of cost function to use for optimization problem
52
52
  :type costfun: str
53
- :param base_path: The path to the yaml configuration file
54
- :type base_path: str
53
+ :param emhass_conf: Dictionary containing the needed emhass paths
54
+ :type emhass_conf: dict
55
55
  :param logger: The passed logger object
56
56
  :type logger: logging object
57
57
  :param opt_time_delta: The number of hours to optimize. If days_list has \
@@ -71,6 +71,7 @@ class Optimization:
71
71
  self.var_load = self.retrieve_hass_conf['var_load']
72
72
  self.var_load_new = self.var_load+'_positive'
73
73
  self.costfun = costfun
74
+ # self.emhass_conf = emhass_conf
74
75
  self.logger = logger
75
76
  self.var_load_cost = var_load_cost
76
77
  self.var_prod_price = var_prod_price