emhass 0.8.6__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +629 -293
- emhass/forecast.py +58 -31
- emhass/machine_learning_forecaster.py +4 -4
- emhass/machine_learning_regressor.py +251 -0
- emhass/optimization.py +4 -3
- emhass/retrieve_hass.py +161 -102
- emhass/static/advanced.html +3 -0
- emhass/static/script.js +2 -0
- emhass/utils.py +588 -303
- emhass/web_server.py +48 -26
- {emhass-0.8.6.dist-info → emhass-0.9.1.dist-info}/METADATA +29 -12
- emhass-0.9.1.dist-info/RECORD +26 -0
- emhass-0.8.6.dist-info/RECORD +0 -25
- {emhass-0.8.6.dist-info → emhass-0.9.1.dist-info}/LICENSE +0 -0
- {emhass-0.8.6.dist-info → emhass-0.9.1.dist-info}/WHEEL +0 -0
- {emhass-0.8.6.dist-info → emhass-0.9.1.dist-info}/entry_points.txt +0 -0
- {emhass-0.8.6.dist-info → emhass-0.9.1.dist-info}/top_level.txt +0 -0
emhass/forecast.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
import pathlib
|
5
|
+
import os
|
5
6
|
import pickle
|
6
7
|
import copy
|
7
8
|
import logging
|
@@ -23,7 +24,7 @@ from pvlib.irradiance import disc
|
|
23
24
|
|
24
25
|
from emhass.retrieve_hass import RetrieveHass
|
25
26
|
from emhass.machine_learning_forecaster import MLForecaster
|
26
|
-
from emhass.utils import get_days_list,
|
27
|
+
from emhass.utils import get_days_list, set_df_index_freq
|
27
28
|
|
28
29
|
|
29
30
|
class Forecast(object):
|
@@ -98,25 +99,25 @@ class Forecast(object):
|
|
98
99
|
"""
|
99
100
|
|
100
101
|
def __init__(self, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
|
101
|
-
params: str,
|
102
|
+
params: str, emhass_conf: dict, logger: logging.Logger,
|
102
103
|
opt_time_delta: Optional[int] = 24,
|
103
104
|
get_data_from_file: Optional[bool] = False) -> None:
|
104
105
|
"""
|
105
106
|
Define constructor for the forecast class.
|
106
107
|
|
107
|
-
:param retrieve_hass_conf:
|
108
|
+
:param retrieve_hass_conf: Dictionary containing the needed configuration
|
108
109
|
data from the configuration file, specific to retrieve data from HASS
|
109
110
|
:type retrieve_hass_conf: dict
|
110
|
-
:param optim_conf:
|
111
|
+
:param optim_conf: Dictionary containing the needed configuration
|
111
112
|
data from the configuration file, specific for the optimization task
|
112
113
|
:type optim_conf: dict
|
113
|
-
:param plant_conf:
|
114
|
+
:param plant_conf: Dictionary containing the needed configuration
|
114
115
|
data from the configuration file, specific for the modeling of the PV plant
|
115
116
|
:type plant_conf: dict
|
116
117
|
:param params: Configuration parameters passed from data/options.json
|
117
118
|
:type params: str
|
118
|
-
:param
|
119
|
-
:type
|
119
|
+
:param emhass_conf: Dictionary containing the needed emhass paths
|
120
|
+
:type emhass_conf: dict
|
120
121
|
:param logger: The passed logger object
|
121
122
|
:type logger: logging object
|
122
123
|
:param opt_time_delta: The time delta in hours used to generate forecasts,
|
@@ -141,7 +142,7 @@ class Forecast(object):
|
|
141
142
|
self.var_load_new = self.var_load+'_positive'
|
142
143
|
self.lat = self.retrieve_hass_conf['lat']
|
143
144
|
self.lon = self.retrieve_hass_conf['lon']
|
144
|
-
self.
|
145
|
+
self.emhass_conf = emhass_conf
|
145
146
|
self.logger = logger
|
146
147
|
self.get_data_from_file = get_data_from_file
|
147
148
|
self.var_load_cost = 'unit_load_cost'
|
@@ -169,7 +170,7 @@ class Forecast(object):
|
|
169
170
|
|
170
171
|
|
171
172
|
def get_weather_forecast(self, method: Optional[str] = 'scrapper',
|
172
|
-
csv_path: Optional[str] = "
|
173
|
+
csv_path: Optional[str] = "data_weather_forecast.csv") -> pd.DataFrame:
|
173
174
|
r"""
|
174
175
|
Get and generate weather forecast data.
|
175
176
|
|
@@ -180,6 +181,8 @@ class Forecast(object):
|
|
180
181
|
:rtype: pd.DataFrame
|
181
182
|
|
182
183
|
"""
|
184
|
+
csv_path = self.emhass_conf['data_path'] / csv_path
|
185
|
+
|
183
186
|
self.logger.info("Retrieving weather forecast data using method = "+method)
|
184
187
|
self.weather_forecast_method = method # Saving this attribute for later use to identify csv method usage
|
185
188
|
if method == 'scrapper':
|
@@ -292,7 +295,7 @@ class Forecast(object):
|
|
292
295
|
else:
|
293
296
|
data = data + data_tmp
|
294
297
|
elif method == 'csv': # reading from a csv file
|
295
|
-
weather_csv_file_path =
|
298
|
+
weather_csv_file_path = csv_path
|
296
299
|
# Loading the csv file, we will consider that this is the PV power in W
|
297
300
|
data = pd.read_csv(weather_csv_file_path, header=None, names=['ts', 'yhat'])
|
298
301
|
# Check if the passed data has the correct length
|
@@ -414,9 +417,9 @@ class Forecast(object):
|
|
414
417
|
# Setting the main parameters of the PV plant
|
415
418
|
location = Location(latitude=self.lat, longitude=self.lon)
|
416
419
|
temp_params = TEMPERATURE_MODEL_PARAMETERS['sapm']['close_mount_glass_glass']
|
417
|
-
cec_modules =
|
420
|
+
cec_modules = bz2.BZ2File(pathlib.Path(__file__).parent / 'data/cec_modules.pbz2', "rb")
|
418
421
|
cec_modules = cPickle.load(cec_modules)
|
419
|
-
cec_inverters = bz2.BZ2File(
|
422
|
+
cec_inverters = bz2.BZ2File(pathlib.Path(__file__).parent / 'data/cec_inverters.pbz2', "rb")
|
420
423
|
cec_inverters = cPickle.load(cec_inverters)
|
421
424
|
if type(self.plant_conf['module_model']) == list:
|
422
425
|
P_PV_forecast = pd.Series(0, index=df_weather.index)
|
@@ -518,7 +521,9 @@ class Forecast(object):
|
|
518
521
|
else:
|
519
522
|
days_list = df_csv.index.day.unique().tolist()
|
520
523
|
else:
|
521
|
-
|
524
|
+
if not os.path.exists(csv_path):
|
525
|
+
csv_path = self.emhass_conf['data_path'] / csv_path
|
526
|
+
load_csv_file_path = csv_path
|
522
527
|
df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
|
523
528
|
df_csv.index = forecast_dates_csv
|
524
529
|
df_csv.drop(['ts'], axis=1, inplace=True)
|
@@ -572,7 +577,7 @@ class Forecast(object):
|
|
572
577
|
return forecast_out
|
573
578
|
|
574
579
|
def get_load_forecast(self, days_min_load_forecast: Optional[int] = 3, method: Optional[str] = 'naive',
|
575
|
-
csv_path: Optional[str] = "
|
580
|
+
csv_path: Optional[str] = "data_load_forecast.csv",
|
576
581
|
set_mix_forecast:Optional[bool] = False, df_now:Optional[pd.DataFrame] = pd.DataFrame(),
|
577
582
|
use_last_window: Optional[bool] = True, mlf: Optional[MLForecaster] = None,
|
578
583
|
debug: Optional[bool] = False) -> pd.Series:
|
@@ -610,6 +615,8 @@ class Forecast(object):
|
|
610
615
|
:rtype: pd.DataFrame
|
611
616
|
|
612
617
|
"""
|
618
|
+
csv_path = self.emhass_conf['data_path'] / csv_path
|
619
|
+
|
613
620
|
if method == 'naive' or method == 'mlforecaster': # retrieving needed data for these methods
|
614
621
|
self.logger.info("Retrieving data from hass for load forecast using method = "+method)
|
615
622
|
var_list = [self.var_load]
|
@@ -618,18 +625,24 @@ class Forecast(object):
|
|
618
625
|
time_zone_load_foreacast = None
|
619
626
|
# We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
|
620
627
|
rh = RetrieveHass(self.retrieve_hass_conf['hass_url'], self.retrieve_hass_conf['long_lived_token'],
|
621
|
-
self.freq, time_zone_load_foreacast, self.params, self.
|
628
|
+
self.freq, time_zone_load_foreacast, self.params, self.emhass_conf, self.logger)
|
622
629
|
if self.get_data_from_file:
|
623
|
-
|
624
|
-
|
630
|
+
filename_path = self.emhass_conf['data_path'] / 'test_df_final.pkl'
|
631
|
+
with open(filename_path, 'rb') as inp:
|
632
|
+
rh.df_final, days_list, var_list = pickle.load(inp)
|
633
|
+
self.var_load = var_list[0]
|
634
|
+
self.retrieve_hass_conf['var_load'] = self.var_load
|
635
|
+
var_interp = [var_list[0]]
|
636
|
+
self.var_list = [var_list[0]]
|
637
|
+
self.var_load_new = self.var_load+'_positive'
|
625
638
|
else:
|
626
639
|
days_list = get_days_list(days_min_load_forecast)
|
627
640
|
if not rh.get_data(days_list, var_list):
|
628
641
|
return False
|
629
|
-
if not rh.prepare_data(
|
630
|
-
|
631
|
-
|
632
|
-
|
642
|
+
if not rh.prepare_data(
|
643
|
+
self.retrieve_hass_conf['var_load'], load_negative = self.retrieve_hass_conf['load_negative'],
|
644
|
+
set_zero_min = self.retrieve_hass_conf['set_zero_min'],
|
645
|
+
var_replace_zero = var_replace_zero, var_interp = var_interp):
|
633
646
|
return False
|
634
647
|
df = rh.df_final.copy()[[self.var_load_new]]
|
635
648
|
if method == 'naive': # using a naive approach
|
@@ -643,13 +656,14 @@ class Forecast(object):
|
|
643
656
|
# Load model
|
644
657
|
model_type = self.params['passed_data']['model_type']
|
645
658
|
filename = model_type+'_mlf.pkl'
|
646
|
-
filename_path =
|
659
|
+
filename_path = self.emhass_conf['data_path'] / filename
|
647
660
|
if not debug:
|
648
661
|
if filename_path.is_file():
|
649
662
|
with open(filename_path, 'rb') as inp:
|
650
663
|
mlf = pickle.load(inp)
|
651
664
|
else:
|
652
665
|
self.logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
|
666
|
+
return False
|
653
667
|
# Make predictions
|
654
668
|
if use_last_window:
|
655
669
|
data_last_window = copy.deepcopy(df)
|
@@ -657,8 +671,15 @@ class Forecast(object):
|
|
657
671
|
else:
|
658
672
|
data_last_window = None
|
659
673
|
forecast_out = mlf.predict(data_last_window)
|
660
|
-
# Force
|
661
|
-
|
674
|
+
# Force forecast length to avoid mismatches
|
675
|
+
self.logger.debug("Number of ML predict forcast data generated (lags_opt): " + str(len(forecast_out.index)))
|
676
|
+
self.logger.debug("Number of forcast dates obtained: " + str(len(self.forecast_dates)))
|
677
|
+
if len(self.forecast_dates) < len(forecast_out.index):
|
678
|
+
forecast_out = forecast_out.iloc[0:len(self.forecast_dates)]
|
679
|
+
# To be removed once bug is fixed
|
680
|
+
elif len(self.forecast_dates) > len(forecast_out.index):
|
681
|
+
self.logger.error("Unable to obtain: " + str(len(self.forecast_dates)) + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters")
|
682
|
+
return False
|
662
683
|
# Define DataFrame
|
663
684
|
data_dict = {'ts':self.forecast_dates, 'yhat':forecast_out.values.tolist()}
|
664
685
|
data = pd.DataFrame.from_dict(data_dict)
|
@@ -666,7 +687,7 @@ class Forecast(object):
|
|
666
687
|
data.set_index('ts', inplace=True)
|
667
688
|
forecast_out = data.copy().loc[self.forecast_dates]
|
668
689
|
elif method == 'csv': # reading from a csv file
|
669
|
-
load_csv_file_path =
|
690
|
+
load_csv_file_path = csv_path
|
670
691
|
df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
|
671
692
|
if len(df_csv) < len(self.forecast_dates):
|
672
693
|
self.logger.error("Passed data from CSV is not long enough")
|
@@ -683,6 +704,7 @@ class Forecast(object):
|
|
683
704
|
# Check if the passed data has the correct length
|
684
705
|
if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
|
685
706
|
self.logger.error("Passed data from passed list is not long enough")
|
707
|
+
return False
|
686
708
|
else:
|
687
709
|
# Ensure correct length
|
688
710
|
data_list = data_list[0:len(self.forecast_dates)]
|
@@ -694,6 +716,7 @@ class Forecast(object):
|
|
694
716
|
forecast_out = data.copy().loc[self.forecast_dates]
|
695
717
|
else:
|
696
718
|
self.logger.error("Passed method is not valid")
|
719
|
+
return False
|
697
720
|
P_Load_forecast = copy.deepcopy(forecast_out['yhat'])
|
698
721
|
if set_mix_forecast:
|
699
722
|
P_Load_forecast = Forecast.get_mix_forecast(
|
@@ -723,6 +746,7 @@ class Forecast(object):
|
|
723
746
|
:rtype: pd.DataFrame
|
724
747
|
|
725
748
|
"""
|
749
|
+
csv_path = self.emhass_conf['data_path'] / csv_path
|
726
750
|
if method == 'hp_hc_periods':
|
727
751
|
df_final[self.var_load_cost] = self.optim_conf['load_cost_hc']
|
728
752
|
list_df_hp = []
|
@@ -742,6 +766,7 @@ class Forecast(object):
|
|
742
766
|
# Check if the passed data has the correct length
|
743
767
|
if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
|
744
768
|
self.logger.error("Passed data from passed list is not long enough")
|
769
|
+
return False
|
745
770
|
else:
|
746
771
|
# Ensure correct length
|
747
772
|
data_list = data_list[0:len(self.forecast_dates)]
|
@@ -753,12 +778,13 @@ class Forecast(object):
|
|
753
778
|
df_final[self.var_load_cost] = forecast_out
|
754
779
|
else:
|
755
780
|
self.logger.error("Passed method is not valid")
|
756
|
-
|
781
|
+
return False
|
757
782
|
return df_final
|
758
783
|
|
759
784
|
def get_prod_price_forecast(self, df_final: pd.DataFrame, method: Optional[str] = 'constant',
|
760
|
-
csv_path: Optional[str] = "
|
785
|
+
csv_path: Optional[str] = "data_prod_price_forecast.csv",
|
761
786
|
list_and_perfect: Optional[bool] = False) -> pd.DataFrame:
|
787
|
+
|
762
788
|
r"""
|
763
789
|
Get the unit power production price for the energy injected to the grid.\
|
764
790
|
This is the price of the energy injected to the utility in a vector \
|
@@ -779,13 +805,13 @@ class Forecast(object):
|
|
779
805
|
:rtype: pd.DataFrame
|
780
806
|
|
781
807
|
"""
|
808
|
+
csv_path = self.emhass_conf['data_path'] / csv_path
|
782
809
|
if method == 'constant':
|
783
810
|
df_final[self.var_prod_price] = self.optim_conf['prod_sell_price']
|
784
811
|
elif method == 'csv':
|
785
812
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
786
|
-
forecast_out = self.get_forecast_out_from_csv_or_list(
|
787
|
-
|
788
|
-
csv_path)
|
813
|
+
forecast_out = self.get_forecast_out_from_csv_or_list(
|
814
|
+
df_final, forecast_dates_csv, csv_path)
|
789
815
|
df_final[self.var_prod_price] = forecast_out
|
790
816
|
elif method == 'list': # reading a list of values
|
791
817
|
# Loading data from passed list
|
@@ -793,6 +819,7 @@ class Forecast(object):
|
|
793
819
|
# Check if the passed data has the correct length
|
794
820
|
if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
|
795
821
|
self.logger.error("Passed data from passed list is not long enough")
|
822
|
+
return False
|
796
823
|
else:
|
797
824
|
# Ensure correct length
|
798
825
|
data_list = data_list[0:len(self.forecast_dates)]
|
@@ -804,6 +831,6 @@ class Forecast(object):
|
|
804
831
|
df_final[self.var_prod_price] = forecast_out
|
805
832
|
else:
|
806
833
|
self.logger.error("Passed method is not valid")
|
807
|
-
|
834
|
+
return False
|
808
835
|
return df_final
|
809
836
|
|
@@ -38,7 +38,7 @@ class MLForecaster:
|
|
38
38
|
"""
|
39
39
|
|
40
40
|
def __init__(self, data: pd.DataFrame, model_type: str, var_model: str, sklearn_model: str,
|
41
|
-
num_lags: int,
|
41
|
+
num_lags: int, emhass_conf: dict, logger: logging.Logger) -> None:
|
42
42
|
r"""Define constructor for the forecast class.
|
43
43
|
|
44
44
|
:param data: The data that will be used for train/test
|
@@ -56,8 +56,8 @@ class MLForecaster:
|
|
56
56
|
is to fix this as one day. For example if your time step is 30 minutes, then fix this \
|
57
57
|
to 48, if the time step is 1 hour the fix this to 24 and so on.
|
58
58
|
:type num_lags: int
|
59
|
-
:param
|
60
|
-
:type
|
59
|
+
:param emhass_conf: Dictionary containing the needed emhass paths
|
60
|
+
:type emhass_conf: dict
|
61
61
|
:param logger: The passed logger object
|
62
62
|
:type logger: logging.Logger
|
63
63
|
"""
|
@@ -66,7 +66,7 @@ class MLForecaster:
|
|
66
66
|
self.var_model = var_model
|
67
67
|
self.sklearn_model = sklearn_model
|
68
68
|
self.num_lags = num_lags
|
69
|
-
self.
|
69
|
+
self.emhass_conf = emhass_conf
|
70
70
|
self.logger = logger
|
71
71
|
self.is_tuned = False
|
72
72
|
# A quick data preparation
|
@@ -0,0 +1,251 @@
|
|
1
|
+
"""Machine learning regressor module."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import time
|
7
|
+
import warnings
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
12
|
+
from sklearn.ensemble import (
|
13
|
+
AdaBoostRegressor,
|
14
|
+
GradientBoostingRegressor,
|
15
|
+
RandomForestRegressor,
|
16
|
+
)
|
17
|
+
from sklearn.linear_model import Lasso, LinearRegression, Ridge
|
18
|
+
from sklearn.metrics import r2_score
|
19
|
+
from sklearn.model_selection import GridSearchCV, train_test_split
|
20
|
+
from sklearn.pipeline import make_pipeline
|
21
|
+
from sklearn.preprocessing import StandardScaler
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
import logging
|
25
|
+
|
26
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
27
|
+
|
28
|
+
REGRESSION_METHODS = {
|
29
|
+
"LinearRegression": {
|
30
|
+
"model": LinearRegression(),
|
31
|
+
"param_grid": {
|
32
|
+
"linearregression__fit_intercept": [True, False],
|
33
|
+
"linearregression__positive": [True, False],
|
34
|
+
},
|
35
|
+
},
|
36
|
+
"RidgeRegression": {
|
37
|
+
"model": Ridge(),
|
38
|
+
"param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
|
39
|
+
},
|
40
|
+
"LassoRegression": {
|
41
|
+
"model": Lasso(),
|
42
|
+
"param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
|
43
|
+
},
|
44
|
+
"RandomForestRegression": {
|
45
|
+
"model": RandomForestRegressor(),
|
46
|
+
"param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
|
47
|
+
},
|
48
|
+
"GradientBoostingRegression": {
|
49
|
+
"model": GradientBoostingRegressor(),
|
50
|
+
"param_grid": {
|
51
|
+
"gradientboostingregressor__n_estimators": [50, 100, 200],
|
52
|
+
"gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
|
53
|
+
},
|
54
|
+
},
|
55
|
+
"AdaBoostRegression": {
|
56
|
+
"model": AdaBoostRegressor(),
|
57
|
+
"param_grid": {
|
58
|
+
"adaboostregressor__n_estimators": [50, 100, 200],
|
59
|
+
"adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
|
60
|
+
},
|
61
|
+
},
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
class MLRegressor:
|
66
|
+
r"""A forecaster class using machine learning models.
|
67
|
+
|
68
|
+
This class uses the `sklearn` module and the machine learning models are \
|
69
|
+
from `scikit-learn`.
|
70
|
+
|
71
|
+
It exposes two main methods:
|
72
|
+
|
73
|
+
- `fit`: to train a model with the passed data.
|
74
|
+
|
75
|
+
- `predict`: to obtain a forecast from a pre-trained model.
|
76
|
+
|
77
|
+
"""
|
78
|
+
|
79
|
+
def __init__(self: MLRegressor, data: pd.DataFrame, model_type: str, regression_model: str,
|
80
|
+
features: list, target: str, timestamp: str, logger: logging.Logger) -> None:
|
81
|
+
r"""Define constructor for the forecast class.
|
82
|
+
|
83
|
+
:param data: The data that will be used for train/test
|
84
|
+
:type data: pd.DataFrame
|
85
|
+
:param model_type: A unique name defining this model and useful to identify \
|
86
|
+
for what it will be used for.
|
87
|
+
:type model_type: str
|
88
|
+
:param regression_model: The model that will be used. For now only \
|
89
|
+
this options are possible: `LinearRegression`, `RidgeRegression`, \
|
90
|
+
`LassoRegression`, `RandomForestRegression`, \
|
91
|
+
`GradientBoostingRegression` and `AdaBoostRegression`.
|
92
|
+
:type regression_model: str
|
93
|
+
:param features: A list of features. \
|
94
|
+
Example: [`solar_production`, `degree_days`].
|
95
|
+
:type features: list
|
96
|
+
:param target: The target(to be predicted). \
|
97
|
+
Example: `heating_hours`.
|
98
|
+
:type target: str
|
99
|
+
:param timestamp: If defined, the column key that has to be used of timestamp.
|
100
|
+
:type timestamp: str
|
101
|
+
:param logger: The passed logger object
|
102
|
+
:type logger: logging.Logger
|
103
|
+
"""
|
104
|
+
self.data = data
|
105
|
+
self.features = features
|
106
|
+
self.target = target
|
107
|
+
self.timestamp = timestamp
|
108
|
+
self.model_type = model_type
|
109
|
+
self.regression_model = regression_model
|
110
|
+
self.logger = logger
|
111
|
+
self.data = self.data.sort_index()
|
112
|
+
self.data = self.data[~self.data.index.duplicated(keep="first")]
|
113
|
+
self.data_exo = None
|
114
|
+
self.steps = None
|
115
|
+
self.model = None
|
116
|
+
self.grid_search = None
|
117
|
+
|
118
|
+
@staticmethod
|
119
|
+
def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
|
120
|
+
"""Add date features from the input DataFrame timestamp.
|
121
|
+
|
122
|
+
:param data: The input DataFrame
|
123
|
+
:type data: pd.DataFrame
|
124
|
+
:param timestamp: The column containing the timestamp
|
125
|
+
:type timestamp: str
|
126
|
+
:return: The DataFrame with the added features
|
127
|
+
:rtype: pd.DataFrame
|
128
|
+
"""
|
129
|
+
df = copy.deepcopy(data) # noqa: PD901
|
130
|
+
df[timestamp] = pd.to_datetime(df["timestamp"])
|
131
|
+
if "year" in date_features:
|
132
|
+
df["year"] = [i.year for i in df["timestamp"]]
|
133
|
+
if "month" in date_features:
|
134
|
+
df["month"] = [i.month for i in df["timestamp"]]
|
135
|
+
if "day_of_week" in date_features:
|
136
|
+
df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
|
137
|
+
if "day_of_year" in date_features:
|
138
|
+
df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
|
139
|
+
if "day" in date_features:
|
140
|
+
df["day"] = [i.day for i in df["timestamp"]]
|
141
|
+
if "hour" in date_features:
|
142
|
+
df["hour"] = [i.day for i in df["timestamp"]]
|
143
|
+
return df
|
144
|
+
|
145
|
+
def get_regression_model(self: MLRegressor) -> tuple[str, str]:
|
146
|
+
r"""
|
147
|
+
Get the base model and parameter grid for the specified regression model.
|
148
|
+
Returns a tuple containing the base model and parameter grid corresponding to \
|
149
|
+
the specified regression model.
|
150
|
+
|
151
|
+
:param self: The instance of the MLRegressor class.
|
152
|
+
:type self: MLRegressor
|
153
|
+
:return: A tuple containing the base model and parameter grid.
|
154
|
+
:rtype: tuple[str, str]
|
155
|
+
"""
|
156
|
+
if self.regression_model == "LinearRegression":
|
157
|
+
base_model = REGRESSION_METHODS["LinearRegression"]["model"]
|
158
|
+
param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
|
159
|
+
elif self.regression_model == "RidgeRegression":
|
160
|
+
base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
|
161
|
+
param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
|
162
|
+
elif self.regression_model == "LassoRegression":
|
163
|
+
base_model = REGRESSION_METHODS["LassoRegression"]["model"]
|
164
|
+
param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
|
165
|
+
elif self.regression_model == "RandomForestRegression":
|
166
|
+
base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
|
167
|
+
param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
|
168
|
+
elif self.regression_model == "GradientBoostingRegression":
|
169
|
+
base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
|
170
|
+
param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
|
171
|
+
elif self.regression_model == "AdaBoostRegression":
|
172
|
+
base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
|
173
|
+
param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
|
174
|
+
else:
|
175
|
+
self.logger.error(
|
176
|
+
"Passed model %s is not valid",
|
177
|
+
self.regression_model,
|
178
|
+
)
|
179
|
+
return None
|
180
|
+
return base_model, param_grid
|
181
|
+
|
182
|
+
def fit(self: MLRegressor, date_features: list | None = None) -> None:
|
183
|
+
r"""Fit the model using the provided data.
|
184
|
+
|
185
|
+
:param date_features: A list of 'date_features' to take into account when \
|
186
|
+
fitting the model.
|
187
|
+
:type data: list
|
188
|
+
"""
|
189
|
+
self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
|
190
|
+
self.data_exo = pd.DataFrame(self.data)
|
191
|
+
self.data_exo[self.features] = self.data[self.features]
|
192
|
+
self.data_exo[self.target] = self.data[self.target]
|
193
|
+
keep_columns = []
|
194
|
+
keep_columns.extend(self.features)
|
195
|
+
if self.timestamp is not None:
|
196
|
+
keep_columns.append(self.timestamp)
|
197
|
+
keep_columns.append(self.target)
|
198
|
+
self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
|
199
|
+
self.data_exo = self.data_exo.reset_index(drop=True)
|
200
|
+
if date_features is not None:
|
201
|
+
if self.timestamp is not None:
|
202
|
+
self.data_exo = MLRegressor.add_date_features(
|
203
|
+
self.data_exo,
|
204
|
+
date_features,
|
205
|
+
self.timestamp,
|
206
|
+
)
|
207
|
+
else:
|
208
|
+
self.logger.error(
|
209
|
+
"If no timestamp provided, you can't use date_features, going \
|
210
|
+
further without date_features.",
|
211
|
+
)
|
212
|
+
y = self.data_exo[self.target]
|
213
|
+
self.data_exo = self.data_exo.drop(self.target, axis=1)
|
214
|
+
if self.timestamp is not None:
|
215
|
+
self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
|
216
|
+
X = self.data_exo
|
217
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
218
|
+
self.steps = len(X_test)
|
219
|
+
base_model, param_grid = self.get_regression_model()
|
220
|
+
self.model = make_pipeline(StandardScaler(), base_model)
|
221
|
+
# Create a grid search object
|
222
|
+
self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring="neg_mean_squared_error",
|
223
|
+
refit=True, verbose=0, n_jobs=-1)
|
224
|
+
# Fit the grid search object to the data
|
225
|
+
self.logger.info("Training a %s model", self.regression_model)
|
226
|
+
start_time = time.time()
|
227
|
+
self.grid_search.fit(X_train.values, y_train.values)
|
228
|
+
self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
|
229
|
+
self.model = self.grid_search.best_estimator_
|
230
|
+
# Make predictions
|
231
|
+
predictions = self.model.predict(X_test.values)
|
232
|
+
predictions = pd.Series(predictions, index=X_test.index)
|
233
|
+
pred_metric = r2_score(y_test, predictions)
|
234
|
+
self.logger.info(
|
235
|
+
"Prediction R2 score of fitted model on test data: %s",
|
236
|
+
pred_metric,
|
237
|
+
)
|
238
|
+
|
239
|
+
def predict(self: MLRegressor, new_values: list) -> np.ndarray:
|
240
|
+
"""Predict a new value.
|
241
|
+
|
242
|
+
:param new_values: The new values for the features \
|
243
|
+
(in the same order as the features list). \
|
244
|
+
Example: [2.24, 5.68].
|
245
|
+
:type new_values: list
|
246
|
+
:return: The np.ndarray containing the predicted value.
|
247
|
+
:rtype: np.ndarray
|
248
|
+
"""
|
249
|
+
self.logger.info("Performing a prediction for %s", self.model_type)
|
250
|
+
new_values = np.array([new_values])
|
251
|
+
return self.model.predict(new_values)
|
emhass/optimization.py
CHANGED
@@ -31,7 +31,7 @@ class Optimization:
|
|
31
31
|
|
32
32
|
def __init__(self, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
|
33
33
|
var_load_cost: str, var_prod_price: str,
|
34
|
-
costfun: str,
|
34
|
+
costfun: str, emhass_conf: dict, logger: logging.Logger,
|
35
35
|
opt_time_delta: Optional[int] = 24) -> None:
|
36
36
|
r"""
|
37
37
|
Define constructor for Optimization class.
|
@@ -50,8 +50,8 @@ class Optimization:
|
|
50
50
|
:type var_prod_price: str
|
51
51
|
:param costfun: The type of cost function to use for optimization problem
|
52
52
|
:type costfun: str
|
53
|
-
:param
|
54
|
-
:type
|
53
|
+
:param emhass_conf: Dictionary containing the needed emhass paths
|
54
|
+
:type emhass_conf: dict
|
55
55
|
:param logger: The passed logger object
|
56
56
|
:type logger: logging object
|
57
57
|
:param opt_time_delta: The number of hours to optimize. If days_list has \
|
@@ -71,6 +71,7 @@ class Optimization:
|
|
71
71
|
self.var_load = self.retrieve_hass_conf['var_load']
|
72
72
|
self.var_load_new = self.var_load+'_positive'
|
73
73
|
self.costfun = costfun
|
74
|
+
# self.emhass_conf = emhass_conf
|
74
75
|
self.logger = logger
|
75
76
|
self.var_load_cost = var_load_cost
|
76
77
|
self.var_prod_price = var_prod_price
|