emhass 0.10.6__py3-none-any.whl → 0.15.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +1827 -735
- emhass/connection_manager.py +108 -0
- emhass/data/associations.csv +98 -0
- emhass/data/cec_inverters.pbz2 +0 -0
- emhass/data/cec_modules.pbz2 +0 -0
- emhass/data/config_defaults.json +120 -0
- emhass/forecast.py +1482 -622
- emhass/img/emhass_icon.png +0 -0
- emhass/machine_learning_forecaster.py +565 -212
- emhass/machine_learning_regressor.py +162 -122
- emhass/optimization.py +1724 -590
- emhass/retrieve_hass.py +1104 -248
- emhass/static/advanced.html +9 -1
- emhass/static/basic.html +4 -2
- emhass/static/configuration_list.html +48 -0
- emhass/static/configuration_script.js +956 -0
- emhass/static/data/param_definitions.json +592 -0
- emhass/static/script.js +377 -322
- emhass/static/style.css +270 -13
- emhass/templates/configuration.html +77 -0
- emhass/templates/index.html +23 -14
- emhass/templates/template.html +4 -5
- emhass/utils.py +1797 -428
- emhass/web_server.py +850 -448
- emhass/websocket_client.py +224 -0
- emhass-0.15.5.dist-info/METADATA +164 -0
- emhass-0.15.5.dist-info/RECORD +34 -0
- {emhass-0.10.6.dist-info → emhass-0.15.5.dist-info}/WHEEL +1 -2
- emhass-0.15.5.dist-info/entry_points.txt +2 -0
- emhass-0.10.6.dist-info/METADATA +0 -622
- emhass-0.10.6.dist-info/RECORD +0 -26
- emhass-0.10.6.dist-info/entry_points.txt +0 -2
- emhass-0.10.6.dist-info/top_level.txt +0 -1
- {emhass-0.10.6.dist-info → emhass-0.15.5.dist-info/licenses}/LICENSE +0 -0
emhass/forecast.py
CHANGED
|
@@ -1,110 +1,120 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
import pathlib
|
|
5
|
-
import os
|
|
6
|
-
import pickle
|
|
1
|
+
import asyncio
|
|
2
|
+
import bz2
|
|
7
3
|
import copy
|
|
8
4
|
import logging
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
import bz2
|
|
5
|
+
import os
|
|
6
|
+
import pickle
|
|
12
7
|
import pickle as cPickle
|
|
13
|
-
import
|
|
14
|
-
import numpy as np
|
|
8
|
+
import re
|
|
15
9
|
from datetime import datetime, timedelta
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
from itertools import zip_longest
|
|
11
|
+
from urllib.parse import quote
|
|
12
|
+
|
|
13
|
+
import aiofiles
|
|
14
|
+
import aiohttp
|
|
15
|
+
import numpy as np
|
|
16
|
+
import orjson
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from pvlib.irradiance import disc
|
|
20
19
|
from pvlib.location import Location
|
|
21
20
|
from pvlib.modelchain import ModelChain
|
|
21
|
+
from pvlib.pvsystem import PVSystem
|
|
22
|
+
from pvlib.solarposition import get_solarposition
|
|
22
23
|
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
|
|
23
|
-
from
|
|
24
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
|
25
|
+
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
|
|
24
26
|
|
|
25
|
-
from emhass.retrieve_hass import RetrieveHass
|
|
26
27
|
from emhass.machine_learning_forecaster import MLForecaster
|
|
27
|
-
from emhass.
|
|
28
|
+
from emhass.machine_learning_regressor import MLRegressor
|
|
29
|
+
from emhass.retrieve_hass import RetrieveHass
|
|
30
|
+
from emhass.utils import add_date_features, get_days_list, set_df_index_freq
|
|
28
31
|
|
|
32
|
+
header_accept = "application/json"
|
|
33
|
+
error_msg_list_not_long_enough = "Passed data from passed list is not long enough"
|
|
34
|
+
error_msg_method_not_valid = "Passed method is not valid"
|
|
29
35
|
|
|
30
|
-
|
|
36
|
+
|
|
37
|
+
class Forecast:
|
|
31
38
|
r"""
|
|
32
39
|
Generate weather, load and costs forecasts needed as inputs to the optimization.
|
|
33
|
-
|
|
40
|
+
|
|
34
41
|
In EMHASS we have basically 4 forecasts to deal with:
|
|
35
|
-
|
|
42
|
+
|
|
36
43
|
- PV power production forecast (internally based on the weather forecast and the
|
|
37
44
|
characteristics of your PV plant). This is given in Watts.
|
|
38
|
-
|
|
45
|
+
|
|
39
46
|
- Load power forecast: how much power your house will demand on the next 24h. This
|
|
40
47
|
is given in Watts.
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
- PV production selling price forecast: at what price are you selling your excess
|
|
43
50
|
PV production on the next 24h. This is given in EUR/kWh.
|
|
44
|
-
|
|
51
|
+
|
|
45
52
|
- Load cost forecast: the price of the energy from the grid on the next 24h. This
|
|
46
53
|
is given in EUR/kWh.
|
|
47
|
-
|
|
54
|
+
|
|
48
55
|
There are methods that are generalized to the 4 forecast needed. For all there
|
|
49
56
|
forecasts it is possible to pass the data either as a passed list of values or by
|
|
50
57
|
reading from a CSV file. With these methods it is then possible to use data from
|
|
51
58
|
external forecast providers.
|
|
52
|
-
|
|
53
|
-
Then there are the methods that are specific to each type of forecast and that
|
|
59
|
+
|
|
60
|
+
Then there are the methods that are specific to each type of forecast and that
|
|
54
61
|
proposed forecast treated and generated internally by this EMHASS forecast class.
|
|
55
|
-
For the weather forecast a first method (`
|
|
56
|
-
|
|
57
|
-
This method seems stable but as with any scrape method it will fail if any changes
|
|
58
|
-
are made to the webpage API. Another method (`solcast`) is using the SolCast PV
|
|
59
|
-
production forecast service. A final method (`solar.forecast`) is using another
|
|
60
|
-
external service: Solar.Forecast, for which just the nominal PV peak installed
|
|
61
|
-
power should be provided. Search the forecast section on the documentation for examples
|
|
62
|
+
For the weather forecast a first method (`open-meteo`) uses a open-meteos API
|
|
63
|
+
proposing detailed forecasts based on Lat/Lon locations.
|
|
64
|
+
This method seems stable but as with any scrape method it will fail if any changes
|
|
65
|
+
are made to the webpage API. Another method (`solcast`) is using the SolCast PV
|
|
66
|
+
production forecast service. A final method (`solar.forecast`) is using another
|
|
67
|
+
external service: Solar.Forecast, for which just the nominal PV peak installed
|
|
68
|
+
power should be provided. Search the forecast section on the documentation for examples
|
|
62
69
|
on how to implement these different methods.
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
The `get_power_from_weather` method is proposed here to convert from irradiance
|
|
65
72
|
data to electrical power. The PVLib module is used to model the PV plant.
|
|
66
|
-
|
|
67
|
-
The specific methods for the load forecast are a first method (`naive`) that uses
|
|
68
|
-
a naive approach, also called persistance. It simply assumes that the forecast for
|
|
69
|
-
a future period will be equal to the observed values in a past period. The past
|
|
73
|
+
|
|
74
|
+
The specific methods for the load forecast are a first method (`naive`) that uses
|
|
75
|
+
a naive approach, also called persistance. It simply assumes that the forecast for
|
|
76
|
+
a future period will be equal to the observed values in a past period. The past
|
|
70
77
|
period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
|
|
71
78
|
uses an internal custom forecasting model using machine learning. There is a section
|
|
72
79
|
in the documentation explaining how to use this method.
|
|
73
|
-
|
|
80
|
+
|
|
74
81
|
.. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
|
|
75
82
|
proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
|
|
76
83
|
Only the `predict` method is used here to generate new forecasts, but it is \
|
|
77
84
|
necessary to previously fit a forecaster model and it is a good idea to \
|
|
78
85
|
optimize the model hyperparameters using the `tune` method. See the dedicated \
|
|
79
86
|
section in the documentation for more help.
|
|
80
|
-
|
|
87
|
+
|
|
81
88
|
For the PV production selling price and Load cost forecasts the privileged method
|
|
82
89
|
is a direct read from a user provided list of values. The list should be passed
|
|
83
90
|
as a runtime parameter during the `curl` to the EMHASS API.
|
|
84
|
-
|
|
85
|
-
I reading from a CSV file, it should contain no header and the timestamped data
|
|
91
|
+
|
|
92
|
+
I reading from a CSV file, it should contain no header and the timestamped data
|
|
86
93
|
should have the following format:
|
|
87
|
-
|
|
88
94
|
2021-04-29 00:00:00+00:00,287.07
|
|
89
|
-
|
|
90
95
|
2021-04-29 00:30:00+00:00,274.27
|
|
91
|
-
|
|
92
96
|
2021-04-29 01:00:00+00:00,243.38
|
|
93
|
-
|
|
94
97
|
...
|
|
95
|
-
|
|
98
|
+
|
|
96
99
|
The data columns in these files will correspond to the data in the units expected
|
|
97
100
|
for each forecasting method.
|
|
98
|
-
|
|
101
|
+
|
|
99
102
|
"""
|
|
100
103
|
|
|
101
|
-
def __init__(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
retrieve_hass_conf: dict,
|
|
107
|
+
optim_conf: dict,
|
|
108
|
+
plant_conf: dict,
|
|
109
|
+
params: str,
|
|
110
|
+
emhass_conf: dict,
|
|
111
|
+
logger: logging.Logger,
|
|
112
|
+
opt_time_delta: int | None = 24,
|
|
113
|
+
get_data_from_file: bool | None = False,
|
|
114
|
+
) -> None:
|
|
105
115
|
"""
|
|
106
116
|
Define constructor for the forecast class.
|
|
107
|
-
|
|
117
|
+
|
|
108
118
|
:param retrieve_hass_conf: Dictionary containing the needed configuration
|
|
109
119
|
data from the configuration file, specific to retrieve data from HASS
|
|
110
120
|
:type retrieve_hass_conf: dict
|
|
@@ -120,10 +130,10 @@ class Forecast(object):
|
|
|
120
130
|
:type emhass_conf: dict
|
|
121
131
|
:param logger: The passed logger object
|
|
122
132
|
:type logger: logging object
|
|
123
|
-
:param opt_time_delta: The time delta in hours used to generate forecasts,
|
|
133
|
+
:param opt_time_delta: The time delta in hours used to generate forecasts,
|
|
124
134
|
a value of 24 will generate 24 hours of forecast data, defaults to 24
|
|
125
135
|
:type opt_time_delta: int, optional
|
|
126
|
-
:param get_data_from_file: Select if data should be retrieved from a
|
|
136
|
+
:param get_data_from_file: Select if data should be retrieved from a
|
|
127
137
|
previously saved pickle useful for testing or directly from connection to
|
|
128
138
|
hass database
|
|
129
139
|
:type get_data_from_file: bool, optional
|
|
@@ -132,279 +142,466 @@ class Forecast(object):
|
|
|
132
142
|
self.retrieve_hass_conf = retrieve_hass_conf
|
|
133
143
|
self.optim_conf = optim_conf
|
|
134
144
|
self.plant_conf = plant_conf
|
|
135
|
-
self.freq = self.retrieve_hass_conf[
|
|
136
|
-
self.time_zone = self.retrieve_hass_conf[
|
|
137
|
-
self.method_ts_round = self.retrieve_hass_conf[
|
|
138
|
-
self.timeStep = self.freq.seconds/3600 # in hours
|
|
145
|
+
self.freq = self.retrieve_hass_conf["optimization_time_step"]
|
|
146
|
+
self.time_zone = self.retrieve_hass_conf["time_zone"]
|
|
147
|
+
self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
|
|
139
148
|
self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
|
|
140
|
-
self.
|
|
141
|
-
self.
|
|
142
|
-
self.
|
|
143
|
-
self.
|
|
144
|
-
self.
|
|
149
|
+
self.var_pv = self.retrieve_hass_conf["sensor_power_photovoltaics"]
|
|
150
|
+
self.var_pv_forecast = self.retrieve_hass_conf["sensor_power_photovoltaics_forecast"]
|
|
151
|
+
self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
|
|
152
|
+
self.var_load_new = self.var_load + "_positive"
|
|
153
|
+
self.lat = self.retrieve_hass_conf["Latitude"]
|
|
154
|
+
self.lon = self.retrieve_hass_conf["Longitude"]
|
|
145
155
|
self.emhass_conf = emhass_conf
|
|
146
156
|
self.logger = logger
|
|
147
157
|
self.get_data_from_file = get_data_from_file
|
|
148
|
-
self.var_load_cost =
|
|
149
|
-
self.var_prod_price =
|
|
150
|
-
if params is None:
|
|
158
|
+
self.var_load_cost = "unit_load_cost"
|
|
159
|
+
self.var_prod_price = "unit_prod_price"
|
|
160
|
+
if (params is None) or (params == "null"):
|
|
161
|
+
self.params = {}
|
|
162
|
+
elif type(params) is dict:
|
|
151
163
|
self.params = params
|
|
152
164
|
else:
|
|
153
|
-
self.params =
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
165
|
+
self.params = orjson.loads(params)
|
|
166
|
+
|
|
167
|
+
if self.method_ts_round == "nearest":
|
|
168
|
+
self.start_forecast = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
|
|
169
|
+
elif self.method_ts_round == "first":
|
|
170
|
+
self.start_forecast = (
|
|
171
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
|
|
172
|
+
)
|
|
173
|
+
elif self.method_ts_round == "last":
|
|
174
|
+
self.start_forecast = (
|
|
175
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
|
|
176
|
+
)
|
|
160
177
|
else:
|
|
161
178
|
self.logger.error("Wrong method_ts_round passed parameter")
|
|
162
|
-
|
|
163
|
-
self.
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
179
|
+
# check if weather_forecast_cache, if so get 2x the amount of forecast
|
|
180
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
181
|
+
self.end_forecast = (
|
|
182
|
+
self.start_forecast + (self.optim_conf["delta_forecast_daily"] * 2)
|
|
183
|
+
).replace(microsecond=0)
|
|
184
|
+
else:
|
|
185
|
+
self.end_forecast = (
|
|
186
|
+
self.start_forecast + self.optim_conf["delta_forecast_daily"]
|
|
187
|
+
).replace(microsecond=0)
|
|
188
|
+
self.forecast_dates = (
|
|
189
|
+
pd.date_range(
|
|
190
|
+
start=self.start_forecast,
|
|
191
|
+
end=self.end_forecast - self.freq,
|
|
192
|
+
freq=self.freq,
|
|
193
|
+
tz=self.time_zone,
|
|
194
|
+
)
|
|
195
|
+
.tz_convert("utc")
|
|
196
|
+
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
197
|
+
.tz_convert(self.time_zone)
|
|
198
|
+
)
|
|
199
|
+
if (
|
|
200
|
+
params is not None
|
|
201
|
+
and "prediction_horizon" in list(self.params["passed_data"].keys())
|
|
202
|
+
and self.params["passed_data"]["prediction_horizon"] is not None
|
|
203
|
+
):
|
|
204
|
+
self.forecast_dates = self.forecast_dates[
|
|
205
|
+
0 : self.params["passed_data"]["prediction_horizon"]
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
async def get_cached_open_meteo_forecast_json(
|
|
209
|
+
self, max_age: int | None = 30, forecast_days: int = 3
|
|
210
|
+
) -> dict:
|
|
174
211
|
r"""
|
|
175
|
-
Get and
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
212
|
+
Get weather forecast json from Open-Meteo and cache it for re-use.
|
|
213
|
+
The response json is cached in the local file system and returned
|
|
214
|
+
on subsequent calls until it is older than max_age, at which point
|
|
215
|
+
attempts will be made to replace it with a new version.
|
|
216
|
+
The cached version will not be overwritten until a new version has
|
|
217
|
+
been successfully fetched from Open-Meteo.
|
|
218
|
+
In the event of connectivity issues, the cached version will continue
|
|
219
|
+
to be returned until such time as a new version can be successfully
|
|
220
|
+
fetched from Open-Meteo.
|
|
221
|
+
If you want to force reload, pass max_age value of zero.
|
|
222
|
+
|
|
223
|
+
:param max_age: The maximum age of the cached json file, in minutes,
|
|
224
|
+
before it is discarded and a new version fetched from Open-Meteo.
|
|
225
|
+
Defaults to 30 minutes.
|
|
226
|
+
:type max_age: int, optional
|
|
227
|
+
:param forecast_days: The number of days of forecast data required from Open-Meteo.
|
|
228
|
+
One additional day is always fetched from Open-Meteo so there is an extra data in the cache.
|
|
229
|
+
Defaults to 2 days (3 days fetched) to match the prior default.
|
|
230
|
+
:type forecast_days: int, optional
|
|
231
|
+
:return: The json containing the Open-Meteo forecast data
|
|
232
|
+
:rtype: dict
|
|
233
|
+
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
# Ensure at least 3 weather forecast days (and 1 more than requested)
|
|
237
|
+
if forecast_days is None:
|
|
238
|
+
self.logger.debug("Open-Meteo forecast_days is missing so defaulting to 3 days")
|
|
239
|
+
forecast_days = 3
|
|
240
|
+
elif forecast_days < 3:
|
|
241
|
+
self.logger.debug(
|
|
242
|
+
"Open-Meteo forecast_days is low (%s) so defaulting to 3 days",
|
|
243
|
+
forecast_days,
|
|
244
|
+
)
|
|
245
|
+
forecast_days = 3
|
|
246
|
+
else:
|
|
247
|
+
forecast_days = forecast_days + 1
|
|
248
|
+
|
|
249
|
+
# The addition of -b.json file name suffix is because the time format
|
|
250
|
+
# has changed, and it avoids any attempt to use the old format file.
|
|
251
|
+
json_path = os.path.abspath(
|
|
252
|
+
self.emhass_conf["data_path"] / "cached-open-meteo-forecast-b.json"
|
|
253
|
+
)
|
|
254
|
+
# The cached JSON file is always loaded, if it exists, as it is also a fallback
|
|
255
|
+
# in case the REST API call to Open-Meteo fails - the cached JSON will continue to
|
|
256
|
+
# be used until it can successfully fetch a new version from Open-Meteo.
|
|
257
|
+
data = None
|
|
258
|
+
use_cache = False
|
|
259
|
+
if os.path.exists(json_path):
|
|
260
|
+
delta = datetime.now() - datetime.fromtimestamp(os.path.getmtime(json_path))
|
|
261
|
+
json_age = int(delta / timedelta(seconds=60))
|
|
262
|
+
use_cache = json_age < max_age
|
|
263
|
+
self.logger.info("Loading existing cached Open-Meteo JSON file: %s", json_path)
|
|
264
|
+
async with aiofiles.open(json_path) as json_file:
|
|
265
|
+
content = await json_file.read()
|
|
266
|
+
data = orjson.loads(content)
|
|
267
|
+
if use_cache:
|
|
268
|
+
self.logger.info(
|
|
269
|
+
"The cached Open-Meteo JSON file is recent (age=%.0fm, max_age=%sm)",
|
|
270
|
+
json_age,
|
|
271
|
+
max_age,
|
|
272
|
+
)
|
|
273
|
+
else:
|
|
274
|
+
self.logger.info(
|
|
275
|
+
"The cached Open-Meteo JSON file is old (age=%.0fm, max_age=%sm)",
|
|
276
|
+
json_age,
|
|
277
|
+
max_age,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if not use_cache:
|
|
281
|
+
self.logger.info("Fetching a new weather forecast from Open-Meteo")
|
|
282
|
+
headers = {"User-Agent": "EMHASS", "Accept": header_accept}
|
|
283
|
+
# Open-Meteo has returned non-existent time over DST transitions,
|
|
284
|
+
# so we now return unix timestamps and convert to date/times locally
|
|
285
|
+
# instead.
|
|
286
|
+
url = (
|
|
287
|
+
"https://api.open-meteo.com/v1/forecast?"
|
|
288
|
+
+ "latitude="
|
|
289
|
+
+ str(round(self.lat, 2))
|
|
290
|
+
+ "&longitude="
|
|
291
|
+
+ str(round(self.lon, 2))
|
|
292
|
+
+ "&minutely_15="
|
|
293
|
+
+ "temperature_2m,"
|
|
294
|
+
+ "relative_humidity_2m,"
|
|
295
|
+
+ "rain,"
|
|
296
|
+
+ "cloud_cover,"
|
|
297
|
+
+ "wind_speed_10m,"
|
|
298
|
+
+ "shortwave_radiation_instant,"
|
|
299
|
+
+ "diffuse_radiation_instant,"
|
|
300
|
+
+ "direct_normal_irradiance_instant"
|
|
301
|
+
+ "&forecast_days="
|
|
302
|
+
+ str(forecast_days)
|
|
303
|
+
+ "&timezone="
|
|
304
|
+
+ quote(str(self.time_zone), safe="")
|
|
305
|
+
+ "&timeformat=unixtime"
|
|
306
|
+
)
|
|
307
|
+
try:
|
|
308
|
+
self.logger.debug("Fetching data from Open-Meteo using URL: %s", url)
|
|
309
|
+
async with aiohttp.ClientSession() as session:
|
|
310
|
+
async with session.get(url, headers=headers) as response:
|
|
311
|
+
self.logger.debug("Returned HTTP status code: %s", response.status)
|
|
312
|
+
response.raise_for_status()
|
|
313
|
+
"""import bz2 # Uncomment to save a serialized data for tests
|
|
314
|
+
import _pickle as cPickle
|
|
315
|
+
with bz2.BZ2File("data/test_response_openmeteo_get_method.pbz2", "w") as f:
|
|
316
|
+
cPickle.dump(response, f)"""
|
|
317
|
+
data = await response.json()
|
|
318
|
+
self.logger.info(
|
|
319
|
+
"Saving response in Open-Meteo JSON cache file: %s",
|
|
320
|
+
json_path,
|
|
321
|
+
)
|
|
322
|
+
async with aiofiles.open(json_path, "w") as json_file:
|
|
323
|
+
content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
|
|
324
|
+
await json_file.write(content)
|
|
325
|
+
except aiohttp.ClientError:
|
|
326
|
+
self.logger.error("Failed to fetch weather forecast from Open-Meteo", exc_info=True)
|
|
327
|
+
if data is not None:
|
|
328
|
+
self.logger.warning("Returning old cached data until next Open-Meteo attempt")
|
|
329
|
+
|
|
330
|
+
return data
|
|
331
|
+
|
|
332
|
+
async def _get_weather_open_meteo(
|
|
333
|
+
self, w_forecast_cache_path: str, use_legacy_pvlib: bool
|
|
334
|
+
) -> pd.DataFrame:
|
|
335
|
+
"""Helper to retrieve weather data from Open-Meteo or cache."""
|
|
336
|
+
if not os.path.isfile(w_forecast_cache_path):
|
|
337
|
+
data_raw = await self.get_cached_open_meteo_forecast_json(
|
|
338
|
+
self.optim_conf["open_meteo_cache_max_age"],
|
|
339
|
+
self.optim_conf["delta_forecast_daily"].days,
|
|
340
|
+
)
|
|
341
|
+
data_15min = pd.DataFrame.from_dict(data_raw["minutely_15"])
|
|
342
|
+
# Date/times in the Open-Meteo JSON are unix timestamps
|
|
343
|
+
data_15min["time"] = pd.to_datetime(data_15min["time"], unit="s", utc=True)
|
|
344
|
+
data_15min["time"] = data_15min["time"].dt.tz_convert(self.time_zone)
|
|
345
|
+
data_15min.set_index("time", inplace=True)
|
|
346
|
+
data_15min = data_15min.rename(
|
|
347
|
+
columns={
|
|
348
|
+
"temperature_2m": "temp_air",
|
|
349
|
+
"relative_humidity_2m": "relative_humidity",
|
|
350
|
+
"rain": "precipitable_water",
|
|
351
|
+
"cloud_cover": "cloud_cover",
|
|
352
|
+
"wind_speed_10m": "wind_speed",
|
|
353
|
+
"shortwave_radiation_instant": "ghi",
|
|
354
|
+
"diffuse_radiation_instant": "dhi",
|
|
355
|
+
"direct_normal_irradiance_instant": "dni",
|
|
356
|
+
}
|
|
357
|
+
)
|
|
358
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
359
|
+
data_15min.to_csv(
|
|
360
|
+
self.emhass_conf["data_path"] / "debug-weather-forecast-open-meteo.csv"
|
|
361
|
+
)
|
|
362
|
+
data = data_15min.reindex(self.forecast_dates)
|
|
363
|
+
data.interpolate(
|
|
364
|
+
method="linear",
|
|
365
|
+
axis=0,
|
|
366
|
+
limit=None,
|
|
367
|
+
limit_direction="both",
|
|
368
|
+
inplace=True,
|
|
369
|
+
)
|
|
370
|
+
data = set_df_index_freq(data)
|
|
371
|
+
index_utc = data.index.tz_convert("utc")
|
|
372
|
+
index_tz = index_utc.round(
|
|
373
|
+
freq=data.index.freq, ambiguous="infer", nonexistent="shift_forward"
|
|
374
|
+
).tz_convert(self.time_zone)
|
|
375
|
+
data.index = index_tz
|
|
376
|
+
data = set_df_index_freq(data)
|
|
377
|
+
# Convert mm to cm and clip minimum to 0.1 cm
|
|
378
|
+
data["precipitable_water"] = (data["precipitable_water"] / 10).clip(lower=0.1)
|
|
379
|
+
if use_legacy_pvlib:
|
|
380
|
+
data = data.drop(columns=["ghi", "dhi", "dni"])
|
|
381
|
+
ghi_est = self.cloud_cover_to_irradiance(data["cloud_cover"])
|
|
382
|
+
data["ghi"] = ghi_est["ghi"]
|
|
383
|
+
data["dni"] = ghi_est["dni"]
|
|
384
|
+
data["dhi"] = ghi_est["dhi"]
|
|
385
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
386
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
387
|
+
else:
|
|
388
|
+
data = await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
389
|
+
return data
|
|
390
|
+
|
|
391
|
+
async def _get_weather_solcast(self, w_forecast_cache_path: str) -> pd.DataFrame:
|
|
392
|
+
"""Helper to retrieve weather data from Solcast or cache."""
|
|
393
|
+
if os.path.isfile(w_forecast_cache_path):
|
|
394
|
+
return await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
395
|
+
if self.params["passed_data"].get("weather_forecast_cache_only", False):
|
|
396
|
+
self.logger.error("Unable to obtain Solcast cache file.")
|
|
397
|
+
self.logger.error(
|
|
398
|
+
"Try running optimization again with 'weather_forecast_cache_only': false"
|
|
399
|
+
)
|
|
400
|
+
self.logger.error(
|
|
401
|
+
"Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true."
|
|
402
|
+
)
|
|
403
|
+
return False
|
|
404
|
+
if "solcast_api_key" not in self.retrieve_hass_conf:
|
|
405
|
+
self.logger.error("The solcast_api_key parameter was not defined")
|
|
406
|
+
return False
|
|
407
|
+
if "solcast_rooftop_id" not in self.retrieve_hass_conf:
|
|
408
|
+
self.logger.error("The solcast_rooftop_id parameter was not defined")
|
|
409
|
+
return False
|
|
410
|
+
headers = {
|
|
411
|
+
"User-Agent": "EMHASS",
|
|
412
|
+
"Authorization": "Bearer " + self.retrieve_hass_conf["solcast_api_key"],
|
|
413
|
+
"content-type": header_accept,
|
|
414
|
+
}
|
|
415
|
+
days_solcast = int(len(self.forecast_dates) * self.freq.seconds / 3600)
|
|
416
|
+
roof_ids = re.split(r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip())
|
|
417
|
+
total_data_list = [0] * len(self.forecast_dates)
|
|
418
|
+
|
|
419
|
+
async with aiohttp.ClientSession() as session:
|
|
420
|
+
for roof_id in roof_ids:
|
|
421
|
+
url = f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
|
|
422
|
+
async with session.get(url, headers=headers) as response:
|
|
423
|
+
if int(response.status) == 200:
|
|
424
|
+
data = await response.json()
|
|
425
|
+
elif int(response.status) in [402, 429]:
|
|
426
|
+
self.logger.error(
|
|
427
|
+
"Solcast error: May have exceeded your subscription limit."
|
|
428
|
+
)
|
|
235
429
|
return False
|
|
236
|
-
|
|
237
|
-
self.logger.error(
|
|
430
|
+
elif int(response.status) >= 400 or (202 <= int(response.status) <= 299):
|
|
431
|
+
self.logger.error(
|
|
432
|
+
"Solcast error: Issue with request, check API key and rooftop ID."
|
|
433
|
+
)
|
|
238
434
|
return False
|
|
239
|
-
headers = {
|
|
240
|
-
'User-Agent': 'EMHASS',
|
|
241
|
-
"Authorization": "Bearer " + self.retrieve_hass_conf['solcast_api_key'],
|
|
242
|
-
"content-type": "application/json",
|
|
243
|
-
}
|
|
244
|
-
days_solcast = int(len(self.forecast_dates)*self.freq.seconds/3600)
|
|
245
|
-
# If weather_forecast_cache, set request days as twice as long to avoid length issues (add a buffer)
|
|
246
|
-
if self.params["passed_data"]["weather_forecast_cache"]:
|
|
247
|
-
days_solcast = min((days_solcast * 2), 336)
|
|
248
|
-
url = "https://api.solcast.com.au/rooftop_sites/"+self.retrieve_hass_conf['solcast_rooftop_id']+"/forecasts?hours="+str(days_solcast)
|
|
249
|
-
response = get(url, headers=headers)
|
|
250
|
-
'''import bz2 # Uncomment to save a serialized data for tests
|
|
251
|
-
import _pickle as cPickle
|
|
252
|
-
with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
|
|
253
|
-
cPickle.dump(response, f)'''
|
|
254
|
-
# Verify the request passed
|
|
255
|
-
if int(response.status_code) == 200:
|
|
256
|
-
data = response.json()
|
|
257
|
-
elif int(response.status_code) == 402 or int(response.status_code) == 429:
|
|
258
|
-
self.logger.error("Solcast error: May have exceeded your subscription limit.")
|
|
259
|
-
return False
|
|
260
|
-
elif int(response.status_code) >= 400 or int(response.status_code) >= 202:
|
|
261
|
-
self.logger.error("Solcast error: There was a issue with the solcast request, check solcast API key and rooftop ID.")
|
|
262
|
-
self.logger.error("Solcast error: Check that your subscription is valid and your network can connect to Solcast.")
|
|
263
|
-
return False
|
|
264
435
|
data_list = []
|
|
265
|
-
for elm in data[
|
|
266
|
-
data_list.append(elm[
|
|
267
|
-
# Check if the retrieved data has the correct length
|
|
436
|
+
for elm in data["forecasts"]:
|
|
437
|
+
data_list.append(elm["pv_estimate"] * 1000)
|
|
268
438
|
if len(data_list) < len(self.forecast_dates):
|
|
269
|
-
self.logger.error("Not enough data
|
|
270
|
-
else:
|
|
271
|
-
# If runtime weather_forecast_cache is true save forecast result to file as cache
|
|
272
|
-
if self.params["passed_data"]["weather_forecast_cache"]:
|
|
273
|
-
# Add x2 forecast periods for cached results. This adds a extra delta_forecast amount of days for a buffer
|
|
274
|
-
cached_forecast_dates = self.forecast_dates.union(pd.date_range(self.forecast_dates[-1], periods=(len(self.forecast_dates) +1), freq=self.freq)[1:])
|
|
275
|
-
cache_data_list = data_list[0:len(cached_forecast_dates)]
|
|
276
|
-
cache_data_dict = {'ts':cached_forecast_dates, 'yhat':cache_data_list}
|
|
277
|
-
data_cache = pd.DataFrame.from_dict(cache_data_dict)
|
|
278
|
-
data_cache.set_index('ts', inplace=True)
|
|
279
|
-
with open(w_forecast_cache_path, "wb") as file:
|
|
280
|
-
cPickle.dump(data_cache, file)
|
|
281
|
-
if not os.path.isfile(w_forecast_cache_path):
|
|
282
|
-
self.logger.warning("Solcast forecast data could not be saved to file.")
|
|
283
|
-
else:
|
|
284
|
-
self.logger.info("Saved the Solcast results to cache, for later reference.")
|
|
285
|
-
# Trim request results to forecast_dates
|
|
286
|
-
data_list = data_list[0:len(self.forecast_dates)]
|
|
287
|
-
data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
|
|
288
|
-
# Define DataFrame
|
|
289
|
-
data = pd.DataFrame.from_dict(data_dict)
|
|
290
|
-
# Define index
|
|
291
|
-
data.set_index('ts', inplace=True)
|
|
292
|
-
# Else, notify user to update cache
|
|
293
|
-
else:
|
|
294
|
-
self.logger.error("Unable to obtain Solcast cache file.")
|
|
295
|
-
self.logger.error("Try running optimization again with 'weather_forecast_cache_only': false")
|
|
296
|
-
self.logger.error("Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `forecast-cache` action, to pull new data from Solcast and cache.")
|
|
297
|
-
return False
|
|
298
|
-
# Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
|
|
299
|
-
else:
|
|
300
|
-
with open(w_forecast_cache_path, "rb") as file:
|
|
301
|
-
data = cPickle.load(file)
|
|
302
|
-
if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
|
|
303
|
-
self.logger.error("There has been a error obtaining cached Solcast forecast data.")
|
|
304
|
-
self.logger.error("Try running optimization again with 'weather_forecast_cache': true, or run action `forecast-cache`, to pull new data from Solcast and cache.")
|
|
305
|
-
self.logger.warning("Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true")
|
|
306
|
-
os.remove(w_forecast_cache_path)
|
|
439
|
+
self.logger.error("Not enough data retrieved from Solcast service.")
|
|
307
440
|
return False
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
441
|
+
total_data_list = [
|
|
442
|
+
total + current
|
|
443
|
+
for total, current in zip_longest(total_data_list, data_list, fillvalue=0)
|
|
444
|
+
]
|
|
445
|
+
|
|
446
|
+
total_data_list = total_data_list[0 : len(self.forecast_dates)]
|
|
447
|
+
data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
|
|
448
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
449
|
+
data.set_index("ts", inplace=True)
|
|
450
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
451
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
452
|
+
return data
|
|
453
|
+
|
|
454
|
+
async def _get_weather_solar_forecast(self, w_forecast_cache_path: str) -> pd.DataFrame:
|
|
455
|
+
"""Helper to retrieve weather data from solar.forecast or cache."""
|
|
456
|
+
if os.path.isfile(w_forecast_cache_path):
|
|
457
|
+
return await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
458
|
+
# Validation and Default Setup
|
|
459
|
+
if "solar_forecast_kwp" not in self.retrieve_hass_conf:
|
|
460
|
+
self.logger.warning(
|
|
461
|
+
"The solar_forecast_kwp parameter was not defined, using dummy values for testing"
|
|
462
|
+
)
|
|
463
|
+
self.retrieve_hass_conf["solar_forecast_kwp"] = 5
|
|
464
|
+
if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
|
|
465
|
+
self.logger.warning(
|
|
466
|
+
"The solar_forecast_kwp parameter is set to zero, setting to default 5"
|
|
467
|
+
)
|
|
468
|
+
self.retrieve_hass_conf["solar_forecast_kwp"] = 5
|
|
469
|
+
if self.optim_conf["delta_forecast_daily"].days > 1:
|
|
470
|
+
self.logger.warning(
|
|
471
|
+
"The free public tier for solar.forecast only provides one day forecasts"
|
|
472
|
+
)
|
|
473
|
+
headers = {"Accept": header_accept}
|
|
474
|
+
data = pd.DataFrame()
|
|
475
|
+
|
|
476
|
+
async with aiohttp.ClientSession() as session:
|
|
477
|
+
for i in range(len(self.plant_conf["pv_module_model"])):
|
|
478
|
+
url = (
|
|
479
|
+
"https://api.forecast.solar/estimate/"
|
|
480
|
+
+ str(round(self.lat, 2))
|
|
481
|
+
+ "/"
|
|
482
|
+
+ str(round(self.lon, 2))
|
|
483
|
+
+ "/"
|
|
484
|
+
+ str(self.plant_conf["surface_tilt"][i])
|
|
485
|
+
+ "/"
|
|
486
|
+
+ str(self.plant_conf["surface_azimuth"][i] - 180)
|
|
487
|
+
+ "/"
|
|
488
|
+
+ str(self.retrieve_hass_conf["solar_forecast_kwp"])
|
|
489
|
+
)
|
|
490
|
+
async with session.get(url, headers=headers) as response:
|
|
491
|
+
data_raw = await response.json()
|
|
492
|
+
data_dict = {
|
|
493
|
+
"ts": list(data_raw["result"]["watts"].keys()),
|
|
494
|
+
"yhat": list(data_raw["result"]["watts"].values()),
|
|
495
|
+
}
|
|
496
|
+
data_tmp = pd.DataFrame.from_dict(data_dict)
|
|
497
|
+
data_tmp.set_index("ts", inplace=True)
|
|
498
|
+
data_tmp.index = pd.to_datetime(data_tmp.index)
|
|
499
|
+
data_tmp = data_tmp.tz_localize(
|
|
500
|
+
self.forecast_dates.tz,
|
|
501
|
+
ambiguous="infer",
|
|
502
|
+
nonexistent="shift_forward",
|
|
503
|
+
)
|
|
504
|
+
data_tmp = data_tmp.reindex(index=self.forecast_dates)
|
|
505
|
+
# Gap filling
|
|
506
|
+
mask_up = data_tmp.copy(deep=True).fillna(method="ffill").isnull()
|
|
507
|
+
mask_down = data_tmp.copy(deep=True).fillna(method="bfill").isnull()
|
|
508
|
+
data_tmp.loc[mask_up["yhat"], :] = 0.0
|
|
509
|
+
data_tmp.loc[mask_down["yhat"], :] = 0.0
|
|
510
|
+
data_tmp.interpolate(inplace=True, limit=1)
|
|
511
|
+
data_tmp = data_tmp.fillna(0.0)
|
|
512
|
+
if len(data) == 0:
|
|
513
|
+
data = copy.deepcopy(data_tmp)
|
|
312
514
|
else:
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
# Ensure correct length
|
|
383
|
-
data_list = data_list[0:len(self.forecast_dates)]
|
|
384
|
-
# Define DataFrame
|
|
385
|
-
data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
|
|
386
|
-
data = pd.DataFrame.from_dict(data_dict)
|
|
387
|
-
# Define index
|
|
388
|
-
data.set_index('ts', inplace=True)
|
|
515
|
+
data = data + data_tmp
|
|
516
|
+
|
|
517
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
518
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
519
|
+
return data
|
|
520
|
+
|
|
521
|
+
def _get_weather_csv(self, csv_path: str) -> pd.DataFrame:
|
|
522
|
+
"""Helper to retrieve weather data from CSV."""
|
|
523
|
+
data = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
524
|
+
if len(data) < len(self.forecast_dates):
|
|
525
|
+
self.logger.error("Passed data from CSV is not long enough")
|
|
526
|
+
else:
|
|
527
|
+
data = data.loc[data.index[0 : len(self.forecast_dates)], :]
|
|
528
|
+
data.index = self.forecast_dates
|
|
529
|
+
data.drop("ts", axis=1, inplace=True)
|
|
530
|
+
data = data.copy().loc[self.forecast_dates]
|
|
531
|
+
return data
|
|
532
|
+
|
|
533
|
+
def _get_weather_list(self) -> pd.DataFrame:
|
|
534
|
+
"""Helper to retrieve weather data from a passed list."""
|
|
535
|
+
data_list = self.params["passed_data"]["pv_power_forecast"]
|
|
536
|
+
if (
|
|
537
|
+
len(data_list) < len(self.forecast_dates)
|
|
538
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
539
|
+
):
|
|
540
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
541
|
+
return None
|
|
542
|
+
else:
|
|
543
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
544
|
+
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
545
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
546
|
+
data.set_index("ts", inplace=True)
|
|
547
|
+
return data
|
|
548
|
+
|
|
549
|
+
async def get_weather_forecast(
|
|
550
|
+
self,
|
|
551
|
+
method: str | None = "open-meteo",
|
|
552
|
+
csv_path: str | None = "data_weather_forecast.csv",
|
|
553
|
+
use_legacy_pvlib: bool | None = False,
|
|
554
|
+
) -> pd.DataFrame:
|
|
555
|
+
r"""
|
|
556
|
+
Get and generate weather forecast data.
|
|
557
|
+
|
|
558
|
+
:param method: The desired method, options are 'open-meteo', 'csv', 'list', 'solcast' and \
|
|
559
|
+
'solar.forecast'. Defaults to 'open-meteo'.
|
|
560
|
+
:type method: str, optional
|
|
561
|
+
:return: The DataFrame containing the forecasted data
|
|
562
|
+
:rtype: pd.DataFrame
|
|
563
|
+
"""
|
|
564
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
565
|
+
w_forecast_cache_path = os.path.abspath(
|
|
566
|
+
self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
|
|
567
|
+
)
|
|
568
|
+
self.logger.info("Retrieving weather forecast data using method = " + method)
|
|
569
|
+
if method == "scrapper":
|
|
570
|
+
self.logger.warning(
|
|
571
|
+
"The scrapper method has been deprecated and the keyword is accepted just for backward compatibility, please change the PV forecast method to open-meteo"
|
|
572
|
+
)
|
|
573
|
+
self.weather_forecast_method = method
|
|
574
|
+
if method in ["open-meteo", "scrapper"]:
|
|
575
|
+
data = await self._get_weather_open_meteo(w_forecast_cache_path, use_legacy_pvlib)
|
|
576
|
+
elif method == "solcast":
|
|
577
|
+
data = await self._get_weather_solcast(w_forecast_cache_path)
|
|
578
|
+
elif method == "solar.forecast":
|
|
579
|
+
data = await self._get_weather_solar_forecast(w_forecast_cache_path)
|
|
580
|
+
elif method == "csv":
|
|
581
|
+
data = self._get_weather_csv(csv_path)
|
|
582
|
+
elif method == "list":
|
|
583
|
+
data = self._get_weather_list()
|
|
389
584
|
else:
|
|
390
585
|
self.logger.error("Method %r is not valid", method)
|
|
391
586
|
data = None
|
|
587
|
+
self.logger.debug("get_weather_forecast returning:\n%s", data)
|
|
392
588
|
return data
|
|
393
|
-
|
|
394
|
-
def cloud_cover_to_irradiance(
|
|
395
|
-
|
|
589
|
+
|
|
590
|
+
def cloud_cover_to_irradiance(
|
|
591
|
+
self, cloud_cover: pd.Series, offset: int | None = 35
|
|
592
|
+
) -> pd.DataFrame:
|
|
396
593
|
"""
|
|
397
594
|
Estimates irradiance from cloud cover in the following steps.
|
|
398
|
-
|
|
595
|
+
|
|
399
596
|
1. Determine clear sky GHI using Ineichen model and
|
|
400
597
|
climatological turbidity.
|
|
401
|
-
|
|
598
|
+
|
|
402
599
|
2. Estimate cloudy sky GHI using a function of cloud_cover
|
|
403
|
-
|
|
600
|
+
|
|
404
601
|
3. Estimate cloudy sky DNI using the DISC model.
|
|
405
|
-
|
|
602
|
+
|
|
406
603
|
4. Calculate DHI from DNI and GHI.
|
|
407
|
-
|
|
604
|
+
|
|
408
605
|
(This function was copied and modified from PVLib)
|
|
409
606
|
|
|
410
607
|
:param cloud_cover: Cloud cover in %.
|
|
@@ -416,21 +613,26 @@ class Forecast(object):
|
|
|
416
613
|
"""
|
|
417
614
|
location = Location(latitude=self.lat, longitude=self.lon)
|
|
418
615
|
solpos = location.get_solarposition(cloud_cover.index)
|
|
419
|
-
cs = location.get_clearsky(cloud_cover.index, model=
|
|
420
|
-
solar_position=solpos)
|
|
616
|
+
cs = location.get_clearsky(cloud_cover.index, model="ineichen", solar_position=solpos)
|
|
421
617
|
# Using only the linear method
|
|
422
|
-
offset = offset / 100.
|
|
423
|
-
cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.
|
|
424
|
-
ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs[
|
|
618
|
+
offset = offset / 100.0
|
|
619
|
+
cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
|
|
620
|
+
ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs["ghi"]
|
|
425
621
|
# Using disc model
|
|
426
|
-
dni = disc(ghi, solpos[
|
|
427
|
-
dhi = ghi - dni * np.cos(np.radians(solpos[
|
|
428
|
-
irrads = pd.DataFrame({
|
|
622
|
+
dni = disc(ghi, solpos["zenith"], cloud_cover.index)["dni"]
|
|
623
|
+
dhi = ghi - dni * np.cos(np.radians(solpos["zenith"]))
|
|
624
|
+
irrads = pd.DataFrame({"ghi": ghi, "dni": dni, "dhi": dhi}).fillna(0)
|
|
429
625
|
return irrads
|
|
430
|
-
|
|
626
|
+
|
|
431
627
|
@staticmethod
|
|
432
|
-
def get_mix_forecast(
|
|
433
|
-
|
|
628
|
+
def get_mix_forecast(
|
|
629
|
+
df_now: pd.DataFrame,
|
|
630
|
+
df_forecast: pd.DataFrame,
|
|
631
|
+
alpha: float,
|
|
632
|
+
beta: float,
|
|
633
|
+
col: str,
|
|
634
|
+
ignore_pv_feedback: bool = False,
|
|
635
|
+
) -> pd.DataFrame:
|
|
434
636
|
"""A simple correction method for forecasted data using the current real values of a variable.
|
|
435
637
|
|
|
436
638
|
:param df_now: The DataFrame containing the current/real values
|
|
@@ -443,127 +645,521 @@ class Forecast(object):
|
|
|
443
645
|
:type beta: float
|
|
444
646
|
:param col: The column variable name
|
|
445
647
|
:type col: str
|
|
648
|
+
:param ignore_pv_feedback: If True, bypass mixing and return original forecast (used during curtailment)
|
|
649
|
+
:type ignore_pv_feedback: bool
|
|
446
650
|
:return: The output DataFrame with the corrected values
|
|
447
651
|
:rtype: pd.DataFrame
|
|
448
652
|
"""
|
|
449
|
-
|
|
450
|
-
|
|
653
|
+
# If ignoring PV feedback (e.g., during curtailment), return original forecast
|
|
654
|
+
if ignore_pv_feedback:
|
|
655
|
+
return df_forecast
|
|
656
|
+
|
|
657
|
+
first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
|
|
658
|
+
df_forecast.iloc[0] = int(round(first_fcst))
|
|
451
659
|
return df_forecast
|
|
452
|
-
|
|
453
|
-
def
|
|
454
|
-
|
|
455
|
-
|
|
660
|
+
|
|
661
|
+
def _get_model_power(self, params, device_type):
|
|
662
|
+
"""
|
|
663
|
+
Helper to extract power rating based on device type and available parameters.
|
|
664
|
+
"""
|
|
665
|
+
if device_type == "module":
|
|
666
|
+
if "STC" in params:
|
|
667
|
+
return params["STC"]
|
|
668
|
+
if "I_mp_ref" in params and "V_mp_ref" in params:
|
|
669
|
+
return params["I_mp_ref"] * params["V_mp_ref"]
|
|
670
|
+
elif device_type == "inverter":
|
|
671
|
+
if "Paco" in params:
|
|
672
|
+
return params["Paco"]
|
|
673
|
+
if "Pdco" in params:
|
|
674
|
+
return params["Pdco"]
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
def _find_closest_model(self, target_power, database, device_type):
|
|
678
|
+
"""
|
|
679
|
+
Find the model in the database that has a power rating closest to the target_power.
|
|
680
|
+
"""
|
|
681
|
+
closest_model = None
|
|
682
|
+
min_diff = float("inf")
|
|
683
|
+
# Handle DataFrame (columns are models) or Dict (keys are models)
|
|
684
|
+
iterator = database.items() if hasattr(database, "items") else database.iteritems()
|
|
685
|
+
for _, params in iterator:
|
|
686
|
+
power = self._get_model_power(params, device_type)
|
|
687
|
+
if power is not None:
|
|
688
|
+
diff = abs(power - target_power)
|
|
689
|
+
if diff < min_diff:
|
|
690
|
+
min_diff = diff
|
|
691
|
+
closest_model = params
|
|
692
|
+
if closest_model is not None:
|
|
693
|
+
# Safely get name if it exists (DataFrame Series usually have a .name attribute)
|
|
694
|
+
model_name = getattr(closest_model, "name", "unknown")
|
|
695
|
+
self.logger.info(f"Closest {device_type} model to {target_power}W found: {model_name}")
|
|
696
|
+
else:
|
|
697
|
+
self.logger.warning(f"No suitable {device_type} model found close to {target_power}W")
|
|
698
|
+
return closest_model
|
|
699
|
+
|
|
700
|
+
def _get_model(self, model_spec, database, device_type):
|
|
701
|
+
"""
|
|
702
|
+
Retrieve a model from the database by name or by power rating.
|
|
703
|
+
"""
|
|
704
|
+
# If it's a string, try to find it by name
|
|
705
|
+
if isinstance(model_spec, str):
|
|
706
|
+
if model_spec in database:
|
|
707
|
+
return database[model_spec]
|
|
708
|
+
# If not found by name, check if it is a number string (e.g., "300")
|
|
709
|
+
try:
|
|
710
|
+
target_power = float(model_spec)
|
|
711
|
+
return self._find_closest_model(target_power, database, device_type)
|
|
712
|
+
except ValueError:
|
|
713
|
+
# Not a number, fallback to original behavior (will likely raise KeyError later)
|
|
714
|
+
self.logger.warning(f"{device_type} model '{model_spec}' not found in database.")
|
|
715
|
+
return database[model_spec]
|
|
716
|
+
# If it's a number (int or float), find closest by power
|
|
717
|
+
elif isinstance(model_spec, int | float):
|
|
718
|
+
return self._find_closest_model(model_spec, database, device_type)
|
|
719
|
+
else:
|
|
720
|
+
self.logger.error(f"Invalid type for {device_type} model: {type(model_spec)}")
|
|
721
|
+
return None
|
|
722
|
+
|
|
723
|
+
def _calculate_pvlib_power(self, df_weather: pd.DataFrame) -> pd.Series:
|
|
724
|
+
"""
|
|
725
|
+
Helper to simulate PV power generation using PVLib when no direct forecast is available.
|
|
726
|
+
"""
|
|
727
|
+
# Setting the main parameters of the PV plant
|
|
728
|
+
location = Location(latitude=self.lat, longitude=self.lon)
|
|
729
|
+
temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"]["close_mount_glass_glass"]
|
|
730
|
+
# Load CEC databases
|
|
731
|
+
cec_modules_path = self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2"
|
|
732
|
+
cec_inverters_path = self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2"
|
|
733
|
+
with bz2.BZ2File(cec_modules_path, "rb") as f:
|
|
734
|
+
cec_modules = cPickle.load(f)
|
|
735
|
+
with bz2.BZ2File(cec_inverters_path, "rb") as f:
|
|
736
|
+
cec_inverters = cPickle.load(f)
|
|
737
|
+
|
|
738
|
+
# Inner helper to run a single simulation configuration
|
|
739
|
+
def run_single_config(mod_spec, inv_spec, tilt, azimuth, mod_per_str, str_per_inv):
|
|
740
|
+
module = self._get_model(mod_spec, cec_modules, "module")
|
|
741
|
+
inverter = self._get_model(inv_spec, cec_inverters, "inverter")
|
|
742
|
+
system = PVSystem(
|
|
743
|
+
surface_tilt=tilt,
|
|
744
|
+
surface_azimuth=azimuth,
|
|
745
|
+
module_parameters=module,
|
|
746
|
+
inverter_parameters=inverter,
|
|
747
|
+
temperature_model_parameters=temp_params,
|
|
748
|
+
modules_per_string=mod_per_str,
|
|
749
|
+
strings_per_inverter=str_per_inv,
|
|
750
|
+
)
|
|
751
|
+
mc = ModelChain(system, location, aoi_model="physical")
|
|
752
|
+
mc.run_model(df_weather)
|
|
753
|
+
return mc.results.ac
|
|
754
|
+
|
|
755
|
+
# Handle list (mixed orientation) vs single configuration
|
|
756
|
+
if isinstance(self.plant_conf["pv_module_model"], list):
|
|
757
|
+
p_pv_forecast = pd.Series(0, index=df_weather.index)
|
|
758
|
+
for i in range(len(self.plant_conf["pv_module_model"])):
|
|
759
|
+
result = run_single_config(
|
|
760
|
+
self.plant_conf["pv_module_model"][i],
|
|
761
|
+
self.plant_conf["pv_inverter_model"][i],
|
|
762
|
+
self.plant_conf["surface_tilt"][i],
|
|
763
|
+
self.plant_conf["surface_azimuth"][i],
|
|
764
|
+
self.plant_conf["modules_per_string"][i],
|
|
765
|
+
self.plant_conf["strings_per_inverter"][i],
|
|
766
|
+
)
|
|
767
|
+
p_pv_forecast = p_pv_forecast + result
|
|
768
|
+
else:
|
|
769
|
+
p_pv_forecast = run_single_config(
|
|
770
|
+
self.plant_conf["pv_module_model"],
|
|
771
|
+
self.plant_conf["pv_inverter_model"],
|
|
772
|
+
self.plant_conf["surface_tilt"],
|
|
773
|
+
self.plant_conf["surface_azimuth"],
|
|
774
|
+
self.plant_conf["modules_per_string"],
|
|
775
|
+
self.plant_conf["strings_per_inverter"],
|
|
776
|
+
)
|
|
777
|
+
return p_pv_forecast
|
|
778
|
+
|
|
779
|
+
def get_power_from_weather(
|
|
780
|
+
self,
|
|
781
|
+
df_weather: pd.DataFrame,
|
|
782
|
+
set_mix_forecast: bool | None = False,
|
|
783
|
+
df_now: pd.DataFrame | None = pd.DataFrame(),
|
|
784
|
+
) -> pd.Series:
|
|
456
785
|
r"""
|
|
457
|
-
Convert
|
|
458
|
-
|
|
786
|
+
Convert weather forecast data into electrical power.
|
|
787
|
+
|
|
459
788
|
:param df_weather: The DataFrame containing the weather forecasted data. \
|
|
460
789
|
This DF should be generated by the 'get_weather_forecast' method or at \
|
|
461
790
|
least contain the same columns names filled with proper data.
|
|
462
791
|
:type df_weather: pd.DataFrame
|
|
463
|
-
:param set_mix_forecast: Use a mixed
|
|
792
|
+
:param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
|
|
464
793
|
:type set_mix_forecast: Bool, optional
|
|
465
794
|
:param df_now: The DataFrame containing the now/current data.
|
|
466
795
|
:type df_now: pd.DataFrame
|
|
467
796
|
:return: The DataFrame containing the electrical power in Watts
|
|
468
797
|
:rtype: pd.DataFrame
|
|
469
|
-
|
|
470
798
|
"""
|
|
471
799
|
# If using csv method we consider that yhat is the PV power in W
|
|
472
|
-
if
|
|
473
|
-
|
|
800
|
+
if (
|
|
801
|
+
"solar_forecast_kwp" in self.retrieve_hass_conf.keys()
|
|
802
|
+
and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
|
|
803
|
+
):
|
|
804
|
+
p_pv_forecast = pd.Series(0, index=df_weather.index)
|
|
805
|
+
elif self.weather_forecast_method in [
|
|
806
|
+
"solcast",
|
|
807
|
+
"solar.forecast",
|
|
808
|
+
"csv",
|
|
809
|
+
"list",
|
|
810
|
+
]:
|
|
811
|
+
p_pv_forecast = df_weather["yhat"]
|
|
812
|
+
p_pv_forecast.name = None
|
|
474
813
|
else:
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
P_PV_forecast = df_weather['yhat']
|
|
478
|
-
P_PV_forecast.name = None
|
|
479
|
-
else: # We will transform the weather data into electrical power
|
|
480
|
-
# Transform to power (Watts)
|
|
481
|
-
# Setting the main parameters of the PV plant
|
|
482
|
-
location = Location(latitude=self.lat, longitude=self.lon)
|
|
483
|
-
temp_params = TEMPERATURE_MODEL_PARAMETERS['sapm']['close_mount_glass_glass']
|
|
484
|
-
cec_modules = bz2.BZ2File(self.emhass_conf['root_path'] / 'data' / 'cec_modules.pbz2', "rb")
|
|
485
|
-
cec_modules = cPickle.load(cec_modules)
|
|
486
|
-
cec_inverters = bz2.BZ2File(self.emhass_conf['root_path'] / 'data' / 'cec_inverters.pbz2', "rb")
|
|
487
|
-
cec_inverters = cPickle.load(cec_inverters)
|
|
488
|
-
if type(self.plant_conf['module_model']) == list:
|
|
489
|
-
P_PV_forecast = pd.Series(0, index=df_weather.index)
|
|
490
|
-
for i in range(len(self.plant_conf['module_model'])):
|
|
491
|
-
# Selecting correct module and inverter
|
|
492
|
-
module = cec_modules[self.plant_conf['module_model'][i]]
|
|
493
|
-
inverter = cec_inverters[self.plant_conf['inverter_model'][i]]
|
|
494
|
-
# Building the PV system in PVLib
|
|
495
|
-
system = PVSystem(surface_tilt=self.plant_conf['surface_tilt'][i],
|
|
496
|
-
surface_azimuth=self.plant_conf['surface_azimuth'][i],
|
|
497
|
-
module_parameters=module,
|
|
498
|
-
inverter_parameters=inverter,
|
|
499
|
-
temperature_model_parameters=temp_params,
|
|
500
|
-
modules_per_string=self.plant_conf['modules_per_string'][i],
|
|
501
|
-
strings_per_inverter=self.plant_conf['strings_per_inverter'][i])
|
|
502
|
-
mc = ModelChain(system, location, aoi_model="physical")
|
|
503
|
-
# Run the model on the weather DF indexes
|
|
504
|
-
mc.run_model(df_weather)
|
|
505
|
-
# Extracting results for AC power
|
|
506
|
-
P_PV_forecast = P_PV_forecast + mc.results.ac
|
|
507
|
-
else:
|
|
508
|
-
# Selecting correct module and inverter
|
|
509
|
-
module = cec_modules[self.plant_conf['module_model']]
|
|
510
|
-
inverter = cec_inverters[self.plant_conf['inverter_model']]
|
|
511
|
-
# Building the PV system in PVLib
|
|
512
|
-
system = PVSystem(surface_tilt=self.plant_conf['surface_tilt'],
|
|
513
|
-
surface_azimuth=self.plant_conf['surface_azimuth'],
|
|
514
|
-
module_parameters=module,
|
|
515
|
-
inverter_parameters=inverter,
|
|
516
|
-
temperature_model_parameters=temp_params,
|
|
517
|
-
modules_per_string=self.plant_conf['modules_per_string'],
|
|
518
|
-
strings_per_inverter=self.plant_conf['strings_per_inverter'])
|
|
519
|
-
mc = ModelChain(system, location, aoi_model="physical")
|
|
520
|
-
# Run the model on the weather DF indexes
|
|
521
|
-
mc.run_model(df_weather)
|
|
522
|
-
# Extracting results for AC power
|
|
523
|
-
P_PV_forecast = mc.results.ac
|
|
814
|
+
# We will transform the weather data into electrical power
|
|
815
|
+
p_pv_forecast = self._calculate_pvlib_power(df_weather)
|
|
524
816
|
if set_mix_forecast:
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
817
|
+
ignore_pv_feedback = self.params["passed_data"].get(
|
|
818
|
+
"ignore_pv_feedback_during_curtailment", False
|
|
819
|
+
)
|
|
820
|
+
p_pv_forecast = Forecast.get_mix_forecast(
|
|
821
|
+
df_now,
|
|
822
|
+
p_pv_forecast,
|
|
823
|
+
self.params["passed_data"]["alpha"],
|
|
824
|
+
self.params["passed_data"]["beta"],
|
|
825
|
+
self.var_pv,
|
|
826
|
+
ignore_pv_feedback,
|
|
827
|
+
)
|
|
828
|
+
p_pv_forecast[p_pv_forecast < 0] = 0 # replace any negative PV values with zero
|
|
829
|
+
self.logger.debug("get_power_from_weather returning:\n%s", p_pv_forecast)
|
|
830
|
+
return p_pv_forecast
|
|
831
|
+
|
|
832
|
+
@staticmethod
|
|
833
|
+
def compute_solar_angles(df: pd.DataFrame, latitude: float, longitude: float) -> pd.DataFrame:
|
|
834
|
+
"""
|
|
835
|
+
Compute solar angles (elevation, azimuth) based on timestamps and location.
|
|
836
|
+
|
|
837
|
+
:param df: DataFrame with a DateTime index.
|
|
838
|
+
:param latitude: Latitude of the PV system.
|
|
839
|
+
:param longitude: Longitude of the PV system.
|
|
840
|
+
:return: DataFrame with added solar elevation and azimuth.
|
|
841
|
+
"""
|
|
842
|
+
df = df.copy()
|
|
843
|
+
solpos = get_solarposition(df.index, latitude, longitude)
|
|
844
|
+
df["solar_elevation"] = solpos["elevation"]
|
|
845
|
+
df["solar_azimuth"] = solpos["azimuth"]
|
|
846
|
+
return df
|
|
847
|
+
|
|
848
|
+
def adjust_pv_forecast_data_prep(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
849
|
+
"""
|
|
850
|
+
Prepare data for adjusting the photovoltaic (PV) forecast.
|
|
851
|
+
|
|
852
|
+
This method aligns the actual PV production data with the forecasted data,
|
|
853
|
+
adds additional features for analysis, and separates the predictors (X)
|
|
854
|
+
from the target variable (y).
|
|
855
|
+
|
|
856
|
+
:param data: A DataFrame containing the actual PV production data and the
|
|
857
|
+
forecasted PV production data.
|
|
858
|
+
:type data: pd.DataFrame
|
|
859
|
+
:return: DataFrame with data for adjusted PV model train.
|
|
860
|
+
"""
|
|
861
|
+
# Extract target and predictor
|
|
862
|
+
self.logger.debug("adjust_pv_forecast_data_prep using data:\n%s", data)
|
|
863
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
864
|
+
data.to_csv(
|
|
865
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-input-data.csv"
|
|
866
|
+
)
|
|
867
|
+
P_PV = data[self.var_pv] # Actual PV production
|
|
868
|
+
p_pv_forecast = data[self.var_pv_forecast] # Forecasted PV production
|
|
869
|
+
# Define time ranges
|
|
870
|
+
last_day = data.index.max().normalize() # Last available day
|
|
871
|
+
three_months_ago = last_day - pd.DateOffset(
|
|
872
|
+
days=self.retrieve_hass_conf["historic_days_to_retrieve"]
|
|
873
|
+
)
|
|
874
|
+
# Train/Test: Last historic_days_to_retrieve days (excluding the last day)
|
|
875
|
+
train_test_mask = (data.index >= three_months_ago) & (data.index < last_day)
|
|
876
|
+
self.p_pv_train_test = P_PV[train_test_mask]
|
|
877
|
+
self.p_pv_forecast_train_test = p_pv_forecast[train_test_mask]
|
|
878
|
+
# Validation: Last day only
|
|
879
|
+
validation_mask = data.index >= last_day
|
|
880
|
+
self.p_pv_validation = P_PV[validation_mask]
|
|
881
|
+
self.p_pv_forecast_validation = p_pv_forecast[validation_mask]
|
|
882
|
+
# Ensure data is aligned
|
|
883
|
+
self.data_adjust_pv = pd.concat(
|
|
884
|
+
[P_PV.rename("actual"), p_pv_forecast.rename("forecast")], axis=1
|
|
885
|
+
).dropna()
|
|
886
|
+
# Add more features
|
|
887
|
+
self.data_adjust_pv = add_date_features(self.data_adjust_pv)
|
|
888
|
+
self.data_adjust_pv = Forecast.compute_solar_angles(self.data_adjust_pv, self.lat, self.lon)
|
|
889
|
+
# Features (X) and target (y)
|
|
890
|
+
self.x_adjust_pv = self.data_adjust_pv.drop(columns=["actual"]) # Predictors
|
|
891
|
+
self.y_adjust_pv = self.data_adjust_pv["actual"] # Target: actual PV production
|
|
892
|
+
self.logger.debug("adjust_pv_forecast_data_prep output data:\n%s", self.data_adjust_pv)
|
|
893
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
894
|
+
self.data_adjust_pv.to_csv(
|
|
895
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-output-data.csv"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
async def adjust_pv_forecast_fit(
|
|
899
|
+
self,
|
|
900
|
+
n_splits: int = 5,
|
|
901
|
+
regression_model: str = "LassoRegression",
|
|
902
|
+
debug: bool | None = False,
|
|
903
|
+
) -> pd.DataFrame:
|
|
904
|
+
"""
|
|
905
|
+
Fit a regression model to adjust the photovoltaic (PV) forecast.
|
|
906
|
+
|
|
907
|
+
This method uses historical actual and forecasted PV production data, along with
|
|
908
|
+
additional solar and date features, to train a regression model. The model is
|
|
909
|
+
optimized using a grid search with time-series cross-validation.
|
|
910
|
+
|
|
911
|
+
:param n_splits: The number of splits for time-series cross-validation, defaults to 5.
|
|
912
|
+
:type n_splits: int, optional
|
|
913
|
+
:param regression_model: The type of regression model to use. See REGRESSION_METHODS \
|
|
914
|
+
in machine_learning_regressor.py for the authoritative list of supported models. \
|
|
915
|
+
Currently: 'LinearRegression', 'RidgeRegression', 'LassoRegression', 'ElasticNet', \
|
|
916
|
+
'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'RandomForestRegressor', \
|
|
917
|
+
'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', \
|
|
918
|
+
'MLPRegressor'. Defaults to "LassoRegression".
|
|
919
|
+
:type regression_model: str, optional
|
|
920
|
+
:param debug: If True, the model is not saved to disk, useful for debugging, defaults to False.
|
|
921
|
+
:type debug: bool, optional
|
|
922
|
+
:return: A DataFrame containing the adjusted PV forecast.
|
|
923
|
+
:rtype: pd.DataFrame
|
|
924
|
+
"""
|
|
925
|
+
# Get regression model and hyperparameter grid
|
|
926
|
+
mlr = MLRegressor(
|
|
927
|
+
self.data_adjust_pv,
|
|
928
|
+
"adjusted_pv_forecast",
|
|
929
|
+
regression_model,
|
|
930
|
+
list(self.x_adjust_pv.columns),
|
|
931
|
+
list(self.y_adjust_pv.name),
|
|
932
|
+
None,
|
|
933
|
+
self.logger,
|
|
934
|
+
)
|
|
935
|
+
pipeline, param_grid = mlr._get_model_and_params()
|
|
936
|
+
# Time-series split
|
|
937
|
+
tscv = TimeSeriesSplit(n_splits=n_splits)
|
|
938
|
+
grid_search = GridSearchCV(
|
|
939
|
+
pipeline, param_grid, cv=tscv, scoring="neg_mean_squared_error", verbose=0
|
|
940
|
+
)
|
|
941
|
+
# Train model
|
|
942
|
+
await asyncio.to_thread(grid_search.fit, self.x_adjust_pv, self.y_adjust_pv)
|
|
943
|
+
self.model_adjust_pv = grid_search.best_estimator_
|
|
944
|
+
# Calculate training metrics
|
|
945
|
+
y_pred_train = self.model_adjust_pv.predict(self.x_adjust_pv)
|
|
946
|
+
self.rmse = np.sqrt(mean_squared_error(self.y_adjust_pv, y_pred_train))
|
|
947
|
+
self.r2 = r2_score(self.y_adjust_pv, y_pred_train)
|
|
948
|
+
# Log the metrics
|
|
949
|
+
self.logger.info(f"PV adjust Training metrics: RMSE = {self.rmse}, R2 = {self.r2}")
|
|
950
|
+
# Save model
|
|
951
|
+
if not debug:
|
|
952
|
+
filename = "adjust_pv_regressor.pkl"
|
|
953
|
+
filename_path = self.emhass_conf["data_path"] / filename
|
|
954
|
+
async with aiofiles.open(filename_path, "wb") as outp:
|
|
955
|
+
await outp.write(pickle.dumps(self.model_adjust_pv, pickle.HIGHEST_PROTOCOL))
|
|
956
|
+
|
|
957
|
+
def adjust_pv_forecast_predict(self, forecasted_pv: pd.DataFrame | None = None) -> pd.DataFrame:
|
|
958
|
+
"""
|
|
959
|
+
Predict the adjusted photovoltaic (PV) forecast.
|
|
960
|
+
|
|
961
|
+
This method uses the trained regression model to predict the adjusted PV forecast
|
|
962
|
+
based on either the validation data stored in `self` or a new forecasted PV data
|
|
963
|
+
passed as input. It applies additional features such as date and solar angles to
|
|
964
|
+
the forecasted PV production data before making predictions. The solar elevation
|
|
965
|
+
is used to avoid negative values and to fix values at the beginning and end of the day.
|
|
966
|
+
|
|
967
|
+
:param forecasted_pv: Optional. A DataFrame containing the forecasted PV production data.
|
|
968
|
+
It must have a DateTime index and a column named "forecast".
|
|
969
|
+
If not provided, the method will use `self.p_pv_forecast_validation`.
|
|
970
|
+
:type forecasted_pv: pd.DataFrame, optional
|
|
971
|
+
:return: A DataFrame containing the adjusted PV forecast with additional features.
|
|
972
|
+
:rtype: pd.DataFrame
|
|
973
|
+
"""
|
|
974
|
+
# Use the provided forecasted PV data or fall back to the validation data in `self`
|
|
975
|
+
if forecasted_pv is not None:
|
|
976
|
+
# Ensure the input DataFrame has the required structure
|
|
977
|
+
if "forecast" not in forecasted_pv.columns:
|
|
978
|
+
raise ValueError("The input DataFrame must contain a 'forecast' column.")
|
|
979
|
+
forecast_data = forecasted_pv.copy()
|
|
980
|
+
else:
|
|
981
|
+
# Use the validation data stored in `self`
|
|
982
|
+
forecast_data = self.p_pv_forecast_validation.rename("forecast").to_frame()
|
|
983
|
+
# Prepare the forecasted PV data
|
|
984
|
+
forecast_data = add_date_features(forecast_data)
|
|
985
|
+
forecast_data = Forecast.compute_solar_angles(forecast_data, self.lat, self.lon)
|
|
986
|
+
# Predict the adjusted forecast
|
|
987
|
+
forecast_data["adjusted_forecast"] = self.model_adjust_pv.predict(forecast_data)
|
|
988
|
+
|
|
989
|
+
# Apply solar elevation weighting only for specific cases
|
|
990
|
+
def apply_weighting(row):
|
|
991
|
+
if row["solar_elevation"] <= 0: # Nighttime or negative solar elevation
|
|
992
|
+
return 0
|
|
993
|
+
elif (
|
|
994
|
+
row["solar_elevation"] < self.optim_conf["adjusted_pv_solar_elevation_threshold"]
|
|
995
|
+
): # Early morning or late evening
|
|
996
|
+
return max(
|
|
997
|
+
row["adjusted_forecast"]
|
|
998
|
+
* (
|
|
999
|
+
row["solar_elevation"]
|
|
1000
|
+
/ self.optim_conf["adjusted_pv_solar_elevation_threshold"]
|
|
1001
|
+
),
|
|
1002
|
+
0,
|
|
1003
|
+
)
|
|
1004
|
+
else: # Daytime with sufficient solar elevation
|
|
1005
|
+
return row["adjusted_forecast"]
|
|
1006
|
+
|
|
1007
|
+
forecast_data["adjusted_forecast"] = forecast_data.apply(apply_weighting, axis=1)
|
|
1008
|
+
# If using validation data, calculate validation metrics
|
|
1009
|
+
if forecasted_pv is None:
|
|
1010
|
+
y_true = self.p_pv_validation.values
|
|
1011
|
+
y_pred = forecast_data["adjusted_forecast"].values
|
|
1012
|
+
self.validation_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
1013
|
+
self.validation_r2 = r2_score(y_true, y_pred)
|
|
1014
|
+
# Log the validation metrics
|
|
1015
|
+
self.logger.info(
|
|
1016
|
+
f"PV adjust Validation metrics: RMSE = {self.validation_rmse}, R2 = {self.validation_r2}"
|
|
1017
|
+
)
|
|
1018
|
+
self.logger.debug("adjust_pv_forecast_predict forecast data:\n%s", forecast_data)
|
|
1019
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
1020
|
+
forecast_data.to_csv(
|
|
1021
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-predict-forecast-data.csv"
|
|
1022
|
+
)
|
|
1023
|
+
# Return the DataFrame with the adjusted forecast
|
|
1024
|
+
return forecast_data
|
|
1025
|
+
|
|
1026
|
+
def get_forecast_days_csv(self, timedelta_days: int | None = 1) -> pd.date_range:
|
|
531
1027
|
r"""
|
|
532
1028
|
Get the date range vector of forecast dates that will be used when loading a CSV file.
|
|
533
|
-
|
|
1029
|
+
|
|
534
1030
|
:return: The forecast dates vector
|
|
535
1031
|
:rtype: pd.date_range
|
|
536
1032
|
|
|
537
1033
|
"""
|
|
538
1034
|
start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
|
|
539
|
-
if self.method_ts_round ==
|
|
540
|
-
start_forecast_csv = pd.Timestamp
|
|
541
|
-
elif self.method_ts_round ==
|
|
542
|
-
start_forecast_csv =
|
|
543
|
-
|
|
544
|
-
|
|
1035
|
+
if self.method_ts_round == "nearest":
|
|
1036
|
+
start_forecast_csv = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
|
|
1037
|
+
elif self.method_ts_round == "first":
|
|
1038
|
+
start_forecast_csv = (
|
|
1039
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
|
|
1040
|
+
)
|
|
1041
|
+
elif self.method_ts_round == "last":
|
|
1042
|
+
start_forecast_csv = (
|
|
1043
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
|
|
1044
|
+
)
|
|
545
1045
|
else:
|
|
546
1046
|
self.logger.error("Wrong method_ts_round passed parameter")
|
|
547
|
-
end_forecast_csv = (start_forecast_csv + self.optim_conf[
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
1047
|
+
end_forecast_csv = (start_forecast_csv + self.optim_conf["delta_forecast_daily"]).replace(
|
|
1048
|
+
microsecond=0
|
|
1049
|
+
)
|
|
1050
|
+
forecast_dates_csv = (
|
|
1051
|
+
pd.date_range(
|
|
1052
|
+
start=start_forecast_csv,
|
|
1053
|
+
end=end_forecast_csv + timedelta(days=timedelta_days) - self.freq,
|
|
1054
|
+
freq=self.freq,
|
|
1055
|
+
tz=self.time_zone,
|
|
1056
|
+
)
|
|
1057
|
+
.tz_convert("utc")
|
|
1058
|
+
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
1059
|
+
.tz_convert(self.time_zone)
|
|
1060
|
+
)
|
|
1061
|
+
if (
|
|
1062
|
+
self.params is not None
|
|
1063
|
+
and "prediction_horizon" in list(self.params["passed_data"].keys())
|
|
1064
|
+
and self.params["passed_data"]["prediction_horizon"] is not None
|
|
1065
|
+
):
|
|
1066
|
+
forecast_dates_csv = forecast_dates_csv[
|
|
1067
|
+
0 : self.params["passed_data"]["prediction_horizon"]
|
|
1068
|
+
]
|
|
555
1069
|
return forecast_dates_csv
|
|
556
|
-
|
|
557
|
-
def
|
|
558
|
-
|
|
559
|
-
|
|
1070
|
+
|
|
1071
|
+
def _load_forecast_data(
|
|
1072
|
+
self,
|
|
1073
|
+
csv_path: str,
|
|
1074
|
+
data_list: list | None,
|
|
1075
|
+
forecast_dates_csv: pd.date_range,
|
|
1076
|
+
) -> pd.DataFrame:
|
|
1077
|
+
"""
|
|
1078
|
+
Helper to load and format forecast data from a CSV file or a list.
|
|
1079
|
+
"""
|
|
1080
|
+
if csv_path is None:
|
|
1081
|
+
data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
|
|
1082
|
+
df_csv = pd.DataFrame.from_dict(data_dict)
|
|
1083
|
+
df_csv.index = forecast_dates_csv
|
|
1084
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1085
|
+
df_csv = set_df_index_freq(df_csv)
|
|
1086
|
+
else:
|
|
1087
|
+
if not os.path.exists(csv_path):
|
|
1088
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1089
|
+
df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
1090
|
+
# Check if first column is a valid datetime
|
|
1091
|
+
first_col = df_csv.iloc[:, 0]
|
|
1092
|
+
if pd.to_datetime(first_col, errors="coerce").notna().all():
|
|
1093
|
+
df_csv["ts"] = pd.to_datetime(df_csv["ts"], utc=True)
|
|
1094
|
+
df_csv.set_index("ts", inplace=True)
|
|
1095
|
+
df_csv.index = df_csv.index.tz_convert(self.time_zone)
|
|
1096
|
+
else:
|
|
1097
|
+
df_csv.index = forecast_dates_csv
|
|
1098
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1099
|
+
df_csv = set_df_index_freq(df_csv)
|
|
1100
|
+
return df_csv
|
|
1101
|
+
|
|
1102
|
+
def _extract_daily_forecast(
|
|
1103
|
+
self,
|
|
1104
|
+
day: int,
|
|
1105
|
+
df_timing: pd.DataFrame,
|
|
1106
|
+
df_csv: pd.DataFrame,
|
|
1107
|
+
csv_path: str,
|
|
1108
|
+
list_and_perfect: bool,
|
|
1109
|
+
) -> pd.DataFrame:
|
|
1110
|
+
"""
|
|
1111
|
+
Helper to extract a specific day's forecast data based on timing configuration.
|
|
1112
|
+
"""
|
|
1113
|
+
# Find the start and end indices for the specific day in the timing DataFrame
|
|
1114
|
+
day_mask = df_timing.index.day == day
|
|
1115
|
+
day_indices = [i for i, x in enumerate(day_mask) if x]
|
|
1116
|
+
first_elm_index = day_indices[0]
|
|
1117
|
+
last_elm_index = day_indices[-1]
|
|
1118
|
+
# Define the target forecast index based on the timing DataFrame
|
|
1119
|
+
fcst_index = pd.date_range(
|
|
1120
|
+
start=df_timing.index[first_elm_index],
|
|
1121
|
+
end=df_timing.index[last_elm_index],
|
|
1122
|
+
freq=df_timing.index.freq,
|
|
1123
|
+
)
|
|
1124
|
+
first_hour = f"{df_timing.index[first_elm_index].hour:02d}:{df_timing.index[first_elm_index].minute:02d}"
|
|
1125
|
+
last_hour = f"{df_timing.index[last_elm_index].hour:02d}:{df_timing.index[last_elm_index].minute:02d}"
|
|
1126
|
+
# Extract data
|
|
1127
|
+
if csv_path is None:
|
|
1128
|
+
if list_and_perfect:
|
|
1129
|
+
values_array = df_csv.between_time(first_hour, last_hour).values
|
|
1130
|
+
# Adjust index length if necessary
|
|
1131
|
+
fcst_index = fcst_index[0 : len(values_array)]
|
|
1132
|
+
return pd.DataFrame(values_array, index=fcst_index)
|
|
1133
|
+
else:
|
|
1134
|
+
return pd.DataFrame(
|
|
1135
|
+
df_csv.loc[fcst_index, :].between_time(first_hour, last_hour).values,
|
|
1136
|
+
index=fcst_index,
|
|
1137
|
+
)
|
|
1138
|
+
else:
|
|
1139
|
+
# For CSV path, filter by date string first
|
|
1140
|
+
df_csv_filtered_date = df_csv.loc[
|
|
1141
|
+
df_csv.index.strftime("%Y-%m-%d") == fcst_index[0].date().strftime("%Y-%m-%d")
|
|
1142
|
+
]
|
|
1143
|
+
return pd.DataFrame(
|
|
1144
|
+
df_csv_filtered_date.between_time(first_hour, last_hour).values,
|
|
1145
|
+
index=fcst_index,
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
def get_forecast_out_from_csv_or_list(
|
|
1149
|
+
self,
|
|
1150
|
+
df_final: pd.DataFrame,
|
|
1151
|
+
forecast_dates_csv: pd.date_range,
|
|
1152
|
+
csv_path: str,
|
|
1153
|
+
data_list: list | None = None,
|
|
1154
|
+
list_and_perfect: bool | None = False,
|
|
1155
|
+
) -> pd.DataFrame:
|
|
560
1156
|
r"""
|
|
561
|
-
Get the forecast data as a DataFrame from a CSV file.
|
|
562
|
-
|
|
563
|
-
The data contained in the CSV file should be a 24h forecast with the same frequency as
|
|
564
|
-
the main '
|
|
1157
|
+
Get the forecast data as a DataFrame from a CSV file.
|
|
1158
|
+
|
|
1159
|
+
The data contained in the CSV file should be a 24h forecast with the same frequency as
|
|
1160
|
+
the main 'optimization_time_step' parameter in the configuration file. The timestamp will not be used and
|
|
565
1161
|
a new DateTimeIndex is generated to fit the timestamp index of the input data in 'df_final'.
|
|
566
|
-
|
|
1162
|
+
|
|
567
1163
|
:param df_final: The DataFrame containing the input data.
|
|
568
1164
|
:type df_final: pd.DataFrame
|
|
569
1165
|
:param forecast_dates_csv: The forecast dates vector
|
|
@@ -574,93 +1170,294 @@ class Forecast(object):
|
|
|
574
1170
|
:rtype: pd.DataFrame
|
|
575
1171
|
|
|
576
1172
|
"""
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
if list_and_perfect:
|
|
584
|
-
days_list = df_final.index.day.unique().tolist()
|
|
585
|
-
else:
|
|
586
|
-
days_list = df_csv.index.day.unique().tolist()
|
|
587
|
-
else:
|
|
588
|
-
if not os.path.exists(csv_path):
|
|
589
|
-
csv_path = self.emhass_conf['data_path'] / csv_path
|
|
590
|
-
load_csv_file_path = csv_path
|
|
591
|
-
df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
|
|
592
|
-
df_csv.index = forecast_dates_csv
|
|
593
|
-
df_csv.drop(['ts'], axis=1, inplace=True)
|
|
594
|
-
df_csv = set_df_index_freq(df_csv)
|
|
1173
|
+
# Load the source data (df_csv)
|
|
1174
|
+
df_csv = self._load_forecast_data(csv_path, data_list, forecast_dates_csv)
|
|
1175
|
+
# Configure timing source (df_timing) and iteration list
|
|
1176
|
+
if csv_path is None or list_and_perfect:
|
|
1177
|
+
df_final = set_df_index_freq(df_final)
|
|
1178
|
+
df_timing = copy.deepcopy(df_final)
|
|
595
1179
|
days_list = df_final.index.day.unique().tolist()
|
|
596
|
-
|
|
1180
|
+
else:
|
|
1181
|
+
df_timing = copy.deepcopy(df_csv)
|
|
1182
|
+
days_list = df_csv.index.day.unique().tolist()
|
|
1183
|
+
# Iterate over days and collect forecast parts
|
|
1184
|
+
forecast_parts = []
|
|
597
1185
|
for day in days_list:
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
df_csv.between_time(first_hour, last_hour).values,
|
|
617
|
-
index=fcst_index)
|
|
618
|
-
else:
|
|
619
|
-
forecast_out = pd.DataFrame(
|
|
620
|
-
df_csv.loc[fcst_index,:].between_time(first_hour, last_hour).values,
|
|
621
|
-
index=fcst_index)
|
|
622
|
-
else:
|
|
623
|
-
forecast_out = pd.DataFrame(
|
|
624
|
-
df_csv.between_time(first_hour, last_hour).values,
|
|
625
|
-
index=fcst_index)
|
|
626
|
-
else:
|
|
627
|
-
if csv_path is None:
|
|
628
|
-
if list_and_perfect:
|
|
629
|
-
forecast_tp = pd.DataFrame(
|
|
630
|
-
df_csv.between_time(first_hour, last_hour).values,
|
|
631
|
-
index=fcst_index)
|
|
632
|
-
else:
|
|
633
|
-
forecast_tp = pd.DataFrame(
|
|
634
|
-
df_csv.loc[fcst_index,:].between_time(first_hour, last_hour).values,
|
|
635
|
-
index=fcst_index)
|
|
636
|
-
else:
|
|
637
|
-
forecast_tp = pd.DataFrame(
|
|
638
|
-
df_csv.between_time(first_hour, last_hour).values,
|
|
639
|
-
index=fcst_index)
|
|
640
|
-
forecast_out = pd.concat([forecast_out, forecast_tp], axis=0)
|
|
1186
|
+
daily_df = self._extract_daily_forecast(
|
|
1187
|
+
day, df_timing, df_csv, csv_path, list_and_perfect
|
|
1188
|
+
)
|
|
1189
|
+
forecast_parts.append(daily_df)
|
|
1190
|
+
if forecast_parts:
|
|
1191
|
+
forecast_out = pd.concat(forecast_parts, axis=0)
|
|
1192
|
+
else:
|
|
1193
|
+
forecast_out = pd.DataFrame()
|
|
1194
|
+
# Merge with final DataFrame to align indices
|
|
1195
|
+
merged = pd.merge_asof(
|
|
1196
|
+
df_final.sort_index(),
|
|
1197
|
+
forecast_out.sort_index(),
|
|
1198
|
+
left_index=True,
|
|
1199
|
+
right_index=True,
|
|
1200
|
+
direction="nearest",
|
|
1201
|
+
)
|
|
1202
|
+
# Keep only forecast_out columns
|
|
1203
|
+
forecast_out = merged[forecast_out.columns]
|
|
641
1204
|
return forecast_out
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
1205
|
+
|
|
1206
|
+
@staticmethod
|
|
1207
|
+
def resample_data(data, freq, current_freq):
|
|
1208
|
+
r"""
|
|
1209
|
+
Resample a DataFrame with a custom frequency.
|
|
1210
|
+
|
|
1211
|
+
:param data: Original time series data with a DateTimeIndex.
|
|
1212
|
+
:type data: pd.DataFrame
|
|
1213
|
+
:param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
|
|
1214
|
+
:type freq: pd.Timedelta
|
|
1215
|
+
:return: Resampled data at the specified frequency.
|
|
1216
|
+
:rtype: pd.DataFrame
|
|
1217
|
+
"""
|
|
1218
|
+
if freq > current_freq:
|
|
1219
|
+
# Downsampling
|
|
1220
|
+
# Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
|
|
1221
|
+
resampled_data = data.resample(freq).mean()
|
|
1222
|
+
elif freq < current_freq:
|
|
1223
|
+
# Upsampling
|
|
1224
|
+
# Use 'asfreq' to create empty slots, then interpolate
|
|
1225
|
+
resampled_data = data.resample(freq).asfreq()
|
|
1226
|
+
resampled_data = resampled_data.interpolate(method="time")
|
|
1227
|
+
else:
|
|
1228
|
+
# No resampling needed
|
|
1229
|
+
resampled_data = data.copy()
|
|
1230
|
+
return resampled_data
|
|
1231
|
+
|
|
1232
|
+
@staticmethod
|
|
1233
|
+
def get_typical_load_forecast(data, forecast_date):
|
|
648
1234
|
r"""
|
|
1235
|
+
Forecast the load profile for the next day based on historic data.
|
|
1236
|
+
|
|
1237
|
+
:param data: A DataFrame with a DateTimeIndex containing the historic load data.
|
|
1238
|
+
Must include a 'load' column.
|
|
1239
|
+
:type data: pd.DataFrame
|
|
1240
|
+
:param forecast_date: The date for which the forecast will be generated.
|
|
1241
|
+
:type forecast_date: pd.Timestamp
|
|
1242
|
+
:return: A Series with the forecasted load profile for the next day and a list of days used
|
|
1243
|
+
to calculate the forecast.
|
|
1244
|
+
:rtype: tuple (pd.Series, list)
|
|
1245
|
+
"""
|
|
1246
|
+
# Ensure the 'load' column exists
|
|
1247
|
+
if "load" not in data.columns:
|
|
1248
|
+
raise ValueError("Data must have a 'load' column.")
|
|
1249
|
+
# Filter historic data for the same month and day of the week
|
|
1250
|
+
month = forecast_date.month
|
|
1251
|
+
day_of_week = forecast_date.dayofweek
|
|
1252
|
+
historic_data = data[(data.index.month == month) & (data.index.dayofweek == day_of_week)]
|
|
1253
|
+
used_days = np.unique(historic_data.index.date)
|
|
1254
|
+
# Align all historic data to the forecast day
|
|
1255
|
+
aligned_data = []
|
|
1256
|
+
for day in used_days:
|
|
1257
|
+
daily_data = data[data.index.date == pd.Timestamp(day).date()]
|
|
1258
|
+
aligned_daily_data = daily_data.copy()
|
|
1259
|
+
aligned_daily_data.index = aligned_daily_data.index.map(
|
|
1260
|
+
lambda x: x.replace(
|
|
1261
|
+
year=forecast_date.year,
|
|
1262
|
+
month=forecast_date.month,
|
|
1263
|
+
day=forecast_date.day,
|
|
1264
|
+
)
|
|
1265
|
+
)
|
|
1266
|
+
aligned_data.append(aligned_daily_data)
|
|
1267
|
+
# Combine all aligned historic data into a single DataFrame
|
|
1268
|
+
combined_data = pd.concat(aligned_data)
|
|
1269
|
+
# Compute the mean load for each timestamp
|
|
1270
|
+
forecast = combined_data.groupby(combined_data.index).mean()
|
|
1271
|
+
return forecast, used_days
|
|
1272
|
+
|
|
1273
|
+
async def _prepare_hass_load_data(
|
|
1274
|
+
self, days_min_load_forecast: int, method: str
|
|
1275
|
+
) -> pd.DataFrame | bool:
|
|
1276
|
+
"""Helper to retrieve and prepare load data from Home Assistant."""
|
|
1277
|
+
self.logger.info(f"Retrieving data from hass for load forecast using method = {method}")
|
|
1278
|
+
var_list = [self.var_load]
|
|
1279
|
+
var_replace_zero = None
|
|
1280
|
+
var_interp = [self.var_load]
|
|
1281
|
+
time_zone_load_forecast = None
|
|
1282
|
+
rh = RetrieveHass(
|
|
1283
|
+
self.retrieve_hass_conf["hass_url"],
|
|
1284
|
+
self.retrieve_hass_conf["long_lived_token"],
|
|
1285
|
+
self.freq,
|
|
1286
|
+
time_zone_load_forecast,
|
|
1287
|
+
self.params,
|
|
1288
|
+
self.emhass_conf,
|
|
1289
|
+
self.logger,
|
|
1290
|
+
)
|
|
1291
|
+
if self.get_data_from_file:
|
|
1292
|
+
filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
|
|
1293
|
+
async with aiofiles.open(filename_path, "rb") as inp:
|
|
1294
|
+
content = await inp.read()
|
|
1295
|
+
rh.df_final, days_list, var_list, rh.ha_config = pickle.loads(content)
|
|
1296
|
+
self.var_load = var_list[0]
|
|
1297
|
+
self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = self.var_load
|
|
1298
|
+
var_interp = [var_list[0]]
|
|
1299
|
+
self.var_list = [var_list[0]]
|
|
1300
|
+
rh.var_list = self.var_list
|
|
1301
|
+
self.var_load_new = self.var_load + "_positive"
|
|
1302
|
+
else:
|
|
1303
|
+
days_list = get_days_list(days_min_load_forecast)
|
|
1304
|
+
if not await rh.get_data(days_list, var_list):
|
|
1305
|
+
return False
|
|
1306
|
+
if not rh.prepare_data(
|
|
1307
|
+
self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
|
|
1308
|
+
load_negative=self.retrieve_hass_conf["load_negative"],
|
|
1309
|
+
set_zero_min=self.retrieve_hass_conf["set_zero_min"],
|
|
1310
|
+
var_replace_zero=var_replace_zero,
|
|
1311
|
+
var_interp=var_interp,
|
|
1312
|
+
):
|
|
1313
|
+
return False
|
|
1314
|
+
return rh.df_final.copy()[[self.var_load_new]]
|
|
1315
|
+
|
|
1316
|
+
async def _get_load_forecast_typical(self) -> pd.DataFrame:
|
|
1317
|
+
"""Helper to generate typical load forecast."""
|
|
1318
|
+
model_type = "long_train_data"
|
|
1319
|
+
data_path = self.emhass_conf["data_path"] / str(model_type + ".pkl")
|
|
1320
|
+
async with aiofiles.open(data_path, "rb") as fid:
|
|
1321
|
+
content = await fid.read()
|
|
1322
|
+
data, _, _, _ = pickle.loads(content)
|
|
1323
|
+
# Ensure the data index is timezone-aware
|
|
1324
|
+
data.index = (
|
|
1325
|
+
data.index.tz_localize(
|
|
1326
|
+
self.forecast_dates.tz,
|
|
1327
|
+
ambiguous="infer",
|
|
1328
|
+
nonexistent="shift_forward",
|
|
1329
|
+
)
|
|
1330
|
+
if data.index.tz is None
|
|
1331
|
+
else data.index.tz_convert(self.forecast_dates.tz)
|
|
1332
|
+
)
|
|
1333
|
+
data = data[[self.var_load]]
|
|
1334
|
+
current_freq = pd.Timedelta("30min")
|
|
1335
|
+
if self.freq != current_freq:
|
|
1336
|
+
data = Forecast.resample_data(data, self.freq, current_freq)
|
|
1337
|
+
dates_list = np.unique(self.forecast_dates.date).tolist()
|
|
1338
|
+
forecast = pd.DataFrame()
|
|
1339
|
+
for date in dates_list:
|
|
1340
|
+
forecast_date = pd.Timestamp(date)
|
|
1341
|
+
data.columns = ["load"]
|
|
1342
|
+
forecast_tmp, used_days = Forecast.get_typical_load_forecast(data, forecast_date)
|
|
1343
|
+
self.logger.debug(f"Using {len(used_days)} days of data to generate the forecast.")
|
|
1344
|
+
forecast_tmp = forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
|
|
1345
|
+
if len(forecast) == 0:
|
|
1346
|
+
forecast = forecast_tmp
|
|
1347
|
+
else:
|
|
1348
|
+
forecast = pd.concat([forecast, forecast_tmp], axis=0)
|
|
1349
|
+
forecast_out = forecast.loc[forecast.index.intersection(self.forecast_dates)]
|
|
1350
|
+
forecast_out.index = self.forecast_dates
|
|
1351
|
+
forecast_out.index.name = "ts"
|
|
1352
|
+
return forecast_out.rename(columns={"load": "yhat"})
|
|
1353
|
+
|
|
1354
|
+
def _get_load_forecast_naive(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
1355
|
+
"""Helper for naive forecast."""
|
|
1356
|
+
forecast_horizon = len(self.forecast_dates)
|
|
1357
|
+
historical_values = df.iloc[-forecast_horizon:]
|
|
1358
|
+
return pd.DataFrame(historical_values.values, index=self.forecast_dates, columns=["yhat"])
|
|
1359
|
+
|
|
1360
|
+
async def _get_load_forecast_ml(
|
|
1361
|
+
self, df: pd.DataFrame, use_last_window: bool, mlf, debug: bool
|
|
1362
|
+
) -> pd.DataFrame | bool:
|
|
1363
|
+
"""Helper for ML forecast."""
|
|
1364
|
+
model_type = self.params["passed_data"]["model_type"]
|
|
1365
|
+
filename = model_type + "_mlf.pkl"
|
|
1366
|
+
filename_path = self.emhass_conf["data_path"] / filename
|
|
1367
|
+
if not debug:
|
|
1368
|
+
if filename_path.is_file():
|
|
1369
|
+
async with aiofiles.open(filename_path, "rb") as inp:
|
|
1370
|
+
content = await inp.read()
|
|
1371
|
+
mlf = pickle.loads(content)
|
|
1372
|
+
else:
|
|
1373
|
+
self.logger.error(
|
|
1374
|
+
"The ML forecaster file was not found, please run a model fit method before this predict method"
|
|
1375
|
+
)
|
|
1376
|
+
return False
|
|
1377
|
+
data_last_window = None
|
|
1378
|
+
if use_last_window:
|
|
1379
|
+
data_last_window = copy.deepcopy(df)
|
|
1380
|
+
data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
|
|
1381
|
+
forecast_out = await mlf.predict(data_last_window)
|
|
1382
|
+
self.logger.debug(
|
|
1383
|
+
"Number of ML predict forcast data generated (lags_opt): "
|
|
1384
|
+
+ str(len(forecast_out.index))
|
|
1385
|
+
)
|
|
1386
|
+
self.logger.debug(
|
|
1387
|
+
"Number of forcast dates obtained (prediction_horizon): "
|
|
1388
|
+
+ str(len(self.forecast_dates))
|
|
1389
|
+
)
|
|
1390
|
+
if len(self.forecast_dates) < len(forecast_out.index):
|
|
1391
|
+
forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
|
|
1392
|
+
elif len(self.forecast_dates) > len(forecast_out.index):
|
|
1393
|
+
self.logger.error(
|
|
1394
|
+
"Unable to obtain: "
|
|
1395
|
+
+ str(len(self.forecast_dates))
|
|
1396
|
+
+ " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
|
|
1397
|
+
)
|
|
1398
|
+
return False
|
|
1399
|
+
data_dict = {
|
|
1400
|
+
"ts": self.forecast_dates,
|
|
1401
|
+
"yhat": forecast_out.values.tolist(),
|
|
1402
|
+
}
|
|
1403
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
1404
|
+
data.set_index("ts", inplace=True)
|
|
1405
|
+
return data.copy().loc[self.forecast_dates]
|
|
1406
|
+
|
|
1407
|
+
def _get_load_forecast_csv(self, csv_path: str) -> pd.DataFrame:
|
|
1408
|
+
"""Helper to retrieve load data from CSV."""
|
|
1409
|
+
df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
1410
|
+
if len(df_csv) < len(self.forecast_dates):
|
|
1411
|
+
self.logger.error("Passed data from CSV is not long enough")
|
|
1412
|
+
return None
|
|
1413
|
+
df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
|
|
1414
|
+
df_csv.index = self.forecast_dates
|
|
1415
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1416
|
+
return df_csv.copy().loc[self.forecast_dates]
|
|
1417
|
+
|
|
1418
|
+
def _get_load_forecast_list(self) -> pd.DataFrame:
|
|
1419
|
+
"""Helper to retrieve load data from a passed list."""
|
|
1420
|
+
data_list = self.params["passed_data"]["load_power_forecast"]
|
|
1421
|
+
if (
|
|
1422
|
+
len(data_list) < len(self.forecast_dates)
|
|
1423
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1424
|
+
):
|
|
1425
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
1426
|
+
return False
|
|
1427
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1428
|
+
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
1429
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
1430
|
+
data.set_index("ts", inplace=True)
|
|
1431
|
+
return data.copy().loc[self.forecast_dates]
|
|
1432
|
+
|
|
1433
|
+
async def get_load_forecast(
|
|
1434
|
+
self,
|
|
1435
|
+
days_min_load_forecast: int | None = 3,
|
|
1436
|
+
method: str | None = "typical",
|
|
1437
|
+
csv_path: str | None = "data_load_forecast.csv",
|
|
1438
|
+
set_mix_forecast: bool | None = False,
|
|
1439
|
+
df_now: pd.DataFrame | None = pd.DataFrame(),
|
|
1440
|
+
use_last_window: bool | None = True,
|
|
1441
|
+
mlf: MLForecaster | None = None,
|
|
1442
|
+
debug: bool | None = False,
|
|
1443
|
+
) -> pd.Series:
|
|
1444
|
+
"""
|
|
649
1445
|
Get and generate the load forecast data.
|
|
650
|
-
|
|
1446
|
+
|
|
651
1447
|
:param days_min_load_forecast: The number of last days to retrieve that \
|
|
652
1448
|
will be used to generate a naive forecast, defaults to 3
|
|
653
1449
|
:type days_min_load_forecast: int, optional
|
|
654
1450
|
:param method: The method to be used to generate load forecast, the options \
|
|
655
|
-
are '
|
|
1451
|
+
are 'typical' for a typical household load consumption curve, \
|
|
1452
|
+
are 'naive' for a persistence model, 'mlforecaster' for using a custom \
|
|
656
1453
|
previously fitted machine learning model, 'csv' to read the forecast from \
|
|
657
1454
|
a CSV file and 'list' to use data directly passed at runtime as a list of \
|
|
658
|
-
values. Defaults to '
|
|
1455
|
+
values. Defaults to 'typical'.
|
|
659
1456
|
:type method: str, optional
|
|
660
1457
|
:param csv_path: The path to the CSV file used when method = 'csv', \
|
|
661
1458
|
defaults to "/data/data_load_forecast.csv"
|
|
662
1459
|
:type csv_path: str, optional
|
|
663
|
-
:param set_mix_forecast: Use a mixed
|
|
1460
|
+
:param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
|
|
664
1461
|
:type set_mix_forecast: Bool, optional
|
|
665
1462
|
:param df_now: The DataFrame containing the now/current data.
|
|
666
1463
|
:type df_now: pd.DataFrame, optional
|
|
@@ -679,123 +1476,60 @@ class Forecast(object):
|
|
|
679
1476
|
:rtype: pd.DataFrame
|
|
680
1477
|
|
|
681
1478
|
"""
|
|
682
|
-
csv_path
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
var_interp = [self.var_load]
|
|
689
|
-
time_zone_load_foreacast = None
|
|
690
|
-
# We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
|
|
691
|
-
rh = RetrieveHass(self.retrieve_hass_conf['hass_url'], self.retrieve_hass_conf['long_lived_token'],
|
|
692
|
-
self.freq, time_zone_load_foreacast, self.params, self.emhass_conf, self.logger)
|
|
693
|
-
if self.get_data_from_file:
|
|
694
|
-
filename_path = self.emhass_conf['data_path'] / 'test_df_final.pkl'
|
|
695
|
-
with open(filename_path, 'rb') as inp:
|
|
696
|
-
rh.df_final, days_list, var_list = pickle.load(inp)
|
|
697
|
-
self.var_load = var_list[0]
|
|
698
|
-
self.retrieve_hass_conf['var_load'] = self.var_load
|
|
699
|
-
var_interp = [var_list[0]]
|
|
700
|
-
self.var_list = [var_list[0]]
|
|
701
|
-
self.var_load_new = self.var_load+'_positive'
|
|
702
|
-
else:
|
|
703
|
-
days_list = get_days_list(days_min_load_forecast)
|
|
704
|
-
if not rh.get_data(days_list, var_list):
|
|
705
|
-
return False
|
|
706
|
-
if not rh.prepare_data(
|
|
707
|
-
self.retrieve_hass_conf['var_load'], load_negative = self.retrieve_hass_conf['load_negative'],
|
|
708
|
-
set_zero_min = self.retrieve_hass_conf['set_zero_min'],
|
|
709
|
-
var_replace_zero = var_replace_zero, var_interp = var_interp):
|
|
1479
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1480
|
+
# Retrieve Data from Home Assistant if needed
|
|
1481
|
+
df = None
|
|
1482
|
+
if method in ["naive", "mlforecaster"]:
|
|
1483
|
+
df = await self._prepare_hass_load_data(days_min_load_forecast, method)
|
|
1484
|
+
if df is False:
|
|
710
1485
|
return False
|
|
711
|
-
|
|
712
|
-
if method ==
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
forecast_out =
|
|
716
|
-
|
|
717
|
-
forecast_out =
|
|
718
|
-
forecast_out
|
|
719
|
-
elif method == 'mlforecaster': # using a custom forecast model with machine learning
|
|
720
|
-
# Load model
|
|
721
|
-
model_type = self.params['passed_data']['model_type']
|
|
722
|
-
filename = model_type+'_mlf.pkl'
|
|
723
|
-
filename_path = self.emhass_conf['data_path'] / filename
|
|
724
|
-
if not debug:
|
|
725
|
-
if filename_path.is_file():
|
|
726
|
-
with open(filename_path, 'rb') as inp:
|
|
727
|
-
mlf = pickle.load(inp)
|
|
728
|
-
else:
|
|
729
|
-
self.logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
|
|
730
|
-
return False
|
|
731
|
-
# Make predictions
|
|
732
|
-
if use_last_window:
|
|
733
|
-
data_last_window = copy.deepcopy(df)
|
|
734
|
-
data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
|
|
735
|
-
else:
|
|
736
|
-
data_last_window = None
|
|
737
|
-
forecast_out = mlf.predict(data_last_window)
|
|
738
|
-
# Force forecast length to avoid mismatches
|
|
739
|
-
self.logger.debug("Number of ML predict forcast data generated (lags_opt): " + str(len(forecast_out.index)))
|
|
740
|
-
self.logger.debug("Number of forcast dates obtained: " + str(len(self.forecast_dates)))
|
|
741
|
-
if len(self.forecast_dates) < len(forecast_out.index):
|
|
742
|
-
forecast_out = forecast_out.iloc[0:len(self.forecast_dates)]
|
|
743
|
-
# To be removed once bug is fixed
|
|
744
|
-
elif len(self.forecast_dates) > len(forecast_out.index):
|
|
745
|
-
self.logger.error("Unable to obtain: " + str(len(self.forecast_dates)) + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters")
|
|
1486
|
+
# Generate Forecast based on Method
|
|
1487
|
+
if method == "typical":
|
|
1488
|
+
forecast_out = await self._get_load_forecast_typical()
|
|
1489
|
+
elif method == "naive":
|
|
1490
|
+
forecast_out = self._get_load_forecast_naive(df)
|
|
1491
|
+
elif method == "mlforecaster":
|
|
1492
|
+
forecast_out = await self._get_load_forecast_ml(df, use_last_window, mlf, debug)
|
|
1493
|
+
if forecast_out is False:
|
|
746
1494
|
return False
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
forecast_out =
|
|
753
|
-
|
|
754
|
-
load_csv_file_path = csv_path
|
|
755
|
-
df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
|
|
756
|
-
if len(df_csv) < len(self.forecast_dates):
|
|
757
|
-
self.logger.error("Passed data from CSV is not long enough")
|
|
758
|
-
else:
|
|
759
|
-
# Ensure correct length
|
|
760
|
-
df_csv = df_csv.loc[df_csv.index[0:len(self.forecast_dates)],:]
|
|
761
|
-
# Define index
|
|
762
|
-
df_csv.index = self.forecast_dates
|
|
763
|
-
df_csv.drop(['ts'], axis=1, inplace=True)
|
|
764
|
-
forecast_out = df_csv.copy().loc[self.forecast_dates]
|
|
765
|
-
elif method == 'list': # reading a list of values
|
|
766
|
-
# Loading data from passed list
|
|
767
|
-
data_list = self.params['passed_data']['load_power_forecast']
|
|
768
|
-
# Check if the passed data has the correct length
|
|
769
|
-
if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
|
|
770
|
-
self.logger.error("Passed data from passed list is not long enough")
|
|
1495
|
+
elif method == "csv":
|
|
1496
|
+
forecast_out = self._get_load_forecast_csv(csv_path)
|
|
1497
|
+
if forecast_out is None:
|
|
1498
|
+
return False
|
|
1499
|
+
elif method == "list":
|
|
1500
|
+
forecast_out = self._get_load_forecast_list()
|
|
1501
|
+
if forecast_out is False:
|
|
771
1502
|
return False
|
|
772
|
-
else:
|
|
773
|
-
# Ensure correct length
|
|
774
|
-
data_list = data_list[0:len(self.forecast_dates)]
|
|
775
|
-
# Define DataFrame
|
|
776
|
-
data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
|
|
777
|
-
data = pd.DataFrame.from_dict(data_dict)
|
|
778
|
-
# Define index
|
|
779
|
-
data.set_index('ts', inplace=True)
|
|
780
|
-
forecast_out = data.copy().loc[self.forecast_dates]
|
|
781
1503
|
else:
|
|
782
|
-
self.logger.error(
|
|
1504
|
+
self.logger.error(error_msg_method_not_valid)
|
|
783
1505
|
return False
|
|
784
|
-
|
|
1506
|
+
# Post-processing (Mix Forecast)
|
|
1507
|
+
p_load_forecast = copy.deepcopy(forecast_out["yhat"])
|
|
785
1508
|
if set_mix_forecast:
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
1509
|
+
# Load forecasts don't need curtailment protection - always use feedback
|
|
1510
|
+
p_load_forecast = Forecast.get_mix_forecast(
|
|
1511
|
+
df_now,
|
|
1512
|
+
p_load_forecast,
|
|
1513
|
+
self.params["passed_data"]["alpha"],
|
|
1514
|
+
self.params["passed_data"]["beta"],
|
|
1515
|
+
self.var_load_new,
|
|
1516
|
+
False, # Never ignore feedback for load forecasts
|
|
1517
|
+
)
|
|
1518
|
+
self.logger.debug("get_load_forecast returning:\n%s", p_load_forecast)
|
|
1519
|
+
return p_load_forecast
|
|
1520
|
+
|
|
1521
|
+
def get_load_cost_forecast(
|
|
1522
|
+
self,
|
|
1523
|
+
df_final: pd.DataFrame,
|
|
1524
|
+
method: str | None = "hp_hc_periods",
|
|
1525
|
+
csv_path: str | None = "data_load_cost_forecast.csv",
|
|
1526
|
+
list_and_perfect: bool | None = False,
|
|
1527
|
+
) -> pd.DataFrame:
|
|
794
1528
|
r"""
|
|
795
1529
|
Get the unit cost for the load consumption based on multiple tariff \
|
|
796
1530
|
periods. This is the cost of the energy from the utility in a vector \
|
|
797
1531
|
sampled at the fixed freq value.
|
|
798
|
-
|
|
1532
|
+
|
|
799
1533
|
:param df_final: The DataFrame containing the input data.
|
|
800
1534
|
:type df_final: pd.DataFrame
|
|
801
1535
|
:param method: The method to be used to generate load cost forecast, \
|
|
@@ -810,50 +1544,77 @@ class Forecast(object):
|
|
|
810
1544
|
:rtype: pd.DataFrame
|
|
811
1545
|
|
|
812
1546
|
"""
|
|
813
|
-
csv_path
|
|
814
|
-
if method ==
|
|
815
|
-
df_final[self.var_load_cost] = self.optim_conf[
|
|
1547
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1548
|
+
if method == "hp_hc_periods":
|
|
1549
|
+
df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
|
|
816
1550
|
list_df_hp = []
|
|
817
|
-
for
|
|
818
|
-
list_df_hp.append(
|
|
819
|
-
|
|
1551
|
+
for _key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
|
|
1552
|
+
list_df_hp.append(
|
|
1553
|
+
df_final[self.var_load_cost].between_time(
|
|
1554
|
+
period_hp[0]["start"], period_hp[1]["end"]
|
|
1555
|
+
)
|
|
1556
|
+
)
|
|
820
1557
|
for df_hp in list_df_hp:
|
|
821
|
-
df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf[
|
|
822
|
-
|
|
1558
|
+
df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf[
|
|
1559
|
+
"load_peak_hours_cost"
|
|
1560
|
+
]
|
|
1561
|
+
elif method == "csv":
|
|
823
1562
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
824
1563
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
825
|
-
df_final, forecast_dates_csv, csv_path
|
|
826
|
-
|
|
827
|
-
|
|
1564
|
+
df_final, forecast_dates_csv, csv_path
|
|
1565
|
+
)
|
|
1566
|
+
# Ensure correct length
|
|
1567
|
+
if not list_and_perfect:
|
|
1568
|
+
forecast_out = forecast_out[0 : len(self.forecast_dates)]
|
|
1569
|
+
df_final = df_final[0 : len(self.forecast_dates)].copy()
|
|
1570
|
+
# Convert to Series if needed and align index
|
|
1571
|
+
if not isinstance(forecast_out, pd.Series):
|
|
1572
|
+
forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
|
|
1573
|
+
df_final.loc[:, self.var_load_cost] = forecast_out
|
|
1574
|
+
elif method == "list": # reading a list of values
|
|
828
1575
|
# Loading data from passed list
|
|
829
|
-
data_list = self.params[
|
|
1576
|
+
data_list = self.params["passed_data"]["load_cost_forecast"]
|
|
830
1577
|
# Check if the passed data has the correct length
|
|
831
|
-
if
|
|
832
|
-
|
|
1578
|
+
if (
|
|
1579
|
+
len(data_list) < len(self.forecast_dates)
|
|
1580
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1581
|
+
):
|
|
1582
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
833
1583
|
return False
|
|
834
1584
|
else:
|
|
835
1585
|
# Ensure correct length
|
|
836
|
-
data_list = data_list[0:len(self.forecast_dates)]
|
|
1586
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1587
|
+
if not list_and_perfect:
|
|
1588
|
+
df_final = df_final.iloc[0 : len(self.forecast_dates)]
|
|
837
1589
|
# Define the correct dates
|
|
838
1590
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
839
1591
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
840
|
-
df_final,
|
|
841
|
-
|
|
1592
|
+
df_final,
|
|
1593
|
+
forecast_dates_csv,
|
|
1594
|
+
None,
|
|
1595
|
+
data_list=data_list,
|
|
1596
|
+
list_and_perfect=list_and_perfect,
|
|
1597
|
+
)
|
|
1598
|
+
df_final = df_final.copy()
|
|
842
1599
|
df_final[self.var_load_cost] = forecast_out
|
|
843
1600
|
else:
|
|
844
|
-
self.logger.error(
|
|
1601
|
+
self.logger.error(error_msg_method_not_valid)
|
|
845
1602
|
return False
|
|
1603
|
+
self.logger.debug("get_load_cost_forecast returning:\n%s", df_final)
|
|
846
1604
|
return df_final
|
|
847
|
-
|
|
848
|
-
def get_prod_price_forecast(self, df_final: pd.DataFrame, method: Optional[str] = 'constant',
|
|
849
|
-
csv_path: Optional[str] = "data_prod_price_forecast.csv",
|
|
850
|
-
list_and_perfect: Optional[bool] = False) -> pd.DataFrame:
|
|
851
1605
|
|
|
1606
|
+
def get_prod_price_forecast(
|
|
1607
|
+
self,
|
|
1608
|
+
df_final: pd.DataFrame,
|
|
1609
|
+
method: str | None = "constant",
|
|
1610
|
+
csv_path: str | None = "data_prod_price_forecast.csv",
|
|
1611
|
+
list_and_perfect: bool | None = False,
|
|
1612
|
+
) -> pd.DataFrame:
|
|
852
1613
|
r"""
|
|
853
1614
|
Get the unit power production price for the energy injected to the grid.\
|
|
854
1615
|
This is the price of the energy injected to the utility in a vector \
|
|
855
1616
|
sampled at the fixed freq value.
|
|
856
|
-
|
|
1617
|
+
|
|
857
1618
|
:param df_input_data: The DataFrame containing all the input data retrieved
|
|
858
1619
|
from hass
|
|
859
1620
|
:type df_input_data: pd.DataFrame
|
|
@@ -869,31 +1630,130 @@ class Forecast(object):
|
|
|
869
1630
|
:rtype: pd.DataFrame
|
|
870
1631
|
|
|
871
1632
|
"""
|
|
872
|
-
csv_path
|
|
873
|
-
if method ==
|
|
874
|
-
df_final[self.var_prod_price] = self.optim_conf[
|
|
875
|
-
elif method ==
|
|
1633
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1634
|
+
if method == "constant":
|
|
1635
|
+
df_final[self.var_prod_price] = self.optim_conf["photovoltaic_production_sell_price"]
|
|
1636
|
+
elif method == "csv":
|
|
876
1637
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
877
1638
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
878
|
-
df_final, forecast_dates_csv, csv_path
|
|
879
|
-
|
|
880
|
-
|
|
1639
|
+
df_final, forecast_dates_csv, csv_path
|
|
1640
|
+
)
|
|
1641
|
+
# Ensure correct length
|
|
1642
|
+
if not list_and_perfect:
|
|
1643
|
+
forecast_out = forecast_out[0 : len(self.forecast_dates)]
|
|
1644
|
+
df_final = df_final[0 : len(self.forecast_dates)].copy()
|
|
1645
|
+
# Convert to Series if needed and align index
|
|
1646
|
+
if not isinstance(forecast_out, pd.Series):
|
|
1647
|
+
forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
|
|
1648
|
+
df_final.loc[:, self.var_prod_price] = forecast_out
|
|
1649
|
+
elif method == "list": # reading a list of values
|
|
881
1650
|
# Loading data from passed list
|
|
882
|
-
data_list = self.params[
|
|
1651
|
+
data_list = self.params["passed_data"]["prod_price_forecast"]
|
|
883
1652
|
# Check if the passed data has the correct length
|
|
884
|
-
if
|
|
885
|
-
|
|
1653
|
+
if (
|
|
1654
|
+
len(data_list) < len(self.forecast_dates)
|
|
1655
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1656
|
+
):
|
|
1657
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
886
1658
|
return False
|
|
887
1659
|
else:
|
|
888
1660
|
# Ensure correct length
|
|
889
|
-
data_list = data_list[0:len(self.forecast_dates)]
|
|
1661
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1662
|
+
if not list_and_perfect:
|
|
1663
|
+
df_final = df_final.iloc[0 : len(self.forecast_dates)]
|
|
890
1664
|
# Define the correct dates
|
|
891
1665
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
892
1666
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
893
|
-
df_final,
|
|
894
|
-
|
|
1667
|
+
df_final,
|
|
1668
|
+
forecast_dates_csv,
|
|
1669
|
+
None,
|
|
1670
|
+
data_list=data_list,
|
|
1671
|
+
list_and_perfect=list_and_perfect,
|
|
1672
|
+
)
|
|
1673
|
+
df_final = df_final.copy()
|
|
895
1674
|
df_final[self.var_prod_price] = forecast_out
|
|
896
1675
|
else:
|
|
897
|
-
self.logger.error(
|
|
1676
|
+
self.logger.error(error_msg_method_not_valid)
|
|
898
1677
|
return False
|
|
899
|
-
|
|
1678
|
+
self.logger.debug("get_prod_price_forecast returning:\n%s", df_final)
|
|
1679
|
+
return df_final
|
|
1680
|
+
|
|
1681
|
+
async def get_cached_forecast_data(self, w_forecast_cache_path) -> pd.DataFrame:
|
|
1682
|
+
r"""
|
|
1683
|
+
Get cached weather forecast data from file.
|
|
1684
|
+
|
|
1685
|
+
:param w_forecast_cache_path: the path to file.
|
|
1686
|
+
:type method: Any
|
|
1687
|
+
:return: The DataFrame containing the forecasted data
|
|
1688
|
+
:rtype: pd.DataFrame
|
|
1689
|
+
|
|
1690
|
+
"""
|
|
1691
|
+
async with aiofiles.open(w_forecast_cache_path, "rb") as file:
|
|
1692
|
+
content = await file.read()
|
|
1693
|
+
data = pickle.loads(content)
|
|
1694
|
+
if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
|
|
1695
|
+
self.logger.error("There has been a error obtaining cached forecast data.")
|
|
1696
|
+
self.logger.error(
|
|
1697
|
+
"Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from forecast API and cache."
|
|
1698
|
+
)
|
|
1699
|
+
self.logger.warning(
|
|
1700
|
+
"Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
|
|
1701
|
+
)
|
|
1702
|
+
os.remove(w_forecast_cache_path)
|
|
1703
|
+
return False
|
|
1704
|
+
# Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
|
|
1705
|
+
if self.forecast_dates[0] in data.index and self.forecast_dates[-1] in data.index:
|
|
1706
|
+
data = data.loc[self.forecast_dates[0] : self.forecast_dates[-1]]
|
|
1707
|
+
self.logger.info("Retrieved forecast data from the previously saved cache.")
|
|
1708
|
+
else:
|
|
1709
|
+
self.logger.error(
|
|
1710
|
+
"Unable to obtain cached forecast data within the requested timeframe range."
|
|
1711
|
+
)
|
|
1712
|
+
self.logger.error(
|
|
1713
|
+
"Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from forecast API and cache."
|
|
1714
|
+
)
|
|
1715
|
+
self.logger.warning(
|
|
1716
|
+
"Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
|
|
1717
|
+
)
|
|
1718
|
+
os.remove(w_forecast_cache_path)
|
|
1719
|
+
return False
|
|
1720
|
+
return data
|
|
1721
|
+
|
|
1722
|
+
async def set_cached_forecast_data(self, w_forecast_cache_path, data) -> pd.DataFrame:
|
|
1723
|
+
r"""
|
|
1724
|
+
Set generated weather forecast data to file.
|
|
1725
|
+
Trim data to match the original requested forecast dates
|
|
1726
|
+
|
|
1727
|
+
:param w_forecast_cache_path: the path to file.
|
|
1728
|
+
:type method: Any
|
|
1729
|
+
:param: The DataFrame containing the forecasted data
|
|
1730
|
+
:type: pd.DataFrame
|
|
1731
|
+
:return: The DataFrame containing the forecasted data
|
|
1732
|
+
:rtype: pd.DataFrame
|
|
1733
|
+
|
|
1734
|
+
"""
|
|
1735
|
+
async with aiofiles.open(w_forecast_cache_path, "wb") as file:
|
|
1736
|
+
content = pickle.dumps(data)
|
|
1737
|
+
await file.write(content)
|
|
1738
|
+
if not os.path.isfile(w_forecast_cache_path):
|
|
1739
|
+
self.logger.warning("forecast data could not be saved to file.")
|
|
1740
|
+
else:
|
|
1741
|
+
self.logger.info("Saved the forecast results to cache, for later reference.")
|
|
1742
|
+
|
|
1743
|
+
# Trim cached data to match requested dates
|
|
1744
|
+
end_forecast = (self.start_forecast + self.optim_conf["delta_forecast_daily"]).replace(
|
|
1745
|
+
microsecond=0
|
|
1746
|
+
)
|
|
1747
|
+
forecast_dates = (
|
|
1748
|
+
pd.date_range(
|
|
1749
|
+
start=self.start_forecast,
|
|
1750
|
+
end=end_forecast - self.freq,
|
|
1751
|
+
freq=self.freq,
|
|
1752
|
+
tz=self.time_zone,
|
|
1753
|
+
)
|
|
1754
|
+
.tz_convert("utc")
|
|
1755
|
+
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
1756
|
+
.tz_convert(self.time_zone)
|
|
1757
|
+
)
|
|
1758
|
+
data = data.loc[forecast_dates[0] : forecast_dates[-1]]
|
|
1759
|
+
return data
|