emhass 0.11.4__py3-none-any.whl → 0.15.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emhass/command_line.py +1481 -811
- emhass/connection_manager.py +108 -0
- emhass/data/associations.csv +37 -2
- emhass/data/cec_inverters.pbz2 +0 -0
- emhass/data/cec_modules.pbz2 +0 -0
- emhass/data/config_defaults.json +53 -49
- emhass/forecast.py +1264 -731
- emhass/img/emhass_icon.png +0 -0
- emhass/machine_learning_forecaster.py +534 -281
- emhass/machine_learning_regressor.py +141 -125
- emhass/optimization.py +1173 -585
- emhass/retrieve_hass.py +958 -263
- emhass/static/advanced.html +7 -0
- emhass/static/configuration_list.html +5 -1
- emhass/static/configuration_script.js +146 -62
- emhass/static/data/param_definitions.json +215 -48
- emhass/static/script.js +58 -26
- emhass/static/style.css +6 -8
- emhass/templates/configuration.html +5 -3
- emhass/templates/index.html +8 -6
- emhass/templates/template.html +4 -5
- emhass/utils.py +1152 -403
- emhass/web_server.py +565 -379
- emhass/websocket_client.py +224 -0
- emhass-0.15.5.dist-info/METADATA +164 -0
- emhass-0.15.5.dist-info/RECORD +34 -0
- {emhass-0.11.4.dist-info → emhass-0.15.5.dist-info}/WHEEL +1 -2
- emhass-0.15.5.dist-info/entry_points.txt +2 -0
- emhass-0.11.4.dist-info/METADATA +0 -666
- emhass-0.11.4.dist-info/RECORD +0 -32
- emhass-0.11.4.dist-info/entry_points.txt +0 -2
- emhass-0.11.4.dist-info/top_level.txt +0 -1
- {emhass-0.11.4.dist-info → emhass-0.15.5.dist-info/licenses}/LICENSE +0 -0
emhass/forecast.py
CHANGED
|
@@ -1,101 +1,104 @@
|
|
|
1
|
-
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
|
|
1
|
+
import asyncio
|
|
4
2
|
import bz2
|
|
5
3
|
import copy
|
|
6
|
-
import json
|
|
7
4
|
import logging
|
|
8
5
|
import os
|
|
9
6
|
import pickle
|
|
10
7
|
import pickle as cPickle
|
|
8
|
+
import re
|
|
11
9
|
from datetime import datetime, timedelta
|
|
12
|
-
from
|
|
10
|
+
from itertools import zip_longest
|
|
11
|
+
from urllib.parse import quote
|
|
13
12
|
|
|
13
|
+
import aiofiles
|
|
14
|
+
import aiohttp
|
|
14
15
|
import numpy as np
|
|
16
|
+
import orjson
|
|
15
17
|
import pandas as pd
|
|
16
|
-
import pvlib
|
|
17
|
-
from bs4 import BeautifulSoup
|
|
18
18
|
from pvlib.irradiance import disc
|
|
19
19
|
from pvlib.location import Location
|
|
20
20
|
from pvlib.modelchain import ModelChain
|
|
21
21
|
from pvlib.pvsystem import PVSystem
|
|
22
|
+
from pvlib.solarposition import get_solarposition
|
|
22
23
|
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
|
|
23
|
-
from
|
|
24
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
|
25
|
+
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
|
|
24
26
|
|
|
25
27
|
from emhass.machine_learning_forecaster import MLForecaster
|
|
28
|
+
from emhass.machine_learning_regressor import MLRegressor
|
|
26
29
|
from emhass.retrieve_hass import RetrieveHass
|
|
27
|
-
from emhass.utils import get_days_list, set_df_index_freq
|
|
30
|
+
from emhass.utils import add_date_features, get_days_list, set_df_index_freq
|
|
31
|
+
|
|
32
|
+
header_accept = "application/json"
|
|
33
|
+
error_msg_list_not_long_enough = "Passed data from passed list is not long enough"
|
|
34
|
+
error_msg_method_not_valid = "Passed method is not valid"
|
|
28
35
|
|
|
29
36
|
|
|
30
|
-
class Forecast
|
|
37
|
+
class Forecast:
|
|
31
38
|
r"""
|
|
32
39
|
Generate weather, load and costs forecasts needed as inputs to the optimization.
|
|
33
|
-
|
|
40
|
+
|
|
34
41
|
In EMHASS we have basically 4 forecasts to deal with:
|
|
35
|
-
|
|
42
|
+
|
|
36
43
|
- PV power production forecast (internally based on the weather forecast and the
|
|
37
44
|
characteristics of your PV plant). This is given in Watts.
|
|
38
|
-
|
|
45
|
+
|
|
39
46
|
- Load power forecast: how much power your house will demand on the next 24h. This
|
|
40
47
|
is given in Watts.
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
- PV production selling price forecast: at what price are you selling your excess
|
|
43
50
|
PV production on the next 24h. This is given in EUR/kWh.
|
|
44
|
-
|
|
51
|
+
|
|
45
52
|
- Load cost forecast: the price of the energy from the grid on the next 24h. This
|
|
46
53
|
is given in EUR/kWh.
|
|
47
|
-
|
|
54
|
+
|
|
48
55
|
There are methods that are generalized to the 4 forecast needed. For all there
|
|
49
56
|
forecasts it is possible to pass the data either as a passed list of values or by
|
|
50
57
|
reading from a CSV file. With these methods it is then possible to use data from
|
|
51
58
|
external forecast providers.
|
|
52
|
-
|
|
53
|
-
Then there are the methods that are specific to each type of forecast and that
|
|
59
|
+
|
|
60
|
+
Then there are the methods that are specific to each type of forecast and that
|
|
54
61
|
proposed forecast treated and generated internally by this EMHASS forecast class.
|
|
55
|
-
For the weather forecast a first method (`
|
|
56
|
-
|
|
57
|
-
This method seems stable but as with any scrape method it will fail if any changes
|
|
58
|
-
are made to the webpage API. Another method (`solcast`) is using the SolCast PV
|
|
59
|
-
production forecast service. A final method (`solar.forecast`) is using another
|
|
60
|
-
external service: Solar.Forecast, for which just the nominal PV peak installed
|
|
61
|
-
power should be provided. Search the forecast section on the documentation for examples
|
|
62
|
+
For the weather forecast a first method (`open-meteo`) uses a open-meteos API
|
|
63
|
+
proposing detailed forecasts based on Lat/Lon locations.
|
|
64
|
+
This method seems stable but as with any scrape method it will fail if any changes
|
|
65
|
+
are made to the webpage API. Another method (`solcast`) is using the SolCast PV
|
|
66
|
+
production forecast service. A final method (`solar.forecast`) is using another
|
|
67
|
+
external service: Solar.Forecast, for which just the nominal PV peak installed
|
|
68
|
+
power should be provided. Search the forecast section on the documentation for examples
|
|
62
69
|
on how to implement these different methods.
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
The `get_power_from_weather` method is proposed here to convert from irradiance
|
|
65
72
|
data to electrical power. The PVLib module is used to model the PV plant.
|
|
66
|
-
|
|
67
|
-
The specific methods for the load forecast are a first method (`naive`) that uses
|
|
68
|
-
a naive approach, also called persistance. It simply assumes that the forecast for
|
|
69
|
-
a future period will be equal to the observed values in a past period. The past
|
|
73
|
+
|
|
74
|
+
The specific methods for the load forecast are a first method (`naive`) that uses
|
|
75
|
+
a naive approach, also called persistance. It simply assumes that the forecast for
|
|
76
|
+
a future period will be equal to the observed values in a past period. The past
|
|
70
77
|
period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
|
|
71
78
|
uses an internal custom forecasting model using machine learning. There is a section
|
|
72
79
|
in the documentation explaining how to use this method.
|
|
73
|
-
|
|
80
|
+
|
|
74
81
|
.. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
|
|
75
82
|
proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
|
|
76
83
|
Only the `predict` method is used here to generate new forecasts, but it is \
|
|
77
84
|
necessary to previously fit a forecaster model and it is a good idea to \
|
|
78
85
|
optimize the model hyperparameters using the `tune` method. See the dedicated \
|
|
79
86
|
section in the documentation for more help.
|
|
80
|
-
|
|
87
|
+
|
|
81
88
|
For the PV production selling price and Load cost forecasts the privileged method
|
|
82
89
|
is a direct read from a user provided list of values. The list should be passed
|
|
83
90
|
as a runtime parameter during the `curl` to the EMHASS API.
|
|
84
|
-
|
|
85
|
-
I reading from a CSV file, it should contain no header and the timestamped data
|
|
91
|
+
|
|
92
|
+
I reading from a CSV file, it should contain no header and the timestamped data
|
|
86
93
|
should have the following format:
|
|
87
|
-
|
|
88
94
|
2021-04-29 00:00:00+00:00,287.07
|
|
89
|
-
|
|
90
95
|
2021-04-29 00:30:00+00:00,274.27
|
|
91
|
-
|
|
92
96
|
2021-04-29 01:00:00+00:00,243.38
|
|
93
|
-
|
|
94
97
|
...
|
|
95
|
-
|
|
98
|
+
|
|
96
99
|
The data columns in these files will correspond to the data in the units expected
|
|
97
100
|
for each forecasting method.
|
|
98
|
-
|
|
101
|
+
|
|
99
102
|
"""
|
|
100
103
|
|
|
101
104
|
def __init__(
|
|
@@ -106,8 +109,8 @@ class Forecast(object):
|
|
|
106
109
|
params: str,
|
|
107
110
|
emhass_conf: dict,
|
|
108
111
|
logger: logging.Logger,
|
|
109
|
-
opt_time_delta:
|
|
110
|
-
get_data_from_file:
|
|
112
|
+
opt_time_delta: int | None = 24,
|
|
113
|
+
get_data_from_file: bool | None = False,
|
|
111
114
|
) -> None:
|
|
112
115
|
"""
|
|
113
116
|
Define constructor for the forecast class.
|
|
@@ -142,9 +145,9 @@ class Forecast(object):
|
|
|
142
145
|
self.freq = self.retrieve_hass_conf["optimization_time_step"]
|
|
143
146
|
self.time_zone = self.retrieve_hass_conf["time_zone"]
|
|
144
147
|
self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
|
|
145
|
-
self.timeStep = self.freq.seconds / 3600 # in hours
|
|
146
148
|
self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
|
|
147
|
-
self.
|
|
149
|
+
self.var_pv = self.retrieve_hass_conf["sensor_power_photovoltaics"]
|
|
150
|
+
self.var_pv_forecast = self.retrieve_hass_conf["sensor_power_photovoltaics_forecast"]
|
|
148
151
|
self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
|
|
149
152
|
self.var_load_new = self.var_load + "_positive"
|
|
150
153
|
self.lat = self.retrieve_hass_conf["Latitude"]
|
|
@@ -154,33 +157,34 @@ class Forecast(object):
|
|
|
154
157
|
self.get_data_from_file = get_data_from_file
|
|
155
158
|
self.var_load_cost = "unit_load_cost"
|
|
156
159
|
self.var_prod_price = "unit_prod_price"
|
|
157
|
-
if (params
|
|
160
|
+
if (params is None) or (params == "null"):
|
|
158
161
|
self.params = {}
|
|
159
162
|
elif type(params) is dict:
|
|
160
163
|
self.params = params
|
|
161
164
|
else:
|
|
162
|
-
self.params =
|
|
165
|
+
self.params = orjson.loads(params)
|
|
166
|
+
|
|
163
167
|
if self.method_ts_round == "nearest":
|
|
164
|
-
self.start_forecast = pd.Timestamp(
|
|
165
|
-
datetime.now(), tz=self.time_zone
|
|
166
|
-
).replace(microsecond=0)
|
|
168
|
+
self.start_forecast = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
|
|
167
169
|
elif self.method_ts_round == "first":
|
|
168
170
|
self.start_forecast = (
|
|
169
|
-
pd.Timestamp
|
|
170
|
-
.replace(microsecond=0)
|
|
171
|
-
.floor(freq=self.freq)
|
|
171
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
|
|
172
172
|
)
|
|
173
173
|
elif self.method_ts_round == "last":
|
|
174
174
|
self.start_forecast = (
|
|
175
|
-
pd.Timestamp
|
|
176
|
-
.replace(microsecond=0)
|
|
177
|
-
.ceil(freq=self.freq)
|
|
175
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
|
|
178
176
|
)
|
|
179
177
|
else:
|
|
180
178
|
self.logger.error("Wrong method_ts_round passed parameter")
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
179
|
+
# check if weather_forecast_cache, if so get 2x the amount of forecast
|
|
180
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
181
|
+
self.end_forecast = (
|
|
182
|
+
self.start_forecast + (self.optim_conf["delta_forecast_daily"] * 2)
|
|
183
|
+
).replace(microsecond=0)
|
|
184
|
+
else:
|
|
185
|
+
self.end_forecast = (
|
|
186
|
+
self.start_forecast + self.optim_conf["delta_forecast_daily"]
|
|
187
|
+
).replace(microsecond=0)
|
|
184
188
|
self.forecast_dates = (
|
|
185
189
|
pd.date_range(
|
|
186
190
|
start=self.start_forecast,
|
|
@@ -192,288 +196,284 @@ class Forecast(object):
|
|
|
192
196
|
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
193
197
|
.tz_convert(self.time_zone)
|
|
194
198
|
)
|
|
195
|
-
if
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
199
|
+
if (
|
|
200
|
+
params is not None
|
|
201
|
+
and "prediction_horizon" in list(self.params["passed_data"].keys())
|
|
202
|
+
and self.params["passed_data"]["prediction_horizon"] is not None
|
|
203
|
+
):
|
|
204
|
+
self.forecast_dates = self.forecast_dates[
|
|
205
|
+
0 : self.params["passed_data"]["prediction_horizon"]
|
|
206
|
+
]
|
|
201
207
|
|
|
202
|
-
def
|
|
203
|
-
self,
|
|
204
|
-
|
|
205
|
-
csv_path: Optional[str] = "data_weather_forecast.csv",
|
|
206
|
-
) -> pd.DataFrame:
|
|
208
|
+
async def get_cached_open_meteo_forecast_json(
|
|
209
|
+
self, max_age: int | None = 30, forecast_days: int = 3
|
|
210
|
+
) -> dict:
|
|
207
211
|
r"""
|
|
208
|
-
Get and
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
212
|
+
Get weather forecast json from Open-Meteo and cache it for re-use.
|
|
213
|
+
The response json is cached in the local file system and returned
|
|
214
|
+
on subsequent calls until it is older than max_age, at which point
|
|
215
|
+
attempts will be made to replace it with a new version.
|
|
216
|
+
The cached version will not be overwritten until a new version has
|
|
217
|
+
been successfully fetched from Open-Meteo.
|
|
218
|
+
In the event of connectivity issues, the cached version will continue
|
|
219
|
+
to be returned until such time as a new version can be successfully
|
|
220
|
+
fetched from Open-Meteo.
|
|
221
|
+
If you want to force reload, pass max_age value of zero.
|
|
222
|
+
|
|
223
|
+
:param max_age: The maximum age of the cached json file, in minutes,
|
|
224
|
+
before it is discarded and a new version fetched from Open-Meteo.
|
|
225
|
+
Defaults to 30 minutes.
|
|
226
|
+
:type max_age: int, optional
|
|
227
|
+
:param forecast_days: The number of days of forecast data required from Open-Meteo.
|
|
228
|
+
One additional day is always fetched from Open-Meteo so there is an extra data in the cache.
|
|
229
|
+
Defaults to 2 days (3 days fetched) to match the prior default.
|
|
230
|
+
:type forecast_days: int, optional
|
|
231
|
+
:return: The json containing the Open-Meteo forecast data
|
|
232
|
+
:rtype: dict
|
|
233
|
+
|
|
216
234
|
"""
|
|
217
|
-
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
218
|
-
w_forecast_cache_path = os.path.abspath(
|
|
219
|
-
self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
|
|
220
|
-
)
|
|
221
235
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
236
|
+
# Ensure at least 3 weather forecast days (and 1 more than requested)
|
|
237
|
+
if forecast_days is None:
|
|
238
|
+
self.logger.debug("Open-Meteo forecast_days is missing so defaulting to 3 days")
|
|
239
|
+
forecast_days = 3
|
|
240
|
+
elif forecast_days < 3:
|
|
241
|
+
self.logger.debug(
|
|
242
|
+
"Open-Meteo forecast_days is low (%s) so defaulting to 3 days",
|
|
243
|
+
forecast_days,
|
|
244
|
+
)
|
|
245
|
+
forecast_days = 3
|
|
246
|
+
else:
|
|
247
|
+
forecast_days = forecast_days + 1
|
|
248
|
+
|
|
249
|
+
# The addition of -b.json file name suffix is because the time format
|
|
250
|
+
# has changed, and it avoids any attempt to use the old format file.
|
|
251
|
+
json_path = os.path.abspath(
|
|
252
|
+
self.emhass_conf["data_path"] / "cached-open-meteo-forecast-b.json"
|
|
225
253
|
)
|
|
226
|
-
if
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
254
|
+
# The cached JSON file is always loaded, if it exists, as it is also a fallback
|
|
255
|
+
# in case the REST API call to Open-Meteo fails - the cached JSON will continue to
|
|
256
|
+
# be used until it can successfully fetch a new version from Open-Meteo.
|
|
257
|
+
data = None
|
|
258
|
+
use_cache = False
|
|
259
|
+
if os.path.exists(json_path):
|
|
260
|
+
delta = datetime.now() - datetime.fromtimestamp(os.path.getmtime(json_path))
|
|
261
|
+
json_age = int(delta / timedelta(seconds=60))
|
|
262
|
+
use_cache = json_age < max_age
|
|
263
|
+
self.logger.info("Loading existing cached Open-Meteo JSON file: %s", json_path)
|
|
264
|
+
async with aiofiles.open(json_path) as json_file:
|
|
265
|
+
content = await json_file.read()
|
|
266
|
+
data = orjson.loads(content)
|
|
267
|
+
if use_cache:
|
|
268
|
+
self.logger.info(
|
|
269
|
+
"The cached Open-Meteo JSON file is recent (age=%.0fm, max_age=%sm)",
|
|
270
|
+
json_age,
|
|
271
|
+
max_age,
|
|
236
272
|
)
|
|
237
|
-
|
|
238
|
-
.
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
273
|
+
else:
|
|
274
|
+
self.logger.info(
|
|
275
|
+
"The cached Open-Meteo JSON file is old (age=%.0fm, max_age=%sm)",
|
|
276
|
+
json_age,
|
|
277
|
+
max_age,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if not use_cache:
|
|
281
|
+
self.logger.info("Fetching a new weather forecast from Open-Meteo")
|
|
282
|
+
headers = {"User-Agent": "EMHASS", "Accept": header_accept}
|
|
283
|
+
# Open-Meteo has returned non-existent time over DST transitions,
|
|
284
|
+
# so we now return unix timestamps and convert to date/times locally
|
|
285
|
+
# instead.
|
|
286
|
+
url = (
|
|
287
|
+
"https://api.open-meteo.com/v1/forecast?"
|
|
288
|
+
+ "latitude="
|
|
244
289
|
+ str(round(self.lat, 2))
|
|
245
|
-
+ "
|
|
290
|
+
+ "&longitude="
|
|
246
291
|
+ str(round(self.lon, 2))
|
|
247
|
-
+ "
|
|
292
|
+
+ "&minutely_15="
|
|
293
|
+
+ "temperature_2m,"
|
|
294
|
+
+ "relative_humidity_2m,"
|
|
295
|
+
+ "rain,"
|
|
296
|
+
+ "cloud_cover,"
|
|
297
|
+
+ "wind_speed_10m,"
|
|
298
|
+
+ "shortwave_radiation_instant,"
|
|
299
|
+
+ "diffuse_radiation_instant,"
|
|
300
|
+
+ "direct_normal_irradiance_instant"
|
|
301
|
+
+ "&forecast_days="
|
|
302
|
+
+ str(forecast_days)
|
|
303
|
+
+ "&timezone="
|
|
304
|
+
+ quote(str(self.time_zone), safe="")
|
|
305
|
+
+ "&timeformat=unixtime"
|
|
248
306
|
)
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
307
|
+
try:
|
|
308
|
+
self.logger.debug("Fetching data from Open-Meteo using URL: %s", url)
|
|
309
|
+
async with aiohttp.ClientSession() as session:
|
|
310
|
+
async with session.get(url, headers=headers) as response:
|
|
311
|
+
self.logger.debug("Returned HTTP status code: %s", response.status)
|
|
312
|
+
response.raise_for_status()
|
|
313
|
+
"""import bz2 # Uncomment to save a serialized data for tests
|
|
314
|
+
import _pickle as cPickle
|
|
315
|
+
with bz2.BZ2File("data/test_response_openmeteo_get_method.pbz2", "w") as f:
|
|
316
|
+
cPickle.dump(response, f)"""
|
|
317
|
+
data = await response.json()
|
|
318
|
+
self.logger.info(
|
|
319
|
+
"Saving response in Open-Meteo JSON cache file: %s",
|
|
320
|
+
json_path,
|
|
321
|
+
)
|
|
322
|
+
async with aiofiles.open(json_path, "w") as json_file:
|
|
323
|
+
content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
|
|
324
|
+
await json_file.write(content)
|
|
325
|
+
except aiohttp.ClientError:
|
|
326
|
+
self.logger.error("Failed to fetch weather forecast from Open-Meteo", exc_info=True)
|
|
327
|
+
if data is not None:
|
|
328
|
+
self.logger.warning("Returning old cached data until next Open-Meteo attempt")
|
|
329
|
+
|
|
330
|
+
return data
|
|
331
|
+
|
|
332
|
+
async def _get_weather_open_meteo(
|
|
333
|
+
self, w_forecast_cache_path: str, use_legacy_pvlib: bool
|
|
334
|
+
) -> pd.DataFrame:
|
|
335
|
+
"""Helper to retrieve weather data from Open-Meteo or cache."""
|
|
336
|
+
if not os.path.isfile(w_forecast_cache_path):
|
|
337
|
+
data_raw = await self.get_cached_open_meteo_forecast_json(
|
|
338
|
+
self.optim_conf["open_meteo_cache_max_age"],
|
|
339
|
+
self.optim_conf["delta_forecast_daily"].days,
|
|
263
340
|
)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
341
|
+
data_15min = pd.DataFrame.from_dict(data_raw["minutely_15"])
|
|
342
|
+
# Date/times in the Open-Meteo JSON are unix timestamps
|
|
343
|
+
data_15min["time"] = pd.to_datetime(data_15min["time"], unit="s", utc=True)
|
|
344
|
+
data_15min["time"] = data_15min["time"].dt.tz_convert(self.time_zone)
|
|
345
|
+
data_15min.set_index("time", inplace=True)
|
|
346
|
+
data_15min = data_15min.rename(
|
|
347
|
+
columns={
|
|
348
|
+
"temperature_2m": "temp_air",
|
|
349
|
+
"relative_humidity_2m": "relative_humidity",
|
|
350
|
+
"rain": "precipitable_water",
|
|
351
|
+
"cloud_cover": "cloud_cover",
|
|
352
|
+
"wind_speed_10m": "wind_speed",
|
|
353
|
+
"shortwave_radiation_instant": "ghi",
|
|
354
|
+
"diffuse_radiation_instant": "dhi",
|
|
355
|
+
"direct_normal_irradiance_instant": "dni",
|
|
356
|
+
}
|
|
357
|
+
)
|
|
358
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
359
|
+
data_15min.to_csv(
|
|
360
|
+
self.emhass_conf["data_path"] / "debug-weather-forecast-open-meteo.csv"
|
|
361
|
+
)
|
|
362
|
+
data = data_15min.reindex(self.forecast_dates)
|
|
363
|
+
data.interpolate(
|
|
273
364
|
method="linear",
|
|
274
365
|
axis=0,
|
|
275
366
|
limit=None,
|
|
276
367
|
limit_direction="both",
|
|
277
368
|
inplace=True,
|
|
278
369
|
)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
370
|
+
data = set_df_index_freq(data)
|
|
371
|
+
index_utc = data.index.tz_convert("utc")
|
|
372
|
+
index_tz = index_utc.round(
|
|
373
|
+
freq=data.index.freq, ambiguous="infer", nonexistent="shift_forward"
|
|
374
|
+
).tz_convert(self.time_zone)
|
|
375
|
+
data.index = index_tz
|
|
376
|
+
data = set_df_index_freq(data)
|
|
377
|
+
# Convert mm to cm and clip minimum to 0.1 cm
|
|
378
|
+
data["precipitable_water"] = (data["precipitable_water"] / 10).clip(lower=0.1)
|
|
379
|
+
if use_legacy_pvlib:
|
|
380
|
+
data = data.drop(columns=["ghi", "dhi", "dni"])
|
|
381
|
+
ghi_est = self.cloud_cover_to_irradiance(data["cloud_cover"])
|
|
382
|
+
data["ghi"] = ghi_est["ghi"]
|
|
383
|
+
data["dni"] = ghi_est["dni"]
|
|
384
|
+
data["dhi"] = ghi_est["dhi"]
|
|
385
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
386
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
387
|
+
else:
|
|
388
|
+
data = await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
389
|
+
return data
|
|
390
|
+
|
|
391
|
+
async def _get_weather_solcast(self, w_forecast_cache_path: str) -> pd.DataFrame:
|
|
392
|
+
"""Helper to retrieve weather data from Solcast or cache."""
|
|
393
|
+
if os.path.isfile(w_forecast_cache_path):
|
|
394
|
+
return await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
395
|
+
if self.params["passed_data"].get("weather_forecast_cache_only", False):
|
|
396
|
+
self.logger.error("Unable to obtain Solcast cache file.")
|
|
397
|
+
self.logger.error(
|
|
398
|
+
"Try running optimization again with 'weather_forecast_cache_only': false"
|
|
282
399
|
)
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
data["wind_speed"] = (
|
|
286
|
-
raw_data["Wind Speed/Direction (mph)"] * 1.60934
|
|
287
|
-
) # conversion to km/h
|
|
288
|
-
data["relative_humidity"] = raw_data["Relative Humidity (%)"]
|
|
289
|
-
data["precipitable_water"] = pvlib.atmosphere.gueymard94_pw(
|
|
290
|
-
data["temp_air"], data["relative_humidity"]
|
|
400
|
+
self.logger.error(
|
|
401
|
+
"Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true."
|
|
291
402
|
)
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
}
|
|
316
|
-
days_solcast = int(
|
|
317
|
-
len(self.forecast_dates) * self.freq.seconds / 3600
|
|
318
|
-
)
|
|
319
|
-
# If weather_forecast_cache, set request days as twice as long to avoid length issues (add a buffer)
|
|
320
|
-
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
321
|
-
days_solcast = min((days_solcast * 2), 336)
|
|
322
|
-
url = (
|
|
323
|
-
"https://api.solcast.com.au/rooftop_sites/"
|
|
324
|
-
+ self.retrieve_hass_conf["solcast_rooftop_id"]
|
|
325
|
-
+ "/forecasts?hours="
|
|
326
|
-
+ str(days_solcast)
|
|
327
|
-
)
|
|
328
|
-
response = get(url, headers=headers)
|
|
329
|
-
"""import bz2 # Uncomment to save a serialized data for tests
|
|
330
|
-
import _pickle as cPickle
|
|
331
|
-
with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
|
|
332
|
-
cPickle.dump(response, f)"""
|
|
333
|
-
# Verify the request passed
|
|
334
|
-
if int(response.status_code) == 200:
|
|
335
|
-
data = response.json()
|
|
336
|
-
elif (
|
|
337
|
-
int(response.status_code) == 402
|
|
338
|
-
or int(response.status_code) == 429
|
|
339
|
-
):
|
|
403
|
+
return False
|
|
404
|
+
if "solcast_api_key" not in self.retrieve_hass_conf:
|
|
405
|
+
self.logger.error("The solcast_api_key parameter was not defined")
|
|
406
|
+
return False
|
|
407
|
+
if "solcast_rooftop_id" not in self.retrieve_hass_conf:
|
|
408
|
+
self.logger.error("The solcast_rooftop_id parameter was not defined")
|
|
409
|
+
return False
|
|
410
|
+
headers = {
|
|
411
|
+
"User-Agent": "EMHASS",
|
|
412
|
+
"Authorization": "Bearer " + self.retrieve_hass_conf["solcast_api_key"],
|
|
413
|
+
"content-type": header_accept,
|
|
414
|
+
}
|
|
415
|
+
days_solcast = int(len(self.forecast_dates) * self.freq.seconds / 3600)
|
|
416
|
+
roof_ids = re.split(r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip())
|
|
417
|
+
total_data_list = [0] * len(self.forecast_dates)
|
|
418
|
+
|
|
419
|
+
async with aiohttp.ClientSession() as session:
|
|
420
|
+
for roof_id in roof_ids:
|
|
421
|
+
url = f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
|
|
422
|
+
async with session.get(url, headers=headers) as response:
|
|
423
|
+
if int(response.status) == 200:
|
|
424
|
+
data = await response.json()
|
|
425
|
+
elif int(response.status) in [402, 429]:
|
|
340
426
|
self.logger.error(
|
|
341
427
|
"Solcast error: May have exceeded your subscription limit."
|
|
342
428
|
)
|
|
343
429
|
return False
|
|
344
|
-
elif (
|
|
345
|
-
int(response.status_code) >= 400
|
|
346
|
-
or int(response.status_code) >= 202
|
|
347
|
-
):
|
|
430
|
+
elif int(response.status) >= 400 or (202 <= int(response.status) <= 299):
|
|
348
431
|
self.logger.error(
|
|
349
|
-
"Solcast error:
|
|
350
|
-
)
|
|
351
|
-
self.logger.error(
|
|
352
|
-
"Solcast error: Check that your subscription is valid and your network can connect to Solcast."
|
|
432
|
+
"Solcast error: Issue with request, check API key and rooftop ID."
|
|
353
433
|
)
|
|
354
434
|
return False
|
|
355
435
|
data_list = []
|
|
356
436
|
for elm in data["forecasts"]:
|
|
357
|
-
data_list.append(
|
|
358
|
-
elm["pv_estimate"] * 1000
|
|
359
|
-
) # Converting kW to W
|
|
360
|
-
# Check if the retrieved data has the correct length
|
|
437
|
+
data_list.append(elm["pv_estimate"] * 1000)
|
|
361
438
|
if len(data_list) < len(self.forecast_dates):
|
|
362
|
-
self.logger.error(
|
|
363
|
-
"Not enough data retried from Solcast service, try increasing the time step or use MPC."
|
|
364
|
-
)
|
|
365
|
-
else:
|
|
366
|
-
# If runtime weather_forecast_cache is true save forecast result to file as cache
|
|
367
|
-
if self.params["passed_data"].get(
|
|
368
|
-
"weather_forecast_cache", False
|
|
369
|
-
):
|
|
370
|
-
# Add x2 forecast periods for cached results. This adds a extra delta_forecast amount of days for a buffer
|
|
371
|
-
cached_forecast_dates = self.forecast_dates.union(
|
|
372
|
-
pd.date_range(
|
|
373
|
-
self.forecast_dates[-1],
|
|
374
|
-
periods=(len(self.forecast_dates) + 1),
|
|
375
|
-
freq=self.freq,
|
|
376
|
-
)[1:]
|
|
377
|
-
)
|
|
378
|
-
cache_data_list = data_list[0 : len(cached_forecast_dates)]
|
|
379
|
-
cache_data_dict = {
|
|
380
|
-
"ts": cached_forecast_dates,
|
|
381
|
-
"yhat": cache_data_list,
|
|
382
|
-
}
|
|
383
|
-
data_cache = pd.DataFrame.from_dict(cache_data_dict)
|
|
384
|
-
data_cache.set_index("ts", inplace=True)
|
|
385
|
-
with open(w_forecast_cache_path, "wb") as file:
|
|
386
|
-
cPickle.dump(data_cache, file)
|
|
387
|
-
if not os.path.isfile(w_forecast_cache_path):
|
|
388
|
-
self.logger.warning(
|
|
389
|
-
"Solcast forecast data could not be saved to file."
|
|
390
|
-
)
|
|
391
|
-
else:
|
|
392
|
-
self.logger.info(
|
|
393
|
-
"Saved the Solcast results to cache, for later reference."
|
|
394
|
-
)
|
|
395
|
-
# Trim request results to forecast_dates
|
|
396
|
-
data_list = data_list[0 : len(self.forecast_dates)]
|
|
397
|
-
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
398
|
-
# Define DataFrame
|
|
399
|
-
data = pd.DataFrame.from_dict(data_dict)
|
|
400
|
-
# Define index
|
|
401
|
-
data.set_index("ts", inplace=True)
|
|
402
|
-
# Else, notify user to update cache
|
|
403
|
-
else:
|
|
404
|
-
self.logger.error("Unable to obtain Solcast cache file.")
|
|
405
|
-
self.logger.error(
|
|
406
|
-
"Try running optimization again with 'weather_forecast_cache_only': false"
|
|
407
|
-
)
|
|
408
|
-
self.logger.error(
|
|
409
|
-
"Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `weather-forecast-cache` action, to pull new data from Solcast and cache."
|
|
410
|
-
)
|
|
411
|
-
return False
|
|
412
|
-
# Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
|
|
413
|
-
else:
|
|
414
|
-
with open(w_forecast_cache_path, "rb") as file:
|
|
415
|
-
data = cPickle.load(file)
|
|
416
|
-
if not isinstance(data, pd.DataFrame) or len(data) < len(
|
|
417
|
-
self.forecast_dates
|
|
418
|
-
):
|
|
419
|
-
self.logger.error(
|
|
420
|
-
"There has been a error obtaining cached Solcast forecast data."
|
|
421
|
-
)
|
|
422
|
-
self.logger.error(
|
|
423
|
-
"Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from Solcast and cache."
|
|
424
|
-
)
|
|
425
|
-
self.logger.warning(
|
|
426
|
-
"Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
|
|
427
|
-
)
|
|
428
|
-
os.remove(w_forecast_cache_path)
|
|
429
|
-
return False
|
|
430
|
-
# Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
|
|
431
|
-
if (
|
|
432
|
-
self.forecast_dates[0] in data.index
|
|
433
|
-
and self.forecast_dates[-1] in data.index
|
|
434
|
-
):
|
|
435
|
-
data = data.loc[
|
|
436
|
-
self.forecast_dates[0] : self.forecast_dates[-1]
|
|
437
|
-
]
|
|
438
|
-
self.logger.info(
|
|
439
|
-
"Retrieved Solcast data from the previously saved cache."
|
|
440
|
-
)
|
|
441
|
-
else:
|
|
442
|
-
self.logger.error(
|
|
443
|
-
"Unable to obtain cached Solcast forecast data within the requested timeframe range."
|
|
444
|
-
)
|
|
445
|
-
self.logger.error(
|
|
446
|
-
"Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from Solcast and cache."
|
|
447
|
-
)
|
|
448
|
-
self.logger.warning(
|
|
449
|
-
"Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
|
|
450
|
-
)
|
|
451
|
-
os.remove(w_forecast_cache_path)
|
|
439
|
+
self.logger.error("Not enough data retrieved from Solcast service.")
|
|
452
440
|
return False
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
441
|
+
total_data_list = [
|
|
442
|
+
total + current
|
|
443
|
+
for total, current in zip_longest(total_data_list, data_list, fillvalue=0)
|
|
444
|
+
]
|
|
445
|
+
|
|
446
|
+
total_data_list = total_data_list[0 : len(self.forecast_dates)]
|
|
447
|
+
data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
|
|
448
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
449
|
+
data.set_index("ts", inplace=True)
|
|
450
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
451
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
452
|
+
return data
|
|
453
|
+
|
|
454
|
+
async def _get_weather_solar_forecast(self, w_forecast_cache_path: str) -> pd.DataFrame:
|
|
455
|
+
"""Helper to retrieve weather data from solar.forecast or cache."""
|
|
456
|
+
if os.path.isfile(w_forecast_cache_path):
|
|
457
|
+
return await self.get_cached_forecast_data(w_forecast_cache_path)
|
|
458
|
+
# Validation and Default Setup
|
|
459
|
+
if "solar_forecast_kwp" not in self.retrieve_hass_conf:
|
|
460
|
+
self.logger.warning(
|
|
461
|
+
"The solar_forecast_kwp parameter was not defined, using dummy values for testing"
|
|
462
|
+
)
|
|
463
|
+
self.retrieve_hass_conf["solar_forecast_kwp"] = 5
|
|
464
|
+
if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
|
|
465
|
+
self.logger.warning(
|
|
466
|
+
"The solar_forecast_kwp parameter is set to zero, setting to default 5"
|
|
467
|
+
)
|
|
468
|
+
self.retrieve_hass_conf["solar_forecast_kwp"] = 5
|
|
469
|
+
if self.optim_conf["delta_forecast_daily"].days > 1:
|
|
470
|
+
self.logger.warning(
|
|
471
|
+
"The free public tier for solar.forecast only provides one day forecasts"
|
|
472
|
+
)
|
|
473
|
+
headers = {"Accept": header_accept}
|
|
474
|
+
data = pd.DataFrame()
|
|
475
|
+
|
|
476
|
+
async with aiohttp.ClientSession() as session:
|
|
477
477
|
for i in range(len(self.plant_conf["pv_module_model"])):
|
|
478
478
|
url = (
|
|
479
479
|
"https://api.forecast.solar/estimate/"
|
|
@@ -487,74 +487,108 @@ class Forecast(object):
|
|
|
487
487
|
+ "/"
|
|
488
488
|
+ str(self.retrieve_hass_conf["solar_forecast_kwp"])
|
|
489
489
|
)
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
"ts"
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
data_tmp.copy(deep=True).fillna(method="
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
data_tmp.
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
490
|
+
async with session.get(url, headers=headers) as response:
|
|
491
|
+
data_raw = await response.json()
|
|
492
|
+
data_dict = {
|
|
493
|
+
"ts": list(data_raw["result"]["watts"].keys()),
|
|
494
|
+
"yhat": list(data_raw["result"]["watts"].values()),
|
|
495
|
+
}
|
|
496
|
+
data_tmp = pd.DataFrame.from_dict(data_dict)
|
|
497
|
+
data_tmp.set_index("ts", inplace=True)
|
|
498
|
+
data_tmp.index = pd.to_datetime(data_tmp.index)
|
|
499
|
+
data_tmp = data_tmp.tz_localize(
|
|
500
|
+
self.forecast_dates.tz,
|
|
501
|
+
ambiguous="infer",
|
|
502
|
+
nonexistent="shift_forward",
|
|
503
|
+
)
|
|
504
|
+
data_tmp = data_tmp.reindex(index=self.forecast_dates)
|
|
505
|
+
# Gap filling
|
|
506
|
+
mask_up = data_tmp.copy(deep=True).fillna(method="ffill").isnull()
|
|
507
|
+
mask_down = data_tmp.copy(deep=True).fillna(method="bfill").isnull()
|
|
508
|
+
data_tmp.loc[mask_up["yhat"], :] = 0.0
|
|
509
|
+
data_tmp.loc[mask_down["yhat"], :] = 0.0
|
|
510
|
+
data_tmp.interpolate(inplace=True, limit=1)
|
|
511
|
+
data_tmp = data_tmp.fillna(0.0)
|
|
512
|
+
if len(data) == 0:
|
|
513
|
+
data = copy.deepcopy(data_tmp)
|
|
514
|
+
else:
|
|
515
|
+
data = data + data_tmp
|
|
516
|
+
|
|
517
|
+
if self.params["passed_data"].get("weather_forecast_cache", False):
|
|
518
|
+
data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
|
|
519
|
+
return data
|
|
520
|
+
|
|
521
|
+
def _get_weather_csv(self, csv_path: str) -> pd.DataFrame:
|
|
522
|
+
"""Helper to retrieve weather data from CSV."""
|
|
523
|
+
data = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
524
|
+
if len(data) < len(self.forecast_dates):
|
|
525
|
+
self.logger.error("Passed data from CSV is not long enough")
|
|
526
|
+
else:
|
|
527
|
+
data = data.loc[data.index[0 : len(self.forecast_dates)], :]
|
|
528
|
+
data.index = self.forecast_dates
|
|
529
|
+
data.drop("ts", axis=1, inplace=True)
|
|
530
|
+
data = data.copy().loc[self.forecast_dates]
|
|
531
|
+
return data
|
|
532
|
+
|
|
533
|
+
def _get_weather_list(self) -> pd.DataFrame:
|
|
534
|
+
"""Helper to retrieve weather data from a passed list."""
|
|
535
|
+
data_list = self.params["passed_data"]["pv_power_forecast"]
|
|
536
|
+
if (
|
|
537
|
+
len(data_list) < len(self.forecast_dates)
|
|
538
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
539
|
+
):
|
|
540
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
541
|
+
return None
|
|
542
|
+
else:
|
|
543
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
544
|
+
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
545
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
546
|
+
data.set_index("ts", inplace=True)
|
|
547
|
+
return data
|
|
548
|
+
|
|
549
|
+
async def get_weather_forecast(
|
|
550
|
+
self,
|
|
551
|
+
method: str | None = "open-meteo",
|
|
552
|
+
csv_path: str | None = "data_weather_forecast.csv",
|
|
553
|
+
use_legacy_pvlib: bool | None = False,
|
|
554
|
+
) -> pd.DataFrame:
|
|
555
|
+
r"""
|
|
556
|
+
Get and generate weather forecast data.
|
|
557
|
+
|
|
558
|
+
:param method: The desired method, options are 'open-meteo', 'csv', 'list', 'solcast' and \
|
|
559
|
+
'solar.forecast'. Defaults to 'open-meteo'.
|
|
560
|
+
:type method: str, optional
|
|
561
|
+
:return: The DataFrame containing the forecasted data
|
|
562
|
+
:rtype: pd.DataFrame
|
|
563
|
+
"""
|
|
564
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
565
|
+
w_forecast_cache_path = os.path.abspath(
|
|
566
|
+
self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
|
|
567
|
+
)
|
|
568
|
+
self.logger.info("Retrieving weather forecast data using method = " + method)
|
|
569
|
+
if method == "scrapper":
|
|
570
|
+
self.logger.warning(
|
|
571
|
+
"The scrapper method has been deprecated and the keyword is accepted just for backward compatibility, please change the PV forecast method to open-meteo"
|
|
572
|
+
)
|
|
573
|
+
self.weather_forecast_method = method
|
|
574
|
+
if method in ["open-meteo", "scrapper"]:
|
|
575
|
+
data = await self._get_weather_open_meteo(w_forecast_cache_path, use_legacy_pvlib)
|
|
576
|
+
elif method == "solcast":
|
|
577
|
+
data = await self._get_weather_solcast(w_forecast_cache_path)
|
|
578
|
+
elif method == "solar.forecast":
|
|
579
|
+
data = await self._get_weather_solar_forecast(w_forecast_cache_path)
|
|
580
|
+
elif method == "csv":
|
|
581
|
+
data = self._get_weather_csv(csv_path)
|
|
582
|
+
elif method == "list":
|
|
583
|
+
data = self._get_weather_list()
|
|
551
584
|
else:
|
|
552
585
|
self.logger.error("Method %r is not valid", method)
|
|
553
586
|
data = None
|
|
587
|
+
self.logger.debug("get_weather_forecast returning:\n%s", data)
|
|
554
588
|
return data
|
|
555
589
|
|
|
556
590
|
def cloud_cover_to_irradiance(
|
|
557
|
-
self, cloud_cover: pd.Series, offset:
|
|
591
|
+
self, cloud_cover: pd.Series, offset: int | None = 35
|
|
558
592
|
) -> pd.DataFrame:
|
|
559
593
|
"""
|
|
560
594
|
Estimates irradiance from cloud cover in the following steps.
|
|
@@ -579,9 +613,7 @@ class Forecast(object):
|
|
|
579
613
|
"""
|
|
580
614
|
location = Location(latitude=self.lat, longitude=self.lon)
|
|
581
615
|
solpos = location.get_solarposition(cloud_cover.index)
|
|
582
|
-
cs = location.get_clearsky(
|
|
583
|
-
cloud_cover.index, model="ineichen", solar_position=solpos
|
|
584
|
-
)
|
|
616
|
+
cs = location.get_clearsky(cloud_cover.index, model="ineichen", solar_position=solpos)
|
|
585
617
|
# Using only the linear method
|
|
586
618
|
offset = offset / 100.0
|
|
587
619
|
cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
|
|
@@ -599,6 +631,7 @@ class Forecast(object):
|
|
|
599
631
|
alpha: float,
|
|
600
632
|
beta: float,
|
|
601
633
|
col: str,
|
|
634
|
+
ignore_pv_feedback: bool = False,
|
|
602
635
|
) -> pd.DataFrame:
|
|
603
636
|
"""A simple correction method for forecasted data using the current real values of a variable.
|
|
604
637
|
|
|
@@ -612,119 +645,385 @@ class Forecast(object):
|
|
|
612
645
|
:type beta: float
|
|
613
646
|
:param col: The column variable name
|
|
614
647
|
:type col: str
|
|
648
|
+
:param ignore_pv_feedback: If True, bypass mixing and return original forecast (used during curtailment)
|
|
649
|
+
:type ignore_pv_feedback: bool
|
|
615
650
|
:return: The output DataFrame with the corrected values
|
|
616
651
|
:rtype: pd.DataFrame
|
|
617
652
|
"""
|
|
653
|
+
# If ignoring PV feedback (e.g., during curtailment), return original forecast
|
|
654
|
+
if ignore_pv_feedback:
|
|
655
|
+
return df_forecast
|
|
656
|
+
|
|
618
657
|
first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
|
|
619
|
-
df_forecast.iloc[0] = first_fcst
|
|
658
|
+
df_forecast.iloc[0] = int(round(first_fcst))
|
|
620
659
|
return df_forecast
|
|
621
660
|
|
|
661
|
+
def _get_model_power(self, params, device_type):
|
|
662
|
+
"""
|
|
663
|
+
Helper to extract power rating based on device type and available parameters.
|
|
664
|
+
"""
|
|
665
|
+
if device_type == "module":
|
|
666
|
+
if "STC" in params:
|
|
667
|
+
return params["STC"]
|
|
668
|
+
if "I_mp_ref" in params and "V_mp_ref" in params:
|
|
669
|
+
return params["I_mp_ref"] * params["V_mp_ref"]
|
|
670
|
+
elif device_type == "inverter":
|
|
671
|
+
if "Paco" in params:
|
|
672
|
+
return params["Paco"]
|
|
673
|
+
if "Pdco" in params:
|
|
674
|
+
return params["Pdco"]
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
def _find_closest_model(self, target_power, database, device_type):
|
|
678
|
+
"""
|
|
679
|
+
Find the model in the database that has a power rating closest to the target_power.
|
|
680
|
+
"""
|
|
681
|
+
closest_model = None
|
|
682
|
+
min_diff = float("inf")
|
|
683
|
+
# Handle DataFrame (columns are models) or Dict (keys are models)
|
|
684
|
+
iterator = database.items() if hasattr(database, "items") else database.iteritems()
|
|
685
|
+
for _, params in iterator:
|
|
686
|
+
power = self._get_model_power(params, device_type)
|
|
687
|
+
if power is not None:
|
|
688
|
+
diff = abs(power - target_power)
|
|
689
|
+
if diff < min_diff:
|
|
690
|
+
min_diff = diff
|
|
691
|
+
closest_model = params
|
|
692
|
+
if closest_model is not None:
|
|
693
|
+
# Safely get name if it exists (DataFrame Series usually have a .name attribute)
|
|
694
|
+
model_name = getattr(closest_model, "name", "unknown")
|
|
695
|
+
self.logger.info(f"Closest {device_type} model to {target_power}W found: {model_name}")
|
|
696
|
+
else:
|
|
697
|
+
self.logger.warning(f"No suitable {device_type} model found close to {target_power}W")
|
|
698
|
+
return closest_model
|
|
699
|
+
|
|
700
|
+
def _get_model(self, model_spec, database, device_type):
|
|
701
|
+
"""
|
|
702
|
+
Retrieve a model from the database by name or by power rating.
|
|
703
|
+
"""
|
|
704
|
+
# If it's a string, try to find it by name
|
|
705
|
+
if isinstance(model_spec, str):
|
|
706
|
+
if model_spec in database:
|
|
707
|
+
return database[model_spec]
|
|
708
|
+
# If not found by name, check if it is a number string (e.g., "300")
|
|
709
|
+
try:
|
|
710
|
+
target_power = float(model_spec)
|
|
711
|
+
return self._find_closest_model(target_power, database, device_type)
|
|
712
|
+
except ValueError:
|
|
713
|
+
# Not a number, fallback to original behavior (will likely raise KeyError later)
|
|
714
|
+
self.logger.warning(f"{device_type} model '{model_spec}' not found in database.")
|
|
715
|
+
return database[model_spec]
|
|
716
|
+
# If it's a number (int or float), find closest by power
|
|
717
|
+
elif isinstance(model_spec, int | float):
|
|
718
|
+
return self._find_closest_model(model_spec, database, device_type)
|
|
719
|
+
else:
|
|
720
|
+
self.logger.error(f"Invalid type for {device_type} model: {type(model_spec)}")
|
|
721
|
+
return None
|
|
722
|
+
|
|
723
|
+
def _calculate_pvlib_power(self, df_weather: pd.DataFrame) -> pd.Series:
|
|
724
|
+
"""
|
|
725
|
+
Helper to simulate PV power generation using PVLib when no direct forecast is available.
|
|
726
|
+
"""
|
|
727
|
+
# Setting the main parameters of the PV plant
|
|
728
|
+
location = Location(latitude=self.lat, longitude=self.lon)
|
|
729
|
+
temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"]["close_mount_glass_glass"]
|
|
730
|
+
# Load CEC databases
|
|
731
|
+
cec_modules_path = self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2"
|
|
732
|
+
cec_inverters_path = self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2"
|
|
733
|
+
with bz2.BZ2File(cec_modules_path, "rb") as f:
|
|
734
|
+
cec_modules = cPickle.load(f)
|
|
735
|
+
with bz2.BZ2File(cec_inverters_path, "rb") as f:
|
|
736
|
+
cec_inverters = cPickle.load(f)
|
|
737
|
+
|
|
738
|
+
# Inner helper to run a single simulation configuration
|
|
739
|
+
def run_single_config(mod_spec, inv_spec, tilt, azimuth, mod_per_str, str_per_inv):
|
|
740
|
+
module = self._get_model(mod_spec, cec_modules, "module")
|
|
741
|
+
inverter = self._get_model(inv_spec, cec_inverters, "inverter")
|
|
742
|
+
system = PVSystem(
|
|
743
|
+
surface_tilt=tilt,
|
|
744
|
+
surface_azimuth=azimuth,
|
|
745
|
+
module_parameters=module,
|
|
746
|
+
inverter_parameters=inverter,
|
|
747
|
+
temperature_model_parameters=temp_params,
|
|
748
|
+
modules_per_string=mod_per_str,
|
|
749
|
+
strings_per_inverter=str_per_inv,
|
|
750
|
+
)
|
|
751
|
+
mc = ModelChain(system, location, aoi_model="physical")
|
|
752
|
+
mc.run_model(df_weather)
|
|
753
|
+
return mc.results.ac
|
|
754
|
+
|
|
755
|
+
# Handle list (mixed orientation) vs single configuration
|
|
756
|
+
if isinstance(self.plant_conf["pv_module_model"], list):
|
|
757
|
+
p_pv_forecast = pd.Series(0, index=df_weather.index)
|
|
758
|
+
for i in range(len(self.plant_conf["pv_module_model"])):
|
|
759
|
+
result = run_single_config(
|
|
760
|
+
self.plant_conf["pv_module_model"][i],
|
|
761
|
+
self.plant_conf["pv_inverter_model"][i],
|
|
762
|
+
self.plant_conf["surface_tilt"][i],
|
|
763
|
+
self.plant_conf["surface_azimuth"][i],
|
|
764
|
+
self.plant_conf["modules_per_string"][i],
|
|
765
|
+
self.plant_conf["strings_per_inverter"][i],
|
|
766
|
+
)
|
|
767
|
+
p_pv_forecast = p_pv_forecast + result
|
|
768
|
+
else:
|
|
769
|
+
p_pv_forecast = run_single_config(
|
|
770
|
+
self.plant_conf["pv_module_model"],
|
|
771
|
+
self.plant_conf["pv_inverter_model"],
|
|
772
|
+
self.plant_conf["surface_tilt"],
|
|
773
|
+
self.plant_conf["surface_azimuth"],
|
|
774
|
+
self.plant_conf["modules_per_string"],
|
|
775
|
+
self.plant_conf["strings_per_inverter"],
|
|
776
|
+
)
|
|
777
|
+
return p_pv_forecast
|
|
778
|
+
|
|
622
779
|
def get_power_from_weather(
|
|
623
780
|
self,
|
|
624
781
|
df_weather: pd.DataFrame,
|
|
625
|
-
set_mix_forecast:
|
|
626
|
-
df_now:
|
|
782
|
+
set_mix_forecast: bool | None = False,
|
|
783
|
+
df_now: pd.DataFrame | None = pd.DataFrame(),
|
|
627
784
|
) -> pd.Series:
|
|
628
785
|
r"""
|
|
629
|
-
Convert
|
|
630
|
-
|
|
786
|
+
Convert weather forecast data into electrical power.
|
|
787
|
+
|
|
631
788
|
:param df_weather: The DataFrame containing the weather forecasted data. \
|
|
632
789
|
This DF should be generated by the 'get_weather_forecast' method or at \
|
|
633
790
|
least contain the same columns names filled with proper data.
|
|
634
791
|
:type df_weather: pd.DataFrame
|
|
635
|
-
:param set_mix_forecast: Use a mixed
|
|
792
|
+
:param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
|
|
636
793
|
:type set_mix_forecast: Bool, optional
|
|
637
794
|
:param df_now: The DataFrame containing the now/current data.
|
|
638
795
|
:type df_now: pd.DataFrame
|
|
639
796
|
:return: The DataFrame containing the electrical power in Watts
|
|
640
797
|
:rtype: pd.DataFrame
|
|
641
|
-
|
|
642
798
|
"""
|
|
643
799
|
# If using csv method we consider that yhat is the PV power in W
|
|
644
800
|
if (
|
|
645
801
|
"solar_forecast_kwp" in self.retrieve_hass_conf.keys()
|
|
646
802
|
and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
|
|
647
803
|
):
|
|
648
|
-
|
|
804
|
+
p_pv_forecast = pd.Series(0, index=df_weather.index)
|
|
805
|
+
elif self.weather_forecast_method in [
|
|
806
|
+
"solcast",
|
|
807
|
+
"solar.forecast",
|
|
808
|
+
"csv",
|
|
809
|
+
"list",
|
|
810
|
+
]:
|
|
811
|
+
p_pv_forecast = df_weather["yhat"]
|
|
812
|
+
p_pv_forecast.name = None
|
|
649
813
|
else:
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
or self.weather_forecast_method == "solar.forecast"
|
|
653
|
-
or self.weather_forecast_method == "csv"
|
|
654
|
-
or self.weather_forecast_method == "list"
|
|
655
|
-
):
|
|
656
|
-
P_PV_forecast = df_weather["yhat"]
|
|
657
|
-
P_PV_forecast.name = None
|
|
658
|
-
else: # We will transform the weather data into electrical power
|
|
659
|
-
# Transform to power (Watts)
|
|
660
|
-
# Setting the main parameters of the PV plant
|
|
661
|
-
location = Location(latitude=self.lat, longitude=self.lon)
|
|
662
|
-
temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"][
|
|
663
|
-
"close_mount_glass_glass"
|
|
664
|
-
]
|
|
665
|
-
cec_modules = bz2.BZ2File(
|
|
666
|
-
self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2", "rb"
|
|
667
|
-
)
|
|
668
|
-
cec_modules = cPickle.load(cec_modules)
|
|
669
|
-
cec_inverters = bz2.BZ2File(
|
|
670
|
-
self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2", "rb"
|
|
671
|
-
)
|
|
672
|
-
cec_inverters = cPickle.load(cec_inverters)
|
|
673
|
-
if type(self.plant_conf["pv_module_model"]) == list:
|
|
674
|
-
P_PV_forecast = pd.Series(0, index=df_weather.index)
|
|
675
|
-
for i in range(len(self.plant_conf["pv_module_model"])):
|
|
676
|
-
# Selecting correct module and inverter
|
|
677
|
-
module = cec_modules[self.plant_conf["pv_module_model"][i]]
|
|
678
|
-
inverter = cec_inverters[
|
|
679
|
-
self.plant_conf["pv_inverter_model"][i]
|
|
680
|
-
]
|
|
681
|
-
# Building the PV system in PVLib
|
|
682
|
-
system = PVSystem(
|
|
683
|
-
surface_tilt=self.plant_conf["surface_tilt"][i],
|
|
684
|
-
surface_azimuth=self.plant_conf["surface_azimuth"][i],
|
|
685
|
-
module_parameters=module,
|
|
686
|
-
inverter_parameters=inverter,
|
|
687
|
-
temperature_model_parameters=temp_params,
|
|
688
|
-
modules_per_string=self.plant_conf["modules_per_string"][i],
|
|
689
|
-
strings_per_inverter=self.plant_conf[
|
|
690
|
-
"strings_per_inverter"
|
|
691
|
-
][i],
|
|
692
|
-
)
|
|
693
|
-
mc = ModelChain(system, location, aoi_model="physical")
|
|
694
|
-
# Run the model on the weather DF indexes
|
|
695
|
-
mc.run_model(df_weather)
|
|
696
|
-
# Extracting results for AC power
|
|
697
|
-
P_PV_forecast = P_PV_forecast + mc.results.ac
|
|
698
|
-
else:
|
|
699
|
-
# Selecting correct module and inverter
|
|
700
|
-
module = cec_modules[self.plant_conf["pv_module_model"]]
|
|
701
|
-
inverter = cec_inverters[self.plant_conf["pv_inverter_model"]]
|
|
702
|
-
# Building the PV system in PVLib
|
|
703
|
-
system = PVSystem(
|
|
704
|
-
surface_tilt=self.plant_conf["surface_tilt"],
|
|
705
|
-
surface_azimuth=self.plant_conf["surface_azimuth"],
|
|
706
|
-
module_parameters=module,
|
|
707
|
-
inverter_parameters=inverter,
|
|
708
|
-
temperature_model_parameters=temp_params,
|
|
709
|
-
modules_per_string=self.plant_conf["modules_per_string"],
|
|
710
|
-
strings_per_inverter=self.plant_conf["strings_per_inverter"],
|
|
711
|
-
)
|
|
712
|
-
mc = ModelChain(system, location, aoi_model="physical")
|
|
713
|
-
# Run the model on the weather DF indexes
|
|
714
|
-
mc.run_model(df_weather)
|
|
715
|
-
# Extracting results for AC power
|
|
716
|
-
P_PV_forecast = mc.results.ac
|
|
814
|
+
# We will transform the weather data into electrical power
|
|
815
|
+
p_pv_forecast = self._calculate_pvlib_power(df_weather)
|
|
717
816
|
if set_mix_forecast:
|
|
718
|
-
|
|
817
|
+
ignore_pv_feedback = self.params["passed_data"].get(
|
|
818
|
+
"ignore_pv_feedback_during_curtailment", False
|
|
819
|
+
)
|
|
820
|
+
p_pv_forecast = Forecast.get_mix_forecast(
|
|
719
821
|
df_now,
|
|
720
|
-
|
|
822
|
+
p_pv_forecast,
|
|
721
823
|
self.params["passed_data"]["alpha"],
|
|
722
824
|
self.params["passed_data"]["beta"],
|
|
723
|
-
self.
|
|
825
|
+
self.var_pv,
|
|
826
|
+
ignore_pv_feedback,
|
|
827
|
+
)
|
|
828
|
+
p_pv_forecast[p_pv_forecast < 0] = 0 # replace any negative PV values with zero
|
|
829
|
+
self.logger.debug("get_power_from_weather returning:\n%s", p_pv_forecast)
|
|
830
|
+
return p_pv_forecast
|
|
831
|
+
|
|
832
|
+
@staticmethod
|
|
833
|
+
def compute_solar_angles(df: pd.DataFrame, latitude: float, longitude: float) -> pd.DataFrame:
|
|
834
|
+
"""
|
|
835
|
+
Compute solar angles (elevation, azimuth) based on timestamps and location.
|
|
836
|
+
|
|
837
|
+
:param df: DataFrame with a DateTime index.
|
|
838
|
+
:param latitude: Latitude of the PV system.
|
|
839
|
+
:param longitude: Longitude of the PV system.
|
|
840
|
+
:return: DataFrame with added solar elevation and azimuth.
|
|
841
|
+
"""
|
|
842
|
+
df = df.copy()
|
|
843
|
+
solpos = get_solarposition(df.index, latitude, longitude)
|
|
844
|
+
df["solar_elevation"] = solpos["elevation"]
|
|
845
|
+
df["solar_azimuth"] = solpos["azimuth"]
|
|
846
|
+
return df
|
|
847
|
+
|
|
848
|
+
def adjust_pv_forecast_data_prep(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
849
|
+
"""
|
|
850
|
+
Prepare data for adjusting the photovoltaic (PV) forecast.
|
|
851
|
+
|
|
852
|
+
This method aligns the actual PV production data with the forecasted data,
|
|
853
|
+
adds additional features for analysis, and separates the predictors (X)
|
|
854
|
+
from the target variable (y).
|
|
855
|
+
|
|
856
|
+
:param data: A DataFrame containing the actual PV production data and the
|
|
857
|
+
forecasted PV production data.
|
|
858
|
+
:type data: pd.DataFrame
|
|
859
|
+
:return: DataFrame with data for adjusted PV model train.
|
|
860
|
+
"""
|
|
861
|
+
# Extract target and predictor
|
|
862
|
+
self.logger.debug("adjust_pv_forecast_data_prep using data:\n%s", data)
|
|
863
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
864
|
+
data.to_csv(
|
|
865
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-input-data.csv"
|
|
866
|
+
)
|
|
867
|
+
P_PV = data[self.var_pv] # Actual PV production
|
|
868
|
+
p_pv_forecast = data[self.var_pv_forecast] # Forecasted PV production
|
|
869
|
+
# Define time ranges
|
|
870
|
+
last_day = data.index.max().normalize() # Last available day
|
|
871
|
+
three_months_ago = last_day - pd.DateOffset(
|
|
872
|
+
days=self.retrieve_hass_conf["historic_days_to_retrieve"]
|
|
873
|
+
)
|
|
874
|
+
# Train/Test: Last historic_days_to_retrieve days (excluding the last day)
|
|
875
|
+
train_test_mask = (data.index >= three_months_ago) & (data.index < last_day)
|
|
876
|
+
self.p_pv_train_test = P_PV[train_test_mask]
|
|
877
|
+
self.p_pv_forecast_train_test = p_pv_forecast[train_test_mask]
|
|
878
|
+
# Validation: Last day only
|
|
879
|
+
validation_mask = data.index >= last_day
|
|
880
|
+
self.p_pv_validation = P_PV[validation_mask]
|
|
881
|
+
self.p_pv_forecast_validation = p_pv_forecast[validation_mask]
|
|
882
|
+
# Ensure data is aligned
|
|
883
|
+
self.data_adjust_pv = pd.concat(
|
|
884
|
+
[P_PV.rename("actual"), p_pv_forecast.rename("forecast")], axis=1
|
|
885
|
+
).dropna()
|
|
886
|
+
# Add more features
|
|
887
|
+
self.data_adjust_pv = add_date_features(self.data_adjust_pv)
|
|
888
|
+
self.data_adjust_pv = Forecast.compute_solar_angles(self.data_adjust_pv, self.lat, self.lon)
|
|
889
|
+
# Features (X) and target (y)
|
|
890
|
+
self.x_adjust_pv = self.data_adjust_pv.drop(columns=["actual"]) # Predictors
|
|
891
|
+
self.y_adjust_pv = self.data_adjust_pv["actual"] # Target: actual PV production
|
|
892
|
+
self.logger.debug("adjust_pv_forecast_data_prep output data:\n%s", self.data_adjust_pv)
|
|
893
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
894
|
+
self.data_adjust_pv.to_csv(
|
|
895
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-output-data.csv"
|
|
724
896
|
)
|
|
725
|
-
return P_PV_forecast
|
|
726
897
|
|
|
727
|
-
def
|
|
898
|
+
async def adjust_pv_forecast_fit(
|
|
899
|
+
self,
|
|
900
|
+
n_splits: int = 5,
|
|
901
|
+
regression_model: str = "LassoRegression",
|
|
902
|
+
debug: bool | None = False,
|
|
903
|
+
) -> pd.DataFrame:
|
|
904
|
+
"""
|
|
905
|
+
Fit a regression model to adjust the photovoltaic (PV) forecast.
|
|
906
|
+
|
|
907
|
+
This method uses historical actual and forecasted PV production data, along with
|
|
908
|
+
additional solar and date features, to train a regression model. The model is
|
|
909
|
+
optimized using a grid search with time-series cross-validation.
|
|
910
|
+
|
|
911
|
+
:param n_splits: The number of splits for time-series cross-validation, defaults to 5.
|
|
912
|
+
:type n_splits: int, optional
|
|
913
|
+
:param regression_model: The type of regression model to use. See REGRESSION_METHODS \
|
|
914
|
+
in machine_learning_regressor.py for the authoritative list of supported models. \
|
|
915
|
+
Currently: 'LinearRegression', 'RidgeRegression', 'LassoRegression', 'ElasticNet', \
|
|
916
|
+
'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'RandomForestRegressor', \
|
|
917
|
+
'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', \
|
|
918
|
+
'MLPRegressor'. Defaults to "LassoRegression".
|
|
919
|
+
:type regression_model: str, optional
|
|
920
|
+
:param debug: If True, the model is not saved to disk, useful for debugging, defaults to False.
|
|
921
|
+
:type debug: bool, optional
|
|
922
|
+
:return: A DataFrame containing the adjusted PV forecast.
|
|
923
|
+
:rtype: pd.DataFrame
|
|
924
|
+
"""
|
|
925
|
+
# Get regression model and hyperparameter grid
|
|
926
|
+
mlr = MLRegressor(
|
|
927
|
+
self.data_adjust_pv,
|
|
928
|
+
"adjusted_pv_forecast",
|
|
929
|
+
regression_model,
|
|
930
|
+
list(self.x_adjust_pv.columns),
|
|
931
|
+
list(self.y_adjust_pv.name),
|
|
932
|
+
None,
|
|
933
|
+
self.logger,
|
|
934
|
+
)
|
|
935
|
+
pipeline, param_grid = mlr._get_model_and_params()
|
|
936
|
+
# Time-series split
|
|
937
|
+
tscv = TimeSeriesSplit(n_splits=n_splits)
|
|
938
|
+
grid_search = GridSearchCV(
|
|
939
|
+
pipeline, param_grid, cv=tscv, scoring="neg_mean_squared_error", verbose=0
|
|
940
|
+
)
|
|
941
|
+
# Train model
|
|
942
|
+
await asyncio.to_thread(grid_search.fit, self.x_adjust_pv, self.y_adjust_pv)
|
|
943
|
+
self.model_adjust_pv = grid_search.best_estimator_
|
|
944
|
+
# Calculate training metrics
|
|
945
|
+
y_pred_train = self.model_adjust_pv.predict(self.x_adjust_pv)
|
|
946
|
+
self.rmse = np.sqrt(mean_squared_error(self.y_adjust_pv, y_pred_train))
|
|
947
|
+
self.r2 = r2_score(self.y_adjust_pv, y_pred_train)
|
|
948
|
+
# Log the metrics
|
|
949
|
+
self.logger.info(f"PV adjust Training metrics: RMSE = {self.rmse}, R2 = {self.r2}")
|
|
950
|
+
# Save model
|
|
951
|
+
if not debug:
|
|
952
|
+
filename = "adjust_pv_regressor.pkl"
|
|
953
|
+
filename_path = self.emhass_conf["data_path"] / filename
|
|
954
|
+
async with aiofiles.open(filename_path, "wb") as outp:
|
|
955
|
+
await outp.write(pickle.dumps(self.model_adjust_pv, pickle.HIGHEST_PROTOCOL))
|
|
956
|
+
|
|
957
|
+
def adjust_pv_forecast_predict(self, forecasted_pv: pd.DataFrame | None = None) -> pd.DataFrame:
|
|
958
|
+
"""
|
|
959
|
+
Predict the adjusted photovoltaic (PV) forecast.
|
|
960
|
+
|
|
961
|
+
This method uses the trained regression model to predict the adjusted PV forecast
|
|
962
|
+
based on either the validation data stored in `self` or a new forecasted PV data
|
|
963
|
+
passed as input. It applies additional features such as date and solar angles to
|
|
964
|
+
the forecasted PV production data before making predictions. The solar elevation
|
|
965
|
+
is used to avoid negative values and to fix values at the beginning and end of the day.
|
|
966
|
+
|
|
967
|
+
:param forecasted_pv: Optional. A DataFrame containing the forecasted PV production data.
|
|
968
|
+
It must have a DateTime index and a column named "forecast".
|
|
969
|
+
If not provided, the method will use `self.p_pv_forecast_validation`.
|
|
970
|
+
:type forecasted_pv: pd.DataFrame, optional
|
|
971
|
+
:return: A DataFrame containing the adjusted PV forecast with additional features.
|
|
972
|
+
:rtype: pd.DataFrame
|
|
973
|
+
"""
|
|
974
|
+
# Use the provided forecasted PV data or fall back to the validation data in `self`
|
|
975
|
+
if forecasted_pv is not None:
|
|
976
|
+
# Ensure the input DataFrame has the required structure
|
|
977
|
+
if "forecast" not in forecasted_pv.columns:
|
|
978
|
+
raise ValueError("The input DataFrame must contain a 'forecast' column.")
|
|
979
|
+
forecast_data = forecasted_pv.copy()
|
|
980
|
+
else:
|
|
981
|
+
# Use the validation data stored in `self`
|
|
982
|
+
forecast_data = self.p_pv_forecast_validation.rename("forecast").to_frame()
|
|
983
|
+
# Prepare the forecasted PV data
|
|
984
|
+
forecast_data = add_date_features(forecast_data)
|
|
985
|
+
forecast_data = Forecast.compute_solar_angles(forecast_data, self.lat, self.lon)
|
|
986
|
+
# Predict the adjusted forecast
|
|
987
|
+
forecast_data["adjusted_forecast"] = self.model_adjust_pv.predict(forecast_data)
|
|
988
|
+
|
|
989
|
+
# Apply solar elevation weighting only for specific cases
|
|
990
|
+
def apply_weighting(row):
|
|
991
|
+
if row["solar_elevation"] <= 0: # Nighttime or negative solar elevation
|
|
992
|
+
return 0
|
|
993
|
+
elif (
|
|
994
|
+
row["solar_elevation"] < self.optim_conf["adjusted_pv_solar_elevation_threshold"]
|
|
995
|
+
): # Early morning or late evening
|
|
996
|
+
return max(
|
|
997
|
+
row["adjusted_forecast"]
|
|
998
|
+
* (
|
|
999
|
+
row["solar_elevation"]
|
|
1000
|
+
/ self.optim_conf["adjusted_pv_solar_elevation_threshold"]
|
|
1001
|
+
),
|
|
1002
|
+
0,
|
|
1003
|
+
)
|
|
1004
|
+
else: # Daytime with sufficient solar elevation
|
|
1005
|
+
return row["adjusted_forecast"]
|
|
1006
|
+
|
|
1007
|
+
forecast_data["adjusted_forecast"] = forecast_data.apply(apply_weighting, axis=1)
|
|
1008
|
+
# If using validation data, calculate validation metrics
|
|
1009
|
+
if forecasted_pv is None:
|
|
1010
|
+
y_true = self.p_pv_validation.values
|
|
1011
|
+
y_pred = forecast_data["adjusted_forecast"].values
|
|
1012
|
+
self.validation_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
1013
|
+
self.validation_r2 = r2_score(y_true, y_pred)
|
|
1014
|
+
# Log the validation metrics
|
|
1015
|
+
self.logger.info(
|
|
1016
|
+
f"PV adjust Validation metrics: RMSE = {self.validation_rmse}, R2 = {self.validation_r2}"
|
|
1017
|
+
)
|
|
1018
|
+
self.logger.debug("adjust_pv_forecast_predict forecast data:\n%s", forecast_data)
|
|
1019
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
1020
|
+
forecast_data.to_csv(
|
|
1021
|
+
self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-predict-forecast-data.csv"
|
|
1022
|
+
)
|
|
1023
|
+
# Return the DataFrame with the adjusted forecast
|
|
1024
|
+
return forecast_data
|
|
1025
|
+
|
|
1026
|
+
def get_forecast_days_csv(self, timedelta_days: int | None = 1) -> pd.date_range:
|
|
728
1027
|
r"""
|
|
729
1028
|
Get the date range vector of forecast dates that will be used when loading a CSV file.
|
|
730
1029
|
|
|
@@ -732,30 +1031,22 @@ class Forecast(object):
|
|
|
732
1031
|
:rtype: pd.date_range
|
|
733
1032
|
|
|
734
1033
|
"""
|
|
735
|
-
start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(
|
|
736
|
-
microsecond=0
|
|
737
|
-
)
|
|
1034
|
+
start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
|
|
738
1035
|
if self.method_ts_round == "nearest":
|
|
739
|
-
start_forecast_csv = pd.Timestamp(
|
|
740
|
-
datetime.now(), tz=self.time_zone
|
|
741
|
-
).replace(microsecond=0)
|
|
1036
|
+
start_forecast_csv = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
|
|
742
1037
|
elif self.method_ts_round == "first":
|
|
743
1038
|
start_forecast_csv = (
|
|
744
|
-
pd.Timestamp
|
|
745
|
-
.replace(microsecond=0)
|
|
746
|
-
.floor(freq=self.freq)
|
|
1039
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
|
|
747
1040
|
)
|
|
748
1041
|
elif self.method_ts_round == "last":
|
|
749
1042
|
start_forecast_csv = (
|
|
750
|
-
pd.Timestamp
|
|
751
|
-
.replace(microsecond=0)
|
|
752
|
-
.ceil(freq=self.freq)
|
|
1043
|
+
pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
|
|
753
1044
|
)
|
|
754
1045
|
else:
|
|
755
1046
|
self.logger.error("Wrong method_ts_round passed parameter")
|
|
756
|
-
end_forecast_csv = (
|
|
757
|
-
|
|
758
|
-
)
|
|
1047
|
+
end_forecast_csv = (start_forecast_csv + self.optim_conf["delta_forecast_daily"]).replace(
|
|
1048
|
+
microsecond=0
|
|
1049
|
+
)
|
|
759
1050
|
forecast_dates_csv = (
|
|
760
1051
|
pd.date_range(
|
|
761
1052
|
start=start_forecast_csv,
|
|
@@ -767,21 +1058,100 @@ class Forecast(object):
|
|
|
767
1058
|
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
768
1059
|
.tz_convert(self.time_zone)
|
|
769
1060
|
)
|
|
770
|
-
if
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
1061
|
+
if (
|
|
1062
|
+
self.params is not None
|
|
1063
|
+
and "prediction_horizon" in list(self.params["passed_data"].keys())
|
|
1064
|
+
and self.params["passed_data"]["prediction_horizon"] is not None
|
|
1065
|
+
):
|
|
1066
|
+
forecast_dates_csv = forecast_dates_csv[
|
|
1067
|
+
0 : self.params["passed_data"]["prediction_horizon"]
|
|
1068
|
+
]
|
|
776
1069
|
return forecast_dates_csv
|
|
777
1070
|
|
|
1071
|
+
def _load_forecast_data(
|
|
1072
|
+
self,
|
|
1073
|
+
csv_path: str,
|
|
1074
|
+
data_list: list | None,
|
|
1075
|
+
forecast_dates_csv: pd.date_range,
|
|
1076
|
+
) -> pd.DataFrame:
|
|
1077
|
+
"""
|
|
1078
|
+
Helper to load and format forecast data from a CSV file or a list.
|
|
1079
|
+
"""
|
|
1080
|
+
if csv_path is None:
|
|
1081
|
+
data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
|
|
1082
|
+
df_csv = pd.DataFrame.from_dict(data_dict)
|
|
1083
|
+
df_csv.index = forecast_dates_csv
|
|
1084
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1085
|
+
df_csv = set_df_index_freq(df_csv)
|
|
1086
|
+
else:
|
|
1087
|
+
if not os.path.exists(csv_path):
|
|
1088
|
+
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1089
|
+
df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
1090
|
+
# Check if first column is a valid datetime
|
|
1091
|
+
first_col = df_csv.iloc[:, 0]
|
|
1092
|
+
if pd.to_datetime(first_col, errors="coerce").notna().all():
|
|
1093
|
+
df_csv["ts"] = pd.to_datetime(df_csv["ts"], utc=True)
|
|
1094
|
+
df_csv.set_index("ts", inplace=True)
|
|
1095
|
+
df_csv.index = df_csv.index.tz_convert(self.time_zone)
|
|
1096
|
+
else:
|
|
1097
|
+
df_csv.index = forecast_dates_csv
|
|
1098
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1099
|
+
df_csv = set_df_index_freq(df_csv)
|
|
1100
|
+
return df_csv
|
|
1101
|
+
|
|
1102
|
+
def _extract_daily_forecast(
|
|
1103
|
+
self,
|
|
1104
|
+
day: int,
|
|
1105
|
+
df_timing: pd.DataFrame,
|
|
1106
|
+
df_csv: pd.DataFrame,
|
|
1107
|
+
csv_path: str,
|
|
1108
|
+
list_and_perfect: bool,
|
|
1109
|
+
) -> pd.DataFrame:
|
|
1110
|
+
"""
|
|
1111
|
+
Helper to extract a specific day's forecast data based on timing configuration.
|
|
1112
|
+
"""
|
|
1113
|
+
# Find the start and end indices for the specific day in the timing DataFrame
|
|
1114
|
+
day_mask = df_timing.index.day == day
|
|
1115
|
+
day_indices = [i for i, x in enumerate(day_mask) if x]
|
|
1116
|
+
first_elm_index = day_indices[0]
|
|
1117
|
+
last_elm_index = day_indices[-1]
|
|
1118
|
+
# Define the target forecast index based on the timing DataFrame
|
|
1119
|
+
fcst_index = pd.date_range(
|
|
1120
|
+
start=df_timing.index[first_elm_index],
|
|
1121
|
+
end=df_timing.index[last_elm_index],
|
|
1122
|
+
freq=df_timing.index.freq,
|
|
1123
|
+
)
|
|
1124
|
+
first_hour = f"{df_timing.index[first_elm_index].hour:02d}:{df_timing.index[first_elm_index].minute:02d}"
|
|
1125
|
+
last_hour = f"{df_timing.index[last_elm_index].hour:02d}:{df_timing.index[last_elm_index].minute:02d}"
|
|
1126
|
+
# Extract data
|
|
1127
|
+
if csv_path is None:
|
|
1128
|
+
if list_and_perfect:
|
|
1129
|
+
values_array = df_csv.between_time(first_hour, last_hour).values
|
|
1130
|
+
# Adjust index length if necessary
|
|
1131
|
+
fcst_index = fcst_index[0 : len(values_array)]
|
|
1132
|
+
return pd.DataFrame(values_array, index=fcst_index)
|
|
1133
|
+
else:
|
|
1134
|
+
return pd.DataFrame(
|
|
1135
|
+
df_csv.loc[fcst_index, :].between_time(first_hour, last_hour).values,
|
|
1136
|
+
index=fcst_index,
|
|
1137
|
+
)
|
|
1138
|
+
else:
|
|
1139
|
+
# For CSV path, filter by date string first
|
|
1140
|
+
df_csv_filtered_date = df_csv.loc[
|
|
1141
|
+
df_csv.index.strftime("%Y-%m-%d") == fcst_index[0].date().strftime("%Y-%m-%d")
|
|
1142
|
+
]
|
|
1143
|
+
return pd.DataFrame(
|
|
1144
|
+
df_csv_filtered_date.between_time(first_hour, last_hour).values,
|
|
1145
|
+
index=fcst_index,
|
|
1146
|
+
)
|
|
1147
|
+
|
|
778
1148
|
def get_forecast_out_from_csv_or_list(
|
|
779
1149
|
self,
|
|
780
1150
|
df_final: pd.DataFrame,
|
|
781
1151
|
forecast_dates_csv: pd.date_range,
|
|
782
1152
|
csv_path: str,
|
|
783
|
-
data_list:
|
|
784
|
-
list_and_perfect:
|
|
1153
|
+
data_list: list | None = None,
|
|
1154
|
+
list_and_perfect: bool | None = False,
|
|
785
1155
|
) -> pd.DataFrame:
|
|
786
1156
|
r"""
|
|
787
1157
|
Get the forecast data as a DataFrame from a CSV file.
|
|
@@ -800,119 +1170,294 @@ class Forecast(object):
|
|
|
800
1170
|
:rtype: pd.DataFrame
|
|
801
1171
|
|
|
802
1172
|
"""
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
if list_and_perfect:
|
|
810
|
-
days_list = df_final.index.day.unique().tolist()
|
|
811
|
-
else:
|
|
812
|
-
days_list = df_csv.index.day.unique().tolist()
|
|
813
|
-
else:
|
|
814
|
-
if not os.path.exists(csv_path):
|
|
815
|
-
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
816
|
-
load_csv_file_path = csv_path
|
|
817
|
-
df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
|
|
818
|
-
df_csv.index = forecast_dates_csv
|
|
819
|
-
df_csv.drop(["ts"], axis=1, inplace=True)
|
|
820
|
-
df_csv = set_df_index_freq(df_csv)
|
|
1173
|
+
# Load the source data (df_csv)
|
|
1174
|
+
df_csv = self._load_forecast_data(csv_path, data_list, forecast_dates_csv)
|
|
1175
|
+
# Configure timing source (df_timing) and iteration list
|
|
1176
|
+
if csv_path is None or list_and_perfect:
|
|
1177
|
+
df_final = set_df_index_freq(df_final)
|
|
1178
|
+
df_timing = copy.deepcopy(df_final)
|
|
821
1179
|
days_list = df_final.index.day.unique().tolist()
|
|
822
|
-
|
|
1180
|
+
else:
|
|
1181
|
+
df_timing = copy.deepcopy(df_csv)
|
|
1182
|
+
days_list = df_csv.index.day.unique().tolist()
|
|
1183
|
+
# Iterate over days and collect forecast parts
|
|
1184
|
+
forecast_parts = []
|
|
823
1185
|
for day in days_list:
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
df_tmp = copy.deepcopy(df_final)
|
|
827
|
-
else:
|
|
828
|
-
df_tmp = copy.deepcopy(df_csv)
|
|
829
|
-
else:
|
|
830
|
-
df_tmp = copy.deepcopy(df_final)
|
|
831
|
-
first_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][0]
|
|
832
|
-
last_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][-1]
|
|
833
|
-
fcst_index = pd.date_range(
|
|
834
|
-
start=df_tmp.index[first_elm_index],
|
|
835
|
-
end=df_tmp.index[last_elm_index],
|
|
836
|
-
freq=df_tmp.index.freq,
|
|
1186
|
+
daily_df = self._extract_daily_forecast(
|
|
1187
|
+
day, df_timing, df_csv, csv_path, list_and_perfect
|
|
837
1188
|
)
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
1189
|
+
forecast_parts.append(daily_df)
|
|
1190
|
+
if forecast_parts:
|
|
1191
|
+
forecast_out = pd.concat(forecast_parts, axis=0)
|
|
1192
|
+
else:
|
|
1193
|
+
forecast_out = pd.DataFrame()
|
|
1194
|
+
# Merge with final DataFrame to align indices
|
|
1195
|
+
merged = pd.merge_asof(
|
|
1196
|
+
df_final.sort_index(),
|
|
1197
|
+
forecast_out.sort_index(),
|
|
1198
|
+
left_index=True,
|
|
1199
|
+
right_index=True,
|
|
1200
|
+
direction="nearest",
|
|
1201
|
+
)
|
|
1202
|
+
# Keep only forecast_out columns
|
|
1203
|
+
forecast_out = merged[forecast_out.columns]
|
|
1204
|
+
return forecast_out
|
|
1205
|
+
|
|
1206
|
+
@staticmethod
|
|
1207
|
+
def resample_data(data, freq, current_freq):
|
|
1208
|
+
r"""
|
|
1209
|
+
Resample a DataFrame with a custom frequency.
|
|
1210
|
+
|
|
1211
|
+
:param data: Original time series data with a DateTimeIndex.
|
|
1212
|
+
:type data: pd.DataFrame
|
|
1213
|
+
:param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
|
|
1214
|
+
:type freq: pd.Timedelta
|
|
1215
|
+
:return: Resampled data at the specified frequency.
|
|
1216
|
+
:rtype: pd.DataFrame
|
|
1217
|
+
"""
|
|
1218
|
+
if freq > current_freq:
|
|
1219
|
+
# Downsampling
|
|
1220
|
+
# Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
|
|
1221
|
+
resampled_data = data.resample(freq).mean()
|
|
1222
|
+
elif freq < current_freq:
|
|
1223
|
+
# Upsampling
|
|
1224
|
+
# Use 'asfreq' to create empty slots, then interpolate
|
|
1225
|
+
resampled_data = data.resample(freq).asfreq()
|
|
1226
|
+
resampled_data = resampled_data.interpolate(method="time")
|
|
1227
|
+
else:
|
|
1228
|
+
# No resampling needed
|
|
1229
|
+
resampled_data = data.copy()
|
|
1230
|
+
return resampled_data
|
|
1231
|
+
|
|
1232
|
+
@staticmethod
|
|
1233
|
+
def get_typical_load_forecast(data, forecast_date):
|
|
1234
|
+
r"""
|
|
1235
|
+
Forecast the load profile for the next day based on historic data.
|
|
1236
|
+
|
|
1237
|
+
:param data: A DataFrame with a DateTimeIndex containing the historic load data.
|
|
1238
|
+
Must include a 'load' column.
|
|
1239
|
+
:type data: pd.DataFrame
|
|
1240
|
+
:param forecast_date: The date for which the forecast will be generated.
|
|
1241
|
+
:type forecast_date: pd.Timestamp
|
|
1242
|
+
:return: A Series with the forecasted load profile for the next day and a list of days used
|
|
1243
|
+
to calculate the forecast.
|
|
1244
|
+
:rtype: tuple (pd.Series, list)
|
|
1245
|
+
"""
|
|
1246
|
+
# Ensure the 'load' column exists
|
|
1247
|
+
if "load" not in data.columns:
|
|
1248
|
+
raise ValueError("Data must have a 'load' column.")
|
|
1249
|
+
# Filter historic data for the same month and day of the week
|
|
1250
|
+
month = forecast_date.month
|
|
1251
|
+
day_of_week = forecast_date.dayofweek
|
|
1252
|
+
historic_data = data[(data.index.month == month) & (data.index.dayofweek == day_of_week)]
|
|
1253
|
+
used_days = np.unique(historic_data.index.date)
|
|
1254
|
+
# Align all historic data to the forecast day
|
|
1255
|
+
aligned_data = []
|
|
1256
|
+
for day in used_days:
|
|
1257
|
+
daily_data = data[data.index.date == pd.Timestamp(day).date()]
|
|
1258
|
+
aligned_daily_data = daily_data.copy()
|
|
1259
|
+
aligned_daily_data.index = aligned_daily_data.index.map(
|
|
1260
|
+
lambda x: x.replace(
|
|
1261
|
+
year=forecast_date.year,
|
|
1262
|
+
month=forecast_date.month,
|
|
1263
|
+
day=forecast_date.day,
|
|
1264
|
+
)
|
|
842
1265
|
)
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
1266
|
+
aligned_data.append(aligned_daily_data)
|
|
1267
|
+
# Combine all aligned historic data into a single DataFrame
|
|
1268
|
+
combined_data = pd.concat(aligned_data)
|
|
1269
|
+
# Compute the mean load for each timestamp
|
|
1270
|
+
forecast = combined_data.groupby(combined_data.index).mean()
|
|
1271
|
+
return forecast, used_days
|
|
1272
|
+
|
|
1273
|
+
async def _prepare_hass_load_data(
|
|
1274
|
+
self, days_min_load_forecast: int, method: str
|
|
1275
|
+
) -> pd.DataFrame | bool:
|
|
1276
|
+
"""Helper to retrieve and prepare load data from Home Assistant."""
|
|
1277
|
+
self.logger.info(f"Retrieving data from hass for load forecast using method = {method}")
|
|
1278
|
+
var_list = [self.var_load]
|
|
1279
|
+
var_replace_zero = None
|
|
1280
|
+
var_interp = [self.var_load]
|
|
1281
|
+
time_zone_load_forecast = None
|
|
1282
|
+
rh = RetrieveHass(
|
|
1283
|
+
self.retrieve_hass_conf["hass_url"],
|
|
1284
|
+
self.retrieve_hass_conf["long_lived_token"],
|
|
1285
|
+
self.freq,
|
|
1286
|
+
time_zone_load_forecast,
|
|
1287
|
+
self.params,
|
|
1288
|
+
self.emhass_conf,
|
|
1289
|
+
self.logger,
|
|
1290
|
+
)
|
|
1291
|
+
if self.get_data_from_file:
|
|
1292
|
+
filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
|
|
1293
|
+
async with aiofiles.open(filename_path, "rb") as inp:
|
|
1294
|
+
content = await inp.read()
|
|
1295
|
+
rh.df_final, days_list, var_list, rh.ha_config = pickle.loads(content)
|
|
1296
|
+
self.var_load = var_list[0]
|
|
1297
|
+
self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = self.var_load
|
|
1298
|
+
var_interp = [var_list[0]]
|
|
1299
|
+
self.var_list = [var_list[0]]
|
|
1300
|
+
rh.var_list = self.var_list
|
|
1301
|
+
self.var_load_new = self.var_load + "_positive"
|
|
1302
|
+
else:
|
|
1303
|
+
days_list = get_days_list(days_min_load_forecast)
|
|
1304
|
+
if not await rh.get_data(days_list, var_list):
|
|
1305
|
+
return False
|
|
1306
|
+
if not rh.prepare_data(
|
|
1307
|
+
self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
|
|
1308
|
+
load_negative=self.retrieve_hass_conf["load_negative"],
|
|
1309
|
+
set_zero_min=self.retrieve_hass_conf["set_zero_min"],
|
|
1310
|
+
var_replace_zero=var_replace_zero,
|
|
1311
|
+
var_interp=var_interp,
|
|
1312
|
+
):
|
|
1313
|
+
return False
|
|
1314
|
+
return rh.df_final.copy()[[self.var_load_new]]
|
|
1315
|
+
|
|
1316
|
+
async def _get_load_forecast_typical(self) -> pd.DataFrame:
|
|
1317
|
+
"""Helper to generate typical load forecast."""
|
|
1318
|
+
model_type = "long_train_data"
|
|
1319
|
+
data_path = self.emhass_conf["data_path"] / str(model_type + ".pkl")
|
|
1320
|
+
async with aiofiles.open(data_path, "rb") as fid:
|
|
1321
|
+
content = await fid.read()
|
|
1322
|
+
data, _, _, _ = pickle.loads(content)
|
|
1323
|
+
# Ensure the data index is timezone-aware
|
|
1324
|
+
data.index = (
|
|
1325
|
+
data.index.tz_localize(
|
|
1326
|
+
self.forecast_dates.tz,
|
|
1327
|
+
ambiguous="infer",
|
|
1328
|
+
nonexistent="shift_forward",
|
|
847
1329
|
)
|
|
848
|
-
if
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
index=fcst_index,
|
|
866
|
-
)
|
|
1330
|
+
if data.index.tz is None
|
|
1331
|
+
else data.index.tz_convert(self.forecast_dates.tz)
|
|
1332
|
+
)
|
|
1333
|
+
data = data[[self.var_load]]
|
|
1334
|
+
current_freq = pd.Timedelta("30min")
|
|
1335
|
+
if self.freq != current_freq:
|
|
1336
|
+
data = Forecast.resample_data(data, self.freq, current_freq)
|
|
1337
|
+
dates_list = np.unique(self.forecast_dates.date).tolist()
|
|
1338
|
+
forecast = pd.DataFrame()
|
|
1339
|
+
for date in dates_list:
|
|
1340
|
+
forecast_date = pd.Timestamp(date)
|
|
1341
|
+
data.columns = ["load"]
|
|
1342
|
+
forecast_tmp, used_days = Forecast.get_typical_load_forecast(data, forecast_date)
|
|
1343
|
+
self.logger.debug(f"Using {len(used_days)} days of data to generate the forecast.")
|
|
1344
|
+
forecast_tmp = forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
|
|
1345
|
+
if len(forecast) == 0:
|
|
1346
|
+
forecast = forecast_tmp
|
|
867
1347
|
else:
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
1348
|
+
forecast = pd.concat([forecast, forecast_tmp], axis=0)
|
|
1349
|
+
forecast_out = forecast.loc[forecast.index.intersection(self.forecast_dates)]
|
|
1350
|
+
forecast_out.index = self.forecast_dates
|
|
1351
|
+
forecast_out.index.name = "ts"
|
|
1352
|
+
return forecast_out.rename(columns={"load": "yhat"})
|
|
1353
|
+
|
|
1354
|
+
def _get_load_forecast_naive(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
1355
|
+
"""Helper for naive forecast."""
|
|
1356
|
+
forecast_horizon = len(self.forecast_dates)
|
|
1357
|
+
historical_values = df.iloc[-forecast_horizon:]
|
|
1358
|
+
return pd.DataFrame(historical_values.values, index=self.forecast_dates, columns=["yhat"])
|
|
1359
|
+
|
|
1360
|
+
async def _get_load_forecast_ml(
|
|
1361
|
+
self, df: pd.DataFrame, use_last_window: bool, mlf, debug: bool
|
|
1362
|
+
) -> pd.DataFrame | bool:
|
|
1363
|
+
"""Helper for ML forecast."""
|
|
1364
|
+
model_type = self.params["passed_data"]["model_type"]
|
|
1365
|
+
filename = model_type + "_mlf.pkl"
|
|
1366
|
+
filename_path = self.emhass_conf["data_path"] / filename
|
|
1367
|
+
if not debug:
|
|
1368
|
+
if filename_path.is_file():
|
|
1369
|
+
async with aiofiles.open(filename_path, "rb") as inp:
|
|
1370
|
+
content = await inp.read()
|
|
1371
|
+
mlf = pickle.loads(content)
|
|
1372
|
+
else:
|
|
1373
|
+
self.logger.error(
|
|
1374
|
+
"The ML forecaster file was not found, please run a model fit method before this predict method"
|
|
1375
|
+
)
|
|
1376
|
+
return False
|
|
1377
|
+
data_last_window = None
|
|
1378
|
+
if use_last_window:
|
|
1379
|
+
data_last_window = copy.deepcopy(df)
|
|
1380
|
+
data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
|
|
1381
|
+
forecast_out = await mlf.predict(data_last_window)
|
|
1382
|
+
self.logger.debug(
|
|
1383
|
+
"Number of ML predict forcast data generated (lags_opt): "
|
|
1384
|
+
+ str(len(forecast_out.index))
|
|
1385
|
+
)
|
|
1386
|
+
self.logger.debug(
|
|
1387
|
+
"Number of forcast dates obtained (prediction_horizon): "
|
|
1388
|
+
+ str(len(self.forecast_dates))
|
|
1389
|
+
)
|
|
1390
|
+
if len(self.forecast_dates) < len(forecast_out.index):
|
|
1391
|
+
forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
|
|
1392
|
+
elif len(self.forecast_dates) > len(forecast_out.index):
|
|
1393
|
+
self.logger.error(
|
|
1394
|
+
"Unable to obtain: "
|
|
1395
|
+
+ str(len(self.forecast_dates))
|
|
1396
|
+
+ " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
|
|
1397
|
+
)
|
|
1398
|
+
return False
|
|
1399
|
+
data_dict = {
|
|
1400
|
+
"ts": self.forecast_dates,
|
|
1401
|
+
"yhat": forecast_out.values.tolist(),
|
|
1402
|
+
}
|
|
1403
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
1404
|
+
data.set_index("ts", inplace=True)
|
|
1405
|
+
return data.copy().loc[self.forecast_dates]
|
|
1406
|
+
|
|
1407
|
+
def _get_load_forecast_csv(self, csv_path: str) -> pd.DataFrame:
|
|
1408
|
+
"""Helper to retrieve load data from CSV."""
|
|
1409
|
+
df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
|
|
1410
|
+
if len(df_csv) < len(self.forecast_dates):
|
|
1411
|
+
self.logger.error("Passed data from CSV is not long enough")
|
|
1412
|
+
return None
|
|
1413
|
+
df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
|
|
1414
|
+
df_csv.index = self.forecast_dates
|
|
1415
|
+
df_csv = df_csv.drop(["ts"], axis=1)
|
|
1416
|
+
return df_csv.copy().loc[self.forecast_dates]
|
|
1417
|
+
|
|
1418
|
+
def _get_load_forecast_list(self) -> pd.DataFrame:
|
|
1419
|
+
"""Helper to retrieve load data from a passed list."""
|
|
1420
|
+
data_list = self.params["passed_data"]["load_power_forecast"]
|
|
1421
|
+
if (
|
|
1422
|
+
len(data_list) < len(self.forecast_dates)
|
|
1423
|
+
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1424
|
+
):
|
|
1425
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
1426
|
+
return False
|
|
1427
|
+
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1428
|
+
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
1429
|
+
data = pd.DataFrame.from_dict(data_dict)
|
|
1430
|
+
data.set_index("ts", inplace=True)
|
|
1431
|
+
return data.copy().loc[self.forecast_dates]
|
|
888
1432
|
|
|
889
|
-
def get_load_forecast(
|
|
1433
|
+
async def get_load_forecast(
|
|
890
1434
|
self,
|
|
891
|
-
days_min_load_forecast:
|
|
892
|
-
method:
|
|
893
|
-
csv_path:
|
|
894
|
-
set_mix_forecast:
|
|
895
|
-
df_now:
|
|
896
|
-
use_last_window:
|
|
897
|
-
mlf:
|
|
898
|
-
debug:
|
|
1435
|
+
days_min_load_forecast: int | None = 3,
|
|
1436
|
+
method: str | None = "typical",
|
|
1437
|
+
csv_path: str | None = "data_load_forecast.csv",
|
|
1438
|
+
set_mix_forecast: bool | None = False,
|
|
1439
|
+
df_now: pd.DataFrame | None = pd.DataFrame(),
|
|
1440
|
+
use_last_window: bool | None = True,
|
|
1441
|
+
mlf: MLForecaster | None = None,
|
|
1442
|
+
debug: bool | None = False,
|
|
899
1443
|
) -> pd.Series:
|
|
900
|
-
|
|
1444
|
+
"""
|
|
901
1445
|
Get and generate the load forecast data.
|
|
902
|
-
|
|
1446
|
+
|
|
903
1447
|
:param days_min_load_forecast: The number of last days to retrieve that \
|
|
904
1448
|
will be used to generate a naive forecast, defaults to 3
|
|
905
1449
|
:type days_min_load_forecast: int, optional
|
|
906
1450
|
:param method: The method to be used to generate load forecast, the options \
|
|
907
|
-
are '
|
|
1451
|
+
are 'typical' for a typical household load consumption curve, \
|
|
1452
|
+
are 'naive' for a persistence model, 'mlforecaster' for using a custom \
|
|
908
1453
|
previously fitted machine learning model, 'csv' to read the forecast from \
|
|
909
1454
|
a CSV file and 'list' to use data directly passed at runtime as a list of \
|
|
910
|
-
values. Defaults to '
|
|
1455
|
+
values. Defaults to 'typical'.
|
|
911
1456
|
:type method: str, optional
|
|
912
1457
|
:param csv_path: The path to the CSV file used when method = 'csv', \
|
|
913
1458
|
defaults to "/data/data_load_forecast.csv"
|
|
914
1459
|
:type csv_path: str, optional
|
|
915
|
-
:param set_mix_forecast: Use a mixed
|
|
1460
|
+
:param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
|
|
916
1461
|
:type set_mix_forecast: Bool, optional
|
|
917
1462
|
:param df_now: The DataFrame containing the now/current data.
|
|
918
1463
|
:type df_now: pd.DataFrame, optional
|
|
@@ -932,169 +1477,59 @@ class Forecast(object):
|
|
|
932
1477
|
|
|
933
1478
|
"""
|
|
934
1479
|
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
"Retrieving data from hass for load forecast using method = " + method
|
|
941
|
-
)
|
|
942
|
-
var_list = [self.var_load]
|
|
943
|
-
var_replace_zero = None
|
|
944
|
-
var_interp = [self.var_load]
|
|
945
|
-
time_zone_load_foreacast = None
|
|
946
|
-
# We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
|
|
947
|
-
rh = RetrieveHass(
|
|
948
|
-
self.retrieve_hass_conf["hass_url"],
|
|
949
|
-
self.retrieve_hass_conf["long_lived_token"],
|
|
950
|
-
self.freq,
|
|
951
|
-
time_zone_load_foreacast,
|
|
952
|
-
self.params,
|
|
953
|
-
self.emhass_conf,
|
|
954
|
-
self.logger,
|
|
955
|
-
)
|
|
956
|
-
if self.get_data_from_file:
|
|
957
|
-
filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
|
|
958
|
-
with open(filename_path, "rb") as inp:
|
|
959
|
-
rh.df_final, days_list, var_list = pickle.load(inp)
|
|
960
|
-
self.var_load = var_list[0]
|
|
961
|
-
self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = (
|
|
962
|
-
self.var_load
|
|
963
|
-
)
|
|
964
|
-
var_interp = [var_list[0]]
|
|
965
|
-
self.var_list = [var_list[0]]
|
|
966
|
-
self.var_load_new = self.var_load + "_positive"
|
|
967
|
-
else:
|
|
968
|
-
days_list = get_days_list(days_min_load_forecast)
|
|
969
|
-
if not rh.get_data(days_list, var_list):
|
|
970
|
-
return False
|
|
971
|
-
if not rh.prepare_data(
|
|
972
|
-
self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
|
|
973
|
-
load_negative=self.retrieve_hass_conf["load_negative"],
|
|
974
|
-
set_zero_min=self.retrieve_hass_conf["set_zero_min"],
|
|
975
|
-
var_replace_zero=var_replace_zero,
|
|
976
|
-
var_interp=var_interp,
|
|
977
|
-
):
|
|
1480
|
+
# Retrieve Data from Home Assistant if needed
|
|
1481
|
+
df = None
|
|
1482
|
+
if method in ["naive", "mlforecaster"]:
|
|
1483
|
+
df = await self._prepare_hass_load_data(days_min_load_forecast, method)
|
|
1484
|
+
if df is False:
|
|
978
1485
|
return False
|
|
979
|
-
|
|
980
|
-
if method == "
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
)
|
|
984
|
-
|
|
985
|
-
forecast_out =
|
|
986
|
-
|
|
987
|
-
forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
|
|
988
|
-
forecast_out.index = self.forecast_dates
|
|
989
|
-
elif (
|
|
990
|
-
method == "mlforecaster"
|
|
991
|
-
): # using a custom forecast model with machine learning
|
|
992
|
-
# Load model
|
|
993
|
-
model_type = self.params["passed_data"]["model_type"]
|
|
994
|
-
filename = model_type + "_mlf.pkl"
|
|
995
|
-
filename_path = self.emhass_conf["data_path"] / filename
|
|
996
|
-
if not debug:
|
|
997
|
-
if filename_path.is_file():
|
|
998
|
-
with open(filename_path, "rb") as inp:
|
|
999
|
-
mlf = pickle.load(inp)
|
|
1000
|
-
else:
|
|
1001
|
-
self.logger.error(
|
|
1002
|
-
"The ML forecaster file was not found, please run a model fit method before this predict method"
|
|
1003
|
-
)
|
|
1004
|
-
return False
|
|
1005
|
-
# Make predictions
|
|
1006
|
-
if use_last_window:
|
|
1007
|
-
data_last_window = copy.deepcopy(df)
|
|
1008
|
-
data_last_window = data_last_window.rename(
|
|
1009
|
-
columns={self.var_load_new: self.var_load}
|
|
1010
|
-
)
|
|
1011
|
-
else:
|
|
1012
|
-
data_last_window = None
|
|
1013
|
-
forecast_out = mlf.predict(data_last_window)
|
|
1014
|
-
# Force forecast length to avoid mismatches
|
|
1015
|
-
self.logger.debug(
|
|
1016
|
-
"Number of ML predict forcast data generated (lags_opt): "
|
|
1017
|
-
+ str(len(forecast_out.index))
|
|
1018
|
-
)
|
|
1019
|
-
self.logger.debug(
|
|
1020
|
-
"Number of forcast dates obtained: " + str(len(self.forecast_dates))
|
|
1021
|
-
)
|
|
1022
|
-
if len(self.forecast_dates) < len(forecast_out.index):
|
|
1023
|
-
forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
|
|
1024
|
-
# To be removed once bug is fixed
|
|
1025
|
-
elif len(self.forecast_dates) > len(forecast_out.index):
|
|
1026
|
-
self.logger.error(
|
|
1027
|
-
"Unable to obtain: "
|
|
1028
|
-
+ str(len(self.forecast_dates))
|
|
1029
|
-
+ " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
|
|
1030
|
-
)
|
|
1486
|
+
# Generate Forecast based on Method
|
|
1487
|
+
if method == "typical":
|
|
1488
|
+
forecast_out = await self._get_load_forecast_typical()
|
|
1489
|
+
elif method == "naive":
|
|
1490
|
+
forecast_out = self._get_load_forecast_naive(df)
|
|
1491
|
+
elif method == "mlforecaster":
|
|
1492
|
+
forecast_out = await self._get_load_forecast_ml(df, use_last_window, mlf, debug)
|
|
1493
|
+
if forecast_out is False:
|
|
1031
1494
|
return False
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
data.set_index("ts", inplace=True)
|
|
1040
|
-
forecast_out = data.copy().loc[self.forecast_dates]
|
|
1041
|
-
elif method == "csv": # reading from a csv file
|
|
1042
|
-
load_csv_file_path = csv_path
|
|
1043
|
-
df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
|
|
1044
|
-
if len(df_csv) < len(self.forecast_dates):
|
|
1045
|
-
self.logger.error("Passed data from CSV is not long enough")
|
|
1046
|
-
else:
|
|
1047
|
-
# Ensure correct length
|
|
1048
|
-
df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
|
|
1049
|
-
# Define index
|
|
1050
|
-
df_csv.index = self.forecast_dates
|
|
1051
|
-
df_csv.drop(["ts"], axis=1, inplace=True)
|
|
1052
|
-
forecast_out = df_csv.copy().loc[self.forecast_dates]
|
|
1053
|
-
elif method == "list": # reading a list of values
|
|
1054
|
-
# Loading data from passed list
|
|
1055
|
-
data_list = self.params["passed_data"]["load_power_forecast"]
|
|
1056
|
-
# Check if the passed data has the correct length
|
|
1057
|
-
if (
|
|
1058
|
-
len(data_list) < len(self.forecast_dates)
|
|
1059
|
-
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1060
|
-
):
|
|
1061
|
-
self.logger.error("Passed data from passed list is not long enough")
|
|
1495
|
+
elif method == "csv":
|
|
1496
|
+
forecast_out = self._get_load_forecast_csv(csv_path)
|
|
1497
|
+
if forecast_out is None:
|
|
1498
|
+
return False
|
|
1499
|
+
elif method == "list":
|
|
1500
|
+
forecast_out = self._get_load_forecast_list()
|
|
1501
|
+
if forecast_out is False:
|
|
1062
1502
|
return False
|
|
1063
|
-
else:
|
|
1064
|
-
# Ensure correct length
|
|
1065
|
-
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1066
|
-
# Define DataFrame
|
|
1067
|
-
data_dict = {"ts": self.forecast_dates, "yhat": data_list}
|
|
1068
|
-
data = pd.DataFrame.from_dict(data_dict)
|
|
1069
|
-
# Define index
|
|
1070
|
-
data.set_index("ts", inplace=True)
|
|
1071
|
-
forecast_out = data.copy().loc[self.forecast_dates]
|
|
1072
1503
|
else:
|
|
1073
|
-
self.logger.error(
|
|
1504
|
+
self.logger.error(error_msg_method_not_valid)
|
|
1074
1505
|
return False
|
|
1075
|
-
|
|
1506
|
+
# Post-processing (Mix Forecast)
|
|
1507
|
+
p_load_forecast = copy.deepcopy(forecast_out["yhat"])
|
|
1076
1508
|
if set_mix_forecast:
|
|
1077
|
-
|
|
1509
|
+
# Load forecasts don't need curtailment protection - always use feedback
|
|
1510
|
+
p_load_forecast = Forecast.get_mix_forecast(
|
|
1078
1511
|
df_now,
|
|
1079
|
-
|
|
1512
|
+
p_load_forecast,
|
|
1080
1513
|
self.params["passed_data"]["alpha"],
|
|
1081
1514
|
self.params["passed_data"]["beta"],
|
|
1082
1515
|
self.var_load_new,
|
|
1516
|
+
False, # Never ignore feedback for load forecasts
|
|
1083
1517
|
)
|
|
1084
|
-
|
|
1518
|
+
self.logger.debug("get_load_forecast returning:\n%s", p_load_forecast)
|
|
1519
|
+
return p_load_forecast
|
|
1085
1520
|
|
|
1086
1521
|
def get_load_cost_forecast(
|
|
1087
1522
|
self,
|
|
1088
1523
|
df_final: pd.DataFrame,
|
|
1089
|
-
method:
|
|
1090
|
-
csv_path:
|
|
1091
|
-
list_and_perfect:
|
|
1524
|
+
method: str | None = "hp_hc_periods",
|
|
1525
|
+
csv_path: str | None = "data_load_cost_forecast.csv",
|
|
1526
|
+
list_and_perfect: bool | None = False,
|
|
1092
1527
|
) -> pd.DataFrame:
|
|
1093
1528
|
r"""
|
|
1094
1529
|
Get the unit cost for the load consumption based on multiple tariff \
|
|
1095
1530
|
periods. This is the cost of the energy from the utility in a vector \
|
|
1096
1531
|
sampled at the fixed freq value.
|
|
1097
|
-
|
|
1532
|
+
|
|
1098
1533
|
:param df_final: The DataFrame containing the input data.
|
|
1099
1534
|
:type df_final: pd.DataFrame
|
|
1100
1535
|
:param method: The method to be used to generate load cost forecast, \
|
|
@@ -1113,7 +1548,7 @@ class Forecast(object):
|
|
|
1113
1548
|
if method == "hp_hc_periods":
|
|
1114
1549
|
df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
|
|
1115
1550
|
list_df_hp = []
|
|
1116
|
-
for
|
|
1551
|
+
for _key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
|
|
1117
1552
|
list_df_hp.append(
|
|
1118
1553
|
df_final[self.var_load_cost].between_time(
|
|
1119
1554
|
period_hp[0]["start"], period_hp[1]["end"]
|
|
@@ -1128,7 +1563,14 @@ class Forecast(object):
|
|
|
1128
1563
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
1129
1564
|
df_final, forecast_dates_csv, csv_path
|
|
1130
1565
|
)
|
|
1131
|
-
|
|
1566
|
+
# Ensure correct length
|
|
1567
|
+
if not list_and_perfect:
|
|
1568
|
+
forecast_out = forecast_out[0 : len(self.forecast_dates)]
|
|
1569
|
+
df_final = df_final[0 : len(self.forecast_dates)].copy()
|
|
1570
|
+
# Convert to Series if needed and align index
|
|
1571
|
+
if not isinstance(forecast_out, pd.Series):
|
|
1572
|
+
forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
|
|
1573
|
+
df_final.loc[:, self.var_load_cost] = forecast_out
|
|
1132
1574
|
elif method == "list": # reading a list of values
|
|
1133
1575
|
# Loading data from passed list
|
|
1134
1576
|
data_list = self.params["passed_data"]["load_cost_forecast"]
|
|
@@ -1137,11 +1579,13 @@ class Forecast(object):
|
|
|
1137
1579
|
len(data_list) < len(self.forecast_dates)
|
|
1138
1580
|
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1139
1581
|
):
|
|
1140
|
-
self.logger.error(
|
|
1582
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
1141
1583
|
return False
|
|
1142
1584
|
else:
|
|
1143
1585
|
# Ensure correct length
|
|
1144
1586
|
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1587
|
+
if not list_and_perfect:
|
|
1588
|
+
df_final = df_final.iloc[0 : len(self.forecast_dates)]
|
|
1145
1589
|
# Define the correct dates
|
|
1146
1590
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
1147
1591
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
@@ -1151,25 +1595,26 @@ class Forecast(object):
|
|
|
1151
1595
|
data_list=data_list,
|
|
1152
1596
|
list_and_perfect=list_and_perfect,
|
|
1153
1597
|
)
|
|
1154
|
-
|
|
1598
|
+
df_final = df_final.copy()
|
|
1155
1599
|
df_final[self.var_load_cost] = forecast_out
|
|
1156
1600
|
else:
|
|
1157
|
-
self.logger.error(
|
|
1601
|
+
self.logger.error(error_msg_method_not_valid)
|
|
1158
1602
|
return False
|
|
1603
|
+
self.logger.debug("get_load_cost_forecast returning:\n%s", df_final)
|
|
1159
1604
|
return df_final
|
|
1160
1605
|
|
|
1161
1606
|
def get_prod_price_forecast(
|
|
1162
1607
|
self,
|
|
1163
1608
|
df_final: pd.DataFrame,
|
|
1164
|
-
method:
|
|
1165
|
-
csv_path:
|
|
1166
|
-
list_and_perfect:
|
|
1609
|
+
method: str | None = "constant",
|
|
1610
|
+
csv_path: str | None = "data_prod_price_forecast.csv",
|
|
1611
|
+
list_and_perfect: bool | None = False,
|
|
1167
1612
|
) -> pd.DataFrame:
|
|
1168
1613
|
r"""
|
|
1169
1614
|
Get the unit power production price for the energy injected to the grid.\
|
|
1170
1615
|
This is the price of the energy injected to the utility in a vector \
|
|
1171
1616
|
sampled at the fixed freq value.
|
|
1172
|
-
|
|
1617
|
+
|
|
1173
1618
|
:param df_input_data: The DataFrame containing all the input data retrieved
|
|
1174
1619
|
from hass
|
|
1175
1620
|
:type df_input_data: pd.DataFrame
|
|
@@ -1187,15 +1632,20 @@ class Forecast(object):
|
|
|
1187
1632
|
"""
|
|
1188
1633
|
csv_path = self.emhass_conf["data_path"] / csv_path
|
|
1189
1634
|
if method == "constant":
|
|
1190
|
-
df_final[self.var_prod_price] = self.optim_conf[
|
|
1191
|
-
"photovoltaic_production_sell_price"
|
|
1192
|
-
]
|
|
1635
|
+
df_final[self.var_prod_price] = self.optim_conf["photovoltaic_production_sell_price"]
|
|
1193
1636
|
elif method == "csv":
|
|
1194
1637
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
1195
1638
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
1196
1639
|
df_final, forecast_dates_csv, csv_path
|
|
1197
1640
|
)
|
|
1198
|
-
|
|
1641
|
+
# Ensure correct length
|
|
1642
|
+
if not list_and_perfect:
|
|
1643
|
+
forecast_out = forecast_out[0 : len(self.forecast_dates)]
|
|
1644
|
+
df_final = df_final[0 : len(self.forecast_dates)].copy()
|
|
1645
|
+
# Convert to Series if needed and align index
|
|
1646
|
+
if not isinstance(forecast_out, pd.Series):
|
|
1647
|
+
forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
|
|
1648
|
+
df_final.loc[:, self.var_prod_price] = forecast_out
|
|
1199
1649
|
elif method == "list": # reading a list of values
|
|
1200
1650
|
# Loading data from passed list
|
|
1201
1651
|
data_list = self.params["passed_data"]["prod_price_forecast"]
|
|
@@ -1204,11 +1654,13 @@ class Forecast(object):
|
|
|
1204
1654
|
len(data_list) < len(self.forecast_dates)
|
|
1205
1655
|
and self.params["passed_data"]["prediction_horizon"] is None
|
|
1206
1656
|
):
|
|
1207
|
-
self.logger.error(
|
|
1657
|
+
self.logger.error(error_msg_list_not_long_enough)
|
|
1208
1658
|
return False
|
|
1209
1659
|
else:
|
|
1210
1660
|
# Ensure correct length
|
|
1211
1661
|
data_list = data_list[0 : len(self.forecast_dates)]
|
|
1662
|
+
if not list_and_perfect:
|
|
1663
|
+
df_final = df_final.iloc[0 : len(self.forecast_dates)]
|
|
1212
1664
|
# Define the correct dates
|
|
1213
1665
|
forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
|
|
1214
1666
|
forecast_out = self.get_forecast_out_from_csv_or_list(
|
|
@@ -1218,9 +1670,90 @@ class Forecast(object):
|
|
|
1218
1670
|
data_list=data_list,
|
|
1219
1671
|
list_and_perfect=list_and_perfect,
|
|
1220
1672
|
)
|
|
1221
|
-
|
|
1673
|
+
df_final = df_final.copy()
|
|
1222
1674
|
df_final[self.var_prod_price] = forecast_out
|
|
1223
1675
|
else:
|
|
1224
|
-
self.logger.error(
|
|
1676
|
+
self.logger.error(error_msg_method_not_valid)
|
|
1225
1677
|
return False
|
|
1678
|
+
self.logger.debug("get_prod_price_forecast returning:\n%s", df_final)
|
|
1226
1679
|
return df_final
|
|
1680
|
+
|
|
1681
|
+
async def get_cached_forecast_data(self, w_forecast_cache_path) -> pd.DataFrame:
|
|
1682
|
+
r"""
|
|
1683
|
+
Get cached weather forecast data from file.
|
|
1684
|
+
|
|
1685
|
+
:param w_forecast_cache_path: the path to file.
|
|
1686
|
+
:type method: Any
|
|
1687
|
+
:return: The DataFrame containing the forecasted data
|
|
1688
|
+
:rtype: pd.DataFrame
|
|
1689
|
+
|
|
1690
|
+
"""
|
|
1691
|
+
async with aiofiles.open(w_forecast_cache_path, "rb") as file:
|
|
1692
|
+
content = await file.read()
|
|
1693
|
+
data = pickle.loads(content)
|
|
1694
|
+
if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
|
|
1695
|
+
self.logger.error("There has been a error obtaining cached forecast data.")
|
|
1696
|
+
self.logger.error(
|
|
1697
|
+
"Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from forecast API and cache."
|
|
1698
|
+
)
|
|
1699
|
+
self.logger.warning(
|
|
1700
|
+
"Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
|
|
1701
|
+
)
|
|
1702
|
+
os.remove(w_forecast_cache_path)
|
|
1703
|
+
return False
|
|
1704
|
+
# Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
|
|
1705
|
+
if self.forecast_dates[0] in data.index and self.forecast_dates[-1] in data.index:
|
|
1706
|
+
data = data.loc[self.forecast_dates[0] : self.forecast_dates[-1]]
|
|
1707
|
+
self.logger.info("Retrieved forecast data from the previously saved cache.")
|
|
1708
|
+
else:
|
|
1709
|
+
self.logger.error(
|
|
1710
|
+
"Unable to obtain cached forecast data within the requested timeframe range."
|
|
1711
|
+
)
|
|
1712
|
+
self.logger.error(
|
|
1713
|
+
"Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from forecast API and cache."
|
|
1714
|
+
)
|
|
1715
|
+
self.logger.warning(
|
|
1716
|
+
"Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
|
|
1717
|
+
)
|
|
1718
|
+
os.remove(w_forecast_cache_path)
|
|
1719
|
+
return False
|
|
1720
|
+
return data
|
|
1721
|
+
|
|
1722
|
+
async def set_cached_forecast_data(self, w_forecast_cache_path, data) -> pd.DataFrame:
|
|
1723
|
+
r"""
|
|
1724
|
+
Set generated weather forecast data to file.
|
|
1725
|
+
Trim data to match the original requested forecast dates
|
|
1726
|
+
|
|
1727
|
+
:param w_forecast_cache_path: the path to file.
|
|
1728
|
+
:type method: Any
|
|
1729
|
+
:param: The DataFrame containing the forecasted data
|
|
1730
|
+
:type: pd.DataFrame
|
|
1731
|
+
:return: The DataFrame containing the forecasted data
|
|
1732
|
+
:rtype: pd.DataFrame
|
|
1733
|
+
|
|
1734
|
+
"""
|
|
1735
|
+
async with aiofiles.open(w_forecast_cache_path, "wb") as file:
|
|
1736
|
+
content = pickle.dumps(data)
|
|
1737
|
+
await file.write(content)
|
|
1738
|
+
if not os.path.isfile(w_forecast_cache_path):
|
|
1739
|
+
self.logger.warning("forecast data could not be saved to file.")
|
|
1740
|
+
else:
|
|
1741
|
+
self.logger.info("Saved the forecast results to cache, for later reference.")
|
|
1742
|
+
|
|
1743
|
+
# Trim cached data to match requested dates
|
|
1744
|
+
end_forecast = (self.start_forecast + self.optim_conf["delta_forecast_daily"]).replace(
|
|
1745
|
+
microsecond=0
|
|
1746
|
+
)
|
|
1747
|
+
forecast_dates = (
|
|
1748
|
+
pd.date_range(
|
|
1749
|
+
start=self.start_forecast,
|
|
1750
|
+
end=end_forecast - self.freq,
|
|
1751
|
+
freq=self.freq,
|
|
1752
|
+
tz=self.time_zone,
|
|
1753
|
+
)
|
|
1754
|
+
.tz_convert("utc")
|
|
1755
|
+
.round(self.freq, ambiguous="infer", nonexistent="shift_forward")
|
|
1756
|
+
.tz_convert(self.time_zone)
|
|
1757
|
+
)
|
|
1758
|
+
data = data.loc[forecast_dates[0] : forecast_dates[-1]]
|
|
1759
|
+
return data
|