emhass 0.10.6__py3-none-any.whl → 0.15.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
emhass/forecast.py CHANGED
@@ -1,110 +1,120 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import pathlib
5
- import os
6
- import pickle
1
+ import asyncio
2
+ import bz2
7
3
  import copy
8
4
  import logging
9
- import json
10
- from typing import Optional
11
- import bz2
5
+ import os
6
+ import pickle
12
7
  import pickle as cPickle
13
- import pandas as pd
14
- import numpy as np
8
+ import re
15
9
  from datetime import datetime, timedelta
16
- from requests import get
17
- from bs4 import BeautifulSoup
18
- import pvlib
19
- from pvlib.pvsystem import PVSystem
10
+ from itertools import zip_longest
11
+ from urllib.parse import quote
12
+
13
+ import aiofiles
14
+ import aiohttp
15
+ import numpy as np
16
+ import orjson
17
+ import pandas as pd
18
+ from pvlib.irradiance import disc
20
19
  from pvlib.location import Location
21
20
  from pvlib.modelchain import ModelChain
21
+ from pvlib.pvsystem import PVSystem
22
+ from pvlib.solarposition import get_solarposition
22
23
  from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
23
- from pvlib.irradiance import disc
24
+ from sklearn.metrics import mean_squared_error, r2_score
25
+ from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
24
26
 
25
- from emhass.retrieve_hass import RetrieveHass
26
27
  from emhass.machine_learning_forecaster import MLForecaster
27
- from emhass.utils import get_days_list, set_df_index_freq
28
+ from emhass.machine_learning_regressor import MLRegressor
29
+ from emhass.retrieve_hass import RetrieveHass
30
+ from emhass.utils import add_date_features, get_days_list, set_df_index_freq
28
31
 
32
+ header_accept = "application/json"
33
+ error_msg_list_not_long_enough = "Passed data from passed list is not long enough"
34
+ error_msg_method_not_valid = "Passed method is not valid"
29
35
 
30
- class Forecast(object):
36
+
37
+ class Forecast:
31
38
  r"""
32
39
  Generate weather, load and costs forecasts needed as inputs to the optimization.
33
-
40
+
34
41
  In EMHASS we have basically 4 forecasts to deal with:
35
-
42
+
36
43
  - PV power production forecast (internally based on the weather forecast and the
37
44
  characteristics of your PV plant). This is given in Watts.
38
-
45
+
39
46
  - Load power forecast: how much power your house will demand on the next 24h. This
40
47
  is given in Watts.
41
-
48
+
42
49
  - PV production selling price forecast: at what price are you selling your excess
43
50
  PV production on the next 24h. This is given in EUR/kWh.
44
-
51
+
45
52
  - Load cost forecast: the price of the energy from the grid on the next 24h. This
46
53
  is given in EUR/kWh.
47
-
54
+
48
55
  There are methods that are generalized to the 4 forecast needed. For all there
49
56
  forecasts it is possible to pass the data either as a passed list of values or by
50
57
  reading from a CSV file. With these methods it is then possible to use data from
51
58
  external forecast providers.
52
-
53
- Then there are the methods that are specific to each type of forecast and that
59
+
60
+ Then there are the methods that are specific to each type of forecast and that
54
61
  proposed forecast treated and generated internally by this EMHASS forecast class.
55
- For the weather forecast a first method (`scrapper`) uses a scrapping to the
56
- ClearOutside webpage which proposes detailed forecasts based on Lat/Lon locations.
57
- This method seems stable but as with any scrape method it will fail if any changes
58
- are made to the webpage API. Another method (`solcast`) is using the SolCast PV
59
- production forecast service. A final method (`solar.forecast`) is using another
60
- external service: Solar.Forecast, for which just the nominal PV peak installed
61
- power should be provided. Search the forecast section on the documentation for examples
62
+ For the weather forecast a first method (`open-meteo`) uses a open-meteos API
63
+ proposing detailed forecasts based on Lat/Lon locations.
64
+ This method seems stable but as with any scrape method it will fail if any changes
65
+ are made to the webpage API. Another method (`solcast`) is using the SolCast PV
66
+ production forecast service. A final method (`solar.forecast`) is using another
67
+ external service: Solar.Forecast, for which just the nominal PV peak installed
68
+ power should be provided. Search the forecast section on the documentation for examples
62
69
  on how to implement these different methods.
63
-
70
+
64
71
  The `get_power_from_weather` method is proposed here to convert from irradiance
65
72
  data to electrical power. The PVLib module is used to model the PV plant.
66
-
67
- The specific methods for the load forecast are a first method (`naive`) that uses
68
- a naive approach, also called persistance. It simply assumes that the forecast for
69
- a future period will be equal to the observed values in a past period. The past
73
+
74
+ The specific methods for the load forecast are a first method (`naive`) that uses
75
+ a naive approach, also called persistance. It simply assumes that the forecast for
76
+ a future period will be equal to the observed values in a past period. The past
70
77
  period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
71
78
  uses an internal custom forecasting model using machine learning. There is a section
72
79
  in the documentation explaining how to use this method.
73
-
80
+
74
81
  .. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
75
82
  proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
76
83
  Only the `predict` method is used here to generate new forecasts, but it is \
77
84
  necessary to previously fit a forecaster model and it is a good idea to \
78
85
  optimize the model hyperparameters using the `tune` method. See the dedicated \
79
86
  section in the documentation for more help.
80
-
87
+
81
88
  For the PV production selling price and Load cost forecasts the privileged method
82
89
  is a direct read from a user provided list of values. The list should be passed
83
90
  as a runtime parameter during the `curl` to the EMHASS API.
84
-
85
- I reading from a CSV file, it should contain no header and the timestamped data
91
+
92
+ I reading from a CSV file, it should contain no header and the timestamped data
86
93
  should have the following format:
87
-
88
94
  2021-04-29 00:00:00+00:00,287.07
89
-
90
95
  2021-04-29 00:30:00+00:00,274.27
91
-
92
96
  2021-04-29 01:00:00+00:00,243.38
93
-
94
97
  ...
95
-
98
+
96
99
  The data columns in these files will correspond to the data in the units expected
97
100
  for each forecasting method.
98
-
101
+
99
102
  """
100
103
 
101
- def __init__(self, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
102
- params: str, emhass_conf: dict, logger: logging.Logger,
103
- opt_time_delta: Optional[int] = 24,
104
- get_data_from_file: Optional[bool] = False) -> None:
104
+ def __init__(
105
+ self,
106
+ retrieve_hass_conf: dict,
107
+ optim_conf: dict,
108
+ plant_conf: dict,
109
+ params: str,
110
+ emhass_conf: dict,
111
+ logger: logging.Logger,
112
+ opt_time_delta: int | None = 24,
113
+ get_data_from_file: bool | None = False,
114
+ ) -> None:
105
115
  """
106
116
  Define constructor for the forecast class.
107
-
117
+
108
118
  :param retrieve_hass_conf: Dictionary containing the needed configuration
109
119
  data from the configuration file, specific to retrieve data from HASS
110
120
  :type retrieve_hass_conf: dict
@@ -120,10 +130,10 @@ class Forecast(object):
120
130
  :type emhass_conf: dict
121
131
  :param logger: The passed logger object
122
132
  :type logger: logging object
123
- :param opt_time_delta: The time delta in hours used to generate forecasts,
133
+ :param opt_time_delta: The time delta in hours used to generate forecasts,
124
134
  a value of 24 will generate 24 hours of forecast data, defaults to 24
125
135
  :type opt_time_delta: int, optional
126
- :param get_data_from_file: Select if data should be retrieved from a
136
+ :param get_data_from_file: Select if data should be retrieved from a
127
137
  previously saved pickle useful for testing or directly from connection to
128
138
  hass database
129
139
  :type get_data_from_file: bool, optional
@@ -132,279 +142,466 @@ class Forecast(object):
132
142
  self.retrieve_hass_conf = retrieve_hass_conf
133
143
  self.optim_conf = optim_conf
134
144
  self.plant_conf = plant_conf
135
- self.freq = self.retrieve_hass_conf['freq']
136
- self.time_zone = self.retrieve_hass_conf['time_zone']
137
- self.method_ts_round = self.retrieve_hass_conf['method_ts_round']
138
- self.timeStep = self.freq.seconds/3600 # in hours
145
+ self.freq = self.retrieve_hass_conf["optimization_time_step"]
146
+ self.time_zone = self.retrieve_hass_conf["time_zone"]
147
+ self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
139
148
  self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
140
- self.var_PV = self.retrieve_hass_conf['var_PV']
141
- self.var_load = self.retrieve_hass_conf['var_load']
142
- self.var_load_new = self.var_load+'_positive'
143
- self.lat = self.retrieve_hass_conf['lat']
144
- self.lon = self.retrieve_hass_conf['lon']
149
+ self.var_pv = self.retrieve_hass_conf["sensor_power_photovoltaics"]
150
+ self.var_pv_forecast = self.retrieve_hass_conf["sensor_power_photovoltaics_forecast"]
151
+ self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
152
+ self.var_load_new = self.var_load + "_positive"
153
+ self.lat = self.retrieve_hass_conf["Latitude"]
154
+ self.lon = self.retrieve_hass_conf["Longitude"]
145
155
  self.emhass_conf = emhass_conf
146
156
  self.logger = logger
147
157
  self.get_data_from_file = get_data_from_file
148
- self.var_load_cost = 'unit_load_cost'
149
- self.var_prod_price = 'unit_prod_price'
150
- if params is None:
158
+ self.var_load_cost = "unit_load_cost"
159
+ self.var_prod_price = "unit_prod_price"
160
+ if (params is None) or (params == "null"):
161
+ self.params = {}
162
+ elif type(params) is dict:
151
163
  self.params = params
152
164
  else:
153
- self.params = json.loads(params)
154
- if self.method_ts_round == 'nearest':
155
- self.start_forecast = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
156
- elif self.method_ts_round == 'first':
157
- self.start_forecast = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
158
- elif self.method_ts_round == 'last':
159
- self.start_forecast = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
165
+ self.params = orjson.loads(params)
166
+
167
+ if self.method_ts_round == "nearest":
168
+ self.start_forecast = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
169
+ elif self.method_ts_round == "first":
170
+ self.start_forecast = (
171
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
172
+ )
173
+ elif self.method_ts_round == "last":
174
+ self.start_forecast = (
175
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
176
+ )
160
177
  else:
161
178
  self.logger.error("Wrong method_ts_round passed parameter")
162
- self.end_forecast = (self.start_forecast + self.optim_conf['delta_forecast']).replace(microsecond=0)
163
- self.forecast_dates = pd.date_range(start=self.start_forecast,
164
- end=self.end_forecast-self.freq,
165
- freq=self.freq).round(self.freq, ambiguous='infer', nonexistent='shift_forward')
166
- if params is not None:
167
- if 'prediction_horizon' in list(self.params['passed_data'].keys()):
168
- if self.params['passed_data']['prediction_horizon'] is not None:
169
- self.forecast_dates = self.forecast_dates[0:self.params['passed_data']['prediction_horizon']]
170
-
171
-
172
- def get_weather_forecast(self, method: Optional[str] = 'scrapper',
173
- csv_path: Optional[str] = "data_weather_forecast.csv") -> pd.DataFrame:
179
+ # check if weather_forecast_cache, if so get 2x the amount of forecast
180
+ if self.params["passed_data"].get("weather_forecast_cache", False):
181
+ self.end_forecast = (
182
+ self.start_forecast + (self.optim_conf["delta_forecast_daily"] * 2)
183
+ ).replace(microsecond=0)
184
+ else:
185
+ self.end_forecast = (
186
+ self.start_forecast + self.optim_conf["delta_forecast_daily"]
187
+ ).replace(microsecond=0)
188
+ self.forecast_dates = (
189
+ pd.date_range(
190
+ start=self.start_forecast,
191
+ end=self.end_forecast - self.freq,
192
+ freq=self.freq,
193
+ tz=self.time_zone,
194
+ )
195
+ .tz_convert("utc")
196
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
197
+ .tz_convert(self.time_zone)
198
+ )
199
+ if (
200
+ params is not None
201
+ and "prediction_horizon" in list(self.params["passed_data"].keys())
202
+ and self.params["passed_data"]["prediction_horizon"] is not None
203
+ ):
204
+ self.forecast_dates = self.forecast_dates[
205
+ 0 : self.params["passed_data"]["prediction_horizon"]
206
+ ]
207
+
208
+ async def get_cached_open_meteo_forecast_json(
209
+ self, max_age: int | None = 30, forecast_days: int = 3
210
+ ) -> dict:
174
211
  r"""
175
- Get and generate weather forecast data.
176
-
177
- :param method: The desired method, options are 'scrapper', 'csv', 'list', 'solcast' and \
178
- 'solar.forecast'. Defaults to 'scrapper'.
179
- :type method: str, optional
180
- :return: The DataFrame containing the forecasted data
181
- :rtype: pd.DataFrame
182
-
183
- """
184
- csv_path = self.emhass_conf['data_path'] / csv_path
185
- w_forecast_cache_path = os.path.abspath(self.emhass_conf['data_path'] / "weather_forecast_data.pkl")
186
-
187
- self.logger.info("Retrieving weather forecast data using method = "+method)
188
- self.weather_forecast_method = method # Saving this attribute for later use to identify csv method usage
189
- if method == 'scrapper':
190
- freq_scrap = pd.to_timedelta(60, "minutes") # The scrapping time step is 60min on clearoutside
191
- forecast_dates_scrap = pd.date_range(start=self.start_forecast,
192
- end=self.end_forecast-freq_scrap,
193
- freq=freq_scrap).round(freq_scrap, ambiguous='infer', nonexistent='shift_forward')
194
- # Using the clearoutside webpage
195
- response = get("https://clearoutside.com/forecast/"+str(round(self.lat, 2))+"/"+str(round(self.lon, 2))+"?desktop=true")
196
- '''import bz2 # Uncomment to save a serialized data for tests
197
- import _pickle as cPickle
198
- with bz2.BZ2File("data/test_response_scrapper_get_method.pbz2", "w") as f:
199
- cPickle.dump(response.content, f)'''
200
- soup = BeautifulSoup(response.content, 'html.parser')
201
- table = soup.find_all(id='day_0')[0]
202
- list_names = table.find_all(class_='fc_detail_label')
203
- list_tables = table.find_all('ul')[1:]
204
- selected_cols = [0, 1, 2, 3, 10, 12, 15] # Selected variables
205
- col_names = [list_names[i].get_text() for i in selected_cols]
206
- list_tables = [list_tables[i] for i in selected_cols]
207
- # Building the raw DF container
208
- raw_data = pd.DataFrame(index=range(len(forecast_dates_scrap)), columns=col_names, dtype=float)
209
- for count_col, col in enumerate(col_names):
210
- list_rows = list_tables[count_col].find_all('li')
211
- for count_row, row in enumerate(list_rows):
212
- raw_data.loc[count_row, col] = float(row.get_text())
213
- # Treating index
214
- raw_data.set_index(forecast_dates_scrap, inplace=True)
215
- raw_data = raw_data[~raw_data.index.duplicated(keep='first')]
216
- raw_data = raw_data.reindex(self.forecast_dates)
217
- raw_data.interpolate(method='linear', axis=0, limit=None,
218
- limit_direction='both', inplace=True)
219
- # Converting the cloud cover into Global Horizontal Irradiance with a PVLib method
220
- ghi_est = self.cloud_cover_to_irradiance(raw_data['Total Clouds (% Sky Obscured)'])
221
- data = ghi_est
222
- data['temp_air'] = raw_data['Temperature (°C)']
223
- data['wind_speed'] = raw_data['Wind Speed/Direction (mph)']*1.60934 # conversion to km/h
224
- data['relative_humidity'] = raw_data['Relative Humidity (%)']
225
- data['precipitable_water'] = pvlib.atmosphere.gueymard94_pw(
226
- data['temp_air'], data['relative_humidity'])
227
- elif method == 'solcast': # using Solcast API
228
- # Check if weather_forecast_cache is true or if forecast_data file does not exist
229
- if self.params["passed_data"]["weather_forecast_cache"] or not os.path.isfile(w_forecast_cache_path):
230
- # Check if weather_forecast_cache_only is true, if so produce error for not finding cache file
231
- if not self.params["passed_data"]["weather_forecast_cache_only"]:
232
- # Retrieve data from the Solcast API
233
- if 'solcast_api_key' not in self.retrieve_hass_conf:
234
- self.logger.error("The solcast_api_key parameter was not defined")
212
+ Get weather forecast json from Open-Meteo and cache it for re-use.
213
+ The response json is cached in the local file system and returned
214
+ on subsequent calls until it is older than max_age, at which point
215
+ attempts will be made to replace it with a new version.
216
+ The cached version will not be overwritten until a new version has
217
+ been successfully fetched from Open-Meteo.
218
+ In the event of connectivity issues, the cached version will continue
219
+ to be returned until such time as a new version can be successfully
220
+ fetched from Open-Meteo.
221
+ If you want to force reload, pass max_age value of zero.
222
+
223
+ :param max_age: The maximum age of the cached json file, in minutes,
224
+ before it is discarded and a new version fetched from Open-Meteo.
225
+ Defaults to 30 minutes.
226
+ :type max_age: int, optional
227
+ :param forecast_days: The number of days of forecast data required from Open-Meteo.
228
+ One additional day is always fetched from Open-Meteo so there is an extra data in the cache.
229
+ Defaults to 2 days (3 days fetched) to match the prior default.
230
+ :type forecast_days: int, optional
231
+ :return: The json containing the Open-Meteo forecast data
232
+ :rtype: dict
233
+
234
+ """
235
+
236
+ # Ensure at least 3 weather forecast days (and 1 more than requested)
237
+ if forecast_days is None:
238
+ self.logger.debug("Open-Meteo forecast_days is missing so defaulting to 3 days")
239
+ forecast_days = 3
240
+ elif forecast_days < 3:
241
+ self.logger.debug(
242
+ "Open-Meteo forecast_days is low (%s) so defaulting to 3 days",
243
+ forecast_days,
244
+ )
245
+ forecast_days = 3
246
+ else:
247
+ forecast_days = forecast_days + 1
248
+
249
+ # The addition of -b.json file name suffix is because the time format
250
+ # has changed, and it avoids any attempt to use the old format file.
251
+ json_path = os.path.abspath(
252
+ self.emhass_conf["data_path"] / "cached-open-meteo-forecast-b.json"
253
+ )
254
+ # The cached JSON file is always loaded, if it exists, as it is also a fallback
255
+ # in case the REST API call to Open-Meteo fails - the cached JSON will continue to
256
+ # be used until it can successfully fetch a new version from Open-Meteo.
257
+ data = None
258
+ use_cache = False
259
+ if os.path.exists(json_path):
260
+ delta = datetime.now() - datetime.fromtimestamp(os.path.getmtime(json_path))
261
+ json_age = int(delta / timedelta(seconds=60))
262
+ use_cache = json_age < max_age
263
+ self.logger.info("Loading existing cached Open-Meteo JSON file: %s", json_path)
264
+ async with aiofiles.open(json_path) as json_file:
265
+ content = await json_file.read()
266
+ data = orjson.loads(content)
267
+ if use_cache:
268
+ self.logger.info(
269
+ "The cached Open-Meteo JSON file is recent (age=%.0fm, max_age=%sm)",
270
+ json_age,
271
+ max_age,
272
+ )
273
+ else:
274
+ self.logger.info(
275
+ "The cached Open-Meteo JSON file is old (age=%.0fm, max_age=%sm)",
276
+ json_age,
277
+ max_age,
278
+ )
279
+
280
+ if not use_cache:
281
+ self.logger.info("Fetching a new weather forecast from Open-Meteo")
282
+ headers = {"User-Agent": "EMHASS", "Accept": header_accept}
283
+ # Open-Meteo has returned non-existent time over DST transitions,
284
+ # so we now return unix timestamps and convert to date/times locally
285
+ # instead.
286
+ url = (
287
+ "https://api.open-meteo.com/v1/forecast?"
288
+ + "latitude="
289
+ + str(round(self.lat, 2))
290
+ + "&longitude="
291
+ + str(round(self.lon, 2))
292
+ + "&minutely_15="
293
+ + "temperature_2m,"
294
+ + "relative_humidity_2m,"
295
+ + "rain,"
296
+ + "cloud_cover,"
297
+ + "wind_speed_10m,"
298
+ + "shortwave_radiation_instant,"
299
+ + "diffuse_radiation_instant,"
300
+ + "direct_normal_irradiance_instant"
301
+ + "&forecast_days="
302
+ + str(forecast_days)
303
+ + "&timezone="
304
+ + quote(str(self.time_zone), safe="")
305
+ + "&timeformat=unixtime"
306
+ )
307
+ try:
308
+ self.logger.debug("Fetching data from Open-Meteo using URL: %s", url)
309
+ async with aiohttp.ClientSession() as session:
310
+ async with session.get(url, headers=headers) as response:
311
+ self.logger.debug("Returned HTTP status code: %s", response.status)
312
+ response.raise_for_status()
313
+ """import bz2 # Uncomment to save a serialized data for tests
314
+ import _pickle as cPickle
315
+ with bz2.BZ2File("data/test_response_openmeteo_get_method.pbz2", "w") as f:
316
+ cPickle.dump(response, f)"""
317
+ data = await response.json()
318
+ self.logger.info(
319
+ "Saving response in Open-Meteo JSON cache file: %s",
320
+ json_path,
321
+ )
322
+ async with aiofiles.open(json_path, "w") as json_file:
323
+ content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
324
+ await json_file.write(content)
325
+ except aiohttp.ClientError:
326
+ self.logger.error("Failed to fetch weather forecast from Open-Meteo", exc_info=True)
327
+ if data is not None:
328
+ self.logger.warning("Returning old cached data until next Open-Meteo attempt")
329
+
330
+ return data
331
+
332
+ async def _get_weather_open_meteo(
333
+ self, w_forecast_cache_path: str, use_legacy_pvlib: bool
334
+ ) -> pd.DataFrame:
335
+ """Helper to retrieve weather data from Open-Meteo or cache."""
336
+ if not os.path.isfile(w_forecast_cache_path):
337
+ data_raw = await self.get_cached_open_meteo_forecast_json(
338
+ self.optim_conf["open_meteo_cache_max_age"],
339
+ self.optim_conf["delta_forecast_daily"].days,
340
+ )
341
+ data_15min = pd.DataFrame.from_dict(data_raw["minutely_15"])
342
+ # Date/times in the Open-Meteo JSON are unix timestamps
343
+ data_15min["time"] = pd.to_datetime(data_15min["time"], unit="s", utc=True)
344
+ data_15min["time"] = data_15min["time"].dt.tz_convert(self.time_zone)
345
+ data_15min.set_index("time", inplace=True)
346
+ data_15min = data_15min.rename(
347
+ columns={
348
+ "temperature_2m": "temp_air",
349
+ "relative_humidity_2m": "relative_humidity",
350
+ "rain": "precipitable_water",
351
+ "cloud_cover": "cloud_cover",
352
+ "wind_speed_10m": "wind_speed",
353
+ "shortwave_radiation_instant": "ghi",
354
+ "diffuse_radiation_instant": "dhi",
355
+ "direct_normal_irradiance_instant": "dni",
356
+ }
357
+ )
358
+ if self.logger.isEnabledFor(logging.DEBUG):
359
+ data_15min.to_csv(
360
+ self.emhass_conf["data_path"] / "debug-weather-forecast-open-meteo.csv"
361
+ )
362
+ data = data_15min.reindex(self.forecast_dates)
363
+ data.interpolate(
364
+ method="linear",
365
+ axis=0,
366
+ limit=None,
367
+ limit_direction="both",
368
+ inplace=True,
369
+ )
370
+ data = set_df_index_freq(data)
371
+ index_utc = data.index.tz_convert("utc")
372
+ index_tz = index_utc.round(
373
+ freq=data.index.freq, ambiguous="infer", nonexistent="shift_forward"
374
+ ).tz_convert(self.time_zone)
375
+ data.index = index_tz
376
+ data = set_df_index_freq(data)
377
+ # Convert mm to cm and clip minimum to 0.1 cm
378
+ data["precipitable_water"] = (data["precipitable_water"] / 10).clip(lower=0.1)
379
+ if use_legacy_pvlib:
380
+ data = data.drop(columns=["ghi", "dhi", "dni"])
381
+ ghi_est = self.cloud_cover_to_irradiance(data["cloud_cover"])
382
+ data["ghi"] = ghi_est["ghi"]
383
+ data["dni"] = ghi_est["dni"]
384
+ data["dhi"] = ghi_est["dhi"]
385
+ if self.params["passed_data"].get("weather_forecast_cache", False):
386
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
387
+ else:
388
+ data = await self.get_cached_forecast_data(w_forecast_cache_path)
389
+ return data
390
+
391
+ async def _get_weather_solcast(self, w_forecast_cache_path: str) -> pd.DataFrame:
392
+ """Helper to retrieve weather data from Solcast or cache."""
393
+ if os.path.isfile(w_forecast_cache_path):
394
+ return await self.get_cached_forecast_data(w_forecast_cache_path)
395
+ if self.params["passed_data"].get("weather_forecast_cache_only", False):
396
+ self.logger.error("Unable to obtain Solcast cache file.")
397
+ self.logger.error(
398
+ "Try running optimization again with 'weather_forecast_cache_only': false"
399
+ )
400
+ self.logger.error(
401
+ "Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true."
402
+ )
403
+ return False
404
+ if "solcast_api_key" not in self.retrieve_hass_conf:
405
+ self.logger.error("The solcast_api_key parameter was not defined")
406
+ return False
407
+ if "solcast_rooftop_id" not in self.retrieve_hass_conf:
408
+ self.logger.error("The solcast_rooftop_id parameter was not defined")
409
+ return False
410
+ headers = {
411
+ "User-Agent": "EMHASS",
412
+ "Authorization": "Bearer " + self.retrieve_hass_conf["solcast_api_key"],
413
+ "content-type": header_accept,
414
+ }
415
+ days_solcast = int(len(self.forecast_dates) * self.freq.seconds / 3600)
416
+ roof_ids = re.split(r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip())
417
+ total_data_list = [0] * len(self.forecast_dates)
418
+
419
+ async with aiohttp.ClientSession() as session:
420
+ for roof_id in roof_ids:
421
+ url = f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
422
+ async with session.get(url, headers=headers) as response:
423
+ if int(response.status) == 200:
424
+ data = await response.json()
425
+ elif int(response.status) in [402, 429]:
426
+ self.logger.error(
427
+ "Solcast error: May have exceeded your subscription limit."
428
+ )
235
429
  return False
236
- if 'solcast_rooftop_id' not in self.retrieve_hass_conf:
237
- self.logger.error("The solcast_rooftop_id parameter was not defined")
430
+ elif int(response.status) >= 400 or (202 <= int(response.status) <= 299):
431
+ self.logger.error(
432
+ "Solcast error: Issue with request, check API key and rooftop ID."
433
+ )
238
434
  return False
239
- headers = {
240
- 'User-Agent': 'EMHASS',
241
- "Authorization": "Bearer " + self.retrieve_hass_conf['solcast_api_key'],
242
- "content-type": "application/json",
243
- }
244
- days_solcast = int(len(self.forecast_dates)*self.freq.seconds/3600)
245
- # If weather_forecast_cache, set request days as twice as long to avoid length issues (add a buffer)
246
- if self.params["passed_data"]["weather_forecast_cache"]:
247
- days_solcast = min((days_solcast * 2), 336)
248
- url = "https://api.solcast.com.au/rooftop_sites/"+self.retrieve_hass_conf['solcast_rooftop_id']+"/forecasts?hours="+str(days_solcast)
249
- response = get(url, headers=headers)
250
- '''import bz2 # Uncomment to save a serialized data for tests
251
- import _pickle as cPickle
252
- with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
253
- cPickle.dump(response, f)'''
254
- # Verify the request passed
255
- if int(response.status_code) == 200:
256
- data = response.json()
257
- elif int(response.status_code) == 402 or int(response.status_code) == 429:
258
- self.logger.error("Solcast error: May have exceeded your subscription limit.")
259
- return False
260
- elif int(response.status_code) >= 400 or int(response.status_code) >= 202:
261
- self.logger.error("Solcast error: There was a issue with the solcast request, check solcast API key and rooftop ID.")
262
- self.logger.error("Solcast error: Check that your subscription is valid and your network can connect to Solcast.")
263
- return False
264
435
  data_list = []
265
- for elm in data['forecasts']:
266
- data_list.append(elm['pv_estimate']*1000) # Converting kW to W
267
- # Check if the retrieved data has the correct length
436
+ for elm in data["forecasts"]:
437
+ data_list.append(elm["pv_estimate"] * 1000)
268
438
  if len(data_list) < len(self.forecast_dates):
269
- self.logger.error("Not enough data retried from Solcast service, try increasing the time step or use MPC.")
270
- else:
271
- # If runtime weather_forecast_cache is true save forecast result to file as cache
272
- if self.params["passed_data"]["weather_forecast_cache"]:
273
- # Add x2 forecast periods for cached results. This adds a extra delta_forecast amount of days for a buffer
274
- cached_forecast_dates = self.forecast_dates.union(pd.date_range(self.forecast_dates[-1], periods=(len(self.forecast_dates) +1), freq=self.freq)[1:])
275
- cache_data_list = data_list[0:len(cached_forecast_dates)]
276
- cache_data_dict = {'ts':cached_forecast_dates, 'yhat':cache_data_list}
277
- data_cache = pd.DataFrame.from_dict(cache_data_dict)
278
- data_cache.set_index('ts', inplace=True)
279
- with open(w_forecast_cache_path, "wb") as file:
280
- cPickle.dump(data_cache, file)
281
- if not os.path.isfile(w_forecast_cache_path):
282
- self.logger.warning("Solcast forecast data could not be saved to file.")
283
- else:
284
- self.logger.info("Saved the Solcast results to cache, for later reference.")
285
- # Trim request results to forecast_dates
286
- data_list = data_list[0:len(self.forecast_dates)]
287
- data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
288
- # Define DataFrame
289
- data = pd.DataFrame.from_dict(data_dict)
290
- # Define index
291
- data.set_index('ts', inplace=True)
292
- # Else, notify user to update cache
293
- else:
294
- self.logger.error("Unable to obtain Solcast cache file.")
295
- self.logger.error("Try running optimization again with 'weather_forecast_cache_only': false")
296
- self.logger.error("Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `forecast-cache` action, to pull new data from Solcast and cache.")
297
- return False
298
- # Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
299
- else:
300
- with open(w_forecast_cache_path, "rb") as file:
301
- data = cPickle.load(file)
302
- if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
303
- self.logger.error("There has been a error obtaining cached Solcast forecast data.")
304
- self.logger.error("Try running optimization again with 'weather_forecast_cache': true, or run action `forecast-cache`, to pull new data from Solcast and cache.")
305
- self.logger.warning("Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true")
306
- os.remove(w_forecast_cache_path)
439
+ self.logger.error("Not enough data retrieved from Solcast service.")
307
440
  return False
308
- # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
309
- if self.forecast_dates[0] in data.index and self.forecast_dates[-1] in data.index:
310
- data = data.loc[self.forecast_dates[0]:self.forecast_dates[-1]]
311
- self.logger.info("Retrieved Solcast data from the previously saved cache.")
441
+ total_data_list = [
442
+ total + current
443
+ for total, current in zip_longest(total_data_list, data_list, fillvalue=0)
444
+ ]
445
+
446
+ total_data_list = total_data_list[0 : len(self.forecast_dates)]
447
+ data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
448
+ data = pd.DataFrame.from_dict(data_dict)
449
+ data.set_index("ts", inplace=True)
450
+ if self.params["passed_data"].get("weather_forecast_cache", False):
451
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
452
+ return data
453
+
454
+ async def _get_weather_solar_forecast(self, w_forecast_cache_path: str) -> pd.DataFrame:
455
+ """Helper to retrieve weather data from solar.forecast or cache."""
456
+ if os.path.isfile(w_forecast_cache_path):
457
+ return await self.get_cached_forecast_data(w_forecast_cache_path)
458
+ # Validation and Default Setup
459
+ if "solar_forecast_kwp" not in self.retrieve_hass_conf:
460
+ self.logger.warning(
461
+ "The solar_forecast_kwp parameter was not defined, using dummy values for testing"
462
+ )
463
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
464
+ if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
465
+ self.logger.warning(
466
+ "The solar_forecast_kwp parameter is set to zero, setting to default 5"
467
+ )
468
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
469
+ if self.optim_conf["delta_forecast_daily"].days > 1:
470
+ self.logger.warning(
471
+ "The free public tier for solar.forecast only provides one day forecasts"
472
+ )
473
+ headers = {"Accept": header_accept}
474
+ data = pd.DataFrame()
475
+
476
+ async with aiohttp.ClientSession() as session:
477
+ for i in range(len(self.plant_conf["pv_module_model"])):
478
+ url = (
479
+ "https://api.forecast.solar/estimate/"
480
+ + str(round(self.lat, 2))
481
+ + "/"
482
+ + str(round(self.lon, 2))
483
+ + "/"
484
+ + str(self.plant_conf["surface_tilt"][i])
485
+ + "/"
486
+ + str(self.plant_conf["surface_azimuth"][i] - 180)
487
+ + "/"
488
+ + str(self.retrieve_hass_conf["solar_forecast_kwp"])
489
+ )
490
+ async with session.get(url, headers=headers) as response:
491
+ data_raw = await response.json()
492
+ data_dict = {
493
+ "ts": list(data_raw["result"]["watts"].keys()),
494
+ "yhat": list(data_raw["result"]["watts"].values()),
495
+ }
496
+ data_tmp = pd.DataFrame.from_dict(data_dict)
497
+ data_tmp.set_index("ts", inplace=True)
498
+ data_tmp.index = pd.to_datetime(data_tmp.index)
499
+ data_tmp = data_tmp.tz_localize(
500
+ self.forecast_dates.tz,
501
+ ambiguous="infer",
502
+ nonexistent="shift_forward",
503
+ )
504
+ data_tmp = data_tmp.reindex(index=self.forecast_dates)
505
+ # Gap filling
506
+ mask_up = data_tmp.copy(deep=True).fillna(method="ffill").isnull()
507
+ mask_down = data_tmp.copy(deep=True).fillna(method="bfill").isnull()
508
+ data_tmp.loc[mask_up["yhat"], :] = 0.0
509
+ data_tmp.loc[mask_down["yhat"], :] = 0.0
510
+ data_tmp.interpolate(inplace=True, limit=1)
511
+ data_tmp = data_tmp.fillna(0.0)
512
+ if len(data) == 0:
513
+ data = copy.deepcopy(data_tmp)
312
514
  else:
313
- self.logger.error("Unable to obtain cached Solcast forecast data within the requested timeframe range.")
314
- self.logger.error("Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from Solcast and cache.")
315
- self.logger.warning("Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true")
316
- os.remove(w_forecast_cache_path)
317
- return False
318
- elif method == 'solar.forecast': # using the solar.forecast API
319
- # Retrieve data from the solar.forecast API
320
- if 'solar_forecast_kwp' not in self.retrieve_hass_conf:
321
- self.logger.warning("The solar_forecast_kwp parameter was not defined, using dummy values for testing")
322
- self.retrieve_hass_conf['solar_forecast_kwp'] = 5
323
- if self.retrieve_hass_conf['solar_forecast_kwp'] == 0:
324
- self.logger.warning("The solar_forecast_kwp parameter is set to zero, setting to default 5")
325
- self.retrieve_hass_conf['solar_forecast_kwp'] = 5
326
- if self.optim_conf['delta_forecast'].days > 1:
327
- self.logger.warning("The free public tier for solar.forecast only provides one day forecasts")
328
- self.logger.warning("Continuing with just the first day of data, the other days are filled with 0.0.")
329
- self.logger.warning("Use the other available methods for delta_forecast > 1")
330
- headers = {
331
- "Accept": "application/json"
332
- }
333
- data = pd.DataFrame()
334
- for i in range(len(self.plant_conf['module_model'])):
335
- url = "https://api.forecast.solar/estimate/"+str(round(self.lat, 2))+"/"+str(round(self.lon, 2))+\
336
- "/"+str(self.plant_conf["surface_tilt"][i])+"/"+str(self.plant_conf["surface_azimuth"][i]-180)+\
337
- "/"+str(self.retrieve_hass_conf["solar_forecast_kwp"])
338
- response = get(url, headers=headers)
339
- '''import bz2 # Uncomment to save a serialized data for tests
340
- import _pickle as cPickle
341
- with bz2.BZ2File("data/test_response_solarforecast_get_method.pbz2", "w") as f:
342
- cPickle.dump(response.json(), f)'''
343
- data_raw = response.json()
344
- data_dict = {'ts':list(data_raw['result']['watts'].keys()), 'yhat':list(data_raw['result']['watts'].values())}
345
- # Form the final DataFrame
346
- data_tmp = pd.DataFrame.from_dict(data_dict)
347
- data_tmp.set_index('ts', inplace=True)
348
- data_tmp.index = pd.to_datetime(data_tmp.index)
349
- data_tmp = data_tmp.tz_localize(self.forecast_dates.tz)
350
- data_tmp = data_tmp.reindex(index=self.forecast_dates)
351
- mask_up_data_df = data_tmp.copy(deep=True).fillna(method = "ffill").isnull()
352
- mask_down_data_df = data_tmp.copy(deep=True).fillna(method = "bfill").isnull()
353
- data_tmp.loc[data_tmp.index[mask_up_data_df['yhat']==True],:] = 0.0
354
- data_tmp.loc[data_tmp.index[mask_down_data_df['yhat']==True],:] = 0.0
355
- data_tmp.interpolate(inplace=True, limit=1)
356
- data_tmp = data_tmp.fillna(0.0)
357
- if len(data) == 0:
358
- data = copy.deepcopy(data_tmp)
359
- else:
360
- data = data + data_tmp
361
- elif method == 'csv': # reading from a csv file
362
- weather_csv_file_path = csv_path
363
- # Loading the csv file, we will consider that this is the PV power in W
364
- data = pd.read_csv(weather_csv_file_path, header=None, names=['ts', 'yhat'])
365
- # Check if the passed data has the correct length
366
- if len(data) < len(self.forecast_dates):
367
- self.logger.error("Passed data from CSV is not long enough")
368
- else:
369
- # Ensure correct length
370
- data = data.loc[data.index[0:len(self.forecast_dates)],:]
371
- # Define index
372
- data.index = self.forecast_dates
373
- data.drop('ts', axis=1, inplace=True)
374
- data = data.copy().loc[self.forecast_dates]
375
- elif method == 'list': # reading a list of values
376
- # Loading data from passed list
377
- data_list = self.params['passed_data']['pv_power_forecast']
378
- # Check if the passed data has the correct length
379
- if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
380
- self.logger.error("Passed data from passed list is not long enough")
381
- else:
382
- # Ensure correct length
383
- data_list = data_list[0:len(self.forecast_dates)]
384
- # Define DataFrame
385
- data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
386
- data = pd.DataFrame.from_dict(data_dict)
387
- # Define index
388
- data.set_index('ts', inplace=True)
515
+ data = data + data_tmp
516
+
517
+ if self.params["passed_data"].get("weather_forecast_cache", False):
518
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
519
+ return data
520
+
521
+ def _get_weather_csv(self, csv_path: str) -> pd.DataFrame:
522
+ """Helper to retrieve weather data from CSV."""
523
+ data = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
524
+ if len(data) < len(self.forecast_dates):
525
+ self.logger.error("Passed data from CSV is not long enough")
526
+ else:
527
+ data = data.loc[data.index[0 : len(self.forecast_dates)], :]
528
+ data.index = self.forecast_dates
529
+ data.drop("ts", axis=1, inplace=True)
530
+ data = data.copy().loc[self.forecast_dates]
531
+ return data
532
+
533
+ def _get_weather_list(self) -> pd.DataFrame:
534
+ """Helper to retrieve weather data from a passed list."""
535
+ data_list = self.params["passed_data"]["pv_power_forecast"]
536
+ if (
537
+ len(data_list) < len(self.forecast_dates)
538
+ and self.params["passed_data"]["prediction_horizon"] is None
539
+ ):
540
+ self.logger.error(error_msg_list_not_long_enough)
541
+ return None
542
+ else:
543
+ data_list = data_list[0 : len(self.forecast_dates)]
544
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
545
+ data = pd.DataFrame.from_dict(data_dict)
546
+ data.set_index("ts", inplace=True)
547
+ return data
548
+
549
+ async def get_weather_forecast(
550
+ self,
551
+ method: str | None = "open-meteo",
552
+ csv_path: str | None = "data_weather_forecast.csv",
553
+ use_legacy_pvlib: bool | None = False,
554
+ ) -> pd.DataFrame:
555
+ r"""
556
+ Get and generate weather forecast data.
557
+
558
+ :param method: The desired method, options are 'open-meteo', 'csv', 'list', 'solcast' and \
559
+ 'solar.forecast'. Defaults to 'open-meteo'.
560
+ :type method: str, optional
561
+ :return: The DataFrame containing the forecasted data
562
+ :rtype: pd.DataFrame
563
+ """
564
+ csv_path = self.emhass_conf["data_path"] / csv_path
565
+ w_forecast_cache_path = os.path.abspath(
566
+ self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
567
+ )
568
+ self.logger.info("Retrieving weather forecast data using method = " + method)
569
+ if method == "scrapper":
570
+ self.logger.warning(
571
+ "The scrapper method has been deprecated and the keyword is accepted just for backward compatibility, please change the PV forecast method to open-meteo"
572
+ )
573
+ self.weather_forecast_method = method
574
+ if method in ["open-meteo", "scrapper"]:
575
+ data = await self._get_weather_open_meteo(w_forecast_cache_path, use_legacy_pvlib)
576
+ elif method == "solcast":
577
+ data = await self._get_weather_solcast(w_forecast_cache_path)
578
+ elif method == "solar.forecast":
579
+ data = await self._get_weather_solar_forecast(w_forecast_cache_path)
580
+ elif method == "csv":
581
+ data = self._get_weather_csv(csv_path)
582
+ elif method == "list":
583
+ data = self._get_weather_list()
389
584
  else:
390
585
  self.logger.error("Method %r is not valid", method)
391
586
  data = None
587
+ self.logger.debug("get_weather_forecast returning:\n%s", data)
392
588
  return data
393
-
394
- def cloud_cover_to_irradiance(self, cloud_cover: pd.Series,
395
- offset:Optional[int] = 35) -> pd.DataFrame:
589
+
590
+ def cloud_cover_to_irradiance(
591
+ self, cloud_cover: pd.Series, offset: int | None = 35
592
+ ) -> pd.DataFrame:
396
593
  """
397
594
  Estimates irradiance from cloud cover in the following steps.
398
-
595
+
399
596
  1. Determine clear sky GHI using Ineichen model and
400
597
  climatological turbidity.
401
-
598
+
402
599
  2. Estimate cloudy sky GHI using a function of cloud_cover
403
-
600
+
404
601
  3. Estimate cloudy sky DNI using the DISC model.
405
-
602
+
406
603
  4. Calculate DHI from DNI and GHI.
407
-
604
+
408
605
  (This function was copied and modified from PVLib)
409
606
 
410
607
  :param cloud_cover: Cloud cover in %.
@@ -416,21 +613,26 @@ class Forecast(object):
416
613
  """
417
614
  location = Location(latitude=self.lat, longitude=self.lon)
418
615
  solpos = location.get_solarposition(cloud_cover.index)
419
- cs = location.get_clearsky(cloud_cover.index, model='ineichen',
420
- solar_position=solpos)
616
+ cs = location.get_clearsky(cloud_cover.index, model="ineichen", solar_position=solpos)
421
617
  # Using only the linear method
422
- offset = offset / 100.
423
- cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.
424
- ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs['ghi']
618
+ offset = offset / 100.0
619
+ cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
620
+ ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs["ghi"]
425
621
  # Using disc model
426
- dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni']
427
- dhi = ghi - dni * np.cos(np.radians(solpos['zenith']))
428
- irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0)
622
+ dni = disc(ghi, solpos["zenith"], cloud_cover.index)["dni"]
623
+ dhi = ghi - dni * np.cos(np.radians(solpos["zenith"]))
624
+ irrads = pd.DataFrame({"ghi": ghi, "dni": dni, "dhi": dhi}).fillna(0)
429
625
  return irrads
430
-
626
+
431
627
  @staticmethod
432
- def get_mix_forecast(df_now: pd.DataFrame, df_forecast: pd.DataFrame,
433
- alpha:float, beta:float, col:str) -> pd.DataFrame:
628
+ def get_mix_forecast(
629
+ df_now: pd.DataFrame,
630
+ df_forecast: pd.DataFrame,
631
+ alpha: float,
632
+ beta: float,
633
+ col: str,
634
+ ignore_pv_feedback: bool = False,
635
+ ) -> pd.DataFrame:
434
636
  """A simple correction method for forecasted data using the current real values of a variable.
435
637
 
436
638
  :param df_now: The DataFrame containing the current/real values
@@ -443,127 +645,521 @@ class Forecast(object):
443
645
  :type beta: float
444
646
  :param col: The column variable name
445
647
  :type col: str
648
+ :param ignore_pv_feedback: If True, bypass mixing and return original forecast (used during curtailment)
649
+ :type ignore_pv_feedback: bool
446
650
  :return: The output DataFrame with the corrected values
447
651
  :rtype: pd.DataFrame
448
652
  """
449
- first_fcst = alpha*df_forecast.iloc[0] + beta*df_now[col].iloc[-1]
450
- df_forecast.iloc[0] = first_fcst
653
+ # If ignoring PV feedback (e.g., during curtailment), return original forecast
654
+ if ignore_pv_feedback:
655
+ return df_forecast
656
+
657
+ first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
658
+ df_forecast.iloc[0] = int(round(first_fcst))
451
659
  return df_forecast
452
-
453
- def get_power_from_weather(self, df_weather: pd.DataFrame,
454
- set_mix_forecast:Optional[bool] = False,
455
- df_now:Optional[pd.DataFrame] = pd.DataFrame()) -> pd.Series:
660
+
661
+ def _get_model_power(self, params, device_type):
662
+ """
663
+ Helper to extract power rating based on device type and available parameters.
664
+ """
665
+ if device_type == "module":
666
+ if "STC" in params:
667
+ return params["STC"]
668
+ if "I_mp_ref" in params and "V_mp_ref" in params:
669
+ return params["I_mp_ref"] * params["V_mp_ref"]
670
+ elif device_type == "inverter":
671
+ if "Paco" in params:
672
+ return params["Paco"]
673
+ if "Pdco" in params:
674
+ return params["Pdco"]
675
+ return None
676
+
677
+ def _find_closest_model(self, target_power, database, device_type):
678
+ """
679
+ Find the model in the database that has a power rating closest to the target_power.
680
+ """
681
+ closest_model = None
682
+ min_diff = float("inf")
683
+ # Handle DataFrame (columns are models) or Dict (keys are models)
684
+ iterator = database.items() if hasattr(database, "items") else database.iteritems()
685
+ for _, params in iterator:
686
+ power = self._get_model_power(params, device_type)
687
+ if power is not None:
688
+ diff = abs(power - target_power)
689
+ if diff < min_diff:
690
+ min_diff = diff
691
+ closest_model = params
692
+ if closest_model is not None:
693
+ # Safely get name if it exists (DataFrame Series usually have a .name attribute)
694
+ model_name = getattr(closest_model, "name", "unknown")
695
+ self.logger.info(f"Closest {device_type} model to {target_power}W found: {model_name}")
696
+ else:
697
+ self.logger.warning(f"No suitable {device_type} model found close to {target_power}W")
698
+ return closest_model
699
+
700
+ def _get_model(self, model_spec, database, device_type):
701
+ """
702
+ Retrieve a model from the database by name or by power rating.
703
+ """
704
+ # If it's a string, try to find it by name
705
+ if isinstance(model_spec, str):
706
+ if model_spec in database:
707
+ return database[model_spec]
708
+ # If not found by name, check if it is a number string (e.g., "300")
709
+ try:
710
+ target_power = float(model_spec)
711
+ return self._find_closest_model(target_power, database, device_type)
712
+ except ValueError:
713
+ # Not a number, fallback to original behavior (will likely raise KeyError later)
714
+ self.logger.warning(f"{device_type} model '{model_spec}' not found in database.")
715
+ return database[model_spec]
716
+ # If it's a number (int or float), find closest by power
717
+ elif isinstance(model_spec, int | float):
718
+ return self._find_closest_model(model_spec, database, device_type)
719
+ else:
720
+ self.logger.error(f"Invalid type for {device_type} model: {type(model_spec)}")
721
+ return None
722
+
723
+ def _calculate_pvlib_power(self, df_weather: pd.DataFrame) -> pd.Series:
724
+ """
725
+ Helper to simulate PV power generation using PVLib when no direct forecast is available.
726
+ """
727
+ # Setting the main parameters of the PV plant
728
+ location = Location(latitude=self.lat, longitude=self.lon)
729
+ temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"]["close_mount_glass_glass"]
730
+ # Load CEC databases
731
+ cec_modules_path = self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2"
732
+ cec_inverters_path = self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2"
733
+ with bz2.BZ2File(cec_modules_path, "rb") as f:
734
+ cec_modules = cPickle.load(f)
735
+ with bz2.BZ2File(cec_inverters_path, "rb") as f:
736
+ cec_inverters = cPickle.load(f)
737
+
738
+ # Inner helper to run a single simulation configuration
739
+ def run_single_config(mod_spec, inv_spec, tilt, azimuth, mod_per_str, str_per_inv):
740
+ module = self._get_model(mod_spec, cec_modules, "module")
741
+ inverter = self._get_model(inv_spec, cec_inverters, "inverter")
742
+ system = PVSystem(
743
+ surface_tilt=tilt,
744
+ surface_azimuth=azimuth,
745
+ module_parameters=module,
746
+ inverter_parameters=inverter,
747
+ temperature_model_parameters=temp_params,
748
+ modules_per_string=mod_per_str,
749
+ strings_per_inverter=str_per_inv,
750
+ )
751
+ mc = ModelChain(system, location, aoi_model="physical")
752
+ mc.run_model(df_weather)
753
+ return mc.results.ac
754
+
755
+ # Handle list (mixed orientation) vs single configuration
756
+ if isinstance(self.plant_conf["pv_module_model"], list):
757
+ p_pv_forecast = pd.Series(0, index=df_weather.index)
758
+ for i in range(len(self.plant_conf["pv_module_model"])):
759
+ result = run_single_config(
760
+ self.plant_conf["pv_module_model"][i],
761
+ self.plant_conf["pv_inverter_model"][i],
762
+ self.plant_conf["surface_tilt"][i],
763
+ self.plant_conf["surface_azimuth"][i],
764
+ self.plant_conf["modules_per_string"][i],
765
+ self.plant_conf["strings_per_inverter"][i],
766
+ )
767
+ p_pv_forecast = p_pv_forecast + result
768
+ else:
769
+ p_pv_forecast = run_single_config(
770
+ self.plant_conf["pv_module_model"],
771
+ self.plant_conf["pv_inverter_model"],
772
+ self.plant_conf["surface_tilt"],
773
+ self.plant_conf["surface_azimuth"],
774
+ self.plant_conf["modules_per_string"],
775
+ self.plant_conf["strings_per_inverter"],
776
+ )
777
+ return p_pv_forecast
778
+
779
+ def get_power_from_weather(
780
+ self,
781
+ df_weather: pd.DataFrame,
782
+ set_mix_forecast: bool | None = False,
783
+ df_now: pd.DataFrame | None = pd.DataFrame(),
784
+ ) -> pd.Series:
456
785
  r"""
457
- Convert wheater forecast data into electrical power.
458
-
786
+ Convert weather forecast data into electrical power.
787
+
459
788
  :param df_weather: The DataFrame containing the weather forecasted data. \
460
789
  This DF should be generated by the 'get_weather_forecast' method or at \
461
790
  least contain the same columns names filled with proper data.
462
791
  :type df_weather: pd.DataFrame
463
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
792
+ :param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
464
793
  :type set_mix_forecast: Bool, optional
465
794
  :param df_now: The DataFrame containing the now/current data.
466
795
  :type df_now: pd.DataFrame
467
796
  :return: The DataFrame containing the electrical power in Watts
468
797
  :rtype: pd.DataFrame
469
-
470
798
  """
471
799
  # If using csv method we consider that yhat is the PV power in W
472
- if "solar_forecast_kwp" in self.retrieve_hass_conf.keys() and self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
473
- P_PV_forecast = pd.Series(0, index=df_weather.index)
800
+ if (
801
+ "solar_forecast_kwp" in self.retrieve_hass_conf.keys()
802
+ and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
803
+ ):
804
+ p_pv_forecast = pd.Series(0, index=df_weather.index)
805
+ elif self.weather_forecast_method in [
806
+ "solcast",
807
+ "solar.forecast",
808
+ "csv",
809
+ "list",
810
+ ]:
811
+ p_pv_forecast = df_weather["yhat"]
812
+ p_pv_forecast.name = None
474
813
  else:
475
- if self.weather_forecast_method == 'solcast' or self.weather_forecast_method == 'solar.forecast' or \
476
- self.weather_forecast_method == 'csv' or self.weather_forecast_method == 'list':
477
- P_PV_forecast = df_weather['yhat']
478
- P_PV_forecast.name = None
479
- else: # We will transform the weather data into electrical power
480
- # Transform to power (Watts)
481
- # Setting the main parameters of the PV plant
482
- location = Location(latitude=self.lat, longitude=self.lon)
483
- temp_params = TEMPERATURE_MODEL_PARAMETERS['sapm']['close_mount_glass_glass']
484
- cec_modules = bz2.BZ2File(self.emhass_conf['root_path'] / 'data' / 'cec_modules.pbz2', "rb")
485
- cec_modules = cPickle.load(cec_modules)
486
- cec_inverters = bz2.BZ2File(self.emhass_conf['root_path'] / 'data' / 'cec_inverters.pbz2', "rb")
487
- cec_inverters = cPickle.load(cec_inverters)
488
- if type(self.plant_conf['module_model']) == list:
489
- P_PV_forecast = pd.Series(0, index=df_weather.index)
490
- for i in range(len(self.plant_conf['module_model'])):
491
- # Selecting correct module and inverter
492
- module = cec_modules[self.plant_conf['module_model'][i]]
493
- inverter = cec_inverters[self.plant_conf['inverter_model'][i]]
494
- # Building the PV system in PVLib
495
- system = PVSystem(surface_tilt=self.plant_conf['surface_tilt'][i],
496
- surface_azimuth=self.plant_conf['surface_azimuth'][i],
497
- module_parameters=module,
498
- inverter_parameters=inverter,
499
- temperature_model_parameters=temp_params,
500
- modules_per_string=self.plant_conf['modules_per_string'][i],
501
- strings_per_inverter=self.plant_conf['strings_per_inverter'][i])
502
- mc = ModelChain(system, location, aoi_model="physical")
503
- # Run the model on the weather DF indexes
504
- mc.run_model(df_weather)
505
- # Extracting results for AC power
506
- P_PV_forecast = P_PV_forecast + mc.results.ac
507
- else:
508
- # Selecting correct module and inverter
509
- module = cec_modules[self.plant_conf['module_model']]
510
- inverter = cec_inverters[self.plant_conf['inverter_model']]
511
- # Building the PV system in PVLib
512
- system = PVSystem(surface_tilt=self.plant_conf['surface_tilt'],
513
- surface_azimuth=self.plant_conf['surface_azimuth'],
514
- module_parameters=module,
515
- inverter_parameters=inverter,
516
- temperature_model_parameters=temp_params,
517
- modules_per_string=self.plant_conf['modules_per_string'],
518
- strings_per_inverter=self.plant_conf['strings_per_inverter'])
519
- mc = ModelChain(system, location, aoi_model="physical")
520
- # Run the model on the weather DF indexes
521
- mc.run_model(df_weather)
522
- # Extracting results for AC power
523
- P_PV_forecast = mc.results.ac
814
+ # We will transform the weather data into electrical power
815
+ p_pv_forecast = self._calculate_pvlib_power(df_weather)
524
816
  if set_mix_forecast:
525
- P_PV_forecast = Forecast.get_mix_forecast(
526
- df_now, P_PV_forecast,
527
- self.params['passed_data']['alpha'], self.params['passed_data']['beta'], self.var_PV)
528
- return P_PV_forecast
529
-
530
- def get_forecast_days_csv(self, timedelta_days: Optional[int] = 1) -> pd.date_range:
817
+ ignore_pv_feedback = self.params["passed_data"].get(
818
+ "ignore_pv_feedback_during_curtailment", False
819
+ )
820
+ p_pv_forecast = Forecast.get_mix_forecast(
821
+ df_now,
822
+ p_pv_forecast,
823
+ self.params["passed_data"]["alpha"],
824
+ self.params["passed_data"]["beta"],
825
+ self.var_pv,
826
+ ignore_pv_feedback,
827
+ )
828
+ p_pv_forecast[p_pv_forecast < 0] = 0 # replace any negative PV values with zero
829
+ self.logger.debug("get_power_from_weather returning:\n%s", p_pv_forecast)
830
+ return p_pv_forecast
831
+
832
+ @staticmethod
833
+ def compute_solar_angles(df: pd.DataFrame, latitude: float, longitude: float) -> pd.DataFrame:
834
+ """
835
+ Compute solar angles (elevation, azimuth) based on timestamps and location.
836
+
837
+ :param df: DataFrame with a DateTime index.
838
+ :param latitude: Latitude of the PV system.
839
+ :param longitude: Longitude of the PV system.
840
+ :return: DataFrame with added solar elevation and azimuth.
841
+ """
842
+ df = df.copy()
843
+ solpos = get_solarposition(df.index, latitude, longitude)
844
+ df["solar_elevation"] = solpos["elevation"]
845
+ df["solar_azimuth"] = solpos["azimuth"]
846
+ return df
847
+
848
+ def adjust_pv_forecast_data_prep(self, data: pd.DataFrame) -> pd.DataFrame:
849
+ """
850
+ Prepare data for adjusting the photovoltaic (PV) forecast.
851
+
852
+ This method aligns the actual PV production data with the forecasted data,
853
+ adds additional features for analysis, and separates the predictors (X)
854
+ from the target variable (y).
855
+
856
+ :param data: A DataFrame containing the actual PV production data and the
857
+ forecasted PV production data.
858
+ :type data: pd.DataFrame
859
+ :return: DataFrame with data for adjusted PV model train.
860
+ """
861
+ # Extract target and predictor
862
+ self.logger.debug("adjust_pv_forecast_data_prep using data:\n%s", data)
863
+ if self.logger.isEnabledFor(logging.DEBUG):
864
+ data.to_csv(
865
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-input-data.csv"
866
+ )
867
+ P_PV = data[self.var_pv] # Actual PV production
868
+ p_pv_forecast = data[self.var_pv_forecast] # Forecasted PV production
869
+ # Define time ranges
870
+ last_day = data.index.max().normalize() # Last available day
871
+ three_months_ago = last_day - pd.DateOffset(
872
+ days=self.retrieve_hass_conf["historic_days_to_retrieve"]
873
+ )
874
+ # Train/Test: Last historic_days_to_retrieve days (excluding the last day)
875
+ train_test_mask = (data.index >= three_months_ago) & (data.index < last_day)
876
+ self.p_pv_train_test = P_PV[train_test_mask]
877
+ self.p_pv_forecast_train_test = p_pv_forecast[train_test_mask]
878
+ # Validation: Last day only
879
+ validation_mask = data.index >= last_day
880
+ self.p_pv_validation = P_PV[validation_mask]
881
+ self.p_pv_forecast_validation = p_pv_forecast[validation_mask]
882
+ # Ensure data is aligned
883
+ self.data_adjust_pv = pd.concat(
884
+ [P_PV.rename("actual"), p_pv_forecast.rename("forecast")], axis=1
885
+ ).dropna()
886
+ # Add more features
887
+ self.data_adjust_pv = add_date_features(self.data_adjust_pv)
888
+ self.data_adjust_pv = Forecast.compute_solar_angles(self.data_adjust_pv, self.lat, self.lon)
889
+ # Features (X) and target (y)
890
+ self.x_adjust_pv = self.data_adjust_pv.drop(columns=["actual"]) # Predictors
891
+ self.y_adjust_pv = self.data_adjust_pv["actual"] # Target: actual PV production
892
+ self.logger.debug("adjust_pv_forecast_data_prep output data:\n%s", self.data_adjust_pv)
893
+ if self.logger.isEnabledFor(logging.DEBUG):
894
+ self.data_adjust_pv.to_csv(
895
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-output-data.csv"
896
+ )
897
+
898
+ async def adjust_pv_forecast_fit(
899
+ self,
900
+ n_splits: int = 5,
901
+ regression_model: str = "LassoRegression",
902
+ debug: bool | None = False,
903
+ ) -> pd.DataFrame:
904
+ """
905
+ Fit a regression model to adjust the photovoltaic (PV) forecast.
906
+
907
+ This method uses historical actual and forecasted PV production data, along with
908
+ additional solar and date features, to train a regression model. The model is
909
+ optimized using a grid search with time-series cross-validation.
910
+
911
+ :param n_splits: The number of splits for time-series cross-validation, defaults to 5.
912
+ :type n_splits: int, optional
913
+ :param regression_model: The type of regression model to use. See REGRESSION_METHODS \
914
+ in machine_learning_regressor.py for the authoritative list of supported models. \
915
+ Currently: 'LinearRegression', 'RidgeRegression', 'LassoRegression', 'ElasticNet', \
916
+ 'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'RandomForestRegressor', \
917
+ 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', \
918
+ 'MLPRegressor'. Defaults to "LassoRegression".
919
+ :type regression_model: str, optional
920
+ :param debug: If True, the model is not saved to disk, useful for debugging, defaults to False.
921
+ :type debug: bool, optional
922
+ :return: A DataFrame containing the adjusted PV forecast.
923
+ :rtype: pd.DataFrame
924
+ """
925
+ # Get regression model and hyperparameter grid
926
+ mlr = MLRegressor(
927
+ self.data_adjust_pv,
928
+ "adjusted_pv_forecast",
929
+ regression_model,
930
+ list(self.x_adjust_pv.columns),
931
+ list(self.y_adjust_pv.name),
932
+ None,
933
+ self.logger,
934
+ )
935
+ pipeline, param_grid = mlr._get_model_and_params()
936
+ # Time-series split
937
+ tscv = TimeSeriesSplit(n_splits=n_splits)
938
+ grid_search = GridSearchCV(
939
+ pipeline, param_grid, cv=tscv, scoring="neg_mean_squared_error", verbose=0
940
+ )
941
+ # Train model
942
+ await asyncio.to_thread(grid_search.fit, self.x_adjust_pv, self.y_adjust_pv)
943
+ self.model_adjust_pv = grid_search.best_estimator_
944
+ # Calculate training metrics
945
+ y_pred_train = self.model_adjust_pv.predict(self.x_adjust_pv)
946
+ self.rmse = np.sqrt(mean_squared_error(self.y_adjust_pv, y_pred_train))
947
+ self.r2 = r2_score(self.y_adjust_pv, y_pred_train)
948
+ # Log the metrics
949
+ self.logger.info(f"PV adjust Training metrics: RMSE = {self.rmse}, R2 = {self.r2}")
950
+ # Save model
951
+ if not debug:
952
+ filename = "adjust_pv_regressor.pkl"
953
+ filename_path = self.emhass_conf["data_path"] / filename
954
+ async with aiofiles.open(filename_path, "wb") as outp:
955
+ await outp.write(pickle.dumps(self.model_adjust_pv, pickle.HIGHEST_PROTOCOL))
956
+
957
+ def adjust_pv_forecast_predict(self, forecasted_pv: pd.DataFrame | None = None) -> pd.DataFrame:
958
+ """
959
+ Predict the adjusted photovoltaic (PV) forecast.
960
+
961
+ This method uses the trained regression model to predict the adjusted PV forecast
962
+ based on either the validation data stored in `self` or a new forecasted PV data
963
+ passed as input. It applies additional features such as date and solar angles to
964
+ the forecasted PV production data before making predictions. The solar elevation
965
+ is used to avoid negative values and to fix values at the beginning and end of the day.
966
+
967
+ :param forecasted_pv: Optional. A DataFrame containing the forecasted PV production data.
968
+ It must have a DateTime index and a column named "forecast".
969
+ If not provided, the method will use `self.p_pv_forecast_validation`.
970
+ :type forecasted_pv: pd.DataFrame, optional
971
+ :return: A DataFrame containing the adjusted PV forecast with additional features.
972
+ :rtype: pd.DataFrame
973
+ """
974
+ # Use the provided forecasted PV data or fall back to the validation data in `self`
975
+ if forecasted_pv is not None:
976
+ # Ensure the input DataFrame has the required structure
977
+ if "forecast" not in forecasted_pv.columns:
978
+ raise ValueError("The input DataFrame must contain a 'forecast' column.")
979
+ forecast_data = forecasted_pv.copy()
980
+ else:
981
+ # Use the validation data stored in `self`
982
+ forecast_data = self.p_pv_forecast_validation.rename("forecast").to_frame()
983
+ # Prepare the forecasted PV data
984
+ forecast_data = add_date_features(forecast_data)
985
+ forecast_data = Forecast.compute_solar_angles(forecast_data, self.lat, self.lon)
986
+ # Predict the adjusted forecast
987
+ forecast_data["adjusted_forecast"] = self.model_adjust_pv.predict(forecast_data)
988
+
989
+ # Apply solar elevation weighting only for specific cases
990
+ def apply_weighting(row):
991
+ if row["solar_elevation"] <= 0: # Nighttime or negative solar elevation
992
+ return 0
993
+ elif (
994
+ row["solar_elevation"] < self.optim_conf["adjusted_pv_solar_elevation_threshold"]
995
+ ): # Early morning or late evening
996
+ return max(
997
+ row["adjusted_forecast"]
998
+ * (
999
+ row["solar_elevation"]
1000
+ / self.optim_conf["adjusted_pv_solar_elevation_threshold"]
1001
+ ),
1002
+ 0,
1003
+ )
1004
+ else: # Daytime with sufficient solar elevation
1005
+ return row["adjusted_forecast"]
1006
+
1007
+ forecast_data["adjusted_forecast"] = forecast_data.apply(apply_weighting, axis=1)
1008
+ # If using validation data, calculate validation metrics
1009
+ if forecasted_pv is None:
1010
+ y_true = self.p_pv_validation.values
1011
+ y_pred = forecast_data["adjusted_forecast"].values
1012
+ self.validation_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
1013
+ self.validation_r2 = r2_score(y_true, y_pred)
1014
+ # Log the validation metrics
1015
+ self.logger.info(
1016
+ f"PV adjust Validation metrics: RMSE = {self.validation_rmse}, R2 = {self.validation_r2}"
1017
+ )
1018
+ self.logger.debug("adjust_pv_forecast_predict forecast data:\n%s", forecast_data)
1019
+ if self.logger.isEnabledFor(logging.DEBUG):
1020
+ forecast_data.to_csv(
1021
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-predict-forecast-data.csv"
1022
+ )
1023
+ # Return the DataFrame with the adjusted forecast
1024
+ return forecast_data
1025
+
1026
+ def get_forecast_days_csv(self, timedelta_days: int | None = 1) -> pd.date_range:
531
1027
  r"""
532
1028
  Get the date range vector of forecast dates that will be used when loading a CSV file.
533
-
1029
+
534
1030
  :return: The forecast dates vector
535
1031
  :rtype: pd.date_range
536
1032
 
537
1033
  """
538
1034
  start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
539
- if self.method_ts_round == 'nearest':
540
- start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
541
- elif self.method_ts_round == 'first':
542
- start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
543
- elif self.method_ts_round == 'last':
544
- start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
1035
+ if self.method_ts_round == "nearest":
1036
+ start_forecast_csv = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
1037
+ elif self.method_ts_round == "first":
1038
+ start_forecast_csv = (
1039
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
1040
+ )
1041
+ elif self.method_ts_round == "last":
1042
+ start_forecast_csv = (
1043
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
1044
+ )
545
1045
  else:
546
1046
  self.logger.error("Wrong method_ts_round passed parameter")
547
- end_forecast_csv = (start_forecast_csv + self.optim_conf['delta_forecast']).replace(microsecond=0)
548
- forecast_dates_csv = pd.date_range(start=start_forecast_csv,
549
- end=end_forecast_csv+timedelta(days=timedelta_days)-self.freq,
550
- freq=self.freq).round(self.freq, ambiguous='infer', nonexistent='shift_forward')
551
- if self.params is not None:
552
- if 'prediction_horizon' in list(self.params['passed_data'].keys()):
553
- if self.params['passed_data']['prediction_horizon'] is not None:
554
- forecast_dates_csv = forecast_dates_csv[0:self.params['passed_data']['prediction_horizon']]
1047
+ end_forecast_csv = (start_forecast_csv + self.optim_conf["delta_forecast_daily"]).replace(
1048
+ microsecond=0
1049
+ )
1050
+ forecast_dates_csv = (
1051
+ pd.date_range(
1052
+ start=start_forecast_csv,
1053
+ end=end_forecast_csv + timedelta(days=timedelta_days) - self.freq,
1054
+ freq=self.freq,
1055
+ tz=self.time_zone,
1056
+ )
1057
+ .tz_convert("utc")
1058
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
1059
+ .tz_convert(self.time_zone)
1060
+ )
1061
+ if (
1062
+ self.params is not None
1063
+ and "prediction_horizon" in list(self.params["passed_data"].keys())
1064
+ and self.params["passed_data"]["prediction_horizon"] is not None
1065
+ ):
1066
+ forecast_dates_csv = forecast_dates_csv[
1067
+ 0 : self.params["passed_data"]["prediction_horizon"]
1068
+ ]
555
1069
  return forecast_dates_csv
556
-
557
- def get_forecast_out_from_csv_or_list(self, df_final: pd.DataFrame, forecast_dates_csv: pd.date_range,
558
- csv_path: str, data_list: Optional[list] = None,
559
- list_and_perfect: Optional[bool] = False) -> pd.DataFrame:
1070
+
1071
+ def _load_forecast_data(
1072
+ self,
1073
+ csv_path: str,
1074
+ data_list: list | None,
1075
+ forecast_dates_csv: pd.date_range,
1076
+ ) -> pd.DataFrame:
1077
+ """
1078
+ Helper to load and format forecast data from a CSV file or a list.
1079
+ """
1080
+ if csv_path is None:
1081
+ data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
1082
+ df_csv = pd.DataFrame.from_dict(data_dict)
1083
+ df_csv.index = forecast_dates_csv
1084
+ df_csv = df_csv.drop(["ts"], axis=1)
1085
+ df_csv = set_df_index_freq(df_csv)
1086
+ else:
1087
+ if not os.path.exists(csv_path):
1088
+ csv_path = self.emhass_conf["data_path"] / csv_path
1089
+ df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
1090
+ # Check if first column is a valid datetime
1091
+ first_col = df_csv.iloc[:, 0]
1092
+ if pd.to_datetime(first_col, errors="coerce").notna().all():
1093
+ df_csv["ts"] = pd.to_datetime(df_csv["ts"], utc=True)
1094
+ df_csv.set_index("ts", inplace=True)
1095
+ df_csv.index = df_csv.index.tz_convert(self.time_zone)
1096
+ else:
1097
+ df_csv.index = forecast_dates_csv
1098
+ df_csv = df_csv.drop(["ts"], axis=1)
1099
+ df_csv = set_df_index_freq(df_csv)
1100
+ return df_csv
1101
+
1102
+ def _extract_daily_forecast(
1103
+ self,
1104
+ day: int,
1105
+ df_timing: pd.DataFrame,
1106
+ df_csv: pd.DataFrame,
1107
+ csv_path: str,
1108
+ list_and_perfect: bool,
1109
+ ) -> pd.DataFrame:
1110
+ """
1111
+ Helper to extract a specific day's forecast data based on timing configuration.
1112
+ """
1113
+ # Find the start and end indices for the specific day in the timing DataFrame
1114
+ day_mask = df_timing.index.day == day
1115
+ day_indices = [i for i, x in enumerate(day_mask) if x]
1116
+ first_elm_index = day_indices[0]
1117
+ last_elm_index = day_indices[-1]
1118
+ # Define the target forecast index based on the timing DataFrame
1119
+ fcst_index = pd.date_range(
1120
+ start=df_timing.index[first_elm_index],
1121
+ end=df_timing.index[last_elm_index],
1122
+ freq=df_timing.index.freq,
1123
+ )
1124
+ first_hour = f"{df_timing.index[first_elm_index].hour:02d}:{df_timing.index[first_elm_index].minute:02d}"
1125
+ last_hour = f"{df_timing.index[last_elm_index].hour:02d}:{df_timing.index[last_elm_index].minute:02d}"
1126
+ # Extract data
1127
+ if csv_path is None:
1128
+ if list_and_perfect:
1129
+ values_array = df_csv.between_time(first_hour, last_hour).values
1130
+ # Adjust index length if necessary
1131
+ fcst_index = fcst_index[0 : len(values_array)]
1132
+ return pd.DataFrame(values_array, index=fcst_index)
1133
+ else:
1134
+ return pd.DataFrame(
1135
+ df_csv.loc[fcst_index, :].between_time(first_hour, last_hour).values,
1136
+ index=fcst_index,
1137
+ )
1138
+ else:
1139
+ # For CSV path, filter by date string first
1140
+ df_csv_filtered_date = df_csv.loc[
1141
+ df_csv.index.strftime("%Y-%m-%d") == fcst_index[0].date().strftime("%Y-%m-%d")
1142
+ ]
1143
+ return pd.DataFrame(
1144
+ df_csv_filtered_date.between_time(first_hour, last_hour).values,
1145
+ index=fcst_index,
1146
+ )
1147
+
1148
+ def get_forecast_out_from_csv_or_list(
1149
+ self,
1150
+ df_final: pd.DataFrame,
1151
+ forecast_dates_csv: pd.date_range,
1152
+ csv_path: str,
1153
+ data_list: list | None = None,
1154
+ list_and_perfect: bool | None = False,
1155
+ ) -> pd.DataFrame:
560
1156
  r"""
561
- Get the forecast data as a DataFrame from a CSV file.
562
-
563
- The data contained in the CSV file should be a 24h forecast with the same frequency as
564
- the main 'freq' parameter in the configuration file. The timestamp will not be used and
1157
+ Get the forecast data as a DataFrame from a CSV file.
1158
+
1159
+ The data contained in the CSV file should be a 24h forecast with the same frequency as
1160
+ the main 'optimization_time_step' parameter in the configuration file. The timestamp will not be used and
565
1161
  a new DateTimeIndex is generated to fit the timestamp index of the input data in 'df_final'.
566
-
1162
+
567
1163
  :param df_final: The DataFrame containing the input data.
568
1164
  :type df_final: pd.DataFrame
569
1165
  :param forecast_dates_csv: The forecast dates vector
@@ -574,93 +1170,294 @@ class Forecast(object):
574
1170
  :rtype: pd.DataFrame
575
1171
 
576
1172
  """
577
- if csv_path is None:
578
- data_dict = {'ts':forecast_dates_csv, 'yhat':data_list}
579
- df_csv = pd.DataFrame.from_dict(data_dict)
580
- df_csv.index = forecast_dates_csv
581
- df_csv.drop(['ts'], axis=1, inplace=True)
582
- df_csv = set_df_index_freq(df_csv)
583
- if list_and_perfect:
584
- days_list = df_final.index.day.unique().tolist()
585
- else:
586
- days_list = df_csv.index.day.unique().tolist()
587
- else:
588
- if not os.path.exists(csv_path):
589
- csv_path = self.emhass_conf['data_path'] / csv_path
590
- load_csv_file_path = csv_path
591
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
592
- df_csv.index = forecast_dates_csv
593
- df_csv.drop(['ts'], axis=1, inplace=True)
594
- df_csv = set_df_index_freq(df_csv)
1173
+ # Load the source data (df_csv)
1174
+ df_csv = self._load_forecast_data(csv_path, data_list, forecast_dates_csv)
1175
+ # Configure timing source (df_timing) and iteration list
1176
+ if csv_path is None or list_and_perfect:
1177
+ df_final = set_df_index_freq(df_final)
1178
+ df_timing = copy.deepcopy(df_final)
595
1179
  days_list = df_final.index.day.unique().tolist()
596
- forecast_out = pd.DataFrame()
1180
+ else:
1181
+ df_timing = copy.deepcopy(df_csv)
1182
+ days_list = df_csv.index.day.unique().tolist()
1183
+ # Iterate over days and collect forecast parts
1184
+ forecast_parts = []
597
1185
  for day in days_list:
598
- if csv_path is None:
599
- if list_and_perfect:
600
- df_tmp = copy.deepcopy(df_final)
601
- else:
602
- df_tmp = copy.deepcopy(df_csv)
603
- else:
604
- df_tmp = copy.deepcopy(df_final)
605
- first_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][0]
606
- last_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][-1]
607
- fcst_index = pd.date_range(start=df_tmp.index[first_elm_index],
608
- end=df_tmp.index[last_elm_index],
609
- freq=df_tmp.index.freq)
610
- first_hour = str(df_tmp.index[first_elm_index].hour)+":"+str(df_tmp.index[first_elm_index].minute)
611
- last_hour = str(df_tmp.index[last_elm_index].hour)+":"+str(df_tmp.index[last_elm_index].minute)
612
- if len(forecast_out) == 0:
613
- if csv_path is None:
614
- if list_and_perfect:
615
- forecast_out = pd.DataFrame(
616
- df_csv.between_time(first_hour, last_hour).values,
617
- index=fcst_index)
618
- else:
619
- forecast_out = pd.DataFrame(
620
- df_csv.loc[fcst_index,:].between_time(first_hour, last_hour).values,
621
- index=fcst_index)
622
- else:
623
- forecast_out = pd.DataFrame(
624
- df_csv.between_time(first_hour, last_hour).values,
625
- index=fcst_index)
626
- else:
627
- if csv_path is None:
628
- if list_and_perfect:
629
- forecast_tp = pd.DataFrame(
630
- df_csv.between_time(first_hour, last_hour).values,
631
- index=fcst_index)
632
- else:
633
- forecast_tp = pd.DataFrame(
634
- df_csv.loc[fcst_index,:].between_time(first_hour, last_hour).values,
635
- index=fcst_index)
636
- else:
637
- forecast_tp = pd.DataFrame(
638
- df_csv.between_time(first_hour, last_hour).values,
639
- index=fcst_index)
640
- forecast_out = pd.concat([forecast_out, forecast_tp], axis=0)
1186
+ daily_df = self._extract_daily_forecast(
1187
+ day, df_timing, df_csv, csv_path, list_and_perfect
1188
+ )
1189
+ forecast_parts.append(daily_df)
1190
+ if forecast_parts:
1191
+ forecast_out = pd.concat(forecast_parts, axis=0)
1192
+ else:
1193
+ forecast_out = pd.DataFrame()
1194
+ # Merge with final DataFrame to align indices
1195
+ merged = pd.merge_asof(
1196
+ df_final.sort_index(),
1197
+ forecast_out.sort_index(),
1198
+ left_index=True,
1199
+ right_index=True,
1200
+ direction="nearest",
1201
+ )
1202
+ # Keep only forecast_out columns
1203
+ forecast_out = merged[forecast_out.columns]
641
1204
  return forecast_out
642
-
643
- def get_load_forecast(self, days_min_load_forecast: Optional[int] = 3, method: Optional[str] = 'naive',
644
- csv_path: Optional[str] = "data_load_forecast.csv",
645
- set_mix_forecast:Optional[bool] = False, df_now:Optional[pd.DataFrame] = pd.DataFrame(),
646
- use_last_window: Optional[bool] = True, mlf: Optional[MLForecaster] = None,
647
- debug: Optional[bool] = False) -> pd.Series:
1205
+
1206
+ @staticmethod
1207
+ def resample_data(data, freq, current_freq):
1208
+ r"""
1209
+ Resample a DataFrame with a custom frequency.
1210
+
1211
+ :param data: Original time series data with a DateTimeIndex.
1212
+ :type data: pd.DataFrame
1213
+ :param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
1214
+ :type freq: pd.Timedelta
1215
+ :return: Resampled data at the specified frequency.
1216
+ :rtype: pd.DataFrame
1217
+ """
1218
+ if freq > current_freq:
1219
+ # Downsampling
1220
+ # Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
1221
+ resampled_data = data.resample(freq).mean()
1222
+ elif freq < current_freq:
1223
+ # Upsampling
1224
+ # Use 'asfreq' to create empty slots, then interpolate
1225
+ resampled_data = data.resample(freq).asfreq()
1226
+ resampled_data = resampled_data.interpolate(method="time")
1227
+ else:
1228
+ # No resampling needed
1229
+ resampled_data = data.copy()
1230
+ return resampled_data
1231
+
1232
+ @staticmethod
1233
+ def get_typical_load_forecast(data, forecast_date):
648
1234
  r"""
1235
+ Forecast the load profile for the next day based on historic data.
1236
+
1237
+ :param data: A DataFrame with a DateTimeIndex containing the historic load data.
1238
+ Must include a 'load' column.
1239
+ :type data: pd.DataFrame
1240
+ :param forecast_date: The date for which the forecast will be generated.
1241
+ :type forecast_date: pd.Timestamp
1242
+ :return: A Series with the forecasted load profile for the next day and a list of days used
1243
+ to calculate the forecast.
1244
+ :rtype: tuple (pd.Series, list)
1245
+ """
1246
+ # Ensure the 'load' column exists
1247
+ if "load" not in data.columns:
1248
+ raise ValueError("Data must have a 'load' column.")
1249
+ # Filter historic data for the same month and day of the week
1250
+ month = forecast_date.month
1251
+ day_of_week = forecast_date.dayofweek
1252
+ historic_data = data[(data.index.month == month) & (data.index.dayofweek == day_of_week)]
1253
+ used_days = np.unique(historic_data.index.date)
1254
+ # Align all historic data to the forecast day
1255
+ aligned_data = []
1256
+ for day in used_days:
1257
+ daily_data = data[data.index.date == pd.Timestamp(day).date()]
1258
+ aligned_daily_data = daily_data.copy()
1259
+ aligned_daily_data.index = aligned_daily_data.index.map(
1260
+ lambda x: x.replace(
1261
+ year=forecast_date.year,
1262
+ month=forecast_date.month,
1263
+ day=forecast_date.day,
1264
+ )
1265
+ )
1266
+ aligned_data.append(aligned_daily_data)
1267
+ # Combine all aligned historic data into a single DataFrame
1268
+ combined_data = pd.concat(aligned_data)
1269
+ # Compute the mean load for each timestamp
1270
+ forecast = combined_data.groupby(combined_data.index).mean()
1271
+ return forecast, used_days
1272
+
1273
+ async def _prepare_hass_load_data(
1274
+ self, days_min_load_forecast: int, method: str
1275
+ ) -> pd.DataFrame | bool:
1276
+ """Helper to retrieve and prepare load data from Home Assistant."""
1277
+ self.logger.info(f"Retrieving data from hass for load forecast using method = {method}")
1278
+ var_list = [self.var_load]
1279
+ var_replace_zero = None
1280
+ var_interp = [self.var_load]
1281
+ time_zone_load_forecast = None
1282
+ rh = RetrieveHass(
1283
+ self.retrieve_hass_conf["hass_url"],
1284
+ self.retrieve_hass_conf["long_lived_token"],
1285
+ self.freq,
1286
+ time_zone_load_forecast,
1287
+ self.params,
1288
+ self.emhass_conf,
1289
+ self.logger,
1290
+ )
1291
+ if self.get_data_from_file:
1292
+ filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
1293
+ async with aiofiles.open(filename_path, "rb") as inp:
1294
+ content = await inp.read()
1295
+ rh.df_final, days_list, var_list, rh.ha_config = pickle.loads(content)
1296
+ self.var_load = var_list[0]
1297
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = self.var_load
1298
+ var_interp = [var_list[0]]
1299
+ self.var_list = [var_list[0]]
1300
+ rh.var_list = self.var_list
1301
+ self.var_load_new = self.var_load + "_positive"
1302
+ else:
1303
+ days_list = get_days_list(days_min_load_forecast)
1304
+ if not await rh.get_data(days_list, var_list):
1305
+ return False
1306
+ if not rh.prepare_data(
1307
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
1308
+ load_negative=self.retrieve_hass_conf["load_negative"],
1309
+ set_zero_min=self.retrieve_hass_conf["set_zero_min"],
1310
+ var_replace_zero=var_replace_zero,
1311
+ var_interp=var_interp,
1312
+ ):
1313
+ return False
1314
+ return rh.df_final.copy()[[self.var_load_new]]
1315
+
1316
+ async def _get_load_forecast_typical(self) -> pd.DataFrame:
1317
+ """Helper to generate typical load forecast."""
1318
+ model_type = "long_train_data"
1319
+ data_path = self.emhass_conf["data_path"] / str(model_type + ".pkl")
1320
+ async with aiofiles.open(data_path, "rb") as fid:
1321
+ content = await fid.read()
1322
+ data, _, _, _ = pickle.loads(content)
1323
+ # Ensure the data index is timezone-aware
1324
+ data.index = (
1325
+ data.index.tz_localize(
1326
+ self.forecast_dates.tz,
1327
+ ambiguous="infer",
1328
+ nonexistent="shift_forward",
1329
+ )
1330
+ if data.index.tz is None
1331
+ else data.index.tz_convert(self.forecast_dates.tz)
1332
+ )
1333
+ data = data[[self.var_load]]
1334
+ current_freq = pd.Timedelta("30min")
1335
+ if self.freq != current_freq:
1336
+ data = Forecast.resample_data(data, self.freq, current_freq)
1337
+ dates_list = np.unique(self.forecast_dates.date).tolist()
1338
+ forecast = pd.DataFrame()
1339
+ for date in dates_list:
1340
+ forecast_date = pd.Timestamp(date)
1341
+ data.columns = ["load"]
1342
+ forecast_tmp, used_days = Forecast.get_typical_load_forecast(data, forecast_date)
1343
+ self.logger.debug(f"Using {len(used_days)} days of data to generate the forecast.")
1344
+ forecast_tmp = forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
1345
+ if len(forecast) == 0:
1346
+ forecast = forecast_tmp
1347
+ else:
1348
+ forecast = pd.concat([forecast, forecast_tmp], axis=0)
1349
+ forecast_out = forecast.loc[forecast.index.intersection(self.forecast_dates)]
1350
+ forecast_out.index = self.forecast_dates
1351
+ forecast_out.index.name = "ts"
1352
+ return forecast_out.rename(columns={"load": "yhat"})
1353
+
1354
+ def _get_load_forecast_naive(self, df: pd.DataFrame) -> pd.DataFrame:
1355
+ """Helper for naive forecast."""
1356
+ forecast_horizon = len(self.forecast_dates)
1357
+ historical_values = df.iloc[-forecast_horizon:]
1358
+ return pd.DataFrame(historical_values.values, index=self.forecast_dates, columns=["yhat"])
1359
+
1360
+ async def _get_load_forecast_ml(
1361
+ self, df: pd.DataFrame, use_last_window: bool, mlf, debug: bool
1362
+ ) -> pd.DataFrame | bool:
1363
+ """Helper for ML forecast."""
1364
+ model_type = self.params["passed_data"]["model_type"]
1365
+ filename = model_type + "_mlf.pkl"
1366
+ filename_path = self.emhass_conf["data_path"] / filename
1367
+ if not debug:
1368
+ if filename_path.is_file():
1369
+ async with aiofiles.open(filename_path, "rb") as inp:
1370
+ content = await inp.read()
1371
+ mlf = pickle.loads(content)
1372
+ else:
1373
+ self.logger.error(
1374
+ "The ML forecaster file was not found, please run a model fit method before this predict method"
1375
+ )
1376
+ return False
1377
+ data_last_window = None
1378
+ if use_last_window:
1379
+ data_last_window = copy.deepcopy(df)
1380
+ data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
1381
+ forecast_out = await mlf.predict(data_last_window)
1382
+ self.logger.debug(
1383
+ "Number of ML predict forcast data generated (lags_opt): "
1384
+ + str(len(forecast_out.index))
1385
+ )
1386
+ self.logger.debug(
1387
+ "Number of forcast dates obtained (prediction_horizon): "
1388
+ + str(len(self.forecast_dates))
1389
+ )
1390
+ if len(self.forecast_dates) < len(forecast_out.index):
1391
+ forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1392
+ elif len(self.forecast_dates) > len(forecast_out.index):
1393
+ self.logger.error(
1394
+ "Unable to obtain: "
1395
+ + str(len(self.forecast_dates))
1396
+ + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
1397
+ )
1398
+ return False
1399
+ data_dict = {
1400
+ "ts": self.forecast_dates,
1401
+ "yhat": forecast_out.values.tolist(),
1402
+ }
1403
+ data = pd.DataFrame.from_dict(data_dict)
1404
+ data.set_index("ts", inplace=True)
1405
+ return data.copy().loc[self.forecast_dates]
1406
+
1407
+ def _get_load_forecast_csv(self, csv_path: str) -> pd.DataFrame:
1408
+ """Helper to retrieve load data from CSV."""
1409
+ df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
1410
+ if len(df_csv) < len(self.forecast_dates):
1411
+ self.logger.error("Passed data from CSV is not long enough")
1412
+ return None
1413
+ df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
1414
+ df_csv.index = self.forecast_dates
1415
+ df_csv = df_csv.drop(["ts"], axis=1)
1416
+ return df_csv.copy().loc[self.forecast_dates]
1417
+
1418
+ def _get_load_forecast_list(self) -> pd.DataFrame:
1419
+ """Helper to retrieve load data from a passed list."""
1420
+ data_list = self.params["passed_data"]["load_power_forecast"]
1421
+ if (
1422
+ len(data_list) < len(self.forecast_dates)
1423
+ and self.params["passed_data"]["prediction_horizon"] is None
1424
+ ):
1425
+ self.logger.error(error_msg_list_not_long_enough)
1426
+ return False
1427
+ data_list = data_list[0 : len(self.forecast_dates)]
1428
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
1429
+ data = pd.DataFrame.from_dict(data_dict)
1430
+ data.set_index("ts", inplace=True)
1431
+ return data.copy().loc[self.forecast_dates]
1432
+
1433
+ async def get_load_forecast(
1434
+ self,
1435
+ days_min_load_forecast: int | None = 3,
1436
+ method: str | None = "typical",
1437
+ csv_path: str | None = "data_load_forecast.csv",
1438
+ set_mix_forecast: bool | None = False,
1439
+ df_now: pd.DataFrame | None = pd.DataFrame(),
1440
+ use_last_window: bool | None = True,
1441
+ mlf: MLForecaster | None = None,
1442
+ debug: bool | None = False,
1443
+ ) -> pd.Series:
1444
+ """
649
1445
  Get and generate the load forecast data.
650
-
1446
+
651
1447
  :param days_min_load_forecast: The number of last days to retrieve that \
652
1448
  will be used to generate a naive forecast, defaults to 3
653
1449
  :type days_min_load_forecast: int, optional
654
1450
  :param method: The method to be used to generate load forecast, the options \
655
- are 'naive' for a persistance model, 'mlforecaster' for using a custom \
1451
+ are 'typical' for a typical household load consumption curve, \
1452
+ are 'naive' for a persistence model, 'mlforecaster' for using a custom \
656
1453
  previously fitted machine learning model, 'csv' to read the forecast from \
657
1454
  a CSV file and 'list' to use data directly passed at runtime as a list of \
658
- values. Defaults to 'naive'.
1455
+ values. Defaults to 'typical'.
659
1456
  :type method: str, optional
660
1457
  :param csv_path: The path to the CSV file used when method = 'csv', \
661
1458
  defaults to "/data/data_load_forecast.csv"
662
1459
  :type csv_path: str, optional
663
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
1460
+ :param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
664
1461
  :type set_mix_forecast: Bool, optional
665
1462
  :param df_now: The DataFrame containing the now/current data.
666
1463
  :type df_now: pd.DataFrame, optional
@@ -679,123 +1476,60 @@ class Forecast(object):
679
1476
  :rtype: pd.DataFrame
680
1477
 
681
1478
  """
682
- csv_path = self.emhass_conf['data_path'] / csv_path
683
-
684
- if method == 'naive' or method == 'mlforecaster': # retrieving needed data for these methods
685
- self.logger.info("Retrieving data from hass for load forecast using method = "+method)
686
- var_list = [self.var_load]
687
- var_replace_zero = None
688
- var_interp = [self.var_load]
689
- time_zone_load_foreacast = None
690
- # We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
691
- rh = RetrieveHass(self.retrieve_hass_conf['hass_url'], self.retrieve_hass_conf['long_lived_token'],
692
- self.freq, time_zone_load_foreacast, self.params, self.emhass_conf, self.logger)
693
- if self.get_data_from_file:
694
- filename_path = self.emhass_conf['data_path'] / 'test_df_final.pkl'
695
- with open(filename_path, 'rb') as inp:
696
- rh.df_final, days_list, var_list = pickle.load(inp)
697
- self.var_load = var_list[0]
698
- self.retrieve_hass_conf['var_load'] = self.var_load
699
- var_interp = [var_list[0]]
700
- self.var_list = [var_list[0]]
701
- self.var_load_new = self.var_load+'_positive'
702
- else:
703
- days_list = get_days_list(days_min_load_forecast)
704
- if not rh.get_data(days_list, var_list):
705
- return False
706
- if not rh.prepare_data(
707
- self.retrieve_hass_conf['var_load'], load_negative = self.retrieve_hass_conf['load_negative'],
708
- set_zero_min = self.retrieve_hass_conf['set_zero_min'],
709
- var_replace_zero = var_replace_zero, var_interp = var_interp):
1479
+ csv_path = self.emhass_conf["data_path"] / csv_path
1480
+ # Retrieve Data from Home Assistant if needed
1481
+ df = None
1482
+ if method in ["naive", "mlforecaster"]:
1483
+ df = await self._prepare_hass_load_data(days_min_load_forecast, method)
1484
+ if df is False:
710
1485
  return False
711
- df = rh.df_final.copy()[[self.var_load_new]]
712
- if method == 'naive': # using a naive approach
713
- mask_forecast_out = (df.index > days_list[-1] - self.optim_conf['delta_forecast'])
714
- forecast_out = df.copy().loc[mask_forecast_out]
715
- forecast_out = forecast_out.rename(columns={self.var_load_new: 'yhat'})
716
- # Force forecast_out length to avoid mismatches
717
- forecast_out = forecast_out.iloc[0:len(self.forecast_dates)]
718
- forecast_out.index = self.forecast_dates
719
- elif method == 'mlforecaster': # using a custom forecast model with machine learning
720
- # Load model
721
- model_type = self.params['passed_data']['model_type']
722
- filename = model_type+'_mlf.pkl'
723
- filename_path = self.emhass_conf['data_path'] / filename
724
- if not debug:
725
- if filename_path.is_file():
726
- with open(filename_path, 'rb') as inp:
727
- mlf = pickle.load(inp)
728
- else:
729
- self.logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
730
- return False
731
- # Make predictions
732
- if use_last_window:
733
- data_last_window = copy.deepcopy(df)
734
- data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
735
- else:
736
- data_last_window = None
737
- forecast_out = mlf.predict(data_last_window)
738
- # Force forecast length to avoid mismatches
739
- self.logger.debug("Number of ML predict forcast data generated (lags_opt): " + str(len(forecast_out.index)))
740
- self.logger.debug("Number of forcast dates obtained: " + str(len(self.forecast_dates)))
741
- if len(self.forecast_dates) < len(forecast_out.index):
742
- forecast_out = forecast_out.iloc[0:len(self.forecast_dates)]
743
- # To be removed once bug is fixed
744
- elif len(self.forecast_dates) > len(forecast_out.index):
745
- self.logger.error("Unable to obtain: " + str(len(self.forecast_dates)) + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters")
1486
+ # Generate Forecast based on Method
1487
+ if method == "typical":
1488
+ forecast_out = await self._get_load_forecast_typical()
1489
+ elif method == "naive":
1490
+ forecast_out = self._get_load_forecast_naive(df)
1491
+ elif method == "mlforecaster":
1492
+ forecast_out = await self._get_load_forecast_ml(df, use_last_window, mlf, debug)
1493
+ if forecast_out is False:
746
1494
  return False
747
- # Define DataFrame
748
- data_dict = {'ts':self.forecast_dates, 'yhat':forecast_out.values.tolist()}
749
- data = pd.DataFrame.from_dict(data_dict)
750
- # Define index
751
- data.set_index('ts', inplace=True)
752
- forecast_out = data.copy().loc[self.forecast_dates]
753
- elif method == 'csv': # reading from a csv file
754
- load_csv_file_path = csv_path
755
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=['ts', 'yhat'])
756
- if len(df_csv) < len(self.forecast_dates):
757
- self.logger.error("Passed data from CSV is not long enough")
758
- else:
759
- # Ensure correct length
760
- df_csv = df_csv.loc[df_csv.index[0:len(self.forecast_dates)],:]
761
- # Define index
762
- df_csv.index = self.forecast_dates
763
- df_csv.drop(['ts'], axis=1, inplace=True)
764
- forecast_out = df_csv.copy().loc[self.forecast_dates]
765
- elif method == 'list': # reading a list of values
766
- # Loading data from passed list
767
- data_list = self.params['passed_data']['load_power_forecast']
768
- # Check if the passed data has the correct length
769
- if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
770
- self.logger.error("Passed data from passed list is not long enough")
1495
+ elif method == "csv":
1496
+ forecast_out = self._get_load_forecast_csv(csv_path)
1497
+ if forecast_out is None:
1498
+ return False
1499
+ elif method == "list":
1500
+ forecast_out = self._get_load_forecast_list()
1501
+ if forecast_out is False:
771
1502
  return False
772
- else:
773
- # Ensure correct length
774
- data_list = data_list[0:len(self.forecast_dates)]
775
- # Define DataFrame
776
- data_dict = {'ts':self.forecast_dates, 'yhat':data_list}
777
- data = pd.DataFrame.from_dict(data_dict)
778
- # Define index
779
- data.set_index('ts', inplace=True)
780
- forecast_out = data.copy().loc[self.forecast_dates]
781
1503
  else:
782
- self.logger.error("Passed method is not valid")
1504
+ self.logger.error(error_msg_method_not_valid)
783
1505
  return False
784
- P_Load_forecast = copy.deepcopy(forecast_out['yhat'])
1506
+ # Post-processing (Mix Forecast)
1507
+ p_load_forecast = copy.deepcopy(forecast_out["yhat"])
785
1508
  if set_mix_forecast:
786
- P_Load_forecast = Forecast.get_mix_forecast(
787
- df_now, P_Load_forecast,
788
- self.params['passed_data']['alpha'], self.params['passed_data']['beta'], self.var_load_new)
789
- return P_Load_forecast
790
-
791
- def get_load_cost_forecast(self, df_final: pd.DataFrame, method: Optional[str] = 'hp_hc_periods',
792
- csv_path: Optional[str] = "data_load_cost_forecast.csv",
793
- list_and_perfect: Optional[bool] = False) -> pd.DataFrame:
1509
+ # Load forecasts don't need curtailment protection - always use feedback
1510
+ p_load_forecast = Forecast.get_mix_forecast(
1511
+ df_now,
1512
+ p_load_forecast,
1513
+ self.params["passed_data"]["alpha"],
1514
+ self.params["passed_data"]["beta"],
1515
+ self.var_load_new,
1516
+ False, # Never ignore feedback for load forecasts
1517
+ )
1518
+ self.logger.debug("get_load_forecast returning:\n%s", p_load_forecast)
1519
+ return p_load_forecast
1520
+
1521
+ def get_load_cost_forecast(
1522
+ self,
1523
+ df_final: pd.DataFrame,
1524
+ method: str | None = "hp_hc_periods",
1525
+ csv_path: str | None = "data_load_cost_forecast.csv",
1526
+ list_and_perfect: bool | None = False,
1527
+ ) -> pd.DataFrame:
794
1528
  r"""
795
1529
  Get the unit cost for the load consumption based on multiple tariff \
796
1530
  periods. This is the cost of the energy from the utility in a vector \
797
1531
  sampled at the fixed freq value.
798
-
1532
+
799
1533
  :param df_final: The DataFrame containing the input data.
800
1534
  :type df_final: pd.DataFrame
801
1535
  :param method: The method to be used to generate load cost forecast, \
@@ -810,50 +1544,77 @@ class Forecast(object):
810
1544
  :rtype: pd.DataFrame
811
1545
 
812
1546
  """
813
- csv_path = self.emhass_conf['data_path'] / csv_path
814
- if method == 'hp_hc_periods':
815
- df_final[self.var_load_cost] = self.optim_conf['load_cost_hc']
1547
+ csv_path = self.emhass_conf["data_path"] / csv_path
1548
+ if method == "hp_hc_periods":
1549
+ df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
816
1550
  list_df_hp = []
817
- for key, period_hp in self.optim_conf['list_hp_periods'].items():
818
- list_df_hp.append(df_final[self.var_load_cost].between_time(
819
- period_hp[0]['start'], period_hp[1]['end']))
1551
+ for _key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
1552
+ list_df_hp.append(
1553
+ df_final[self.var_load_cost].between_time(
1554
+ period_hp[0]["start"], period_hp[1]["end"]
1555
+ )
1556
+ )
820
1557
  for df_hp in list_df_hp:
821
- df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf['load_cost_hp']
822
- elif method == 'csv':
1558
+ df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf[
1559
+ "load_peak_hours_cost"
1560
+ ]
1561
+ elif method == "csv":
823
1562
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
824
1563
  forecast_out = self.get_forecast_out_from_csv_or_list(
825
- df_final, forecast_dates_csv, csv_path)
826
- df_final[self.var_load_cost] = forecast_out
827
- elif method == 'list': # reading a list of values
1564
+ df_final, forecast_dates_csv, csv_path
1565
+ )
1566
+ # Ensure correct length
1567
+ if not list_and_perfect:
1568
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1569
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1570
+ # Convert to Series if needed and align index
1571
+ if not isinstance(forecast_out, pd.Series):
1572
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1573
+ df_final.loc[:, self.var_load_cost] = forecast_out
1574
+ elif method == "list": # reading a list of values
828
1575
  # Loading data from passed list
829
- data_list = self.params['passed_data']['load_cost_forecast']
1576
+ data_list = self.params["passed_data"]["load_cost_forecast"]
830
1577
  # Check if the passed data has the correct length
831
- if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
832
- self.logger.error("Passed data from passed list is not long enough")
1578
+ if (
1579
+ len(data_list) < len(self.forecast_dates)
1580
+ and self.params["passed_data"]["prediction_horizon"] is None
1581
+ ):
1582
+ self.logger.error(error_msg_list_not_long_enough)
833
1583
  return False
834
1584
  else:
835
1585
  # Ensure correct length
836
- data_list = data_list[0:len(self.forecast_dates)]
1586
+ data_list = data_list[0 : len(self.forecast_dates)]
1587
+ if not list_and_perfect:
1588
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
837
1589
  # Define the correct dates
838
1590
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
839
1591
  forecast_out = self.get_forecast_out_from_csv_or_list(
840
- df_final, forecast_dates_csv, None, data_list=data_list, list_and_perfect=list_and_perfect)
841
- # Fill the final DF
1592
+ df_final,
1593
+ forecast_dates_csv,
1594
+ None,
1595
+ data_list=data_list,
1596
+ list_and_perfect=list_and_perfect,
1597
+ )
1598
+ df_final = df_final.copy()
842
1599
  df_final[self.var_load_cost] = forecast_out
843
1600
  else:
844
- self.logger.error("Passed method is not valid")
1601
+ self.logger.error(error_msg_method_not_valid)
845
1602
  return False
1603
+ self.logger.debug("get_load_cost_forecast returning:\n%s", df_final)
846
1604
  return df_final
847
-
848
- def get_prod_price_forecast(self, df_final: pd.DataFrame, method: Optional[str] = 'constant',
849
- csv_path: Optional[str] = "data_prod_price_forecast.csv",
850
- list_and_perfect: Optional[bool] = False) -> pd.DataFrame:
851
1605
 
1606
+ def get_prod_price_forecast(
1607
+ self,
1608
+ df_final: pd.DataFrame,
1609
+ method: str | None = "constant",
1610
+ csv_path: str | None = "data_prod_price_forecast.csv",
1611
+ list_and_perfect: bool | None = False,
1612
+ ) -> pd.DataFrame:
852
1613
  r"""
853
1614
  Get the unit power production price for the energy injected to the grid.\
854
1615
  This is the price of the energy injected to the utility in a vector \
855
1616
  sampled at the fixed freq value.
856
-
1617
+
857
1618
  :param df_input_data: The DataFrame containing all the input data retrieved
858
1619
  from hass
859
1620
  :type df_input_data: pd.DataFrame
@@ -869,31 +1630,130 @@ class Forecast(object):
869
1630
  :rtype: pd.DataFrame
870
1631
 
871
1632
  """
872
- csv_path = self.emhass_conf['data_path'] / csv_path
873
- if method == 'constant':
874
- df_final[self.var_prod_price] = self.optim_conf['prod_sell_price']
875
- elif method == 'csv':
1633
+ csv_path = self.emhass_conf["data_path"] / csv_path
1634
+ if method == "constant":
1635
+ df_final[self.var_prod_price] = self.optim_conf["photovoltaic_production_sell_price"]
1636
+ elif method == "csv":
876
1637
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
877
1638
  forecast_out = self.get_forecast_out_from_csv_or_list(
878
- df_final, forecast_dates_csv, csv_path)
879
- df_final[self.var_prod_price] = forecast_out
880
- elif method == 'list': # reading a list of values
1639
+ df_final, forecast_dates_csv, csv_path
1640
+ )
1641
+ # Ensure correct length
1642
+ if not list_and_perfect:
1643
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1644
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1645
+ # Convert to Series if needed and align index
1646
+ if not isinstance(forecast_out, pd.Series):
1647
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1648
+ df_final.loc[:, self.var_prod_price] = forecast_out
1649
+ elif method == "list": # reading a list of values
881
1650
  # Loading data from passed list
882
- data_list = self.params['passed_data']['prod_price_forecast']
1651
+ data_list = self.params["passed_data"]["prod_price_forecast"]
883
1652
  # Check if the passed data has the correct length
884
- if len(data_list) < len(self.forecast_dates) and self.params['passed_data']['prediction_horizon'] is None:
885
- self.logger.error("Passed data from passed list is not long enough")
1653
+ if (
1654
+ len(data_list) < len(self.forecast_dates)
1655
+ and self.params["passed_data"]["prediction_horizon"] is None
1656
+ ):
1657
+ self.logger.error(error_msg_list_not_long_enough)
886
1658
  return False
887
1659
  else:
888
1660
  # Ensure correct length
889
- data_list = data_list[0:len(self.forecast_dates)]
1661
+ data_list = data_list[0 : len(self.forecast_dates)]
1662
+ if not list_and_perfect:
1663
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
890
1664
  # Define the correct dates
891
1665
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
892
1666
  forecast_out = self.get_forecast_out_from_csv_or_list(
893
- df_final, forecast_dates_csv, None, data_list=data_list, list_and_perfect=list_and_perfect)
894
- # Fill the final DF
1667
+ df_final,
1668
+ forecast_dates_csv,
1669
+ None,
1670
+ data_list=data_list,
1671
+ list_and_perfect=list_and_perfect,
1672
+ )
1673
+ df_final = df_final.copy()
895
1674
  df_final[self.var_prod_price] = forecast_out
896
1675
  else:
897
- self.logger.error("Passed method is not valid")
1676
+ self.logger.error(error_msg_method_not_valid)
898
1677
  return False
899
- return df_final
1678
+ self.logger.debug("get_prod_price_forecast returning:\n%s", df_final)
1679
+ return df_final
1680
+
1681
+ async def get_cached_forecast_data(self, w_forecast_cache_path) -> pd.DataFrame:
1682
+ r"""
1683
+ Get cached weather forecast data from file.
1684
+
1685
+ :param w_forecast_cache_path: the path to file.
1686
+ :type method: Any
1687
+ :return: The DataFrame containing the forecasted data
1688
+ :rtype: pd.DataFrame
1689
+
1690
+ """
1691
+ async with aiofiles.open(w_forecast_cache_path, "rb") as file:
1692
+ content = await file.read()
1693
+ data = pickle.loads(content)
1694
+ if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
1695
+ self.logger.error("There has been a error obtaining cached forecast data.")
1696
+ self.logger.error(
1697
+ "Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from forecast API and cache."
1698
+ )
1699
+ self.logger.warning(
1700
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1701
+ )
1702
+ os.remove(w_forecast_cache_path)
1703
+ return False
1704
+ # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
1705
+ if self.forecast_dates[0] in data.index and self.forecast_dates[-1] in data.index:
1706
+ data = data.loc[self.forecast_dates[0] : self.forecast_dates[-1]]
1707
+ self.logger.info("Retrieved forecast data from the previously saved cache.")
1708
+ else:
1709
+ self.logger.error(
1710
+ "Unable to obtain cached forecast data within the requested timeframe range."
1711
+ )
1712
+ self.logger.error(
1713
+ "Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from forecast API and cache."
1714
+ )
1715
+ self.logger.warning(
1716
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1717
+ )
1718
+ os.remove(w_forecast_cache_path)
1719
+ return False
1720
+ return data
1721
+
1722
+ async def set_cached_forecast_data(self, w_forecast_cache_path, data) -> pd.DataFrame:
1723
+ r"""
1724
+ Set generated weather forecast data to file.
1725
+ Trim data to match the original requested forecast dates
1726
+
1727
+ :param w_forecast_cache_path: the path to file.
1728
+ :type method: Any
1729
+ :param: The DataFrame containing the forecasted data
1730
+ :type: pd.DataFrame
1731
+ :return: The DataFrame containing the forecasted data
1732
+ :rtype: pd.DataFrame
1733
+
1734
+ """
1735
+ async with aiofiles.open(w_forecast_cache_path, "wb") as file:
1736
+ content = pickle.dumps(data)
1737
+ await file.write(content)
1738
+ if not os.path.isfile(w_forecast_cache_path):
1739
+ self.logger.warning("forecast data could not be saved to file.")
1740
+ else:
1741
+ self.logger.info("Saved the forecast results to cache, for later reference.")
1742
+
1743
+ # Trim cached data to match requested dates
1744
+ end_forecast = (self.start_forecast + self.optim_conf["delta_forecast_daily"]).replace(
1745
+ microsecond=0
1746
+ )
1747
+ forecast_dates = (
1748
+ pd.date_range(
1749
+ start=self.start_forecast,
1750
+ end=end_forecast - self.freq,
1751
+ freq=self.freq,
1752
+ tz=self.time_zone,
1753
+ )
1754
+ .tz_convert("utc")
1755
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
1756
+ .tz_convert(self.time_zone)
1757
+ )
1758
+ data = data.loc[forecast_dates[0] : forecast_dates[-1]]
1759
+ return data