emhass 0.11.4__py3-none-any.whl → 0.15.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
emhass/forecast.py CHANGED
@@ -1,101 +1,104 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
1
+ import asyncio
4
2
  import bz2
5
3
  import copy
6
- import json
7
4
  import logging
8
5
  import os
9
6
  import pickle
10
7
  import pickle as cPickle
8
+ import re
11
9
  from datetime import datetime, timedelta
12
- from typing import Optional
10
+ from itertools import zip_longest
11
+ from urllib.parse import quote
13
12
 
13
+ import aiofiles
14
+ import aiohttp
14
15
  import numpy as np
16
+ import orjson
15
17
  import pandas as pd
16
- import pvlib
17
- from bs4 import BeautifulSoup
18
18
  from pvlib.irradiance import disc
19
19
  from pvlib.location import Location
20
20
  from pvlib.modelchain import ModelChain
21
21
  from pvlib.pvsystem import PVSystem
22
+ from pvlib.solarposition import get_solarposition
22
23
  from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
23
- from requests import get
24
+ from sklearn.metrics import mean_squared_error, r2_score
25
+ from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
24
26
 
25
27
  from emhass.machine_learning_forecaster import MLForecaster
28
+ from emhass.machine_learning_regressor import MLRegressor
26
29
  from emhass.retrieve_hass import RetrieveHass
27
- from emhass.utils import get_days_list, set_df_index_freq
30
+ from emhass.utils import add_date_features, get_days_list, set_df_index_freq
31
+
32
+ header_accept = "application/json"
33
+ error_msg_list_not_long_enough = "Passed data from passed list is not long enough"
34
+ error_msg_method_not_valid = "Passed method is not valid"
28
35
 
29
36
 
30
- class Forecast(object):
37
+ class Forecast:
31
38
  r"""
32
39
  Generate weather, load and costs forecasts needed as inputs to the optimization.
33
-
40
+
34
41
  In EMHASS we have basically 4 forecasts to deal with:
35
-
42
+
36
43
  - PV power production forecast (internally based on the weather forecast and the
37
44
  characteristics of your PV plant). This is given in Watts.
38
-
45
+
39
46
  - Load power forecast: how much power your house will demand on the next 24h. This
40
47
  is given in Watts.
41
-
48
+
42
49
  - PV production selling price forecast: at what price are you selling your excess
43
50
  PV production on the next 24h. This is given in EUR/kWh.
44
-
51
+
45
52
  - Load cost forecast: the price of the energy from the grid on the next 24h. This
46
53
  is given in EUR/kWh.
47
-
54
+
48
55
  There are methods that are generalized to the 4 forecast needed. For all there
49
56
  forecasts it is possible to pass the data either as a passed list of values or by
50
57
  reading from a CSV file. With these methods it is then possible to use data from
51
58
  external forecast providers.
52
-
53
- Then there are the methods that are specific to each type of forecast and that
59
+
60
+ Then there are the methods that are specific to each type of forecast and that
54
61
  proposed forecast treated and generated internally by this EMHASS forecast class.
55
- For the weather forecast a first method (`scrapper`) uses a scrapping to the
56
- ClearOutside webpage which proposes detailed forecasts based on Lat/Lon locations.
57
- This method seems stable but as with any scrape method it will fail if any changes
58
- are made to the webpage API. Another method (`solcast`) is using the SolCast PV
59
- production forecast service. A final method (`solar.forecast`) is using another
60
- external service: Solar.Forecast, for which just the nominal PV peak installed
61
- power should be provided. Search the forecast section on the documentation for examples
62
+ For the weather forecast a first method (`open-meteo`) uses a open-meteos API
63
+ proposing detailed forecasts based on Lat/Lon locations.
64
+ This method seems stable but as with any scrape method it will fail if any changes
65
+ are made to the webpage API. Another method (`solcast`) is using the SolCast PV
66
+ production forecast service. A final method (`solar.forecast`) is using another
67
+ external service: Solar.Forecast, for which just the nominal PV peak installed
68
+ power should be provided. Search the forecast section on the documentation for examples
62
69
  on how to implement these different methods.
63
-
70
+
64
71
  The `get_power_from_weather` method is proposed here to convert from irradiance
65
72
  data to electrical power. The PVLib module is used to model the PV plant.
66
-
67
- The specific methods for the load forecast are a first method (`naive`) that uses
68
- a naive approach, also called persistance. It simply assumes that the forecast for
69
- a future period will be equal to the observed values in a past period. The past
73
+
74
+ The specific methods for the load forecast are a first method (`naive`) that uses
75
+ a naive approach, also called persistance. It simply assumes that the forecast for
76
+ a future period will be equal to the observed values in a past period. The past
70
77
  period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
71
78
  uses an internal custom forecasting model using machine learning. There is a section
72
79
  in the documentation explaining how to use this method.
73
-
80
+
74
81
  .. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
75
82
  proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
76
83
  Only the `predict` method is used here to generate new forecasts, but it is \
77
84
  necessary to previously fit a forecaster model and it is a good idea to \
78
85
  optimize the model hyperparameters using the `tune` method. See the dedicated \
79
86
  section in the documentation for more help.
80
-
87
+
81
88
  For the PV production selling price and Load cost forecasts the privileged method
82
89
  is a direct read from a user provided list of values. The list should be passed
83
90
  as a runtime parameter during the `curl` to the EMHASS API.
84
-
85
- I reading from a CSV file, it should contain no header and the timestamped data
91
+
92
+ I reading from a CSV file, it should contain no header and the timestamped data
86
93
  should have the following format:
87
-
88
94
  2021-04-29 00:00:00+00:00,287.07
89
-
90
95
  2021-04-29 00:30:00+00:00,274.27
91
-
92
96
  2021-04-29 01:00:00+00:00,243.38
93
-
94
97
  ...
95
-
98
+
96
99
  The data columns in these files will correspond to the data in the units expected
97
100
  for each forecasting method.
98
-
101
+
99
102
  """
100
103
 
101
104
  def __init__(
@@ -106,8 +109,8 @@ class Forecast(object):
106
109
  params: str,
107
110
  emhass_conf: dict,
108
111
  logger: logging.Logger,
109
- opt_time_delta: Optional[int] = 24,
110
- get_data_from_file: Optional[bool] = False,
112
+ opt_time_delta: int | None = 24,
113
+ get_data_from_file: bool | None = False,
111
114
  ) -> None:
112
115
  """
113
116
  Define constructor for the forecast class.
@@ -142,9 +145,9 @@ class Forecast(object):
142
145
  self.freq = self.retrieve_hass_conf["optimization_time_step"]
143
146
  self.time_zone = self.retrieve_hass_conf["time_zone"]
144
147
  self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
145
- self.timeStep = self.freq.seconds / 3600 # in hours
146
148
  self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
147
- self.var_PV = self.retrieve_hass_conf["sensor_power_photovoltaics"]
149
+ self.var_pv = self.retrieve_hass_conf["sensor_power_photovoltaics"]
150
+ self.var_pv_forecast = self.retrieve_hass_conf["sensor_power_photovoltaics_forecast"]
148
151
  self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
149
152
  self.var_load_new = self.var_load + "_positive"
150
153
  self.lat = self.retrieve_hass_conf["Latitude"]
@@ -154,33 +157,34 @@ class Forecast(object):
154
157
  self.get_data_from_file = get_data_from_file
155
158
  self.var_load_cost = "unit_load_cost"
156
159
  self.var_prod_price = "unit_prod_price"
157
- if (params == None) or (params == "null"):
160
+ if (params is None) or (params == "null"):
158
161
  self.params = {}
159
162
  elif type(params) is dict:
160
163
  self.params = params
161
164
  else:
162
- self.params = json.loads(params)
165
+ self.params = orjson.loads(params)
166
+
163
167
  if self.method_ts_round == "nearest":
164
- self.start_forecast = pd.Timestamp(
165
- datetime.now(), tz=self.time_zone
166
- ).replace(microsecond=0)
168
+ self.start_forecast = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
167
169
  elif self.method_ts_round == "first":
168
170
  self.start_forecast = (
169
- pd.Timestamp(datetime.now(), tz=self.time_zone)
170
- .replace(microsecond=0)
171
- .floor(freq=self.freq)
171
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
172
172
  )
173
173
  elif self.method_ts_round == "last":
174
174
  self.start_forecast = (
175
- pd.Timestamp(datetime.now(), tz=self.time_zone)
176
- .replace(microsecond=0)
177
- .ceil(freq=self.freq)
175
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
178
176
  )
179
177
  else:
180
178
  self.logger.error("Wrong method_ts_round passed parameter")
181
- self.end_forecast = (
182
- self.start_forecast + self.optim_conf["delta_forecast_daily"]
183
- ).replace(microsecond=0)
179
+ # check if weather_forecast_cache, if so get 2x the amount of forecast
180
+ if self.params["passed_data"].get("weather_forecast_cache", False):
181
+ self.end_forecast = (
182
+ self.start_forecast + (self.optim_conf["delta_forecast_daily"] * 2)
183
+ ).replace(microsecond=0)
184
+ else:
185
+ self.end_forecast = (
186
+ self.start_forecast + self.optim_conf["delta_forecast_daily"]
187
+ ).replace(microsecond=0)
184
188
  self.forecast_dates = (
185
189
  pd.date_range(
186
190
  start=self.start_forecast,
@@ -192,288 +196,284 @@ class Forecast(object):
192
196
  .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
193
197
  .tz_convert(self.time_zone)
194
198
  )
195
- if params is not None:
196
- if "prediction_horizon" in list(self.params["passed_data"].keys()):
197
- if self.params["passed_data"]["prediction_horizon"] is not None:
198
- self.forecast_dates = self.forecast_dates[
199
- 0 : self.params["passed_data"]["prediction_horizon"]
200
- ]
199
+ if (
200
+ params is not None
201
+ and "prediction_horizon" in list(self.params["passed_data"].keys())
202
+ and self.params["passed_data"]["prediction_horizon"] is not None
203
+ ):
204
+ self.forecast_dates = self.forecast_dates[
205
+ 0 : self.params["passed_data"]["prediction_horizon"]
206
+ ]
201
207
 
202
- def get_weather_forecast(
203
- self,
204
- method: Optional[str] = "scrapper",
205
- csv_path: Optional[str] = "data_weather_forecast.csv",
206
- ) -> pd.DataFrame:
208
+ async def get_cached_open_meteo_forecast_json(
209
+ self, max_age: int | None = 30, forecast_days: int = 3
210
+ ) -> dict:
207
211
  r"""
208
- Get and generate weather forecast data.
209
-
210
- :param method: The desired method, options are 'scrapper', 'csv', 'list', 'solcast' and \
211
- 'solar.forecast'. Defaults to 'scrapper'.
212
- :type method: str, optional
213
- :return: The DataFrame containing the forecasted data
214
- :rtype: pd.DataFrame
215
-
212
+ Get weather forecast json from Open-Meteo and cache it for re-use.
213
+ The response json is cached in the local file system and returned
214
+ on subsequent calls until it is older than max_age, at which point
215
+ attempts will be made to replace it with a new version.
216
+ The cached version will not be overwritten until a new version has
217
+ been successfully fetched from Open-Meteo.
218
+ In the event of connectivity issues, the cached version will continue
219
+ to be returned until such time as a new version can be successfully
220
+ fetched from Open-Meteo.
221
+ If you want to force reload, pass max_age value of zero.
222
+
223
+ :param max_age: The maximum age of the cached json file, in minutes,
224
+ before it is discarded and a new version fetched from Open-Meteo.
225
+ Defaults to 30 minutes.
226
+ :type max_age: int, optional
227
+ :param forecast_days: The number of days of forecast data required from Open-Meteo.
228
+ One additional day is always fetched from Open-Meteo so there is an extra data in the cache.
229
+ Defaults to 2 days (3 days fetched) to match the prior default.
230
+ :type forecast_days: int, optional
231
+ :return: The json containing the Open-Meteo forecast data
232
+ :rtype: dict
233
+
216
234
  """
217
- csv_path = self.emhass_conf["data_path"] / csv_path
218
- w_forecast_cache_path = os.path.abspath(
219
- self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
220
- )
221
235
 
222
- self.logger.info("Retrieving weather forecast data using method = " + method)
223
- self.weather_forecast_method = (
224
- method # Saving this attribute for later use to identify csv method usage
236
+ # Ensure at least 3 weather forecast days (and 1 more than requested)
237
+ if forecast_days is None:
238
+ self.logger.debug("Open-Meteo forecast_days is missing so defaulting to 3 days")
239
+ forecast_days = 3
240
+ elif forecast_days < 3:
241
+ self.logger.debug(
242
+ "Open-Meteo forecast_days is low (%s) so defaulting to 3 days",
243
+ forecast_days,
244
+ )
245
+ forecast_days = 3
246
+ else:
247
+ forecast_days = forecast_days + 1
248
+
249
+ # The addition of -b.json file name suffix is because the time format
250
+ # has changed, and it avoids any attempt to use the old format file.
251
+ json_path = os.path.abspath(
252
+ self.emhass_conf["data_path"] / "cached-open-meteo-forecast-b.json"
225
253
  )
226
- if method == "scrapper":
227
- freq_scrap = pd.to_timedelta(
228
- 60, "minutes"
229
- ) # The scrapping time step is 60min on clearoutside
230
- forecast_dates_scrap = (
231
- pd.date_range(
232
- start=self.start_forecast,
233
- end=self.end_forecast - freq_scrap,
234
- freq=freq_scrap,
235
- tz=self.time_zone,
254
+ # The cached JSON file is always loaded, if it exists, as it is also a fallback
255
+ # in case the REST API call to Open-Meteo fails - the cached JSON will continue to
256
+ # be used until it can successfully fetch a new version from Open-Meteo.
257
+ data = None
258
+ use_cache = False
259
+ if os.path.exists(json_path):
260
+ delta = datetime.now() - datetime.fromtimestamp(os.path.getmtime(json_path))
261
+ json_age = int(delta / timedelta(seconds=60))
262
+ use_cache = json_age < max_age
263
+ self.logger.info("Loading existing cached Open-Meteo JSON file: %s", json_path)
264
+ async with aiofiles.open(json_path) as json_file:
265
+ content = await json_file.read()
266
+ data = orjson.loads(content)
267
+ if use_cache:
268
+ self.logger.info(
269
+ "The cached Open-Meteo JSON file is recent (age=%.0fm, max_age=%sm)",
270
+ json_age,
271
+ max_age,
236
272
  )
237
- .tz_convert("utc")
238
- .round(freq_scrap, ambiguous="infer", nonexistent="shift_forward")
239
- .tz_convert(self.time_zone)
240
- )
241
- # Using the clearoutside webpage
242
- response = get(
243
- "https://clearoutside.com/forecast/"
273
+ else:
274
+ self.logger.info(
275
+ "The cached Open-Meteo JSON file is old (age=%.0fm, max_age=%sm)",
276
+ json_age,
277
+ max_age,
278
+ )
279
+
280
+ if not use_cache:
281
+ self.logger.info("Fetching a new weather forecast from Open-Meteo")
282
+ headers = {"User-Agent": "EMHASS", "Accept": header_accept}
283
+ # Open-Meteo has returned non-existent time over DST transitions,
284
+ # so we now return unix timestamps and convert to date/times locally
285
+ # instead.
286
+ url = (
287
+ "https://api.open-meteo.com/v1/forecast?"
288
+ + "latitude="
244
289
  + str(round(self.lat, 2))
245
- + "/"
290
+ + "&longitude="
246
291
  + str(round(self.lon, 2))
247
- + "?desktop=true"
292
+ + "&minutely_15="
293
+ + "temperature_2m,"
294
+ + "relative_humidity_2m,"
295
+ + "rain,"
296
+ + "cloud_cover,"
297
+ + "wind_speed_10m,"
298
+ + "shortwave_radiation_instant,"
299
+ + "diffuse_radiation_instant,"
300
+ + "direct_normal_irradiance_instant"
301
+ + "&forecast_days="
302
+ + str(forecast_days)
303
+ + "&timezone="
304
+ + quote(str(self.time_zone), safe="")
305
+ + "&timeformat=unixtime"
248
306
  )
249
- """import bz2 # Uncomment to save a serialized data for tests
250
- import _pickle as cPickle
251
- with bz2.BZ2File("data/test_response_scrapper_get_method.pbz2", "w") as f:
252
- cPickle.dump(response.content, f)"""
253
- soup = BeautifulSoup(response.content, "html.parser")
254
- table = soup.find_all(id="day_0")[0]
255
- list_names = table.find_all(class_="fc_detail_label")
256
- list_tables = table.find_all("ul")[1:]
257
- selected_cols = [0, 1, 2, 3, 10, 12, 15] # Selected variables
258
- col_names = [list_names[i].get_text() for i in selected_cols]
259
- list_tables = [list_tables[i] for i in selected_cols]
260
- # Building the raw DF container
261
- raw_data = pd.DataFrame(
262
- index=range(len(forecast_dates_scrap)), columns=col_names, dtype=float
307
+ try:
308
+ self.logger.debug("Fetching data from Open-Meteo using URL: %s", url)
309
+ async with aiohttp.ClientSession() as session:
310
+ async with session.get(url, headers=headers) as response:
311
+ self.logger.debug("Returned HTTP status code: %s", response.status)
312
+ response.raise_for_status()
313
+ """import bz2 # Uncomment to save a serialized data for tests
314
+ import _pickle as cPickle
315
+ with bz2.BZ2File("data/test_response_openmeteo_get_method.pbz2", "w") as f:
316
+ cPickle.dump(response, f)"""
317
+ data = await response.json()
318
+ self.logger.info(
319
+ "Saving response in Open-Meteo JSON cache file: %s",
320
+ json_path,
321
+ )
322
+ async with aiofiles.open(json_path, "w") as json_file:
323
+ content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
324
+ await json_file.write(content)
325
+ except aiohttp.ClientError:
326
+ self.logger.error("Failed to fetch weather forecast from Open-Meteo", exc_info=True)
327
+ if data is not None:
328
+ self.logger.warning("Returning old cached data until next Open-Meteo attempt")
329
+
330
+ return data
331
+
332
+ async def _get_weather_open_meteo(
333
+ self, w_forecast_cache_path: str, use_legacy_pvlib: bool
334
+ ) -> pd.DataFrame:
335
+ """Helper to retrieve weather data from Open-Meteo or cache."""
336
+ if not os.path.isfile(w_forecast_cache_path):
337
+ data_raw = await self.get_cached_open_meteo_forecast_json(
338
+ self.optim_conf["open_meteo_cache_max_age"],
339
+ self.optim_conf["delta_forecast_daily"].days,
263
340
  )
264
- for count_col, col in enumerate(col_names):
265
- list_rows = list_tables[count_col].find_all("li")
266
- for count_row, row in enumerate(list_rows):
267
- raw_data.loc[count_row, col] = float(row.get_text())
268
- # Treating index
269
- raw_data.set_index(forecast_dates_scrap, inplace=True)
270
- raw_data = raw_data[~raw_data.index.duplicated(keep="first")]
271
- raw_data = raw_data.reindex(self.forecast_dates)
272
- raw_data.interpolate(
341
+ data_15min = pd.DataFrame.from_dict(data_raw["minutely_15"])
342
+ # Date/times in the Open-Meteo JSON are unix timestamps
343
+ data_15min["time"] = pd.to_datetime(data_15min["time"], unit="s", utc=True)
344
+ data_15min["time"] = data_15min["time"].dt.tz_convert(self.time_zone)
345
+ data_15min.set_index("time", inplace=True)
346
+ data_15min = data_15min.rename(
347
+ columns={
348
+ "temperature_2m": "temp_air",
349
+ "relative_humidity_2m": "relative_humidity",
350
+ "rain": "precipitable_water",
351
+ "cloud_cover": "cloud_cover",
352
+ "wind_speed_10m": "wind_speed",
353
+ "shortwave_radiation_instant": "ghi",
354
+ "diffuse_radiation_instant": "dhi",
355
+ "direct_normal_irradiance_instant": "dni",
356
+ }
357
+ )
358
+ if self.logger.isEnabledFor(logging.DEBUG):
359
+ data_15min.to_csv(
360
+ self.emhass_conf["data_path"] / "debug-weather-forecast-open-meteo.csv"
361
+ )
362
+ data = data_15min.reindex(self.forecast_dates)
363
+ data.interpolate(
273
364
  method="linear",
274
365
  axis=0,
275
366
  limit=None,
276
367
  limit_direction="both",
277
368
  inplace=True,
278
369
  )
279
- # Converting the cloud cover into Global Horizontal Irradiance with a PVLib method
280
- ghi_est = self.cloud_cover_to_irradiance(
281
- raw_data["Total Clouds (% Sky Obscured)"]
370
+ data = set_df_index_freq(data)
371
+ index_utc = data.index.tz_convert("utc")
372
+ index_tz = index_utc.round(
373
+ freq=data.index.freq, ambiguous="infer", nonexistent="shift_forward"
374
+ ).tz_convert(self.time_zone)
375
+ data.index = index_tz
376
+ data = set_df_index_freq(data)
377
+ # Convert mm to cm and clip minimum to 0.1 cm
378
+ data["precipitable_water"] = (data["precipitable_water"] / 10).clip(lower=0.1)
379
+ if use_legacy_pvlib:
380
+ data = data.drop(columns=["ghi", "dhi", "dni"])
381
+ ghi_est = self.cloud_cover_to_irradiance(data["cloud_cover"])
382
+ data["ghi"] = ghi_est["ghi"]
383
+ data["dni"] = ghi_est["dni"]
384
+ data["dhi"] = ghi_est["dhi"]
385
+ if self.params["passed_data"].get("weather_forecast_cache", False):
386
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
387
+ else:
388
+ data = await self.get_cached_forecast_data(w_forecast_cache_path)
389
+ return data
390
+
391
+ async def _get_weather_solcast(self, w_forecast_cache_path: str) -> pd.DataFrame:
392
+ """Helper to retrieve weather data from Solcast or cache."""
393
+ if os.path.isfile(w_forecast_cache_path):
394
+ return await self.get_cached_forecast_data(w_forecast_cache_path)
395
+ if self.params["passed_data"].get("weather_forecast_cache_only", False):
396
+ self.logger.error("Unable to obtain Solcast cache file.")
397
+ self.logger.error(
398
+ "Try running optimization again with 'weather_forecast_cache_only': false"
282
399
  )
283
- data = ghi_est
284
- data["temp_air"] = raw_data["Temperature (°C)"]
285
- data["wind_speed"] = (
286
- raw_data["Wind Speed/Direction (mph)"] * 1.60934
287
- ) # conversion to km/h
288
- data["relative_humidity"] = raw_data["Relative Humidity (%)"]
289
- data["precipitable_water"] = pvlib.atmosphere.gueymard94_pw(
290
- data["temp_air"], data["relative_humidity"]
400
+ self.logger.error(
401
+ "Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true."
291
402
  )
292
- elif method == "solcast": # using Solcast API
293
- # Check if weather_forecast_cache is true or if forecast_data file does not exist
294
- if not os.path.isfile(w_forecast_cache_path):
295
- # Check if weather_forecast_cache_only is true, if so produce error for not finding cache file
296
- if not self.params["passed_data"].get(
297
- "weather_forecast_cache_only", False
298
- ):
299
- # Retrieve data from the Solcast API
300
- if "solcast_api_key" not in self.retrieve_hass_conf:
301
- self.logger.error(
302
- "The solcast_api_key parameter was not defined"
303
- )
304
- return False
305
- if "solcast_rooftop_id" not in self.retrieve_hass_conf:
306
- self.logger.error(
307
- "The solcast_rooftop_id parameter was not defined"
308
- )
309
- return False
310
- headers = {
311
- "User-Agent": "EMHASS",
312
- "Authorization": "Bearer "
313
- + self.retrieve_hass_conf["solcast_api_key"],
314
- "content-type": "application/json",
315
- }
316
- days_solcast = int(
317
- len(self.forecast_dates) * self.freq.seconds / 3600
318
- )
319
- # If weather_forecast_cache, set request days as twice as long to avoid length issues (add a buffer)
320
- if self.params["passed_data"].get("weather_forecast_cache", False):
321
- days_solcast = min((days_solcast * 2), 336)
322
- url = (
323
- "https://api.solcast.com.au/rooftop_sites/"
324
- + self.retrieve_hass_conf["solcast_rooftop_id"]
325
- + "/forecasts?hours="
326
- + str(days_solcast)
327
- )
328
- response = get(url, headers=headers)
329
- """import bz2 # Uncomment to save a serialized data for tests
330
- import _pickle as cPickle
331
- with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
332
- cPickle.dump(response, f)"""
333
- # Verify the request passed
334
- if int(response.status_code) == 200:
335
- data = response.json()
336
- elif (
337
- int(response.status_code) == 402
338
- or int(response.status_code) == 429
339
- ):
403
+ return False
404
+ if "solcast_api_key" not in self.retrieve_hass_conf:
405
+ self.logger.error("The solcast_api_key parameter was not defined")
406
+ return False
407
+ if "solcast_rooftop_id" not in self.retrieve_hass_conf:
408
+ self.logger.error("The solcast_rooftop_id parameter was not defined")
409
+ return False
410
+ headers = {
411
+ "User-Agent": "EMHASS",
412
+ "Authorization": "Bearer " + self.retrieve_hass_conf["solcast_api_key"],
413
+ "content-type": header_accept,
414
+ }
415
+ days_solcast = int(len(self.forecast_dates) * self.freq.seconds / 3600)
416
+ roof_ids = re.split(r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip())
417
+ total_data_list = [0] * len(self.forecast_dates)
418
+
419
+ async with aiohttp.ClientSession() as session:
420
+ for roof_id in roof_ids:
421
+ url = f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
422
+ async with session.get(url, headers=headers) as response:
423
+ if int(response.status) == 200:
424
+ data = await response.json()
425
+ elif int(response.status) in [402, 429]:
340
426
  self.logger.error(
341
427
  "Solcast error: May have exceeded your subscription limit."
342
428
  )
343
429
  return False
344
- elif (
345
- int(response.status_code) >= 400
346
- or int(response.status_code) >= 202
347
- ):
430
+ elif int(response.status) >= 400 or (202 <= int(response.status) <= 299):
348
431
  self.logger.error(
349
- "Solcast error: There was a issue with the solcast request, check solcast API key and rooftop ID."
350
- )
351
- self.logger.error(
352
- "Solcast error: Check that your subscription is valid and your network can connect to Solcast."
432
+ "Solcast error: Issue with request, check API key and rooftop ID."
353
433
  )
354
434
  return False
355
435
  data_list = []
356
436
  for elm in data["forecasts"]:
357
- data_list.append(
358
- elm["pv_estimate"] * 1000
359
- ) # Converting kW to W
360
- # Check if the retrieved data has the correct length
437
+ data_list.append(elm["pv_estimate"] * 1000)
361
438
  if len(data_list) < len(self.forecast_dates):
362
- self.logger.error(
363
- "Not enough data retried from Solcast service, try increasing the time step or use MPC."
364
- )
365
- else:
366
- # If runtime weather_forecast_cache is true save forecast result to file as cache
367
- if self.params["passed_data"].get(
368
- "weather_forecast_cache", False
369
- ):
370
- # Add x2 forecast periods for cached results. This adds a extra delta_forecast amount of days for a buffer
371
- cached_forecast_dates = self.forecast_dates.union(
372
- pd.date_range(
373
- self.forecast_dates[-1],
374
- periods=(len(self.forecast_dates) + 1),
375
- freq=self.freq,
376
- )[1:]
377
- )
378
- cache_data_list = data_list[0 : len(cached_forecast_dates)]
379
- cache_data_dict = {
380
- "ts": cached_forecast_dates,
381
- "yhat": cache_data_list,
382
- }
383
- data_cache = pd.DataFrame.from_dict(cache_data_dict)
384
- data_cache.set_index("ts", inplace=True)
385
- with open(w_forecast_cache_path, "wb") as file:
386
- cPickle.dump(data_cache, file)
387
- if not os.path.isfile(w_forecast_cache_path):
388
- self.logger.warning(
389
- "Solcast forecast data could not be saved to file."
390
- )
391
- else:
392
- self.logger.info(
393
- "Saved the Solcast results to cache, for later reference."
394
- )
395
- # Trim request results to forecast_dates
396
- data_list = data_list[0 : len(self.forecast_dates)]
397
- data_dict = {"ts": self.forecast_dates, "yhat": data_list}
398
- # Define DataFrame
399
- data = pd.DataFrame.from_dict(data_dict)
400
- # Define index
401
- data.set_index("ts", inplace=True)
402
- # Else, notify user to update cache
403
- else:
404
- self.logger.error("Unable to obtain Solcast cache file.")
405
- self.logger.error(
406
- "Try running optimization again with 'weather_forecast_cache_only': false"
407
- )
408
- self.logger.error(
409
- "Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `weather-forecast-cache` action, to pull new data from Solcast and cache."
410
- )
411
- return False
412
- # Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
413
- else:
414
- with open(w_forecast_cache_path, "rb") as file:
415
- data = cPickle.load(file)
416
- if not isinstance(data, pd.DataFrame) or len(data) < len(
417
- self.forecast_dates
418
- ):
419
- self.logger.error(
420
- "There has been a error obtaining cached Solcast forecast data."
421
- )
422
- self.logger.error(
423
- "Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from Solcast and cache."
424
- )
425
- self.logger.warning(
426
- "Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
427
- )
428
- os.remove(w_forecast_cache_path)
429
- return False
430
- # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
431
- if (
432
- self.forecast_dates[0] in data.index
433
- and self.forecast_dates[-1] in data.index
434
- ):
435
- data = data.loc[
436
- self.forecast_dates[0] : self.forecast_dates[-1]
437
- ]
438
- self.logger.info(
439
- "Retrieved Solcast data from the previously saved cache."
440
- )
441
- else:
442
- self.logger.error(
443
- "Unable to obtain cached Solcast forecast data within the requested timeframe range."
444
- )
445
- self.logger.error(
446
- "Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from Solcast and cache."
447
- )
448
- self.logger.warning(
449
- "Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
450
- )
451
- os.remove(w_forecast_cache_path)
439
+ self.logger.error("Not enough data retrieved from Solcast service.")
452
440
  return False
453
- elif method == "solar.forecast": # using the solar.forecast API
454
- # Retrieve data from the solar.forecast API
455
- if "solar_forecast_kwp" not in self.retrieve_hass_conf:
456
- self.logger.warning(
457
- "The solar_forecast_kwp parameter was not defined, using dummy values for testing"
458
- )
459
- self.retrieve_hass_conf["solar_forecast_kwp"] = 5
460
- if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
461
- self.logger.warning(
462
- "The solar_forecast_kwp parameter is set to zero, setting to default 5"
463
- )
464
- self.retrieve_hass_conf["solar_forecast_kwp"] = 5
465
- if self.optim_conf["delta_forecast_daily"].days > 1:
466
- self.logger.warning(
467
- "The free public tier for solar.forecast only provides one day forecasts"
468
- )
469
- self.logger.warning(
470
- "Continuing with just the first day of data, the other days are filled with 0.0."
471
- )
472
- self.logger.warning(
473
- "Use the other available methods for delta_forecast_daily > 1"
474
- )
475
- headers = {"Accept": "application/json"}
476
- data = pd.DataFrame()
441
+ total_data_list = [
442
+ total + current
443
+ for total, current in zip_longest(total_data_list, data_list, fillvalue=0)
444
+ ]
445
+
446
+ total_data_list = total_data_list[0 : len(self.forecast_dates)]
447
+ data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
448
+ data = pd.DataFrame.from_dict(data_dict)
449
+ data.set_index("ts", inplace=True)
450
+ if self.params["passed_data"].get("weather_forecast_cache", False):
451
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
452
+ return data
453
+
454
+ async def _get_weather_solar_forecast(self, w_forecast_cache_path: str) -> pd.DataFrame:
455
+ """Helper to retrieve weather data from solar.forecast or cache."""
456
+ if os.path.isfile(w_forecast_cache_path):
457
+ return await self.get_cached_forecast_data(w_forecast_cache_path)
458
+ # Validation and Default Setup
459
+ if "solar_forecast_kwp" not in self.retrieve_hass_conf:
460
+ self.logger.warning(
461
+ "The solar_forecast_kwp parameter was not defined, using dummy values for testing"
462
+ )
463
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
464
+ if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
465
+ self.logger.warning(
466
+ "The solar_forecast_kwp parameter is set to zero, setting to default 5"
467
+ )
468
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
469
+ if self.optim_conf["delta_forecast_daily"].days > 1:
470
+ self.logger.warning(
471
+ "The free public tier for solar.forecast only provides one day forecasts"
472
+ )
473
+ headers = {"Accept": header_accept}
474
+ data = pd.DataFrame()
475
+
476
+ async with aiohttp.ClientSession() as session:
477
477
  for i in range(len(self.plant_conf["pv_module_model"])):
478
478
  url = (
479
479
  "https://api.forecast.solar/estimate/"
@@ -487,74 +487,108 @@ class Forecast(object):
487
487
  + "/"
488
488
  + str(self.retrieve_hass_conf["solar_forecast_kwp"])
489
489
  )
490
- response = get(url, headers=headers)
491
- """import bz2 # Uncomment to save a serialized data for tests
492
- import _pickle as cPickle
493
- with bz2.BZ2File("data/test_response_solarforecast_get_method.pbz2", "w") as f:
494
- cPickle.dump(response.json(), f)"""
495
- data_raw = response.json()
496
- data_dict = {
497
- "ts": list(data_raw["result"]["watts"].keys()),
498
- "yhat": list(data_raw["result"]["watts"].values()),
499
- }
500
- # Form the final DataFrame
501
- data_tmp = pd.DataFrame.from_dict(data_dict)
502
- data_tmp.set_index("ts", inplace=True)
503
- data_tmp.index = pd.to_datetime(data_tmp.index)
504
- data_tmp = data_tmp.tz_localize(self.forecast_dates.tz)
505
- data_tmp = data_tmp.reindex(index=self.forecast_dates)
506
- mask_up_data_df = (
507
- data_tmp.copy(deep=True).fillna(method="ffill").isnull()
508
- )
509
- mask_down_data_df = (
510
- data_tmp.copy(deep=True).fillna(method="bfill").isnull()
511
- )
512
- data_tmp.loc[data_tmp.index[mask_up_data_df["yhat"] == True], :] = 0.0
513
- data_tmp.loc[data_tmp.index[mask_down_data_df["yhat"] == True], :] = 0.0
514
- data_tmp.interpolate(inplace=True, limit=1)
515
- data_tmp = data_tmp.fillna(0.0)
516
- if len(data) == 0:
517
- data = copy.deepcopy(data_tmp)
518
- else:
519
- data = data + data_tmp
520
- elif method == "csv": # reading from a csv file
521
- weather_csv_file_path = csv_path
522
- # Loading the csv file, we will consider that this is the PV power in W
523
- data = pd.read_csv(weather_csv_file_path, header=None, names=["ts", "yhat"])
524
- # Check if the passed data has the correct length
525
- if len(data) < len(self.forecast_dates):
526
- self.logger.error("Passed data from CSV is not long enough")
527
- else:
528
- # Ensure correct length
529
- data = data.loc[data.index[0 : len(self.forecast_dates)], :]
530
- # Define index
531
- data.index = self.forecast_dates
532
- data.drop("ts", axis=1, inplace=True)
533
- data = data.copy().loc[self.forecast_dates]
534
- elif method == "list": # reading a list of values
535
- # Loading data from passed list
536
- data_list = self.params["passed_data"]["pv_power_forecast"]
537
- # Check if the passed data has the correct length
538
- if (
539
- len(data_list) < len(self.forecast_dates)
540
- and self.params["passed_data"]["prediction_horizon"] is None
541
- ):
542
- self.logger.error("Passed data from passed list is not long enough")
543
- else:
544
- # Ensure correct length
545
- data_list = data_list[0 : len(self.forecast_dates)]
546
- # Define DataFrame
547
- data_dict = {"ts": self.forecast_dates, "yhat": data_list}
548
- data = pd.DataFrame.from_dict(data_dict)
549
- # Define index
550
- data.set_index("ts", inplace=True)
490
+ async with session.get(url, headers=headers) as response:
491
+ data_raw = await response.json()
492
+ data_dict = {
493
+ "ts": list(data_raw["result"]["watts"].keys()),
494
+ "yhat": list(data_raw["result"]["watts"].values()),
495
+ }
496
+ data_tmp = pd.DataFrame.from_dict(data_dict)
497
+ data_tmp.set_index("ts", inplace=True)
498
+ data_tmp.index = pd.to_datetime(data_tmp.index)
499
+ data_tmp = data_tmp.tz_localize(
500
+ self.forecast_dates.tz,
501
+ ambiguous="infer",
502
+ nonexistent="shift_forward",
503
+ )
504
+ data_tmp = data_tmp.reindex(index=self.forecast_dates)
505
+ # Gap filling
506
+ mask_up = data_tmp.copy(deep=True).fillna(method="ffill").isnull()
507
+ mask_down = data_tmp.copy(deep=True).fillna(method="bfill").isnull()
508
+ data_tmp.loc[mask_up["yhat"], :] = 0.0
509
+ data_tmp.loc[mask_down["yhat"], :] = 0.0
510
+ data_tmp.interpolate(inplace=True, limit=1)
511
+ data_tmp = data_tmp.fillna(0.0)
512
+ if len(data) == 0:
513
+ data = copy.deepcopy(data_tmp)
514
+ else:
515
+ data = data + data_tmp
516
+
517
+ if self.params["passed_data"].get("weather_forecast_cache", False):
518
+ data = await self.set_cached_forecast_data(w_forecast_cache_path, data)
519
+ return data
520
+
521
+ def _get_weather_csv(self, csv_path: str) -> pd.DataFrame:
522
+ """Helper to retrieve weather data from CSV."""
523
+ data = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
524
+ if len(data) < len(self.forecast_dates):
525
+ self.logger.error("Passed data from CSV is not long enough")
526
+ else:
527
+ data = data.loc[data.index[0 : len(self.forecast_dates)], :]
528
+ data.index = self.forecast_dates
529
+ data.drop("ts", axis=1, inplace=True)
530
+ data = data.copy().loc[self.forecast_dates]
531
+ return data
532
+
533
+ def _get_weather_list(self) -> pd.DataFrame:
534
+ """Helper to retrieve weather data from a passed list."""
535
+ data_list = self.params["passed_data"]["pv_power_forecast"]
536
+ if (
537
+ len(data_list) < len(self.forecast_dates)
538
+ and self.params["passed_data"]["prediction_horizon"] is None
539
+ ):
540
+ self.logger.error(error_msg_list_not_long_enough)
541
+ return None
542
+ else:
543
+ data_list = data_list[0 : len(self.forecast_dates)]
544
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
545
+ data = pd.DataFrame.from_dict(data_dict)
546
+ data.set_index("ts", inplace=True)
547
+ return data
548
+
549
+ async def get_weather_forecast(
550
+ self,
551
+ method: str | None = "open-meteo",
552
+ csv_path: str | None = "data_weather_forecast.csv",
553
+ use_legacy_pvlib: bool | None = False,
554
+ ) -> pd.DataFrame:
555
+ r"""
556
+ Get and generate weather forecast data.
557
+
558
+ :param method: The desired method, options are 'open-meteo', 'csv', 'list', 'solcast' and \
559
+ 'solar.forecast'. Defaults to 'open-meteo'.
560
+ :type method: str, optional
561
+ :return: The DataFrame containing the forecasted data
562
+ :rtype: pd.DataFrame
563
+ """
564
+ csv_path = self.emhass_conf["data_path"] / csv_path
565
+ w_forecast_cache_path = os.path.abspath(
566
+ self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
567
+ )
568
+ self.logger.info("Retrieving weather forecast data using method = " + method)
569
+ if method == "scrapper":
570
+ self.logger.warning(
571
+ "The scrapper method has been deprecated and the keyword is accepted just for backward compatibility, please change the PV forecast method to open-meteo"
572
+ )
573
+ self.weather_forecast_method = method
574
+ if method in ["open-meteo", "scrapper"]:
575
+ data = await self._get_weather_open_meteo(w_forecast_cache_path, use_legacy_pvlib)
576
+ elif method == "solcast":
577
+ data = await self._get_weather_solcast(w_forecast_cache_path)
578
+ elif method == "solar.forecast":
579
+ data = await self._get_weather_solar_forecast(w_forecast_cache_path)
580
+ elif method == "csv":
581
+ data = self._get_weather_csv(csv_path)
582
+ elif method == "list":
583
+ data = self._get_weather_list()
551
584
  else:
552
585
  self.logger.error("Method %r is not valid", method)
553
586
  data = None
587
+ self.logger.debug("get_weather_forecast returning:\n%s", data)
554
588
  return data
555
589
 
556
590
  def cloud_cover_to_irradiance(
557
- self, cloud_cover: pd.Series, offset: Optional[int] = 35
591
+ self, cloud_cover: pd.Series, offset: int | None = 35
558
592
  ) -> pd.DataFrame:
559
593
  """
560
594
  Estimates irradiance from cloud cover in the following steps.
@@ -579,9 +613,7 @@ class Forecast(object):
579
613
  """
580
614
  location = Location(latitude=self.lat, longitude=self.lon)
581
615
  solpos = location.get_solarposition(cloud_cover.index)
582
- cs = location.get_clearsky(
583
- cloud_cover.index, model="ineichen", solar_position=solpos
584
- )
616
+ cs = location.get_clearsky(cloud_cover.index, model="ineichen", solar_position=solpos)
585
617
  # Using only the linear method
586
618
  offset = offset / 100.0
587
619
  cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
@@ -599,6 +631,7 @@ class Forecast(object):
599
631
  alpha: float,
600
632
  beta: float,
601
633
  col: str,
634
+ ignore_pv_feedback: bool = False,
602
635
  ) -> pd.DataFrame:
603
636
  """A simple correction method for forecasted data using the current real values of a variable.
604
637
 
@@ -612,119 +645,385 @@ class Forecast(object):
612
645
  :type beta: float
613
646
  :param col: The column variable name
614
647
  :type col: str
648
+ :param ignore_pv_feedback: If True, bypass mixing and return original forecast (used during curtailment)
649
+ :type ignore_pv_feedback: bool
615
650
  :return: The output DataFrame with the corrected values
616
651
  :rtype: pd.DataFrame
617
652
  """
653
+ # If ignoring PV feedback (e.g., during curtailment), return original forecast
654
+ if ignore_pv_feedback:
655
+ return df_forecast
656
+
618
657
  first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
619
- df_forecast.iloc[0] = first_fcst
658
+ df_forecast.iloc[0] = int(round(first_fcst))
620
659
  return df_forecast
621
660
 
661
+ def _get_model_power(self, params, device_type):
662
+ """
663
+ Helper to extract power rating based on device type and available parameters.
664
+ """
665
+ if device_type == "module":
666
+ if "STC" in params:
667
+ return params["STC"]
668
+ if "I_mp_ref" in params and "V_mp_ref" in params:
669
+ return params["I_mp_ref"] * params["V_mp_ref"]
670
+ elif device_type == "inverter":
671
+ if "Paco" in params:
672
+ return params["Paco"]
673
+ if "Pdco" in params:
674
+ return params["Pdco"]
675
+ return None
676
+
677
+ def _find_closest_model(self, target_power, database, device_type):
678
+ """
679
+ Find the model in the database that has a power rating closest to the target_power.
680
+ """
681
+ closest_model = None
682
+ min_diff = float("inf")
683
+ # Handle DataFrame (columns are models) or Dict (keys are models)
684
+ iterator = database.items() if hasattr(database, "items") else database.iteritems()
685
+ for _, params in iterator:
686
+ power = self._get_model_power(params, device_type)
687
+ if power is not None:
688
+ diff = abs(power - target_power)
689
+ if diff < min_diff:
690
+ min_diff = diff
691
+ closest_model = params
692
+ if closest_model is not None:
693
+ # Safely get name if it exists (DataFrame Series usually have a .name attribute)
694
+ model_name = getattr(closest_model, "name", "unknown")
695
+ self.logger.info(f"Closest {device_type} model to {target_power}W found: {model_name}")
696
+ else:
697
+ self.logger.warning(f"No suitable {device_type} model found close to {target_power}W")
698
+ return closest_model
699
+
700
+ def _get_model(self, model_spec, database, device_type):
701
+ """
702
+ Retrieve a model from the database by name or by power rating.
703
+ """
704
+ # If it's a string, try to find it by name
705
+ if isinstance(model_spec, str):
706
+ if model_spec in database:
707
+ return database[model_spec]
708
+ # If not found by name, check if it is a number string (e.g., "300")
709
+ try:
710
+ target_power = float(model_spec)
711
+ return self._find_closest_model(target_power, database, device_type)
712
+ except ValueError:
713
+ # Not a number, fallback to original behavior (will likely raise KeyError later)
714
+ self.logger.warning(f"{device_type} model '{model_spec}' not found in database.")
715
+ return database[model_spec]
716
+ # If it's a number (int or float), find closest by power
717
+ elif isinstance(model_spec, int | float):
718
+ return self._find_closest_model(model_spec, database, device_type)
719
+ else:
720
+ self.logger.error(f"Invalid type for {device_type} model: {type(model_spec)}")
721
+ return None
722
+
723
+ def _calculate_pvlib_power(self, df_weather: pd.DataFrame) -> pd.Series:
724
+ """
725
+ Helper to simulate PV power generation using PVLib when no direct forecast is available.
726
+ """
727
+ # Setting the main parameters of the PV plant
728
+ location = Location(latitude=self.lat, longitude=self.lon)
729
+ temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"]["close_mount_glass_glass"]
730
+ # Load CEC databases
731
+ cec_modules_path = self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2"
732
+ cec_inverters_path = self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2"
733
+ with bz2.BZ2File(cec_modules_path, "rb") as f:
734
+ cec_modules = cPickle.load(f)
735
+ with bz2.BZ2File(cec_inverters_path, "rb") as f:
736
+ cec_inverters = cPickle.load(f)
737
+
738
+ # Inner helper to run a single simulation configuration
739
+ def run_single_config(mod_spec, inv_spec, tilt, azimuth, mod_per_str, str_per_inv):
740
+ module = self._get_model(mod_spec, cec_modules, "module")
741
+ inverter = self._get_model(inv_spec, cec_inverters, "inverter")
742
+ system = PVSystem(
743
+ surface_tilt=tilt,
744
+ surface_azimuth=azimuth,
745
+ module_parameters=module,
746
+ inverter_parameters=inverter,
747
+ temperature_model_parameters=temp_params,
748
+ modules_per_string=mod_per_str,
749
+ strings_per_inverter=str_per_inv,
750
+ )
751
+ mc = ModelChain(system, location, aoi_model="physical")
752
+ mc.run_model(df_weather)
753
+ return mc.results.ac
754
+
755
+ # Handle list (mixed orientation) vs single configuration
756
+ if isinstance(self.plant_conf["pv_module_model"], list):
757
+ p_pv_forecast = pd.Series(0, index=df_weather.index)
758
+ for i in range(len(self.plant_conf["pv_module_model"])):
759
+ result = run_single_config(
760
+ self.plant_conf["pv_module_model"][i],
761
+ self.plant_conf["pv_inverter_model"][i],
762
+ self.plant_conf["surface_tilt"][i],
763
+ self.plant_conf["surface_azimuth"][i],
764
+ self.plant_conf["modules_per_string"][i],
765
+ self.plant_conf["strings_per_inverter"][i],
766
+ )
767
+ p_pv_forecast = p_pv_forecast + result
768
+ else:
769
+ p_pv_forecast = run_single_config(
770
+ self.plant_conf["pv_module_model"],
771
+ self.plant_conf["pv_inverter_model"],
772
+ self.plant_conf["surface_tilt"],
773
+ self.plant_conf["surface_azimuth"],
774
+ self.plant_conf["modules_per_string"],
775
+ self.plant_conf["strings_per_inverter"],
776
+ )
777
+ return p_pv_forecast
778
+
622
779
  def get_power_from_weather(
623
780
  self,
624
781
  df_weather: pd.DataFrame,
625
- set_mix_forecast: Optional[bool] = False,
626
- df_now: Optional[pd.DataFrame] = pd.DataFrame(),
782
+ set_mix_forecast: bool | None = False,
783
+ df_now: pd.DataFrame | None = pd.DataFrame(),
627
784
  ) -> pd.Series:
628
785
  r"""
629
- Convert wheater forecast data into electrical power.
630
-
786
+ Convert weather forecast data into electrical power.
787
+
631
788
  :param df_weather: The DataFrame containing the weather forecasted data. \
632
789
  This DF should be generated by the 'get_weather_forecast' method or at \
633
790
  least contain the same columns names filled with proper data.
634
791
  :type df_weather: pd.DataFrame
635
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
792
+ :param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
636
793
  :type set_mix_forecast: Bool, optional
637
794
  :param df_now: The DataFrame containing the now/current data.
638
795
  :type df_now: pd.DataFrame
639
796
  :return: The DataFrame containing the electrical power in Watts
640
797
  :rtype: pd.DataFrame
641
-
642
798
  """
643
799
  # If using csv method we consider that yhat is the PV power in W
644
800
  if (
645
801
  "solar_forecast_kwp" in self.retrieve_hass_conf.keys()
646
802
  and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
647
803
  ):
648
- P_PV_forecast = pd.Series(0, index=df_weather.index)
804
+ p_pv_forecast = pd.Series(0, index=df_weather.index)
805
+ elif self.weather_forecast_method in [
806
+ "solcast",
807
+ "solar.forecast",
808
+ "csv",
809
+ "list",
810
+ ]:
811
+ p_pv_forecast = df_weather["yhat"]
812
+ p_pv_forecast.name = None
649
813
  else:
650
- if (
651
- self.weather_forecast_method == "solcast"
652
- or self.weather_forecast_method == "solar.forecast"
653
- or self.weather_forecast_method == "csv"
654
- or self.weather_forecast_method == "list"
655
- ):
656
- P_PV_forecast = df_weather["yhat"]
657
- P_PV_forecast.name = None
658
- else: # We will transform the weather data into electrical power
659
- # Transform to power (Watts)
660
- # Setting the main parameters of the PV plant
661
- location = Location(latitude=self.lat, longitude=self.lon)
662
- temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"][
663
- "close_mount_glass_glass"
664
- ]
665
- cec_modules = bz2.BZ2File(
666
- self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2", "rb"
667
- )
668
- cec_modules = cPickle.load(cec_modules)
669
- cec_inverters = bz2.BZ2File(
670
- self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2", "rb"
671
- )
672
- cec_inverters = cPickle.load(cec_inverters)
673
- if type(self.plant_conf["pv_module_model"]) == list:
674
- P_PV_forecast = pd.Series(0, index=df_weather.index)
675
- for i in range(len(self.plant_conf["pv_module_model"])):
676
- # Selecting correct module and inverter
677
- module = cec_modules[self.plant_conf["pv_module_model"][i]]
678
- inverter = cec_inverters[
679
- self.plant_conf["pv_inverter_model"][i]
680
- ]
681
- # Building the PV system in PVLib
682
- system = PVSystem(
683
- surface_tilt=self.plant_conf["surface_tilt"][i],
684
- surface_azimuth=self.plant_conf["surface_azimuth"][i],
685
- module_parameters=module,
686
- inverter_parameters=inverter,
687
- temperature_model_parameters=temp_params,
688
- modules_per_string=self.plant_conf["modules_per_string"][i],
689
- strings_per_inverter=self.plant_conf[
690
- "strings_per_inverter"
691
- ][i],
692
- )
693
- mc = ModelChain(system, location, aoi_model="physical")
694
- # Run the model on the weather DF indexes
695
- mc.run_model(df_weather)
696
- # Extracting results for AC power
697
- P_PV_forecast = P_PV_forecast + mc.results.ac
698
- else:
699
- # Selecting correct module and inverter
700
- module = cec_modules[self.plant_conf["pv_module_model"]]
701
- inverter = cec_inverters[self.plant_conf["pv_inverter_model"]]
702
- # Building the PV system in PVLib
703
- system = PVSystem(
704
- surface_tilt=self.plant_conf["surface_tilt"],
705
- surface_azimuth=self.plant_conf["surface_azimuth"],
706
- module_parameters=module,
707
- inverter_parameters=inverter,
708
- temperature_model_parameters=temp_params,
709
- modules_per_string=self.plant_conf["modules_per_string"],
710
- strings_per_inverter=self.plant_conf["strings_per_inverter"],
711
- )
712
- mc = ModelChain(system, location, aoi_model="physical")
713
- # Run the model on the weather DF indexes
714
- mc.run_model(df_weather)
715
- # Extracting results for AC power
716
- P_PV_forecast = mc.results.ac
814
+ # We will transform the weather data into electrical power
815
+ p_pv_forecast = self._calculate_pvlib_power(df_weather)
717
816
  if set_mix_forecast:
718
- P_PV_forecast = Forecast.get_mix_forecast(
817
+ ignore_pv_feedback = self.params["passed_data"].get(
818
+ "ignore_pv_feedback_during_curtailment", False
819
+ )
820
+ p_pv_forecast = Forecast.get_mix_forecast(
719
821
  df_now,
720
- P_PV_forecast,
822
+ p_pv_forecast,
721
823
  self.params["passed_data"]["alpha"],
722
824
  self.params["passed_data"]["beta"],
723
- self.var_PV,
825
+ self.var_pv,
826
+ ignore_pv_feedback,
827
+ )
828
+ p_pv_forecast[p_pv_forecast < 0] = 0 # replace any negative PV values with zero
829
+ self.logger.debug("get_power_from_weather returning:\n%s", p_pv_forecast)
830
+ return p_pv_forecast
831
+
832
+ @staticmethod
833
+ def compute_solar_angles(df: pd.DataFrame, latitude: float, longitude: float) -> pd.DataFrame:
834
+ """
835
+ Compute solar angles (elevation, azimuth) based on timestamps and location.
836
+
837
+ :param df: DataFrame with a DateTime index.
838
+ :param latitude: Latitude of the PV system.
839
+ :param longitude: Longitude of the PV system.
840
+ :return: DataFrame with added solar elevation and azimuth.
841
+ """
842
+ df = df.copy()
843
+ solpos = get_solarposition(df.index, latitude, longitude)
844
+ df["solar_elevation"] = solpos["elevation"]
845
+ df["solar_azimuth"] = solpos["azimuth"]
846
+ return df
847
+
848
+ def adjust_pv_forecast_data_prep(self, data: pd.DataFrame) -> pd.DataFrame:
849
+ """
850
+ Prepare data for adjusting the photovoltaic (PV) forecast.
851
+
852
+ This method aligns the actual PV production data with the forecasted data,
853
+ adds additional features for analysis, and separates the predictors (X)
854
+ from the target variable (y).
855
+
856
+ :param data: A DataFrame containing the actual PV production data and the
857
+ forecasted PV production data.
858
+ :type data: pd.DataFrame
859
+ :return: DataFrame with data for adjusted PV model train.
860
+ """
861
+ # Extract target and predictor
862
+ self.logger.debug("adjust_pv_forecast_data_prep using data:\n%s", data)
863
+ if self.logger.isEnabledFor(logging.DEBUG):
864
+ data.to_csv(
865
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-input-data.csv"
866
+ )
867
+ P_PV = data[self.var_pv] # Actual PV production
868
+ p_pv_forecast = data[self.var_pv_forecast] # Forecasted PV production
869
+ # Define time ranges
870
+ last_day = data.index.max().normalize() # Last available day
871
+ three_months_ago = last_day - pd.DateOffset(
872
+ days=self.retrieve_hass_conf["historic_days_to_retrieve"]
873
+ )
874
+ # Train/Test: Last historic_days_to_retrieve days (excluding the last day)
875
+ train_test_mask = (data.index >= three_months_ago) & (data.index < last_day)
876
+ self.p_pv_train_test = P_PV[train_test_mask]
877
+ self.p_pv_forecast_train_test = p_pv_forecast[train_test_mask]
878
+ # Validation: Last day only
879
+ validation_mask = data.index >= last_day
880
+ self.p_pv_validation = P_PV[validation_mask]
881
+ self.p_pv_forecast_validation = p_pv_forecast[validation_mask]
882
+ # Ensure data is aligned
883
+ self.data_adjust_pv = pd.concat(
884
+ [P_PV.rename("actual"), p_pv_forecast.rename("forecast")], axis=1
885
+ ).dropna()
886
+ # Add more features
887
+ self.data_adjust_pv = add_date_features(self.data_adjust_pv)
888
+ self.data_adjust_pv = Forecast.compute_solar_angles(self.data_adjust_pv, self.lat, self.lon)
889
+ # Features (X) and target (y)
890
+ self.x_adjust_pv = self.data_adjust_pv.drop(columns=["actual"]) # Predictors
891
+ self.y_adjust_pv = self.data_adjust_pv["actual"] # Target: actual PV production
892
+ self.logger.debug("adjust_pv_forecast_data_prep output data:\n%s", self.data_adjust_pv)
893
+ if self.logger.isEnabledFor(logging.DEBUG):
894
+ self.data_adjust_pv.to_csv(
895
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-data-prep-output-data.csv"
724
896
  )
725
- return P_PV_forecast
726
897
 
727
- def get_forecast_days_csv(self, timedelta_days: Optional[int] = 1) -> pd.date_range:
898
+ async def adjust_pv_forecast_fit(
899
+ self,
900
+ n_splits: int = 5,
901
+ regression_model: str = "LassoRegression",
902
+ debug: bool | None = False,
903
+ ) -> pd.DataFrame:
904
+ """
905
+ Fit a regression model to adjust the photovoltaic (PV) forecast.
906
+
907
+ This method uses historical actual and forecasted PV production data, along with
908
+ additional solar and date features, to train a regression model. The model is
909
+ optimized using a grid search with time-series cross-validation.
910
+
911
+ :param n_splits: The number of splits for time-series cross-validation, defaults to 5.
912
+ :type n_splits: int, optional
913
+ :param regression_model: The type of regression model to use. See REGRESSION_METHODS \
914
+ in machine_learning_regressor.py for the authoritative list of supported models. \
915
+ Currently: 'LinearRegression', 'RidgeRegression', 'LassoRegression', 'ElasticNet', \
916
+ 'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'RandomForestRegressor', \
917
+ 'ExtraTreesRegressor', 'GradientBoostingRegressor', 'AdaBoostRegressor', \
918
+ 'MLPRegressor'. Defaults to "LassoRegression".
919
+ :type regression_model: str, optional
920
+ :param debug: If True, the model is not saved to disk, useful for debugging, defaults to False.
921
+ :type debug: bool, optional
922
+ :return: A DataFrame containing the adjusted PV forecast.
923
+ :rtype: pd.DataFrame
924
+ """
925
+ # Get regression model and hyperparameter grid
926
+ mlr = MLRegressor(
927
+ self.data_adjust_pv,
928
+ "adjusted_pv_forecast",
929
+ regression_model,
930
+ list(self.x_adjust_pv.columns),
931
+ list(self.y_adjust_pv.name),
932
+ None,
933
+ self.logger,
934
+ )
935
+ pipeline, param_grid = mlr._get_model_and_params()
936
+ # Time-series split
937
+ tscv = TimeSeriesSplit(n_splits=n_splits)
938
+ grid_search = GridSearchCV(
939
+ pipeline, param_grid, cv=tscv, scoring="neg_mean_squared_error", verbose=0
940
+ )
941
+ # Train model
942
+ await asyncio.to_thread(grid_search.fit, self.x_adjust_pv, self.y_adjust_pv)
943
+ self.model_adjust_pv = grid_search.best_estimator_
944
+ # Calculate training metrics
945
+ y_pred_train = self.model_adjust_pv.predict(self.x_adjust_pv)
946
+ self.rmse = np.sqrt(mean_squared_error(self.y_adjust_pv, y_pred_train))
947
+ self.r2 = r2_score(self.y_adjust_pv, y_pred_train)
948
+ # Log the metrics
949
+ self.logger.info(f"PV adjust Training metrics: RMSE = {self.rmse}, R2 = {self.r2}")
950
+ # Save model
951
+ if not debug:
952
+ filename = "adjust_pv_regressor.pkl"
953
+ filename_path = self.emhass_conf["data_path"] / filename
954
+ async with aiofiles.open(filename_path, "wb") as outp:
955
+ await outp.write(pickle.dumps(self.model_adjust_pv, pickle.HIGHEST_PROTOCOL))
956
+
957
+ def adjust_pv_forecast_predict(self, forecasted_pv: pd.DataFrame | None = None) -> pd.DataFrame:
958
+ """
959
+ Predict the adjusted photovoltaic (PV) forecast.
960
+
961
+ This method uses the trained regression model to predict the adjusted PV forecast
962
+ based on either the validation data stored in `self` or a new forecasted PV data
963
+ passed as input. It applies additional features such as date and solar angles to
964
+ the forecasted PV production data before making predictions. The solar elevation
965
+ is used to avoid negative values and to fix values at the beginning and end of the day.
966
+
967
+ :param forecasted_pv: Optional. A DataFrame containing the forecasted PV production data.
968
+ It must have a DateTime index and a column named "forecast".
969
+ If not provided, the method will use `self.p_pv_forecast_validation`.
970
+ :type forecasted_pv: pd.DataFrame, optional
971
+ :return: A DataFrame containing the adjusted PV forecast with additional features.
972
+ :rtype: pd.DataFrame
973
+ """
974
+ # Use the provided forecasted PV data or fall back to the validation data in `self`
975
+ if forecasted_pv is not None:
976
+ # Ensure the input DataFrame has the required structure
977
+ if "forecast" not in forecasted_pv.columns:
978
+ raise ValueError("The input DataFrame must contain a 'forecast' column.")
979
+ forecast_data = forecasted_pv.copy()
980
+ else:
981
+ # Use the validation data stored in `self`
982
+ forecast_data = self.p_pv_forecast_validation.rename("forecast").to_frame()
983
+ # Prepare the forecasted PV data
984
+ forecast_data = add_date_features(forecast_data)
985
+ forecast_data = Forecast.compute_solar_angles(forecast_data, self.lat, self.lon)
986
+ # Predict the adjusted forecast
987
+ forecast_data["adjusted_forecast"] = self.model_adjust_pv.predict(forecast_data)
988
+
989
+ # Apply solar elevation weighting only for specific cases
990
+ def apply_weighting(row):
991
+ if row["solar_elevation"] <= 0: # Nighttime or negative solar elevation
992
+ return 0
993
+ elif (
994
+ row["solar_elevation"] < self.optim_conf["adjusted_pv_solar_elevation_threshold"]
995
+ ): # Early morning or late evening
996
+ return max(
997
+ row["adjusted_forecast"]
998
+ * (
999
+ row["solar_elevation"]
1000
+ / self.optim_conf["adjusted_pv_solar_elevation_threshold"]
1001
+ ),
1002
+ 0,
1003
+ )
1004
+ else: # Daytime with sufficient solar elevation
1005
+ return row["adjusted_forecast"]
1006
+
1007
+ forecast_data["adjusted_forecast"] = forecast_data.apply(apply_weighting, axis=1)
1008
+ # If using validation data, calculate validation metrics
1009
+ if forecasted_pv is None:
1010
+ y_true = self.p_pv_validation.values
1011
+ y_pred = forecast_data["adjusted_forecast"].values
1012
+ self.validation_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
1013
+ self.validation_r2 = r2_score(y_true, y_pred)
1014
+ # Log the validation metrics
1015
+ self.logger.info(
1016
+ f"PV adjust Validation metrics: RMSE = {self.validation_rmse}, R2 = {self.validation_r2}"
1017
+ )
1018
+ self.logger.debug("adjust_pv_forecast_predict forecast data:\n%s", forecast_data)
1019
+ if self.logger.isEnabledFor(logging.DEBUG):
1020
+ forecast_data.to_csv(
1021
+ self.emhass_conf["data_path"] / "debug-adjust-pv-forecast-predict-forecast-data.csv"
1022
+ )
1023
+ # Return the DataFrame with the adjusted forecast
1024
+ return forecast_data
1025
+
1026
+ def get_forecast_days_csv(self, timedelta_days: int | None = 1) -> pd.date_range:
728
1027
  r"""
729
1028
  Get the date range vector of forecast dates that will be used when loading a CSV file.
730
1029
 
@@ -732,30 +1031,22 @@ class Forecast(object):
732
1031
  :rtype: pd.date_range
733
1032
 
734
1033
  """
735
- start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(
736
- microsecond=0
737
- )
1034
+ start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(microsecond=0)
738
1035
  if self.method_ts_round == "nearest":
739
- start_forecast_csv = pd.Timestamp(
740
- datetime.now(), tz=self.time_zone
741
- ).replace(microsecond=0)
1036
+ start_forecast_csv = pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0)
742
1037
  elif self.method_ts_round == "first":
743
1038
  start_forecast_csv = (
744
- pd.Timestamp(datetime.now(), tz=self.time_zone)
745
- .replace(microsecond=0)
746
- .floor(freq=self.freq)
1039
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).floor(freq=self.freq)
747
1040
  )
748
1041
  elif self.method_ts_round == "last":
749
1042
  start_forecast_csv = (
750
- pd.Timestamp(datetime.now(), tz=self.time_zone)
751
- .replace(microsecond=0)
752
- .ceil(freq=self.freq)
1043
+ pd.Timestamp.now(tz=self.time_zone).replace(microsecond=0).ceil(freq=self.freq)
753
1044
  )
754
1045
  else:
755
1046
  self.logger.error("Wrong method_ts_round passed parameter")
756
- end_forecast_csv = (
757
- start_forecast_csv + self.optim_conf["delta_forecast_daily"]
758
- ).replace(microsecond=0)
1047
+ end_forecast_csv = (start_forecast_csv + self.optim_conf["delta_forecast_daily"]).replace(
1048
+ microsecond=0
1049
+ )
759
1050
  forecast_dates_csv = (
760
1051
  pd.date_range(
761
1052
  start=start_forecast_csv,
@@ -767,21 +1058,100 @@ class Forecast(object):
767
1058
  .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
768
1059
  .tz_convert(self.time_zone)
769
1060
  )
770
- if self.params is not None:
771
- if "prediction_horizon" in list(self.params["passed_data"].keys()):
772
- if self.params["passed_data"]["prediction_horizon"] is not None:
773
- forecast_dates_csv = forecast_dates_csv[
774
- 0 : self.params["passed_data"]["prediction_horizon"]
775
- ]
1061
+ if (
1062
+ self.params is not None
1063
+ and "prediction_horizon" in list(self.params["passed_data"].keys())
1064
+ and self.params["passed_data"]["prediction_horizon"] is not None
1065
+ ):
1066
+ forecast_dates_csv = forecast_dates_csv[
1067
+ 0 : self.params["passed_data"]["prediction_horizon"]
1068
+ ]
776
1069
  return forecast_dates_csv
777
1070
 
1071
+ def _load_forecast_data(
1072
+ self,
1073
+ csv_path: str,
1074
+ data_list: list | None,
1075
+ forecast_dates_csv: pd.date_range,
1076
+ ) -> pd.DataFrame:
1077
+ """
1078
+ Helper to load and format forecast data from a CSV file or a list.
1079
+ """
1080
+ if csv_path is None:
1081
+ data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
1082
+ df_csv = pd.DataFrame.from_dict(data_dict)
1083
+ df_csv.index = forecast_dates_csv
1084
+ df_csv = df_csv.drop(["ts"], axis=1)
1085
+ df_csv = set_df_index_freq(df_csv)
1086
+ else:
1087
+ if not os.path.exists(csv_path):
1088
+ csv_path = self.emhass_conf["data_path"] / csv_path
1089
+ df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
1090
+ # Check if first column is a valid datetime
1091
+ first_col = df_csv.iloc[:, 0]
1092
+ if pd.to_datetime(first_col, errors="coerce").notna().all():
1093
+ df_csv["ts"] = pd.to_datetime(df_csv["ts"], utc=True)
1094
+ df_csv.set_index("ts", inplace=True)
1095
+ df_csv.index = df_csv.index.tz_convert(self.time_zone)
1096
+ else:
1097
+ df_csv.index = forecast_dates_csv
1098
+ df_csv = df_csv.drop(["ts"], axis=1)
1099
+ df_csv = set_df_index_freq(df_csv)
1100
+ return df_csv
1101
+
1102
+ def _extract_daily_forecast(
1103
+ self,
1104
+ day: int,
1105
+ df_timing: pd.DataFrame,
1106
+ df_csv: pd.DataFrame,
1107
+ csv_path: str,
1108
+ list_and_perfect: bool,
1109
+ ) -> pd.DataFrame:
1110
+ """
1111
+ Helper to extract a specific day's forecast data based on timing configuration.
1112
+ """
1113
+ # Find the start and end indices for the specific day in the timing DataFrame
1114
+ day_mask = df_timing.index.day == day
1115
+ day_indices = [i for i, x in enumerate(day_mask) if x]
1116
+ first_elm_index = day_indices[0]
1117
+ last_elm_index = day_indices[-1]
1118
+ # Define the target forecast index based on the timing DataFrame
1119
+ fcst_index = pd.date_range(
1120
+ start=df_timing.index[first_elm_index],
1121
+ end=df_timing.index[last_elm_index],
1122
+ freq=df_timing.index.freq,
1123
+ )
1124
+ first_hour = f"{df_timing.index[first_elm_index].hour:02d}:{df_timing.index[first_elm_index].minute:02d}"
1125
+ last_hour = f"{df_timing.index[last_elm_index].hour:02d}:{df_timing.index[last_elm_index].minute:02d}"
1126
+ # Extract data
1127
+ if csv_path is None:
1128
+ if list_and_perfect:
1129
+ values_array = df_csv.between_time(first_hour, last_hour).values
1130
+ # Adjust index length if necessary
1131
+ fcst_index = fcst_index[0 : len(values_array)]
1132
+ return pd.DataFrame(values_array, index=fcst_index)
1133
+ else:
1134
+ return pd.DataFrame(
1135
+ df_csv.loc[fcst_index, :].between_time(first_hour, last_hour).values,
1136
+ index=fcst_index,
1137
+ )
1138
+ else:
1139
+ # For CSV path, filter by date string first
1140
+ df_csv_filtered_date = df_csv.loc[
1141
+ df_csv.index.strftime("%Y-%m-%d") == fcst_index[0].date().strftime("%Y-%m-%d")
1142
+ ]
1143
+ return pd.DataFrame(
1144
+ df_csv_filtered_date.between_time(first_hour, last_hour).values,
1145
+ index=fcst_index,
1146
+ )
1147
+
778
1148
  def get_forecast_out_from_csv_or_list(
779
1149
  self,
780
1150
  df_final: pd.DataFrame,
781
1151
  forecast_dates_csv: pd.date_range,
782
1152
  csv_path: str,
783
- data_list: Optional[list] = None,
784
- list_and_perfect: Optional[bool] = False,
1153
+ data_list: list | None = None,
1154
+ list_and_perfect: bool | None = False,
785
1155
  ) -> pd.DataFrame:
786
1156
  r"""
787
1157
  Get the forecast data as a DataFrame from a CSV file.
@@ -800,119 +1170,294 @@ class Forecast(object):
800
1170
  :rtype: pd.DataFrame
801
1171
 
802
1172
  """
803
- if csv_path is None:
804
- data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
805
- df_csv = pd.DataFrame.from_dict(data_dict)
806
- df_csv.index = forecast_dates_csv
807
- df_csv.drop(["ts"], axis=1, inplace=True)
808
- df_csv = set_df_index_freq(df_csv)
809
- if list_and_perfect:
810
- days_list = df_final.index.day.unique().tolist()
811
- else:
812
- days_list = df_csv.index.day.unique().tolist()
813
- else:
814
- if not os.path.exists(csv_path):
815
- csv_path = self.emhass_conf["data_path"] / csv_path
816
- load_csv_file_path = csv_path
817
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
818
- df_csv.index = forecast_dates_csv
819
- df_csv.drop(["ts"], axis=1, inplace=True)
820
- df_csv = set_df_index_freq(df_csv)
1173
+ # Load the source data (df_csv)
1174
+ df_csv = self._load_forecast_data(csv_path, data_list, forecast_dates_csv)
1175
+ # Configure timing source (df_timing) and iteration list
1176
+ if csv_path is None or list_and_perfect:
1177
+ df_final = set_df_index_freq(df_final)
1178
+ df_timing = copy.deepcopy(df_final)
821
1179
  days_list = df_final.index.day.unique().tolist()
822
- forecast_out = pd.DataFrame()
1180
+ else:
1181
+ df_timing = copy.deepcopy(df_csv)
1182
+ days_list = df_csv.index.day.unique().tolist()
1183
+ # Iterate over days and collect forecast parts
1184
+ forecast_parts = []
823
1185
  for day in days_list:
824
- if csv_path is None:
825
- if list_and_perfect:
826
- df_tmp = copy.deepcopy(df_final)
827
- else:
828
- df_tmp = copy.deepcopy(df_csv)
829
- else:
830
- df_tmp = copy.deepcopy(df_final)
831
- first_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][0]
832
- last_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][-1]
833
- fcst_index = pd.date_range(
834
- start=df_tmp.index[first_elm_index],
835
- end=df_tmp.index[last_elm_index],
836
- freq=df_tmp.index.freq,
1186
+ daily_df = self._extract_daily_forecast(
1187
+ day, df_timing, df_csv, csv_path, list_and_perfect
837
1188
  )
838
- first_hour = (
839
- str(df_tmp.index[first_elm_index].hour)
840
- + ":"
841
- + str(df_tmp.index[first_elm_index].minute)
1189
+ forecast_parts.append(daily_df)
1190
+ if forecast_parts:
1191
+ forecast_out = pd.concat(forecast_parts, axis=0)
1192
+ else:
1193
+ forecast_out = pd.DataFrame()
1194
+ # Merge with final DataFrame to align indices
1195
+ merged = pd.merge_asof(
1196
+ df_final.sort_index(),
1197
+ forecast_out.sort_index(),
1198
+ left_index=True,
1199
+ right_index=True,
1200
+ direction="nearest",
1201
+ )
1202
+ # Keep only forecast_out columns
1203
+ forecast_out = merged[forecast_out.columns]
1204
+ return forecast_out
1205
+
1206
+ @staticmethod
1207
+ def resample_data(data, freq, current_freq):
1208
+ r"""
1209
+ Resample a DataFrame with a custom frequency.
1210
+
1211
+ :param data: Original time series data with a DateTimeIndex.
1212
+ :type data: pd.DataFrame
1213
+ :param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
1214
+ :type freq: pd.Timedelta
1215
+ :return: Resampled data at the specified frequency.
1216
+ :rtype: pd.DataFrame
1217
+ """
1218
+ if freq > current_freq:
1219
+ # Downsampling
1220
+ # Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
1221
+ resampled_data = data.resample(freq).mean()
1222
+ elif freq < current_freq:
1223
+ # Upsampling
1224
+ # Use 'asfreq' to create empty slots, then interpolate
1225
+ resampled_data = data.resample(freq).asfreq()
1226
+ resampled_data = resampled_data.interpolate(method="time")
1227
+ else:
1228
+ # No resampling needed
1229
+ resampled_data = data.copy()
1230
+ return resampled_data
1231
+
1232
+ @staticmethod
1233
+ def get_typical_load_forecast(data, forecast_date):
1234
+ r"""
1235
+ Forecast the load profile for the next day based on historic data.
1236
+
1237
+ :param data: A DataFrame with a DateTimeIndex containing the historic load data.
1238
+ Must include a 'load' column.
1239
+ :type data: pd.DataFrame
1240
+ :param forecast_date: The date for which the forecast will be generated.
1241
+ :type forecast_date: pd.Timestamp
1242
+ :return: A Series with the forecasted load profile for the next day and a list of days used
1243
+ to calculate the forecast.
1244
+ :rtype: tuple (pd.Series, list)
1245
+ """
1246
+ # Ensure the 'load' column exists
1247
+ if "load" not in data.columns:
1248
+ raise ValueError("Data must have a 'load' column.")
1249
+ # Filter historic data for the same month and day of the week
1250
+ month = forecast_date.month
1251
+ day_of_week = forecast_date.dayofweek
1252
+ historic_data = data[(data.index.month == month) & (data.index.dayofweek == day_of_week)]
1253
+ used_days = np.unique(historic_data.index.date)
1254
+ # Align all historic data to the forecast day
1255
+ aligned_data = []
1256
+ for day in used_days:
1257
+ daily_data = data[data.index.date == pd.Timestamp(day).date()]
1258
+ aligned_daily_data = daily_data.copy()
1259
+ aligned_daily_data.index = aligned_daily_data.index.map(
1260
+ lambda x: x.replace(
1261
+ year=forecast_date.year,
1262
+ month=forecast_date.month,
1263
+ day=forecast_date.day,
1264
+ )
842
1265
  )
843
- last_hour = (
844
- str(df_tmp.index[last_elm_index].hour)
845
- + ":"
846
- + str(df_tmp.index[last_elm_index].minute)
1266
+ aligned_data.append(aligned_daily_data)
1267
+ # Combine all aligned historic data into a single DataFrame
1268
+ combined_data = pd.concat(aligned_data)
1269
+ # Compute the mean load for each timestamp
1270
+ forecast = combined_data.groupby(combined_data.index).mean()
1271
+ return forecast, used_days
1272
+
1273
+ async def _prepare_hass_load_data(
1274
+ self, days_min_load_forecast: int, method: str
1275
+ ) -> pd.DataFrame | bool:
1276
+ """Helper to retrieve and prepare load data from Home Assistant."""
1277
+ self.logger.info(f"Retrieving data from hass for load forecast using method = {method}")
1278
+ var_list = [self.var_load]
1279
+ var_replace_zero = None
1280
+ var_interp = [self.var_load]
1281
+ time_zone_load_forecast = None
1282
+ rh = RetrieveHass(
1283
+ self.retrieve_hass_conf["hass_url"],
1284
+ self.retrieve_hass_conf["long_lived_token"],
1285
+ self.freq,
1286
+ time_zone_load_forecast,
1287
+ self.params,
1288
+ self.emhass_conf,
1289
+ self.logger,
1290
+ )
1291
+ if self.get_data_from_file:
1292
+ filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
1293
+ async with aiofiles.open(filename_path, "rb") as inp:
1294
+ content = await inp.read()
1295
+ rh.df_final, days_list, var_list, rh.ha_config = pickle.loads(content)
1296
+ self.var_load = var_list[0]
1297
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = self.var_load
1298
+ var_interp = [var_list[0]]
1299
+ self.var_list = [var_list[0]]
1300
+ rh.var_list = self.var_list
1301
+ self.var_load_new = self.var_load + "_positive"
1302
+ else:
1303
+ days_list = get_days_list(days_min_load_forecast)
1304
+ if not await rh.get_data(days_list, var_list):
1305
+ return False
1306
+ if not rh.prepare_data(
1307
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
1308
+ load_negative=self.retrieve_hass_conf["load_negative"],
1309
+ set_zero_min=self.retrieve_hass_conf["set_zero_min"],
1310
+ var_replace_zero=var_replace_zero,
1311
+ var_interp=var_interp,
1312
+ ):
1313
+ return False
1314
+ return rh.df_final.copy()[[self.var_load_new]]
1315
+
1316
+ async def _get_load_forecast_typical(self) -> pd.DataFrame:
1317
+ """Helper to generate typical load forecast."""
1318
+ model_type = "long_train_data"
1319
+ data_path = self.emhass_conf["data_path"] / str(model_type + ".pkl")
1320
+ async with aiofiles.open(data_path, "rb") as fid:
1321
+ content = await fid.read()
1322
+ data, _, _, _ = pickle.loads(content)
1323
+ # Ensure the data index is timezone-aware
1324
+ data.index = (
1325
+ data.index.tz_localize(
1326
+ self.forecast_dates.tz,
1327
+ ambiguous="infer",
1328
+ nonexistent="shift_forward",
847
1329
  )
848
- if len(forecast_out) == 0:
849
- if csv_path is None:
850
- if list_and_perfect:
851
- forecast_out = pd.DataFrame(
852
- df_csv.between_time(first_hour, last_hour).values,
853
- index=fcst_index,
854
- )
855
- else:
856
- forecast_out = pd.DataFrame(
857
- df_csv.loc[fcst_index, :]
858
- .between_time(first_hour, last_hour)
859
- .values,
860
- index=fcst_index,
861
- )
862
- else:
863
- forecast_out = pd.DataFrame(
864
- df_csv.between_time(first_hour, last_hour).values,
865
- index=fcst_index,
866
- )
1330
+ if data.index.tz is None
1331
+ else data.index.tz_convert(self.forecast_dates.tz)
1332
+ )
1333
+ data = data[[self.var_load]]
1334
+ current_freq = pd.Timedelta("30min")
1335
+ if self.freq != current_freq:
1336
+ data = Forecast.resample_data(data, self.freq, current_freq)
1337
+ dates_list = np.unique(self.forecast_dates.date).tolist()
1338
+ forecast = pd.DataFrame()
1339
+ for date in dates_list:
1340
+ forecast_date = pd.Timestamp(date)
1341
+ data.columns = ["load"]
1342
+ forecast_tmp, used_days = Forecast.get_typical_load_forecast(data, forecast_date)
1343
+ self.logger.debug(f"Using {len(used_days)} days of data to generate the forecast.")
1344
+ forecast_tmp = forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
1345
+ if len(forecast) == 0:
1346
+ forecast = forecast_tmp
867
1347
  else:
868
- if csv_path is None:
869
- if list_and_perfect:
870
- forecast_tp = pd.DataFrame(
871
- df_csv.between_time(first_hour, last_hour).values,
872
- index=fcst_index,
873
- )
874
- else:
875
- forecast_tp = pd.DataFrame(
876
- df_csv.loc[fcst_index, :]
877
- .between_time(first_hour, last_hour)
878
- .values,
879
- index=fcst_index,
880
- )
881
- else:
882
- forecast_tp = pd.DataFrame(
883
- df_csv.between_time(first_hour, last_hour).values,
884
- index=fcst_index,
885
- )
886
- forecast_out = pd.concat([forecast_out, forecast_tp], axis=0)
887
- return forecast_out
1348
+ forecast = pd.concat([forecast, forecast_tmp], axis=0)
1349
+ forecast_out = forecast.loc[forecast.index.intersection(self.forecast_dates)]
1350
+ forecast_out.index = self.forecast_dates
1351
+ forecast_out.index.name = "ts"
1352
+ return forecast_out.rename(columns={"load": "yhat"})
1353
+
1354
+ def _get_load_forecast_naive(self, df: pd.DataFrame) -> pd.DataFrame:
1355
+ """Helper for naive forecast."""
1356
+ forecast_horizon = len(self.forecast_dates)
1357
+ historical_values = df.iloc[-forecast_horizon:]
1358
+ return pd.DataFrame(historical_values.values, index=self.forecast_dates, columns=["yhat"])
1359
+
1360
+ async def _get_load_forecast_ml(
1361
+ self, df: pd.DataFrame, use_last_window: bool, mlf, debug: bool
1362
+ ) -> pd.DataFrame | bool:
1363
+ """Helper for ML forecast."""
1364
+ model_type = self.params["passed_data"]["model_type"]
1365
+ filename = model_type + "_mlf.pkl"
1366
+ filename_path = self.emhass_conf["data_path"] / filename
1367
+ if not debug:
1368
+ if filename_path.is_file():
1369
+ async with aiofiles.open(filename_path, "rb") as inp:
1370
+ content = await inp.read()
1371
+ mlf = pickle.loads(content)
1372
+ else:
1373
+ self.logger.error(
1374
+ "The ML forecaster file was not found, please run a model fit method before this predict method"
1375
+ )
1376
+ return False
1377
+ data_last_window = None
1378
+ if use_last_window:
1379
+ data_last_window = copy.deepcopy(df)
1380
+ data_last_window = data_last_window.rename(columns={self.var_load_new: self.var_load})
1381
+ forecast_out = await mlf.predict(data_last_window)
1382
+ self.logger.debug(
1383
+ "Number of ML predict forcast data generated (lags_opt): "
1384
+ + str(len(forecast_out.index))
1385
+ )
1386
+ self.logger.debug(
1387
+ "Number of forcast dates obtained (prediction_horizon): "
1388
+ + str(len(self.forecast_dates))
1389
+ )
1390
+ if len(self.forecast_dates) < len(forecast_out.index):
1391
+ forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1392
+ elif len(self.forecast_dates) > len(forecast_out.index):
1393
+ self.logger.error(
1394
+ "Unable to obtain: "
1395
+ + str(len(self.forecast_dates))
1396
+ + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
1397
+ )
1398
+ return False
1399
+ data_dict = {
1400
+ "ts": self.forecast_dates,
1401
+ "yhat": forecast_out.values.tolist(),
1402
+ }
1403
+ data = pd.DataFrame.from_dict(data_dict)
1404
+ data.set_index("ts", inplace=True)
1405
+ return data.copy().loc[self.forecast_dates]
1406
+
1407
+ def _get_load_forecast_csv(self, csv_path: str) -> pd.DataFrame:
1408
+ """Helper to retrieve load data from CSV."""
1409
+ df_csv = pd.read_csv(csv_path, header=None, names=["ts", "yhat"])
1410
+ if len(df_csv) < len(self.forecast_dates):
1411
+ self.logger.error("Passed data from CSV is not long enough")
1412
+ return None
1413
+ df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
1414
+ df_csv.index = self.forecast_dates
1415
+ df_csv = df_csv.drop(["ts"], axis=1)
1416
+ return df_csv.copy().loc[self.forecast_dates]
1417
+
1418
+ def _get_load_forecast_list(self) -> pd.DataFrame:
1419
+ """Helper to retrieve load data from a passed list."""
1420
+ data_list = self.params["passed_data"]["load_power_forecast"]
1421
+ if (
1422
+ len(data_list) < len(self.forecast_dates)
1423
+ and self.params["passed_data"]["prediction_horizon"] is None
1424
+ ):
1425
+ self.logger.error(error_msg_list_not_long_enough)
1426
+ return False
1427
+ data_list = data_list[0 : len(self.forecast_dates)]
1428
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
1429
+ data = pd.DataFrame.from_dict(data_dict)
1430
+ data.set_index("ts", inplace=True)
1431
+ return data.copy().loc[self.forecast_dates]
888
1432
 
889
- def get_load_forecast(
1433
+ async def get_load_forecast(
890
1434
  self,
891
- days_min_load_forecast: Optional[int] = 3,
892
- method: Optional[str] = "naive",
893
- csv_path: Optional[str] = "data_load_forecast.csv",
894
- set_mix_forecast: Optional[bool] = False,
895
- df_now: Optional[pd.DataFrame] = pd.DataFrame(),
896
- use_last_window: Optional[bool] = True,
897
- mlf: Optional[MLForecaster] = None,
898
- debug: Optional[bool] = False,
1435
+ days_min_load_forecast: int | None = 3,
1436
+ method: str | None = "typical",
1437
+ csv_path: str | None = "data_load_forecast.csv",
1438
+ set_mix_forecast: bool | None = False,
1439
+ df_now: pd.DataFrame | None = pd.DataFrame(),
1440
+ use_last_window: bool | None = True,
1441
+ mlf: MLForecaster | None = None,
1442
+ debug: bool | None = False,
899
1443
  ) -> pd.Series:
900
- r"""
1444
+ """
901
1445
  Get and generate the load forecast data.
902
-
1446
+
903
1447
  :param days_min_load_forecast: The number of last days to retrieve that \
904
1448
  will be used to generate a naive forecast, defaults to 3
905
1449
  :type days_min_load_forecast: int, optional
906
1450
  :param method: The method to be used to generate load forecast, the options \
907
- are 'naive' for a persistance model, 'mlforecaster' for using a custom \
1451
+ are 'typical' for a typical household load consumption curve, \
1452
+ are 'naive' for a persistence model, 'mlforecaster' for using a custom \
908
1453
  previously fitted machine learning model, 'csv' to read the forecast from \
909
1454
  a CSV file and 'list' to use data directly passed at runtime as a list of \
910
- values. Defaults to 'naive'.
1455
+ values. Defaults to 'typical'.
911
1456
  :type method: str, optional
912
1457
  :param csv_path: The path to the CSV file used when method = 'csv', \
913
1458
  defaults to "/data/data_load_forecast.csv"
914
1459
  :type csv_path: str, optional
915
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
1460
+ :param set_mix_forecast: Use a mixed forecast strategy to integrate now/current values.
916
1461
  :type set_mix_forecast: Bool, optional
917
1462
  :param df_now: The DataFrame containing the now/current data.
918
1463
  :type df_now: pd.DataFrame, optional
@@ -932,169 +1477,59 @@ class Forecast(object):
932
1477
 
933
1478
  """
934
1479
  csv_path = self.emhass_conf["data_path"] / csv_path
935
-
936
- if (
937
- method == "naive" or method == "mlforecaster"
938
- ): # retrieving needed data for these methods
939
- self.logger.info(
940
- "Retrieving data from hass for load forecast using method = " + method
941
- )
942
- var_list = [self.var_load]
943
- var_replace_zero = None
944
- var_interp = [self.var_load]
945
- time_zone_load_foreacast = None
946
- # We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
947
- rh = RetrieveHass(
948
- self.retrieve_hass_conf["hass_url"],
949
- self.retrieve_hass_conf["long_lived_token"],
950
- self.freq,
951
- time_zone_load_foreacast,
952
- self.params,
953
- self.emhass_conf,
954
- self.logger,
955
- )
956
- if self.get_data_from_file:
957
- filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
958
- with open(filename_path, "rb") as inp:
959
- rh.df_final, days_list, var_list = pickle.load(inp)
960
- self.var_load = var_list[0]
961
- self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = (
962
- self.var_load
963
- )
964
- var_interp = [var_list[0]]
965
- self.var_list = [var_list[0]]
966
- self.var_load_new = self.var_load + "_positive"
967
- else:
968
- days_list = get_days_list(days_min_load_forecast)
969
- if not rh.get_data(days_list, var_list):
970
- return False
971
- if not rh.prepare_data(
972
- self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
973
- load_negative=self.retrieve_hass_conf["load_negative"],
974
- set_zero_min=self.retrieve_hass_conf["set_zero_min"],
975
- var_replace_zero=var_replace_zero,
976
- var_interp=var_interp,
977
- ):
1480
+ # Retrieve Data from Home Assistant if needed
1481
+ df = None
1482
+ if method in ["naive", "mlforecaster"]:
1483
+ df = await self._prepare_hass_load_data(days_min_load_forecast, method)
1484
+ if df is False:
978
1485
  return False
979
- df = rh.df_final.copy()[[self.var_load_new]]
980
- if method == "naive": # using a naive approach
981
- mask_forecast_out = (
982
- df.index > days_list[-1] - self.optim_conf["delta_forecast_daily"]
983
- )
984
- forecast_out = df.copy().loc[mask_forecast_out]
985
- forecast_out = forecast_out.rename(columns={self.var_load_new: "yhat"})
986
- # Force forecast_out length to avoid mismatches
987
- forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
988
- forecast_out.index = self.forecast_dates
989
- elif (
990
- method == "mlforecaster"
991
- ): # using a custom forecast model with machine learning
992
- # Load model
993
- model_type = self.params["passed_data"]["model_type"]
994
- filename = model_type + "_mlf.pkl"
995
- filename_path = self.emhass_conf["data_path"] / filename
996
- if not debug:
997
- if filename_path.is_file():
998
- with open(filename_path, "rb") as inp:
999
- mlf = pickle.load(inp)
1000
- else:
1001
- self.logger.error(
1002
- "The ML forecaster file was not found, please run a model fit method before this predict method"
1003
- )
1004
- return False
1005
- # Make predictions
1006
- if use_last_window:
1007
- data_last_window = copy.deepcopy(df)
1008
- data_last_window = data_last_window.rename(
1009
- columns={self.var_load_new: self.var_load}
1010
- )
1011
- else:
1012
- data_last_window = None
1013
- forecast_out = mlf.predict(data_last_window)
1014
- # Force forecast length to avoid mismatches
1015
- self.logger.debug(
1016
- "Number of ML predict forcast data generated (lags_opt): "
1017
- + str(len(forecast_out.index))
1018
- )
1019
- self.logger.debug(
1020
- "Number of forcast dates obtained: " + str(len(self.forecast_dates))
1021
- )
1022
- if len(self.forecast_dates) < len(forecast_out.index):
1023
- forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1024
- # To be removed once bug is fixed
1025
- elif len(self.forecast_dates) > len(forecast_out.index):
1026
- self.logger.error(
1027
- "Unable to obtain: "
1028
- + str(len(self.forecast_dates))
1029
- + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
1030
- )
1486
+ # Generate Forecast based on Method
1487
+ if method == "typical":
1488
+ forecast_out = await self._get_load_forecast_typical()
1489
+ elif method == "naive":
1490
+ forecast_out = self._get_load_forecast_naive(df)
1491
+ elif method == "mlforecaster":
1492
+ forecast_out = await self._get_load_forecast_ml(df, use_last_window, mlf, debug)
1493
+ if forecast_out is False:
1031
1494
  return False
1032
- # Define DataFrame
1033
- data_dict = {
1034
- "ts": self.forecast_dates,
1035
- "yhat": forecast_out.values.tolist(),
1036
- }
1037
- data = pd.DataFrame.from_dict(data_dict)
1038
- # Define index
1039
- data.set_index("ts", inplace=True)
1040
- forecast_out = data.copy().loc[self.forecast_dates]
1041
- elif method == "csv": # reading from a csv file
1042
- load_csv_file_path = csv_path
1043
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
1044
- if len(df_csv) < len(self.forecast_dates):
1045
- self.logger.error("Passed data from CSV is not long enough")
1046
- else:
1047
- # Ensure correct length
1048
- df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
1049
- # Define index
1050
- df_csv.index = self.forecast_dates
1051
- df_csv.drop(["ts"], axis=1, inplace=True)
1052
- forecast_out = df_csv.copy().loc[self.forecast_dates]
1053
- elif method == "list": # reading a list of values
1054
- # Loading data from passed list
1055
- data_list = self.params["passed_data"]["load_power_forecast"]
1056
- # Check if the passed data has the correct length
1057
- if (
1058
- len(data_list) < len(self.forecast_dates)
1059
- and self.params["passed_data"]["prediction_horizon"] is None
1060
- ):
1061
- self.logger.error("Passed data from passed list is not long enough")
1495
+ elif method == "csv":
1496
+ forecast_out = self._get_load_forecast_csv(csv_path)
1497
+ if forecast_out is None:
1498
+ return False
1499
+ elif method == "list":
1500
+ forecast_out = self._get_load_forecast_list()
1501
+ if forecast_out is False:
1062
1502
  return False
1063
- else:
1064
- # Ensure correct length
1065
- data_list = data_list[0 : len(self.forecast_dates)]
1066
- # Define DataFrame
1067
- data_dict = {"ts": self.forecast_dates, "yhat": data_list}
1068
- data = pd.DataFrame.from_dict(data_dict)
1069
- # Define index
1070
- data.set_index("ts", inplace=True)
1071
- forecast_out = data.copy().loc[self.forecast_dates]
1072
1503
  else:
1073
- self.logger.error("Passed method is not valid")
1504
+ self.logger.error(error_msg_method_not_valid)
1074
1505
  return False
1075
- P_Load_forecast = copy.deepcopy(forecast_out["yhat"])
1506
+ # Post-processing (Mix Forecast)
1507
+ p_load_forecast = copy.deepcopy(forecast_out["yhat"])
1076
1508
  if set_mix_forecast:
1077
- P_Load_forecast = Forecast.get_mix_forecast(
1509
+ # Load forecasts don't need curtailment protection - always use feedback
1510
+ p_load_forecast = Forecast.get_mix_forecast(
1078
1511
  df_now,
1079
- P_Load_forecast,
1512
+ p_load_forecast,
1080
1513
  self.params["passed_data"]["alpha"],
1081
1514
  self.params["passed_data"]["beta"],
1082
1515
  self.var_load_new,
1516
+ False, # Never ignore feedback for load forecasts
1083
1517
  )
1084
- return P_Load_forecast
1518
+ self.logger.debug("get_load_forecast returning:\n%s", p_load_forecast)
1519
+ return p_load_forecast
1085
1520
 
1086
1521
  def get_load_cost_forecast(
1087
1522
  self,
1088
1523
  df_final: pd.DataFrame,
1089
- method: Optional[str] = "hp_hc_periods",
1090
- csv_path: Optional[str] = "data_load_cost_forecast.csv",
1091
- list_and_perfect: Optional[bool] = False,
1524
+ method: str | None = "hp_hc_periods",
1525
+ csv_path: str | None = "data_load_cost_forecast.csv",
1526
+ list_and_perfect: bool | None = False,
1092
1527
  ) -> pd.DataFrame:
1093
1528
  r"""
1094
1529
  Get the unit cost for the load consumption based on multiple tariff \
1095
1530
  periods. This is the cost of the energy from the utility in a vector \
1096
1531
  sampled at the fixed freq value.
1097
-
1532
+
1098
1533
  :param df_final: The DataFrame containing the input data.
1099
1534
  :type df_final: pd.DataFrame
1100
1535
  :param method: The method to be used to generate load cost forecast, \
@@ -1113,7 +1548,7 @@ class Forecast(object):
1113
1548
  if method == "hp_hc_periods":
1114
1549
  df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
1115
1550
  list_df_hp = []
1116
- for key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
1551
+ for _key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
1117
1552
  list_df_hp.append(
1118
1553
  df_final[self.var_load_cost].between_time(
1119
1554
  period_hp[0]["start"], period_hp[1]["end"]
@@ -1128,7 +1563,14 @@ class Forecast(object):
1128
1563
  forecast_out = self.get_forecast_out_from_csv_or_list(
1129
1564
  df_final, forecast_dates_csv, csv_path
1130
1565
  )
1131
- df_final[self.var_load_cost] = forecast_out
1566
+ # Ensure correct length
1567
+ if not list_and_perfect:
1568
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1569
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1570
+ # Convert to Series if needed and align index
1571
+ if not isinstance(forecast_out, pd.Series):
1572
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1573
+ df_final.loc[:, self.var_load_cost] = forecast_out
1132
1574
  elif method == "list": # reading a list of values
1133
1575
  # Loading data from passed list
1134
1576
  data_list = self.params["passed_data"]["load_cost_forecast"]
@@ -1137,11 +1579,13 @@ class Forecast(object):
1137
1579
  len(data_list) < len(self.forecast_dates)
1138
1580
  and self.params["passed_data"]["prediction_horizon"] is None
1139
1581
  ):
1140
- self.logger.error("Passed data from passed list is not long enough")
1582
+ self.logger.error(error_msg_list_not_long_enough)
1141
1583
  return False
1142
1584
  else:
1143
1585
  # Ensure correct length
1144
1586
  data_list = data_list[0 : len(self.forecast_dates)]
1587
+ if not list_and_perfect:
1588
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
1145
1589
  # Define the correct dates
1146
1590
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1147
1591
  forecast_out = self.get_forecast_out_from_csv_or_list(
@@ -1151,25 +1595,26 @@ class Forecast(object):
1151
1595
  data_list=data_list,
1152
1596
  list_and_perfect=list_and_perfect,
1153
1597
  )
1154
- # Fill the final DF
1598
+ df_final = df_final.copy()
1155
1599
  df_final[self.var_load_cost] = forecast_out
1156
1600
  else:
1157
- self.logger.error("Passed method is not valid")
1601
+ self.logger.error(error_msg_method_not_valid)
1158
1602
  return False
1603
+ self.logger.debug("get_load_cost_forecast returning:\n%s", df_final)
1159
1604
  return df_final
1160
1605
 
1161
1606
  def get_prod_price_forecast(
1162
1607
  self,
1163
1608
  df_final: pd.DataFrame,
1164
- method: Optional[str] = "constant",
1165
- csv_path: Optional[str] = "data_prod_price_forecast.csv",
1166
- list_and_perfect: Optional[bool] = False,
1609
+ method: str | None = "constant",
1610
+ csv_path: str | None = "data_prod_price_forecast.csv",
1611
+ list_and_perfect: bool | None = False,
1167
1612
  ) -> pd.DataFrame:
1168
1613
  r"""
1169
1614
  Get the unit power production price for the energy injected to the grid.\
1170
1615
  This is the price of the energy injected to the utility in a vector \
1171
1616
  sampled at the fixed freq value.
1172
-
1617
+
1173
1618
  :param df_input_data: The DataFrame containing all the input data retrieved
1174
1619
  from hass
1175
1620
  :type df_input_data: pd.DataFrame
@@ -1187,15 +1632,20 @@ class Forecast(object):
1187
1632
  """
1188
1633
  csv_path = self.emhass_conf["data_path"] / csv_path
1189
1634
  if method == "constant":
1190
- df_final[self.var_prod_price] = self.optim_conf[
1191
- "photovoltaic_production_sell_price"
1192
- ]
1635
+ df_final[self.var_prod_price] = self.optim_conf["photovoltaic_production_sell_price"]
1193
1636
  elif method == "csv":
1194
1637
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1195
1638
  forecast_out = self.get_forecast_out_from_csv_or_list(
1196
1639
  df_final, forecast_dates_csv, csv_path
1197
1640
  )
1198
- df_final[self.var_prod_price] = forecast_out
1641
+ # Ensure correct length
1642
+ if not list_and_perfect:
1643
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1644
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1645
+ # Convert to Series if needed and align index
1646
+ if not isinstance(forecast_out, pd.Series):
1647
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1648
+ df_final.loc[:, self.var_prod_price] = forecast_out
1199
1649
  elif method == "list": # reading a list of values
1200
1650
  # Loading data from passed list
1201
1651
  data_list = self.params["passed_data"]["prod_price_forecast"]
@@ -1204,11 +1654,13 @@ class Forecast(object):
1204
1654
  len(data_list) < len(self.forecast_dates)
1205
1655
  and self.params["passed_data"]["prediction_horizon"] is None
1206
1656
  ):
1207
- self.logger.error("Passed data from passed list is not long enough")
1657
+ self.logger.error(error_msg_list_not_long_enough)
1208
1658
  return False
1209
1659
  else:
1210
1660
  # Ensure correct length
1211
1661
  data_list = data_list[0 : len(self.forecast_dates)]
1662
+ if not list_and_perfect:
1663
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
1212
1664
  # Define the correct dates
1213
1665
  forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1214
1666
  forecast_out = self.get_forecast_out_from_csv_or_list(
@@ -1218,9 +1670,90 @@ class Forecast(object):
1218
1670
  data_list=data_list,
1219
1671
  list_and_perfect=list_and_perfect,
1220
1672
  )
1221
- # Fill the final DF
1673
+ df_final = df_final.copy()
1222
1674
  df_final[self.var_prod_price] = forecast_out
1223
1675
  else:
1224
- self.logger.error("Passed method is not valid")
1676
+ self.logger.error(error_msg_method_not_valid)
1225
1677
  return False
1678
+ self.logger.debug("get_prod_price_forecast returning:\n%s", df_final)
1226
1679
  return df_final
1680
+
1681
+ async def get_cached_forecast_data(self, w_forecast_cache_path) -> pd.DataFrame:
1682
+ r"""
1683
+ Get cached weather forecast data from file.
1684
+
1685
+ :param w_forecast_cache_path: the path to file.
1686
+ :type method: Any
1687
+ :return: The DataFrame containing the forecasted data
1688
+ :rtype: pd.DataFrame
1689
+
1690
+ """
1691
+ async with aiofiles.open(w_forecast_cache_path, "rb") as file:
1692
+ content = await file.read()
1693
+ data = pickle.loads(content)
1694
+ if not isinstance(data, pd.DataFrame) or len(data) < len(self.forecast_dates):
1695
+ self.logger.error("There has been a error obtaining cached forecast data.")
1696
+ self.logger.error(
1697
+ "Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from forecast API and cache."
1698
+ )
1699
+ self.logger.warning(
1700
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1701
+ )
1702
+ os.remove(w_forecast_cache_path)
1703
+ return False
1704
+ # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
1705
+ if self.forecast_dates[0] in data.index and self.forecast_dates[-1] in data.index:
1706
+ data = data.loc[self.forecast_dates[0] : self.forecast_dates[-1]]
1707
+ self.logger.info("Retrieved forecast data from the previously saved cache.")
1708
+ else:
1709
+ self.logger.error(
1710
+ "Unable to obtain cached forecast data within the requested timeframe range."
1711
+ )
1712
+ self.logger.error(
1713
+ "Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from forecast API and cache."
1714
+ )
1715
+ self.logger.warning(
1716
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1717
+ )
1718
+ os.remove(w_forecast_cache_path)
1719
+ return False
1720
+ return data
1721
+
1722
+ async def set_cached_forecast_data(self, w_forecast_cache_path, data) -> pd.DataFrame:
1723
+ r"""
1724
+ Set generated weather forecast data to file.
1725
+ Trim data to match the original requested forecast dates
1726
+
1727
+ :param w_forecast_cache_path: the path to file.
1728
+ :type method: Any
1729
+ :param: The DataFrame containing the forecasted data
1730
+ :type: pd.DataFrame
1731
+ :return: The DataFrame containing the forecasted data
1732
+ :rtype: pd.DataFrame
1733
+
1734
+ """
1735
+ async with aiofiles.open(w_forecast_cache_path, "wb") as file:
1736
+ content = pickle.dumps(data)
1737
+ await file.write(content)
1738
+ if not os.path.isfile(w_forecast_cache_path):
1739
+ self.logger.warning("forecast data could not be saved to file.")
1740
+ else:
1741
+ self.logger.info("Saved the forecast results to cache, for later reference.")
1742
+
1743
+ # Trim cached data to match requested dates
1744
+ end_forecast = (self.start_forecast + self.optim_conf["delta_forecast_daily"]).replace(
1745
+ microsecond=0
1746
+ )
1747
+ forecast_dates = (
1748
+ pd.date_range(
1749
+ start=self.start_forecast,
1750
+ end=end_forecast - self.freq,
1751
+ freq=self.freq,
1752
+ tz=self.time_zone,
1753
+ )
1754
+ .tz_convert("utc")
1755
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
1756
+ .tz_convert(self.time_zone)
1757
+ )
1758
+ data = data.loc[forecast_dates[0] : forecast_dates[-1]]
1759
+ return data