emhass 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
emhass/forecast.py ADDED
@@ -0,0 +1,1768 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import bz2
4
+ import copy
5
+ import json
6
+ import logging
7
+ import os
8
+ import pickle
9
+ import pickle as cPickle
10
+ import re
11
+ from datetime import datetime, timedelta
12
+ from itertools import zip_longest
13
+ from urllib.parse import quote
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from pvlib.irradiance import disc
18
+ from pvlib.location import Location
19
+ from pvlib.modelchain import ModelChain
20
+ from pvlib.pvsystem import PVSystem
21
+ from pvlib.solarposition import get_solarposition
22
+ from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
23
+ from requests import get
24
+ from requests.exceptions import RequestException
25
+ from sklearn.metrics import mean_squared_error, r2_score
26
+ from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
27
+ from sklearn.pipeline import make_pipeline
28
+ from sklearn.preprocessing import StandardScaler
29
+
30
+ from emhass.machine_learning_forecaster import MLForecaster
31
+ from emhass.machine_learning_regressor import MLRegressor
32
+ from emhass.retrieve_hass import RetrieveHass
33
+ from emhass.utils import add_date_features, get_days_list, set_df_index_freq
34
+
35
+
36
+ class Forecast:
37
+ r"""
38
+ Generate weather, load and costs forecasts needed as inputs to the optimization.
39
+
40
+ In EMHASS we have basically 4 forecasts to deal with:
41
+
42
+ - PV power production forecast (internally based on the weather forecast and the
43
+ characteristics of your PV plant). This is given in Watts.
44
+
45
+ - Load power forecast: how much power your house will demand on the next 24h. This
46
+ is given in Watts.
47
+
48
+ - PV production selling price forecast: at what price are you selling your excess
49
+ PV production on the next 24h. This is given in EUR/kWh.
50
+
51
+ - Load cost forecast: the price of the energy from the grid on the next 24h. This
52
+ is given in EUR/kWh.
53
+
54
+ There are methods that are generalized to the 4 forecast needed. For all there
55
+ forecasts it is possible to pass the data either as a passed list of values or by
56
+ reading from a CSV file. With these methods it is then possible to use data from
57
+ external forecast providers.
58
+
59
+ Then there are the methods that are specific to each type of forecast and that
60
+ proposed forecast treated and generated internally by this EMHASS forecast class.
61
+ For the weather forecast a first method (`open-meteo`) uses a open-meteos API
62
+ proposing detailed forecasts based on Lat/Lon locations.
63
+ This method seems stable but as with any scrape method it will fail if any changes
64
+ are made to the webpage API. Another method (`solcast`) is using the SolCast PV
65
+ production forecast service. A final method (`solar.forecast`) is using another
66
+ external service: Solar.Forecast, for which just the nominal PV peak installed
67
+ power should be provided. Search the forecast section on the documentation for examples
68
+ on how to implement these different methods.
69
+
70
+ The `get_power_from_weather` method is proposed here to convert from irradiance
71
+ data to electrical power. The PVLib module is used to model the PV plant.
72
+
73
+ The specific methods for the load forecast are a first method (`naive`) that uses
74
+ a naive approach, also called persistance. It simply assumes that the forecast for
75
+ a future period will be equal to the observed values in a past period. The past
76
+ period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
77
+ uses an internal custom forecasting model using machine learning. There is a section
78
+ in the documentation explaining how to use this method.
79
+
80
+ .. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
81
+ proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
82
+ Only the `predict` method is used here to generate new forecasts, but it is \
83
+ necessary to previously fit a forecaster model and it is a good idea to \
84
+ optimize the model hyperparameters using the `tune` method. See the dedicated \
85
+ section in the documentation for more help.
86
+
87
+ For the PV production selling price and Load cost forecasts the privileged method
88
+ is a direct read from a user provided list of values. The list should be passed
89
+ as a runtime parameter during the `curl` to the EMHASS API.
90
+
91
+ I reading from a CSV file, it should contain no header and the timestamped data
92
+ should have the following format:
93
+
94
+ 2021-04-29 00:00:00+00:00,287.07
95
+
96
+ 2021-04-29 00:30:00+00:00,274.27
97
+
98
+ 2021-04-29 01:00:00+00:00,243.38
99
+
100
+ ...
101
+
102
+ The data columns in these files will correspond to the data in the units expected
103
+ for each forecasting method.
104
+
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ retrieve_hass_conf: dict,
110
+ optim_conf: dict,
111
+ plant_conf: dict,
112
+ params: str,
113
+ emhass_conf: dict,
114
+ logger: logging.Logger,
115
+ opt_time_delta: int | None = 24,
116
+ get_data_from_file: bool | None = False,
117
+ ) -> None:
118
+ """
119
+ Define constructor for the forecast class.
120
+
121
+ :param retrieve_hass_conf: Dictionary containing the needed configuration
122
+ data from the configuration file, specific to retrieve data from HASS
123
+ :type retrieve_hass_conf: dict
124
+ :param optim_conf: Dictionary containing the needed configuration
125
+ data from the configuration file, specific for the optimization task
126
+ :type optim_conf: dict
127
+ :param plant_conf: Dictionary containing the needed configuration
128
+ data from the configuration file, specific for the modeling of the PV plant
129
+ :type plant_conf: dict
130
+ :param params: Configuration parameters passed from data/options.json
131
+ :type params: str
132
+ :param emhass_conf: Dictionary containing the needed emhass paths
133
+ :type emhass_conf: dict
134
+ :param logger: The passed logger object
135
+ :type logger: logging object
136
+ :param opt_time_delta: The time delta in hours used to generate forecasts,
137
+ a value of 24 will generate 24 hours of forecast data, defaults to 24
138
+ :type opt_time_delta: int, optional
139
+ :param get_data_from_file: Select if data should be retrieved from a
140
+ previously saved pickle useful for testing or directly from connection to
141
+ hass database
142
+ :type get_data_from_file: bool, optional
143
+
144
+ """
145
+ self.retrieve_hass_conf = retrieve_hass_conf
146
+ self.optim_conf = optim_conf
147
+ self.plant_conf = plant_conf
148
+ self.freq = self.retrieve_hass_conf["optimization_time_step"]
149
+ self.time_zone = self.retrieve_hass_conf["time_zone"]
150
+ self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
151
+ self.timeStep = self.freq.seconds / 3600 # in hours
152
+ self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
153
+ self.var_PV = self.retrieve_hass_conf["sensor_power_photovoltaics"]
154
+ self.var_PV_forecast = self.retrieve_hass_conf[
155
+ "sensor_power_photovoltaics_forecast"
156
+ ]
157
+ self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
158
+ self.var_load_new = self.var_load + "_positive"
159
+ self.lat = self.retrieve_hass_conf["Latitude"]
160
+ self.lon = self.retrieve_hass_conf["Longitude"]
161
+ self.emhass_conf = emhass_conf
162
+ self.logger = logger
163
+ self.get_data_from_file = get_data_from_file
164
+ self.var_load_cost = "unit_load_cost"
165
+ self.var_prod_price = "unit_prod_price"
166
+ if (params is None) or (params == "null"):
167
+ self.params = {}
168
+ elif type(params) is dict:
169
+ self.params = params
170
+ else:
171
+ self.params = json.loads(params)
172
+
173
+ if self.method_ts_round == "nearest":
174
+ self.start_forecast = pd.Timestamp(
175
+ datetime.now(), tz=self.time_zone
176
+ ).replace(microsecond=0)
177
+ elif self.method_ts_round == "first":
178
+ self.start_forecast = (
179
+ pd.Timestamp(datetime.now(), tz=self.time_zone)
180
+ .replace(microsecond=0)
181
+ .floor(freq=self.freq)
182
+ )
183
+ elif self.method_ts_round == "last":
184
+ self.start_forecast = (
185
+ pd.Timestamp(datetime.now(), tz=self.time_zone)
186
+ .replace(microsecond=0)
187
+ .ceil(freq=self.freq)
188
+ )
189
+ else:
190
+ self.logger.error("Wrong method_ts_round passed parameter")
191
+ # check if weather_forecast_cache, if so get 2x the amount of forecast
192
+ if self.params["passed_data"].get("weather_forecast_cache", False):
193
+ self.end_forecast = (
194
+ self.start_forecast + (self.optim_conf["delta_forecast_daily"] * 2)
195
+ ).replace(microsecond=0)
196
+ else:
197
+ self.end_forecast = (
198
+ self.start_forecast + self.optim_conf["delta_forecast_daily"]
199
+ ).replace(microsecond=0)
200
+ self.forecast_dates = (
201
+ pd.date_range(
202
+ start=self.start_forecast,
203
+ end=self.end_forecast - self.freq,
204
+ freq=self.freq,
205
+ tz=self.time_zone,
206
+ )
207
+ .tz_convert("utc")
208
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
209
+ .tz_convert(self.time_zone)
210
+ )
211
+ if params is not None:
212
+ if "prediction_horizon" in list(self.params["passed_data"].keys()):
213
+ if self.params["passed_data"]["prediction_horizon"] is not None:
214
+ self.forecast_dates = self.forecast_dates[
215
+ 0 : self.params["passed_data"]["prediction_horizon"]
216
+ ]
217
+
218
+ def get_cached_open_meteo_forecast_json(
219
+ self,
220
+ max_age: int | None = 30,
221
+ ) -> dict:
222
+ r"""
223
+ Get weather forecast json from Open-Meteo and cache it for re-use.
224
+ The response json is cached in the local file system and returned
225
+ on subsequent calls until it is older than max_age, at which point
226
+ attempts will be made to replace it with a new version.
227
+ The cached version will not be overwritten until a new version has
228
+ been successfully fetched from Open-Meteo.
229
+ In the event of connectivity issues, the cached version will continue
230
+ to be returned until such time as a new version can be successfully
231
+ fetched from Open-Meteo.
232
+ If you want to force reload, pass max_age value of zero.
233
+
234
+ :param max_age: The maximum age of the cached json file, in minutes,
235
+ before it is discarded and a new version fetched from Open-Meteo.
236
+ Defaults to 30 minutes.
237
+ :type max_age: int, optional
238
+ :return: The json containing the Open-Meteo forecast data
239
+ :rtype: dict
240
+
241
+ """
242
+ json_path = os.path.abspath(
243
+ self.emhass_conf["data_path"] / "cached-open-meteo-forecast.json"
244
+ )
245
+ # The cached JSON file is always loaded, if it exists, as it is also a fallback
246
+ # in case the REST API call to Open-Meteo fails - the cached JSON will continue to
247
+ # be used until it can successfully fetch a new version from Open-Meteo.
248
+ data = None
249
+ use_cache = False
250
+ if os.path.exists(json_path):
251
+ delta = datetime.now() - datetime.fromtimestamp(os.path.getmtime(json_path))
252
+ json_age = int(delta / timedelta(seconds=60))
253
+ use_cache = json_age < max_age
254
+ self.logger.info(
255
+ "Loading existing cached Open-Meteo JSON file: %s", json_path
256
+ )
257
+ with open(json_path) as json_file:
258
+ data = json.load(json_file)
259
+ if use_cache:
260
+ self.logger.info(
261
+ "The cached Open-Meteo JSON file is recent (age=%.0fm, max_age=%sm)",
262
+ json_age,
263
+ max_age,
264
+ )
265
+ else:
266
+ self.logger.info(
267
+ "The cached Open-Meteo JSON file is old (age=%.0fm, max_age=%sm)",
268
+ json_age,
269
+ max_age,
270
+ )
271
+
272
+ if not use_cache:
273
+ self.logger.info("Fetching a new weather forecast from Open-Meteo")
274
+ headers = {"User-Agent": "EMHASS", "Accept": "application/json"}
275
+ url = (
276
+ "https://api.open-meteo.com/v1/forecast?"
277
+ + "latitude="
278
+ + str(round(self.lat, 2))
279
+ + "&longitude="
280
+ + str(round(self.lon, 2))
281
+ + "&minutely_15="
282
+ + "temperature_2m,"
283
+ + "relative_humidity_2m,"
284
+ + "rain,"
285
+ + "cloud_cover,"
286
+ + "wind_speed_10m,"
287
+ + "shortwave_radiation_instant,"
288
+ + "diffuse_radiation_instant,"
289
+ + "direct_normal_irradiance_instant"
290
+ + "&timezone="
291
+ + quote(str(self.time_zone), safe="")
292
+ )
293
+ try:
294
+ response = get(url, headers=headers)
295
+ self.logger.debug("Returned HTTP status code: %s", response.status_code)
296
+ response.raise_for_status()
297
+ """import bz2 # Uncomment to save a serialized data for tests
298
+ import _pickle as cPickle
299
+ with bz2.BZ2File("data/test_response_openmeteo_get_method.pbz2", "w") as f:
300
+ cPickle.dump(response, f)"""
301
+ data = response.json()
302
+ self.logger.info(
303
+ "Saving response in Open-Meteo JSON cache file: %s", json_path
304
+ )
305
+ with open(json_path, "w") as json_file:
306
+ json.dump(response.json(), json_file, indent=2)
307
+ except RequestException:
308
+ self.logger.error(
309
+ "Failed to fetch weather forecast from Open-Meteo", exc_info=True
310
+ )
311
+ if data is not None:
312
+ self.logger.warning(
313
+ "Returning old cached data until next Open-Meteo attempt"
314
+ )
315
+
316
+ return data
317
+
318
+ def get_weather_forecast(
319
+ self,
320
+ method: str | None = "open-meteo",
321
+ csv_path: str | None = "data_weather_forecast.csv",
322
+ use_legacy_pvlib: bool | None = False,
323
+ ) -> pd.DataFrame:
324
+ r"""
325
+ Get and generate weather forecast data.
326
+
327
+ :param method: The desired method, options are 'open-meteo', 'csv', 'list', 'solcast' and \
328
+ 'solar.forecast'. Defaults to 'open-meteo'.
329
+ :type method: str, optional
330
+ :return: The DataFrame containing the forecasted data
331
+ :rtype: pd.DataFrame
332
+
333
+ """
334
+ csv_path = self.emhass_conf["data_path"] / csv_path
335
+ w_forecast_cache_path = os.path.abspath(
336
+ self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
337
+ )
338
+
339
+ self.logger.info("Retrieving weather forecast data using method = " + method)
340
+ if method == "scrapper":
341
+ self.logger.warning(
342
+ "The scrapper method has been deprecated and the keyword is accepted just for backward compatibility, please change the PV forecast method to open-meteo"
343
+ )
344
+ self.weather_forecast_method = (
345
+ method # Saving this attribute for later use to identify csv method usage
346
+ )
347
+ if (
348
+ method == "open-meteo" or method == "scrapper"
349
+ ): # The scrapper option is being left here for backward compatibility
350
+ if not os.path.isfile(w_forecast_cache_path):
351
+ data_raw = self.get_cached_open_meteo_forecast_json(
352
+ self.optim_conf["open_meteo_cache_max_age"]
353
+ )
354
+ data_15min = pd.DataFrame.from_dict(data_raw["minutely_15"])
355
+ data_15min["time"] = pd.to_datetime(data_15min["time"])
356
+ data_15min.set_index("time", inplace=True)
357
+ data_15min.index = data_15min.index.tz_localize(self.time_zone)
358
+
359
+ data_15min = data_15min.rename(
360
+ columns={
361
+ "temperature_2m": "temp_air",
362
+ "relative_humidity_2m": "relative_humidity",
363
+ "rain": "precipitable_water",
364
+ "cloud_cover": "cloud_cover",
365
+ "wind_speed_10m": "wind_speed",
366
+ "shortwave_radiation_instant": "ghi",
367
+ "diffuse_radiation_instant": "dhi",
368
+ "direct_normal_irradiance_instant": "dni",
369
+ }
370
+ )
371
+
372
+ data = data_15min.reindex(self.forecast_dates)
373
+ data.interpolate(
374
+ method="linear",
375
+ axis=0,
376
+ limit=None,
377
+ limit_direction="both",
378
+ inplace=True,
379
+ )
380
+ data = set_df_index_freq(data)
381
+ index_utc = data.index.tz_convert("utc")
382
+ index_tz = index_utc.round(
383
+ freq=data.index.freq, ambiguous="infer", nonexistent="shift_forward"
384
+ ).tz_convert(self.time_zone)
385
+ data.index = index_tz
386
+ data = set_df_index_freq(data)
387
+
388
+ # Convert mm to cm and clip the minimum value to 0.1 cm as expected by PVLib
389
+ data["precipitable_water"] = (data["precipitable_water"] / 10).clip(
390
+ lower=0.1
391
+ )
392
+
393
+ if use_legacy_pvlib:
394
+ # Converting the cloud cover into Global Horizontal Irradiance with a PVLib method
395
+ data = data.drop(columns=["ghi", "dhi", "dni"])
396
+ ghi_est = self.cloud_cover_to_irradiance(data["cloud_cover"])
397
+ data["ghi"] = ghi_est["ghi"]
398
+ data["dni"] = ghi_est["dni"]
399
+ data["dhi"] = ghi_est["dhi"]
400
+
401
+ # If runtime weather_forecast_cache is true save forecast result to file as cache
402
+ if self.params["passed_data"].get("weather_forecast_cache", False):
403
+ data = self.set_cached_forecast_data(w_forecast_cache_path, data)
404
+ # Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
405
+ # Trim data to match the current required dates
406
+ else:
407
+ data = self.get_cached_forecast_data(w_forecast_cache_path)
408
+
409
+ elif method == "solcast": # using Solcast API
410
+ # Check if weather_forecast_cache is true or if forecast_data file does not exist
411
+ if os.path.isfile(w_forecast_cache_path):
412
+ data = self.get_cached_forecast_data(w_forecast_cache_path)
413
+ # open stored weather_forecast_data.pkl file for previous forecast data (cached data)
414
+ else:
415
+ # Check if weather_forecast_cache_only is true, if so produce error for not finding cache file
416
+ if self.params["passed_data"].get("weather_forecast_cache_only", False):
417
+ self.logger.error("Unable to obtain Solcast cache file.")
418
+ self.logger.error(
419
+ "Try running optimization again with 'weather_forecast_cache_only': false"
420
+ )
421
+ self.logger.error(
422
+ "Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `weather-forecast-cache` action, to pull new data from Solcast and cache."
423
+ )
424
+ return False
425
+ else:
426
+ # Retrieve data from the Solcast API
427
+ if "solcast_api_key" not in self.retrieve_hass_conf:
428
+ self.logger.error(
429
+ "The solcast_api_key parameter was not defined"
430
+ )
431
+ return False
432
+ if "solcast_rooftop_id" not in self.retrieve_hass_conf:
433
+ self.logger.error(
434
+ "The solcast_rooftop_id parameter was not defined"
435
+ )
436
+ return False
437
+ headers = {
438
+ "User-Agent": "EMHASS",
439
+ "Authorization": "Bearer "
440
+ + self.retrieve_hass_conf["solcast_api_key"],
441
+ "content-type": "application/json",
442
+ }
443
+ days_solcast = int(
444
+ len(self.forecast_dates) * self.freq.seconds / 3600
445
+ )
446
+ # Split `roof_id` into a list (support comma or space as separator)
447
+ roof_ids = re.split(
448
+ r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip()
449
+ )
450
+ # Summary list of data
451
+ total_data_list = [0] * len(self.forecast_dates)
452
+ # Iteration over individual `roof_id`
453
+ for roof_id in roof_ids:
454
+ url = f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
455
+ response = get(url, headers=headers)
456
+ """import bz2 # Uncomment to save a serialized data for tests
457
+ import _pickle as cPickle
458
+ with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
459
+ cPickle.dump(response, f)"""
460
+ # Verify the request passed
461
+ if int(response.status_code) == 200:
462
+ data = response.json()
463
+ elif (
464
+ int(response.status_code) == 402
465
+ or int(response.status_code) == 429
466
+ ):
467
+ self.logger.error(
468
+ "Solcast error: May have exceeded your subscription limit."
469
+ )
470
+ return False
471
+ elif int(response.status_code) >= 400 or (
472
+ int(response.status_code) >= 202
473
+ and int(response.status_code) <= 299
474
+ ):
475
+ self.logger.error(
476
+ "Solcast error: There was a issue with the solcast request, check solcast API key and rooftop ID."
477
+ )
478
+ self.logger.error(
479
+ "Solcast error: Check that your subscription is valid and your network can connect to Solcast."
480
+ )
481
+ return False
482
+ # Data processing for the current `roof_id`
483
+ data_list = []
484
+ for elm in data["forecasts"]:
485
+ data_list.append(
486
+ elm["pv_estimate"] * 1000
487
+ ) # Converting kW to W
488
+ # Check if the retrieved data has the correct length
489
+ if len(data_list) < len(self.forecast_dates):
490
+ self.logger.error(
491
+ "Not enough data retrieved from Solcast service, try increasing the time step or use MPC."
492
+ )
493
+ return False
494
+ # Adding the data of the current `roof_id` to the total
495
+ total_data_list = [
496
+ total + current
497
+ for total, current in zip_longest(
498
+ total_data_list, data_list, fillvalue=0
499
+ )
500
+ ]
501
+ # Trim request results to forecast_dates
502
+ total_data_list = total_data_list[0 : len(self.forecast_dates)]
503
+ data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
504
+ # Define DataFrame
505
+ data = pd.DataFrame.from_dict(data_dict)
506
+ # Define index
507
+ data.set_index("ts", inplace=True)
508
+ # If runtime weather_forecast_cache is true save forecast result to file as cache
509
+ # Trim data to match the current required dates
510
+ if self.params["passed_data"].get("weather_forecast_cache", False):
511
+ data = self.set_cached_forecast_data(
512
+ w_forecast_cache_path, data
513
+ )
514
+
515
+ elif method == "solar.forecast": # using the solar.forecast API
516
+ # Retrieve data from the solar.forecast API
517
+ if os.path.isfile(w_forecast_cache_path):
518
+ data = self.get_cached_forecast_data(w_forecast_cache_path)
519
+ else:
520
+ if "solar_forecast_kwp" not in self.retrieve_hass_conf:
521
+ self.logger.warning(
522
+ "The solar_forecast_kwp parameter was not defined, using dummy values for testing"
523
+ )
524
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
525
+ if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
526
+ self.logger.warning(
527
+ "The solar_forecast_kwp parameter is set to zero, setting to default 5"
528
+ )
529
+ self.retrieve_hass_conf["solar_forecast_kwp"] = 5
530
+ if self.optim_conf["delta_forecast_daily"].days > 1:
531
+ self.logger.warning(
532
+ "The free public tier for solar.forecast only provides one day forecasts"
533
+ )
534
+ self.logger.warning(
535
+ "Continuing with just the first day of data, the other days are filled with 0.0."
536
+ )
537
+ self.logger.warning(
538
+ "Use the other available methods for delta_forecast_daily > 1"
539
+ )
540
+ headers = {"Accept": "application/json"}
541
+ data = pd.DataFrame()
542
+ for i in range(len(self.plant_conf["pv_module_model"])):
543
+ url = (
544
+ "https://api.forecast.solar/estimate/"
545
+ + str(round(self.lat, 2))
546
+ + "/"
547
+ + str(round(self.lon, 2))
548
+ + "/"
549
+ + str(self.plant_conf["surface_tilt"][i])
550
+ + "/"
551
+ + str(self.plant_conf["surface_azimuth"][i] - 180)
552
+ + "/"
553
+ + str(self.retrieve_hass_conf["solar_forecast_kwp"])
554
+ )
555
+ response = get(url, headers=headers)
556
+ """import bz2 # Uncomment to save a serialized data for tests
557
+ import _pickle as cPickle
558
+ with bz2.BZ2File("data/test_response_solarforecast_get_method.pbz2", "w") as f:
559
+ cPickle.dump(response.json(), f)"""
560
+ data_raw = response.json()
561
+ data_dict = {
562
+ "ts": list(data_raw["result"]["watts"].keys()),
563
+ "yhat": list(data_raw["result"]["watts"].values()),
564
+ }
565
+ # Form the final DataFrame
566
+ data_tmp = pd.DataFrame.from_dict(data_dict)
567
+ data_tmp.set_index("ts", inplace=True)
568
+ data_tmp.index = pd.to_datetime(data_tmp.index)
569
+ data_tmp = data_tmp.tz_localize(self.forecast_dates.tz)
570
+ data_tmp = data_tmp.reindex(index=self.forecast_dates)
571
+ mask_up_data_df = (
572
+ data_tmp.copy(deep=True).fillna(method="ffill").isnull()
573
+ )
574
+ mask_down_data_df = (
575
+ data_tmp.copy(deep=True).fillna(method="bfill").isnull()
576
+ )
577
+ data_tmp.loc[mask_up_data_df["yhat"], :] = 0.0
578
+ data_tmp.loc[mask_down_data_df["yhat"], :] = 0.0
579
+ data_tmp.interpolate(inplace=True, limit=1)
580
+ data_tmp = data_tmp.fillna(0.0)
581
+ if len(data) == 0:
582
+ data = copy.deepcopy(data_tmp)
583
+ else:
584
+ data = data + data_tmp
585
+ # If runtime weather_forecast_cache is true save forecast result to file as cache.
586
+ # Trim data to match the current required dates
587
+ if self.params["passed_data"].get("weather_forecast_cache", False):
588
+ data = self.set_cached_forecast_data(
589
+ w_forecast_cache_path, data
590
+ )
591
+ elif method == "csv": # reading from a csv file
592
+ weather_csv_file_path = csv_path
593
+ # Loading the csv file, we will consider that this is the PV power in W
594
+ data = pd.read_csv(weather_csv_file_path, header=None, names=["ts", "yhat"])
595
+ # Check if the passed data has the correct length
596
+ if len(data) < len(self.forecast_dates):
597
+ self.logger.error("Passed data from CSV is not long enough")
598
+ else:
599
+ # Ensure correct length
600
+ data = data.loc[data.index[0 : len(self.forecast_dates)], :]
601
+ # Define index
602
+ data.index = self.forecast_dates
603
+ data.drop("ts", axis=1, inplace=True)
604
+ data = data.copy().loc[self.forecast_dates]
605
+ elif method == "list": # reading a list of values
606
+ # Loading data from passed list
607
+ data_list = self.params["passed_data"]["pv_power_forecast"]
608
+ # Check if the passed data has the correct length
609
+ if (
610
+ len(data_list) < len(self.forecast_dates)
611
+ and self.params["passed_data"]["prediction_horizon"] is None
612
+ ):
613
+ self.logger.error("Passed data from passed list is not long enough")
614
+ else:
615
+ # Ensure correct length
616
+ data_list = data_list[0 : len(self.forecast_dates)]
617
+ # Define DataFrame
618
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
619
+ data = pd.DataFrame.from_dict(data_dict)
620
+ # Define index
621
+ data.set_index("ts", inplace=True)
622
+ else:
623
+ self.logger.error("Method %r is not valid", method)
624
+ data = None
625
+ self.logger.debug("get_weather_forecast returning:\n%s", data)
626
+ return data
627
+
628
+ def cloud_cover_to_irradiance(
629
+ self, cloud_cover: pd.Series, offset: int | None = 35
630
+ ) -> pd.DataFrame:
631
+ """
632
+ Estimates irradiance from cloud cover in the following steps.
633
+
634
+ 1. Determine clear sky GHI using Ineichen model and
635
+ climatological turbidity.
636
+
637
+ 2. Estimate cloudy sky GHI using a function of cloud_cover
638
+
639
+ 3. Estimate cloudy sky DNI using the DISC model.
640
+
641
+ 4. Calculate DHI from DNI and GHI.
642
+
643
+ (This function was copied and modified from PVLib)
644
+
645
+ :param cloud_cover: Cloud cover in %.
646
+ :type cloud_cover: pd.Series
647
+ :param offset: Determines the minimum GHI., defaults to 35
648
+ :type offset: Optional[int], optional
649
+ :return: Estimated GHI, DNI, and DHI.
650
+ :rtype: pd.DataFrame
651
+ """
652
+ location = Location(latitude=self.lat, longitude=self.lon)
653
+ solpos = location.get_solarposition(cloud_cover.index)
654
+ cs = location.get_clearsky(
655
+ cloud_cover.index, model="ineichen", solar_position=solpos
656
+ )
657
+ # Using only the linear method
658
+ offset = offset / 100.0
659
+ cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
660
+ ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs["ghi"]
661
+ # Using disc model
662
+ dni = disc(ghi, solpos["zenith"], cloud_cover.index)["dni"]
663
+ dhi = ghi - dni * np.cos(np.radians(solpos["zenith"]))
664
+ irrads = pd.DataFrame({"ghi": ghi, "dni": dni, "dhi": dhi}).fillna(0)
665
+ return irrads
666
+
667
+ @staticmethod
668
+ def get_mix_forecast(
669
+ df_now: pd.DataFrame,
670
+ df_forecast: pd.DataFrame,
671
+ alpha: float,
672
+ beta: float,
673
+ col: str,
674
+ ) -> pd.DataFrame:
675
+ """A simple correction method for forecasted data using the current real values of a variable.
676
+
677
+ :param df_now: The DataFrame containing the current/real values
678
+ :type df_now: pd.DataFrame
679
+ :param df_forecast: The DataFrame containing the forecast data
680
+ :type df_forecast: pd.DataFrame
681
+ :param alpha: A weight for the forecast data side
682
+ :type alpha: float
683
+ :param beta: A weight for the current/real values sied
684
+ :type beta: float
685
+ :param col: The column variable name
686
+ :type col: str
687
+ :return: The output DataFrame with the corrected values
688
+ :rtype: pd.DataFrame
689
+ """
690
+ first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
691
+ df_forecast.iloc[0] = int(round(first_fcst))
692
+ return df_forecast
693
+
694
+ def get_power_from_weather(
695
+ self,
696
+ df_weather: pd.DataFrame,
697
+ set_mix_forecast: bool | None = False,
698
+ df_now: pd.DataFrame | None = pd.DataFrame(),
699
+ ) -> pd.Series:
700
+ r"""
701
+ Convert wheater forecast data into electrical power.
702
+
703
+ :param df_weather: The DataFrame containing the weather forecasted data. \
704
+ This DF should be generated by the 'get_weather_forecast' method or at \
705
+ least contain the same columns names filled with proper data.
706
+ :type df_weather: pd.DataFrame
707
+ :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
708
+ :type set_mix_forecast: Bool, optional
709
+ :param df_now: The DataFrame containing the now/current data.
710
+ :type df_now: pd.DataFrame
711
+ :return: The DataFrame containing the electrical power in Watts
712
+ :rtype: pd.DataFrame
713
+
714
+ """
715
+ # If using csv method we consider that yhat is the PV power in W
716
+ if (
717
+ "solar_forecast_kwp" in self.retrieve_hass_conf.keys()
718
+ and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
719
+ ):
720
+ P_PV_forecast = pd.Series(0, index=df_weather.index)
721
+ else:
722
+ if (
723
+ self.weather_forecast_method == "solcast"
724
+ or self.weather_forecast_method == "solar.forecast"
725
+ or self.weather_forecast_method == "csv"
726
+ or self.weather_forecast_method == "list"
727
+ ):
728
+ P_PV_forecast = df_weather["yhat"]
729
+ P_PV_forecast.name = None
730
+ else: # We will transform the weather data into electrical power
731
+ # Transform to power (Watts)
732
+ # Setting the main parameters of the PV plant
733
+ location = Location(latitude=self.lat, longitude=self.lon)
734
+ temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"][
735
+ "close_mount_glass_glass"
736
+ ]
737
+ cec_modules = bz2.BZ2File(
738
+ self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2", "rb"
739
+ )
740
+ cec_modules = cPickle.load(cec_modules)
741
+ cec_inverters = bz2.BZ2File(
742
+ self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2", "rb"
743
+ )
744
+ cec_inverters = cPickle.load(cec_inverters)
745
+ if isinstance(self.plant_conf["pv_module_model"], list):
746
+ P_PV_forecast = pd.Series(0, index=df_weather.index)
747
+ for i in range(len(self.plant_conf["pv_module_model"])):
748
+ # Selecting correct module and inverter
749
+ module = cec_modules[self.plant_conf["pv_module_model"][i]]
750
+ inverter = cec_inverters[
751
+ self.plant_conf["pv_inverter_model"][i]
752
+ ]
753
+ # Building the PV system in PVLib
754
+ system = PVSystem(
755
+ surface_tilt=self.plant_conf["surface_tilt"][i],
756
+ surface_azimuth=self.plant_conf["surface_azimuth"][i],
757
+ module_parameters=module,
758
+ inverter_parameters=inverter,
759
+ temperature_model_parameters=temp_params,
760
+ modules_per_string=self.plant_conf["modules_per_string"][i],
761
+ strings_per_inverter=self.plant_conf[
762
+ "strings_per_inverter"
763
+ ][i],
764
+ )
765
+ mc = ModelChain(system, location, aoi_model="physical")
766
+ # Run the model on the weather DF indexes
767
+ mc.run_model(df_weather)
768
+ # Extracting results for AC power
769
+ P_PV_forecast = P_PV_forecast + mc.results.ac
770
+ else:
771
+ # Selecting correct module and inverter
772
+ module = cec_modules[self.plant_conf["pv_module_model"]]
773
+ inverter = cec_inverters[self.plant_conf["pv_inverter_model"]]
774
+ # Building the PV system in PVLib
775
+ system = PVSystem(
776
+ surface_tilt=self.plant_conf["surface_tilt"],
777
+ surface_azimuth=self.plant_conf["surface_azimuth"],
778
+ module_parameters=module,
779
+ inverter_parameters=inverter,
780
+ temperature_model_parameters=temp_params,
781
+ modules_per_string=self.plant_conf["modules_per_string"],
782
+ strings_per_inverter=self.plant_conf["strings_per_inverter"],
783
+ )
784
+ mc = ModelChain(system, location, aoi_model="physical")
785
+ # Run the model on the weather DF indexes
786
+ mc.run_model(df_weather)
787
+ # Extracting results for AC power
788
+ P_PV_forecast = mc.results.ac
789
+ if set_mix_forecast:
790
+ P_PV_forecast = Forecast.get_mix_forecast(
791
+ df_now,
792
+ P_PV_forecast,
793
+ self.params["passed_data"]["alpha"],
794
+ self.params["passed_data"]["beta"],
795
+ self.var_PV,
796
+ )
797
+ P_PV_forecast[P_PV_forecast < 0] = 0 # replace any negative PV values with zero
798
+ self.logger.debug("get_power_from_weather returning:\n%s", P_PV_forecast)
799
+ return P_PV_forecast
800
+
801
+ @staticmethod
802
+ def compute_solar_angles(
803
+ df: pd.DataFrame, latitude: float, longitude: float
804
+ ) -> pd.DataFrame:
805
+ """
806
+ Compute solar angles (elevation, azimuth) based on timestamps and location.
807
+
808
+ :param df: DataFrame with a DateTime index.
809
+ :param latitude: Latitude of the PV system.
810
+ :param longitude: Longitude of the PV system.
811
+ :return: DataFrame with added solar elevation and azimuth.
812
+ """
813
+ df = df.copy()
814
+ solpos = get_solarposition(df.index, latitude, longitude)
815
+ df["solar_elevation"] = solpos["elevation"]
816
+ df["solar_azimuth"] = solpos["azimuth"]
817
+ return df
818
+
819
+ def adjust_pv_forecast_data_prep(self, data: pd.DataFrame) -> pd.DataFrame:
820
+ """
821
+ Prepare data for adjusting the photovoltaic (PV) forecast.
822
+
823
+ This method aligns the actual PV production data with the forecasted data,
824
+ adds additional features for analysis, and separates the predictors (X)
825
+ from the target variable (y).
826
+
827
+ :param data: A DataFrame containing the actual PV production data and the
828
+ forecasted PV production data.
829
+ :type data: pd.DataFrame
830
+ """
831
+ # Extract target and predictor
832
+ self.logger.debug("adjust_pv_forecast_data_prep using data:\n%s", data)
833
+ if self.logger.isEnabledFor(logging.DEBUG):
834
+ data.to_csv(
835
+ self.emhass_conf["data_path"]
836
+ / "debug-adjust-pv-forecast-data-prep-input-data.csv"
837
+ )
838
+ P_PV = data[self.var_PV] # Actual PV production
839
+ P_PV_forecast = data[self.var_PV_forecast] # Forecasted PV production
840
+ # Define time ranges
841
+ last_day = data.index.max().normalize() # Last available day
842
+ three_months_ago = last_day - pd.DateOffset(
843
+ days=self.retrieve_hass_conf["historic_days_to_retrieve"]
844
+ )
845
+ # Train/Test: Last historic_days_to_retrieve days (excluding the last day)
846
+ train_test_mask = (data.index >= three_months_ago) & (data.index < last_day)
847
+ self.P_PV_train_test = P_PV[train_test_mask]
848
+ self.P_PV_forecast_train_test = P_PV_forecast[train_test_mask]
849
+ # Validation: Last day only
850
+ validation_mask = data.index >= last_day
851
+ self.P_PV_validation = P_PV[validation_mask]
852
+ self.P_PV_forecast_validation = P_PV_forecast[validation_mask]
853
+ # Ensure data is aligned
854
+ self.data_adjust_pv = pd.concat(
855
+ [P_PV.rename("actual"), P_PV_forecast.rename("forecast")], axis=1
856
+ ).dropna()
857
+ # Add more features
858
+ self.data_adjust_pv = add_date_features(self.data_adjust_pv)
859
+ self.data_adjust_pv = Forecast.compute_solar_angles(
860
+ self.data_adjust_pv, self.lat, self.lon
861
+ )
862
+ # Features (X) and target (y)
863
+ self.X_adjust_pv = self.data_adjust_pv.drop(columns=["actual"]) # Predictors
864
+ self.y_adjust_pv = self.data_adjust_pv["actual"] # Target: actual PV production
865
+ self.logger.debug(
866
+ "adjust_pv_forecast_data_prep output data:\n%s", self.data_adjust_pv
867
+ )
868
+ if self.logger.isEnabledFor(logging.DEBUG):
869
+ self.data_adjust_pv.to_csv(
870
+ self.emhass_conf["data_path"]
871
+ / "debug-adjust-pv-forecast-data-prep-output-data.csv"
872
+ )
873
+
874
+ def adjust_pv_forecast_fit(
875
+ self,
876
+ n_splits: int = 5,
877
+ regression_model: str = "LassoRegression",
878
+ debug: bool | None = False,
879
+ ) -> pd.DataFrame:
880
+ """
881
+ Fit a regression model to adjust the photovoltaic (PV) forecast.
882
+
883
+ This method uses historical actual and forecasted PV production data, along with
884
+ additional solar and date features, to train a regression model. The model is
885
+ optimized using a grid search with time-series cross-validation.
886
+
887
+ :param n_splits: The number of splits for time-series cross-validation, defaults to 5.
888
+ :type n_splits: int, optional
889
+ :param regression_model: The type of regression model to use. Options include \
890
+ "LassoRegression", "RidgeRegression", etc., defaults to "LassoRegression".
891
+ :type regression_model: str, optional
892
+ :param debug: If True, the model is not saved to disk, useful for debugging, defaults to False.
893
+ :type debug: bool, optional
894
+ :return: A DataFrame containing the adjusted PV forecast.
895
+ :rtype: pd.DataFrame
896
+ """
897
+ # Get regression model and hyperparameter grid
898
+ mlr = MLRegressor(
899
+ self.data_adjust_pv,
900
+ "adjusted_pv_forecast",
901
+ regression_model,
902
+ list(self.X_adjust_pv.columns),
903
+ list(self.y_adjust_pv.name),
904
+ None,
905
+ self.logger,
906
+ )
907
+ base_model, param_grid = mlr.get_regression_model()
908
+ model = make_pipeline(StandardScaler(), base_model)
909
+ # Time-series split
910
+ tscv = TimeSeriesSplit(n_splits=n_splits)
911
+ grid_search = GridSearchCV(
912
+ model, param_grid, cv=tscv, scoring="neg_mean_squared_error", verbose=0
913
+ )
914
+ # Train model
915
+ grid_search.fit(self.X_adjust_pv, self.y_adjust_pv)
916
+ self.model_adjust_pv = grid_search.best_estimator_
917
+ # Calculate training metrics
918
+ y_pred_train = self.model_adjust_pv.predict(self.X_adjust_pv)
919
+ self.rmse = np.sqrt(mean_squared_error(self.y_adjust_pv, y_pred_train))
920
+ self.r2 = r2_score(self.y_adjust_pv, y_pred_train)
921
+ # Log the metrics
922
+ self.logger.info(
923
+ f"PV adjust Training metrics: RMSE = {self.rmse}, R2 = {self.r2}"
924
+ )
925
+ # Save model
926
+ if not debug:
927
+ filename = "adjust_pv_regressor.pkl"
928
+ filename_path = self.emhass_conf["data_path"] / filename
929
+ with open(filename_path, "wb") as outp:
930
+ pickle.dump(self.model_adjust_pv, outp, pickle.HIGHEST_PROTOCOL)
931
+
932
+ def adjust_pv_forecast_predict(
933
+ self, forecasted_pv: pd.DataFrame | None = None
934
+ ) -> pd.DataFrame:
935
+ """
936
+ Predict the adjusted photovoltaic (PV) forecast.
937
+
938
+ This method uses the trained regression model to predict the adjusted PV forecast
939
+ based on either the validation data stored in `self` or a new forecasted PV data
940
+ passed as input. It applies additional features such as date and solar angles to
941
+ the forecasted PV production data before making predictions. The solar elevation
942
+ is used to avoid negative values and to fix values at the beginning and end of the day.
943
+
944
+ :param forecasted_pv: Optional. A DataFrame containing the forecasted PV production data.
945
+ It must have a DateTime index and a column named "forecast".
946
+ If not provided, the method will use `self.P_PV_forecast_validation`.
947
+ :type forecasted_pv: pd.DataFrame, optional
948
+ :return: A DataFrame containing the adjusted PV forecast with additional features.
949
+ :rtype: pd.DataFrame
950
+ """
951
+ # Use the provided forecasted PV data or fall back to the validation data in `self`
952
+ if forecasted_pv is not None:
953
+ # Ensure the input DataFrame has the required structure
954
+ if "forecast" not in forecasted_pv.columns:
955
+ raise ValueError(
956
+ "The input DataFrame must contain a 'forecast' column."
957
+ )
958
+ forecast_data = forecasted_pv.copy()
959
+ else:
960
+ # Use the validation data stored in `self`
961
+ forecast_data = self.P_PV_forecast_validation.rename("forecast").to_frame()
962
+ # Prepare the forecasted PV data
963
+ forecast_data = add_date_features(forecast_data)
964
+ forecast_data = Forecast.compute_solar_angles(forecast_data, self.lat, self.lon)
965
+ # Predict the adjusted forecast
966
+ forecast_data["adjusted_forecast"] = self.model_adjust_pv.predict(forecast_data)
967
+
968
+ # Apply solar elevation weighting only for specific cases
969
+ def apply_weighting(row):
970
+ if row["solar_elevation"] <= 0: # Nighttime or negative solar elevation
971
+ return 0
972
+ elif (
973
+ row["solar_elevation"]
974
+ < self.optim_conf["adjusted_pv_solar_elevation_threshold"]
975
+ ): # Early morning or late evening
976
+ return max(
977
+ row["adjusted_forecast"]
978
+ * (
979
+ row["solar_elevation"]
980
+ / self.optim_conf["adjusted_pv_solar_elevation_threshold"]
981
+ ),
982
+ 0,
983
+ )
984
+ else: # Daytime with sufficient solar elevation
985
+ return row["adjusted_forecast"]
986
+
987
+ forecast_data["adjusted_forecast"] = forecast_data.apply(
988
+ apply_weighting, axis=1
989
+ )
990
+ # If using validation data, calculate validation metrics
991
+ if forecasted_pv is None:
992
+ y_true = self.P_PV_validation.values
993
+ y_pred = forecast_data["adjusted_forecast"].values
994
+ self.validation_rmse = np.sqrt(mean_squared_error(y_true, y_pred))
995
+ self.validation_r2 = r2_score(y_true, y_pred)
996
+ # Log the validation metrics
997
+ self.logger.info(
998
+ f"PV adjust Validation metrics: RMSE = {self.validation_rmse}, R2 = {self.validation_r2}"
999
+ )
1000
+ self.logger.debug(
1001
+ "adjust_pv_forecast_predict forecast data:\n%s", forecast_data
1002
+ )
1003
+ if self.logger.isEnabledFor(logging.DEBUG):
1004
+ forecast_data.to_csv(
1005
+ self.emhass_conf["data_path"]
1006
+ / "debug-adjust-pv-forecast-predict-forecast-data.csv"
1007
+ )
1008
+ # Return the DataFrame with the adjusted forecast
1009
+ return forecast_data
1010
+
1011
+ def get_forecast_days_csv(self, timedelta_days: int | None = 1) -> pd.date_range:
1012
+ r"""
1013
+ Get the date range vector of forecast dates that will be used when loading a CSV file.
1014
+
1015
+ :return: The forecast dates vector
1016
+ :rtype: pd.date_range
1017
+
1018
+ """
1019
+ start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(
1020
+ microsecond=0
1021
+ )
1022
+ if self.method_ts_round == "nearest":
1023
+ start_forecast_csv = pd.Timestamp(
1024
+ datetime.now(), tz=self.time_zone
1025
+ ).replace(microsecond=0)
1026
+ elif self.method_ts_round == "first":
1027
+ start_forecast_csv = (
1028
+ pd.Timestamp(datetime.now(), tz=self.time_zone)
1029
+ .replace(microsecond=0)
1030
+ .floor(freq=self.freq)
1031
+ )
1032
+ elif self.method_ts_round == "last":
1033
+ start_forecast_csv = (
1034
+ pd.Timestamp(datetime.now(), tz=self.time_zone)
1035
+ .replace(microsecond=0)
1036
+ .ceil(freq=self.freq)
1037
+ )
1038
+ else:
1039
+ self.logger.error("Wrong method_ts_round passed parameter")
1040
+ end_forecast_csv = (
1041
+ start_forecast_csv + self.optim_conf["delta_forecast_daily"]
1042
+ ).replace(microsecond=0)
1043
+ forecast_dates_csv = (
1044
+ pd.date_range(
1045
+ start=start_forecast_csv,
1046
+ end=end_forecast_csv + timedelta(days=timedelta_days) - self.freq,
1047
+ freq=self.freq,
1048
+ tz=self.time_zone,
1049
+ )
1050
+ .tz_convert("utc")
1051
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
1052
+ .tz_convert(self.time_zone)
1053
+ )
1054
+ if self.params is not None:
1055
+ if "prediction_horizon" in list(self.params["passed_data"].keys()):
1056
+ if self.params["passed_data"]["prediction_horizon"] is not None:
1057
+ forecast_dates_csv = forecast_dates_csv[
1058
+ 0 : self.params["passed_data"]["prediction_horizon"]
1059
+ ]
1060
+ return forecast_dates_csv
1061
+
1062
+ def get_forecast_out_from_csv_or_list(
1063
+ self,
1064
+ df_final: pd.DataFrame,
1065
+ forecast_dates_csv: pd.date_range,
1066
+ csv_path: str,
1067
+ data_list: list | None = None,
1068
+ list_and_perfect: bool | None = False,
1069
+ ) -> pd.DataFrame:
1070
+ r"""
1071
+ Get the forecast data as a DataFrame from a CSV file.
1072
+
1073
+ The data contained in the CSV file should be a 24h forecast with the same frequency as
1074
+ the main 'optimization_time_step' parameter in the configuration file. The timestamp will not be used and
1075
+ a new DateTimeIndex is generated to fit the timestamp index of the input data in 'df_final'.
1076
+
1077
+ :param df_final: The DataFrame containing the input data.
1078
+ :type df_final: pd.DataFrame
1079
+ :param forecast_dates_csv: The forecast dates vector
1080
+ :type forecast_dates_csv: pd.date_range
1081
+ :param csv_path: The path to the CSV file
1082
+ :type csv_path: str
1083
+ :return: The data from the CSV file
1084
+ :rtype: pd.DataFrame
1085
+
1086
+ """
1087
+ if csv_path is None:
1088
+ data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
1089
+ df_csv = pd.DataFrame.from_dict(data_dict)
1090
+ df_csv.index = forecast_dates_csv
1091
+ df_csv.drop(["ts"], axis=1, inplace=True)
1092
+ df_csv = set_df_index_freq(df_csv)
1093
+ if list_and_perfect:
1094
+ days_list = df_final.index.day.unique().tolist()
1095
+ else:
1096
+ days_list = df_csv.index.day.unique().tolist()
1097
+ else:
1098
+ if not os.path.exists(csv_path):
1099
+ csv_path = self.emhass_conf["data_path"] / csv_path
1100
+ load_csv_file_path = csv_path
1101
+ df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
1102
+
1103
+ first_col = df_csv.iloc[:, 0]
1104
+ # If the entire column can be converted to datetime, set it as index
1105
+ if pd.to_datetime(first_col, errors="coerce").notna().all():
1106
+ df_csv["ts"] = pd.to_datetime(df_csv["ts"], utc=True)
1107
+ # Set the timestamp column as the index
1108
+ df_csv.set_index("ts", inplace=True)
1109
+ df_csv.index = df_csv.index.tz_convert(self.time_zone)
1110
+ else:
1111
+ df_csv.index = forecast_dates_csv
1112
+ df_csv.drop(["ts"], axis=1, inplace=True)
1113
+ df_csv = set_df_index_freq(df_csv)
1114
+ if list_and_perfect:
1115
+ days_list = df_final.index.day.unique().tolist()
1116
+ else:
1117
+ days_list = df_csv.index.day.unique().tolist()
1118
+ forecast_out = pd.DataFrame()
1119
+ for day in days_list:
1120
+ if csv_path is None:
1121
+ df_final = set_df_index_freq(df_final)
1122
+ df_tmp = copy.deepcopy(df_final)
1123
+ else:
1124
+ if list_and_perfect:
1125
+ df_final = set_df_index_freq(df_final)
1126
+ df_tmp = copy.deepcopy(df_final)
1127
+ else:
1128
+ df_tmp = copy.deepcopy(df_csv)
1129
+ first_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][0]
1130
+ last_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][-1]
1131
+ fcst_index = pd.date_range(
1132
+ start=df_tmp.index[first_elm_index],
1133
+ end=df_tmp.index[last_elm_index],
1134
+ freq=df_tmp.index.freq,
1135
+ )
1136
+ first_hour = (
1137
+ f"{df_tmp.index[first_elm_index].hour:02d}"
1138
+ + ":"
1139
+ + f"{df_tmp.index[first_elm_index].minute:02d}"
1140
+ )
1141
+ last_hour = (
1142
+ f"{df_tmp.index[last_elm_index].hour:02d}"
1143
+ + ":"
1144
+ + f"{df_tmp.index[last_elm_index].minute:02d}"
1145
+ )
1146
+ if len(forecast_out) == 0:
1147
+ if csv_path is None:
1148
+ if list_and_perfect:
1149
+ values_array = df_csv.between_time(first_hour, last_hour).values
1150
+ fcst_index = fcst_index[
1151
+ 0 : len(values_array)
1152
+ ] # Fix for different lengths
1153
+ forecast_out = pd.DataFrame(
1154
+ values_array,
1155
+ index=fcst_index,
1156
+ )
1157
+ else:
1158
+ forecast_out = pd.DataFrame(
1159
+ df_csv.loc[fcst_index, :]
1160
+ .between_time(first_hour, last_hour)
1161
+ .values,
1162
+ index=fcst_index,
1163
+ )
1164
+ else:
1165
+ df_csv_filtered_date = df_csv.loc[
1166
+ df_csv.index.strftime("%Y-%m-%d")
1167
+ == fcst_index[0].date().strftime("%Y-%m-%d")
1168
+ ]
1169
+ forecast_out = pd.DataFrame(
1170
+ df_csv_filtered_date.between_time(first_hour, last_hour).values,
1171
+ index=fcst_index,
1172
+ )
1173
+ else:
1174
+ if csv_path is None:
1175
+ if list_and_perfect:
1176
+ values_array = df_csv.between_time(first_hour, last_hour).values
1177
+ fcst_index = fcst_index[
1178
+ 0 : len(values_array)
1179
+ ] # Fix for different lengths
1180
+ forecast_tp = pd.DataFrame(
1181
+ values_array,
1182
+ index=fcst_index,
1183
+ )
1184
+ else:
1185
+ forecast_tp = pd.DataFrame(
1186
+ df_csv.loc[fcst_index, :]
1187
+ .between_time(first_hour, last_hour)
1188
+ .values,
1189
+ index=fcst_index,
1190
+ )
1191
+ else:
1192
+ df_csv_filtered_date = df_csv.loc[
1193
+ df_csv.index.strftime("%Y-%m-%d")
1194
+ == fcst_index[0].date().strftime("%Y-%m-%d")
1195
+ ]
1196
+ forecast_tp = pd.DataFrame(
1197
+ df_csv_filtered_date.between_time(first_hour, last_hour).values,
1198
+ index=fcst_index,
1199
+ )
1200
+ forecast_out = pd.concat([forecast_out, forecast_tp], axis=0)
1201
+ return forecast_out
1202
+
1203
+ @staticmethod
1204
+ def resample_data(data, freq, current_freq):
1205
+ r"""
1206
+ Resample a DataFrame with a custom frequency.
1207
+
1208
+ :param data: Original time series data with a DateTimeIndex.
1209
+ :type data: pd.DataFrame
1210
+ :param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
1211
+ :type freq: pd.Timedelta
1212
+ :return: Resampled data at the specified frequency.
1213
+ :rtype: pd.DataFrame
1214
+ """
1215
+ if freq > current_freq:
1216
+ # Downsampling
1217
+ # Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
1218
+ resampled_data = data.resample(freq).mean()
1219
+ elif freq < current_freq:
1220
+ # Upsampling
1221
+ # Use 'asfreq' to create empty slots, then interpolate
1222
+ resampled_data = data.resample(freq).asfreq()
1223
+ resampled_data = resampled_data.interpolate(method="time")
1224
+ else:
1225
+ # No resampling needed
1226
+ resampled_data = data.copy()
1227
+ return resampled_data
1228
+
1229
+ @staticmethod
1230
+ def get_typical_load_forecast(data, forecast_date):
1231
+ r"""
1232
+ Forecast the load profile for the next day based on historic data.
1233
+
1234
+ :param data: A DataFrame with a DateTimeIndex containing the historic load data.
1235
+ Must include a 'load' column.
1236
+ :type data: pd.DataFrame
1237
+ :param forecast_date: The date for which the forecast will be generated.
1238
+ :type forecast_date: pd.Timestamp
1239
+ :return: A Series with the forecasted load profile for the next day and a list of days used
1240
+ to calculate the forecast.
1241
+ :rtype: tuple (pd.Series, list)
1242
+ """
1243
+ # Ensure the 'load' column exists
1244
+ if "load" not in data.columns:
1245
+ raise ValueError("Data must have a 'load' column.")
1246
+ # Filter historic data for the same month and day of the week
1247
+ month = forecast_date.month
1248
+ day_of_week = forecast_date.dayofweek
1249
+ historic_data = data[
1250
+ (data.index.month == month) & (data.index.dayofweek == day_of_week)
1251
+ ]
1252
+ used_days = np.unique(historic_data.index.date)
1253
+ # Align all historic data to the forecast day
1254
+ aligned_data = []
1255
+ for day in used_days:
1256
+ daily_data = data[data.index.date == pd.Timestamp(day).date()]
1257
+ aligned_daily_data = daily_data.copy()
1258
+ aligned_daily_data.index = aligned_daily_data.index.map(
1259
+ lambda x: x.replace(
1260
+ year=forecast_date.year,
1261
+ month=forecast_date.month,
1262
+ day=forecast_date.day,
1263
+ )
1264
+ )
1265
+ aligned_data.append(aligned_daily_data)
1266
+ # Combine all aligned historic data into a single DataFrame
1267
+ combined_data = pd.concat(aligned_data)
1268
+ # Compute the mean load for each timestamp
1269
+ forecast = combined_data.groupby(combined_data.index).mean()
1270
+ return forecast, used_days
1271
+
1272
+ def get_load_forecast(
1273
+ self,
1274
+ days_min_load_forecast: int | None = 3,
1275
+ method: str | None = "typical",
1276
+ csv_path: str | None = "data_load_forecast.csv",
1277
+ set_mix_forecast: bool | None = False,
1278
+ df_now: pd.DataFrame | None = pd.DataFrame(),
1279
+ use_last_window: bool | None = True,
1280
+ mlf: MLForecaster | None = None,
1281
+ debug: bool | None = False,
1282
+ ) -> pd.Series:
1283
+ r"""
1284
+ Get and generate the load forecast data.
1285
+
1286
+ :param days_min_load_forecast: The number of last days to retrieve that \
1287
+ will be used to generate a naive forecast, defaults to 3
1288
+ :type days_min_load_forecast: int, optional
1289
+ :param method: The method to be used to generate load forecast, the options \
1290
+ are 'typical' for a typical household load consumption curve, \
1291
+ are 'naive' for a persistance model, 'mlforecaster' for using a custom \
1292
+ previously fitted machine learning model, 'csv' to read the forecast from \
1293
+ a CSV file and 'list' to use data directly passed at runtime as a list of \
1294
+ values. Defaults to 'typical'.
1295
+ :type method: str, optional
1296
+ :param csv_path: The path to the CSV file used when method = 'csv', \
1297
+ defaults to "/data/data_load_forecast.csv"
1298
+ :type csv_path: str, optional
1299
+ :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
1300
+ :type set_mix_forecast: Bool, optional
1301
+ :param df_now: The DataFrame containing the now/current data.
1302
+ :type df_now: pd.DataFrame, optional
1303
+ :param use_last_window: True if the 'last_window' option should be used for the \
1304
+ custom machine learning forecast model. The 'last_window=True' means that the data \
1305
+ that will be used to generate the new forecast will be freshly retrieved from \
1306
+ Home Assistant. This data is needed because the forecast model is an auto-regressive \
1307
+ model with lags. If 'False' then the data using during the model train is used.
1308
+ :type use_last_window: Bool, optional
1309
+ :param mlf: The 'mlforecaster' object previously trained. This is mainly used for debug \
1310
+ and unit testing. In production the actual model will be read from a saved pickle file.
1311
+ :type mlf: mlforecaster, optional
1312
+ :param debug: The DataFrame containing the now/current data.
1313
+ :type debug: Bool, optional
1314
+ :return: The DataFrame containing the electrical load power in Watts
1315
+ :rtype: pd.DataFrame
1316
+
1317
+ """
1318
+ csv_path = self.emhass_conf["data_path"] / csv_path
1319
+
1320
+ if (
1321
+ method == "naive" or method == "mlforecaster"
1322
+ ): # retrieving needed data for these methods
1323
+ self.logger.info(
1324
+ "Retrieving data from hass for load forecast using method = " + method
1325
+ )
1326
+ var_list = [self.var_load]
1327
+ var_replace_zero = None
1328
+ var_interp = [self.var_load]
1329
+ time_zone_load_foreacast = None
1330
+ # We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
1331
+ rh = RetrieveHass(
1332
+ self.retrieve_hass_conf["hass_url"],
1333
+ self.retrieve_hass_conf["long_lived_token"],
1334
+ self.freq,
1335
+ time_zone_load_foreacast,
1336
+ self.params,
1337
+ self.emhass_conf,
1338
+ self.logger,
1339
+ )
1340
+ if self.get_data_from_file:
1341
+ filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
1342
+ with open(filename_path, "rb") as inp:
1343
+ rh.df_final, days_list, var_list, rh.ha_config = pickle.load(inp)
1344
+ self.var_load = var_list[0]
1345
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = (
1346
+ self.var_load
1347
+ )
1348
+ var_interp = [var_list[0]]
1349
+ self.var_list = [var_list[0]]
1350
+ rh.var_list = self.var_list
1351
+ self.var_load_new = self.var_load + "_positive"
1352
+ else:
1353
+ days_list = get_days_list(days_min_load_forecast)
1354
+ if not rh.get_data(days_list, var_list):
1355
+ return False
1356
+ if not rh.prepare_data(
1357
+ self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
1358
+ load_negative=self.retrieve_hass_conf["load_negative"],
1359
+ set_zero_min=self.retrieve_hass_conf["set_zero_min"],
1360
+ var_replace_zero=var_replace_zero,
1361
+ var_interp=var_interp,
1362
+ ):
1363
+ return False
1364
+ df = rh.df_final.copy()[[self.var_load_new]]
1365
+ if (
1366
+ method == "typical"
1367
+ ): # using typical statistical data from a household power consumption
1368
+ # Loading data from history file
1369
+ model_type = "long_train_data"
1370
+ data_path = self.emhass_conf["data_path"] / str(model_type + ".pkl")
1371
+ with open(data_path, "rb") as fid:
1372
+ data, _, _, _ = pickle.load(fid)
1373
+ # Ensure the data index is timezone-aware and matches self.forecast_dates' timezone
1374
+ data.index = (
1375
+ data.index.tz_localize(self.forecast_dates.tz)
1376
+ if data.index.tz is None
1377
+ else data.index.tz_convert(self.forecast_dates.tz)
1378
+ )
1379
+ # Resample the data if needed
1380
+ data = data[[self.var_load]]
1381
+ current_freq = pd.Timedelta("30min")
1382
+ if self.freq != current_freq:
1383
+ data = Forecast.resample_data(data, self.freq, current_freq)
1384
+ # Generate forecast
1385
+ data_list = []
1386
+ dates_list = np.unique(self.forecast_dates.date).tolist()
1387
+ forecast = pd.DataFrame()
1388
+ for date in dates_list:
1389
+ forecast_date = pd.Timestamp(date)
1390
+ data.columns = ["load"]
1391
+ forecast_tmp, used_days = Forecast.get_typical_load_forecast(
1392
+ data, forecast_date
1393
+ )
1394
+ self.logger.debug(
1395
+ f"Using {len(used_days)} days of data to generate the forecast."
1396
+ )
1397
+ # Normalize the forecast
1398
+ forecast_tmp = (
1399
+ forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
1400
+ )
1401
+ data_list.extend(forecast_tmp.values.ravel().tolist())
1402
+ if len(forecast) == 0:
1403
+ forecast = forecast_tmp
1404
+ else:
1405
+ forecast = pd.concat([forecast, forecast_tmp], axis=0)
1406
+ forecast_out = forecast.loc[
1407
+ forecast.index.intersection(self.forecast_dates)
1408
+ ]
1409
+ forecast_out.index = self.forecast_dates
1410
+ forecast_out.index.name = "ts"
1411
+ forecast_out = forecast_out.rename(columns={"load": "yhat"})
1412
+ elif method == "naive": # using a naive approach
1413
+ mask_forecast_out = (
1414
+ df.index > days_list[-1] - self.optim_conf["delta_forecast_daily"]
1415
+ )
1416
+ forecast_out = df.copy().loc[mask_forecast_out]
1417
+ forecast_out = forecast_out.rename(columns={self.var_load_new: "yhat"})
1418
+ # Force forecast_out length to avoid mismatches
1419
+ forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1420
+ forecast_out.index = self.forecast_dates
1421
+ elif (
1422
+ method == "mlforecaster"
1423
+ ): # using a custom forecast model with machine learning
1424
+ # Load model
1425
+ model_type = self.params["passed_data"]["model_type"]
1426
+ filename = model_type + "_mlf.pkl"
1427
+ filename_path = self.emhass_conf["data_path"] / filename
1428
+ if not debug:
1429
+ if filename_path.is_file():
1430
+ with open(filename_path, "rb") as inp:
1431
+ mlf = pickle.load(inp)
1432
+ else:
1433
+ self.logger.error(
1434
+ "The ML forecaster file was not found, please run a model fit method before this predict method"
1435
+ )
1436
+ return False
1437
+ # Make predictions
1438
+ if use_last_window:
1439
+ data_last_window = copy.deepcopy(df)
1440
+ data_last_window = data_last_window.rename(
1441
+ columns={self.var_load_new: self.var_load}
1442
+ )
1443
+ else:
1444
+ data_last_window = None
1445
+ forecast_out = mlf.predict(data_last_window)
1446
+ # Force forecast length to avoid mismatches
1447
+ self.logger.debug(
1448
+ "Number of ML predict forcast data generated (lags_opt): "
1449
+ + str(len(forecast_out.index))
1450
+ )
1451
+ self.logger.debug(
1452
+ "Number of forcast dates obtained: " + str(len(self.forecast_dates))
1453
+ )
1454
+ if len(self.forecast_dates) < len(forecast_out.index):
1455
+ forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1456
+ # To be removed once bug is fixed
1457
+ elif len(self.forecast_dates) > len(forecast_out.index):
1458
+ self.logger.error(
1459
+ "Unable to obtain: "
1460
+ + str(len(self.forecast_dates))
1461
+ + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
1462
+ )
1463
+ return False
1464
+ # Define DataFrame
1465
+ data_dict = {
1466
+ "ts": self.forecast_dates,
1467
+ "yhat": forecast_out.values.tolist(),
1468
+ }
1469
+ data = pd.DataFrame.from_dict(data_dict)
1470
+ # Define index
1471
+ data.set_index("ts", inplace=True)
1472
+ forecast_out = data.copy().loc[self.forecast_dates]
1473
+ elif method == "csv": # reading from a csv file
1474
+ load_csv_file_path = csv_path
1475
+ df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
1476
+ if len(df_csv) < len(self.forecast_dates):
1477
+ self.logger.error("Passed data from CSV is not long enough")
1478
+ else:
1479
+ # Ensure correct length
1480
+ df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
1481
+ # Define index
1482
+ df_csv.index = self.forecast_dates
1483
+ df_csv.drop(["ts"], axis=1, inplace=True)
1484
+ forecast_out = df_csv.copy().loc[self.forecast_dates]
1485
+ elif method == "list": # reading a list of values
1486
+ # Loading data from passed list
1487
+ data_list = self.params["passed_data"]["load_power_forecast"]
1488
+ # Check if the passed data has the correct length
1489
+ if (
1490
+ len(data_list) < len(self.forecast_dates)
1491
+ and self.params["passed_data"]["prediction_horizon"] is None
1492
+ ):
1493
+ self.logger.error("Passed data from passed list is not long enough")
1494
+ return False
1495
+ else:
1496
+ # Ensure correct length
1497
+ data_list = data_list[0 : len(self.forecast_dates)]
1498
+ # Define DataFrame
1499
+ data_dict = {"ts": self.forecast_dates, "yhat": data_list}
1500
+ data = pd.DataFrame.from_dict(data_dict)
1501
+ # Define index
1502
+ data.set_index("ts", inplace=True)
1503
+ forecast_out = data.copy().loc[self.forecast_dates]
1504
+ else:
1505
+ self.logger.error("Passed method is not valid")
1506
+ return False
1507
+ P_Load_forecast = copy.deepcopy(forecast_out["yhat"])
1508
+ if set_mix_forecast:
1509
+ P_Load_forecast = Forecast.get_mix_forecast(
1510
+ df_now,
1511
+ P_Load_forecast,
1512
+ self.params["passed_data"]["alpha"],
1513
+ self.params["passed_data"]["beta"],
1514
+ self.var_load_new,
1515
+ )
1516
+ self.logger.debug("get_load_forecast returning:\n%s", P_Load_forecast)
1517
+ return P_Load_forecast
1518
+
1519
+ def get_load_cost_forecast(
1520
+ self,
1521
+ df_final: pd.DataFrame,
1522
+ method: str | None = "hp_hc_periods",
1523
+ csv_path: str | None = "data_load_cost_forecast.csv",
1524
+ list_and_perfect: bool | None = False,
1525
+ ) -> pd.DataFrame:
1526
+ r"""
1527
+ Get the unit cost for the load consumption based on multiple tariff \
1528
+ periods. This is the cost of the energy from the utility in a vector \
1529
+ sampled at the fixed freq value.
1530
+
1531
+ :param df_final: The DataFrame containing the input data.
1532
+ :type df_final: pd.DataFrame
1533
+ :param method: The method to be used to generate load cost forecast, \
1534
+ the options are 'hp_hc_periods' for peak and non-peak hours contracts\
1535
+ and 'csv' to load a CSV file, defaults to 'hp_hc_periods'
1536
+ :type method: str, optional
1537
+ :param csv_path: The path to the CSV file used when method = 'csv', \
1538
+ defaults to "data_load_cost_forecast.csv"
1539
+ :type csv_path: str, optional
1540
+ :return: The input DataFrame with one additionnal column appended containing
1541
+ the load cost for each time observation.
1542
+ :rtype: pd.DataFrame
1543
+
1544
+ """
1545
+ csv_path = self.emhass_conf["data_path"] / csv_path
1546
+ if method == "hp_hc_periods":
1547
+ df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
1548
+ list_df_hp = []
1549
+ for _key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
1550
+ list_df_hp.append(
1551
+ df_final[self.var_load_cost].between_time(
1552
+ period_hp[0]["start"], period_hp[1]["end"]
1553
+ )
1554
+ )
1555
+ for df_hp in list_df_hp:
1556
+ df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf[
1557
+ "load_peak_hours_cost"
1558
+ ]
1559
+ elif method == "csv":
1560
+ forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1561
+ forecast_out = self.get_forecast_out_from_csv_or_list(
1562
+ df_final, forecast_dates_csv, csv_path
1563
+ )
1564
+ # Ensure correct length
1565
+ if not list_and_perfect:
1566
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1567
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1568
+ # Convert to Series if needed and align index
1569
+ if not isinstance(forecast_out, pd.Series):
1570
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1571
+ df_final.loc[:, self.var_load_cost] = forecast_out
1572
+ elif method == "list": # reading a list of values
1573
+ # Loading data from passed list
1574
+ data_list = self.params["passed_data"]["load_cost_forecast"]
1575
+ # Check if the passed data has the correct length
1576
+ if (
1577
+ len(data_list) < len(self.forecast_dates)
1578
+ and self.params["passed_data"]["prediction_horizon"] is None
1579
+ ):
1580
+ self.logger.error("Passed data from passed list is not long enough")
1581
+ return False
1582
+ else:
1583
+ # Ensure correct length
1584
+ data_list = data_list[0 : len(self.forecast_dates)]
1585
+ if not list_and_perfect:
1586
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
1587
+ # Define the correct dates
1588
+ forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1589
+ forecast_out = self.get_forecast_out_from_csv_or_list(
1590
+ df_final,
1591
+ forecast_dates_csv,
1592
+ None,
1593
+ data_list=data_list,
1594
+ list_and_perfect=list_and_perfect,
1595
+ )
1596
+ df_final = df_final.copy()
1597
+ df_final[self.var_load_cost] = forecast_out
1598
+ else:
1599
+ self.logger.error("Passed method is not valid")
1600
+ return False
1601
+ self.logger.debug("get_load_cost_forecast returning:\n%s", df_final)
1602
+ return df_final
1603
+
1604
+ def get_prod_price_forecast(
1605
+ self,
1606
+ df_final: pd.DataFrame,
1607
+ method: str | None = "constant",
1608
+ csv_path: str | None = "data_prod_price_forecast.csv",
1609
+ list_and_perfect: bool | None = False,
1610
+ ) -> pd.DataFrame:
1611
+ r"""
1612
+ Get the unit power production price for the energy injected to the grid.\
1613
+ This is the price of the energy injected to the utility in a vector \
1614
+ sampled at the fixed freq value.
1615
+
1616
+ :param df_input_data: The DataFrame containing all the input data retrieved
1617
+ from hass
1618
+ :type df_input_data: pd.DataFrame
1619
+ :param method: The method to be used to generate the production price forecast, \
1620
+ the options are 'constant' for a fixed constant value and 'csv'\
1621
+ to load a CSV file, defaults to 'constant'
1622
+ :type method: str, optional
1623
+ :param csv_path: The path to the CSV file used when method = 'csv', \
1624
+ defaults to "/data/data_load_cost_forecast.csv"
1625
+ :type csv_path: str, optional
1626
+ :return: The input DataFrame with one additionnal column appended containing
1627
+ the power production price for each time observation.
1628
+ :rtype: pd.DataFrame
1629
+
1630
+ """
1631
+ csv_path = self.emhass_conf["data_path"] / csv_path
1632
+ if method == "constant":
1633
+ df_final[self.var_prod_price] = self.optim_conf[
1634
+ "photovoltaic_production_sell_price"
1635
+ ]
1636
+ elif method == "csv":
1637
+ forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1638
+ forecast_out = self.get_forecast_out_from_csv_or_list(
1639
+ df_final, forecast_dates_csv, csv_path
1640
+ )
1641
+ # Ensure correct length
1642
+ if not list_and_perfect:
1643
+ forecast_out = forecast_out[0 : len(self.forecast_dates)]
1644
+ df_final = df_final[0 : len(self.forecast_dates)].copy()
1645
+ # Convert to Series if needed and align index
1646
+ if not isinstance(forecast_out, pd.Series):
1647
+ forecast_out = pd.Series(np.ravel(forecast_out), index=df_final.index)
1648
+ df_final.loc[:, self.var_prod_price] = forecast_out
1649
+ elif method == "list": # reading a list of values
1650
+ # Loading data from passed list
1651
+ data_list = self.params["passed_data"]["prod_price_forecast"]
1652
+ # Check if the passed data has the correct length
1653
+ if (
1654
+ len(data_list) < len(self.forecast_dates)
1655
+ and self.params["passed_data"]["prediction_horizon"] is None
1656
+ ):
1657
+ self.logger.error("Passed data from passed list is not long enough")
1658
+ return False
1659
+ else:
1660
+ # Ensure correct length
1661
+ data_list = data_list[0 : len(self.forecast_dates)]
1662
+ if not list_and_perfect:
1663
+ df_final = df_final.iloc[0 : len(self.forecast_dates)]
1664
+ # Define the correct dates
1665
+ forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1666
+ forecast_out = self.get_forecast_out_from_csv_or_list(
1667
+ df_final,
1668
+ forecast_dates_csv,
1669
+ None,
1670
+ data_list=data_list,
1671
+ list_and_perfect=list_and_perfect,
1672
+ )
1673
+ df_final = df_final.copy()
1674
+ df_final[self.var_prod_price] = forecast_out
1675
+ else:
1676
+ self.logger.error("Passed method is not valid")
1677
+ return False
1678
+ self.logger.debug("get_prod_price_forecast returning:\n%s", df_final)
1679
+ return df_final
1680
+
1681
+ def get_cached_forecast_data(self, w_forecast_cache_path) -> pd.DataFrame:
1682
+ r"""
1683
+ Get cached weather forecast data from file.
1684
+
1685
+ :param w_forecast_cache_path: the path to file.
1686
+ :type method: Any
1687
+ :return: The DataFrame containing the forecasted data
1688
+ :rtype: pd.DataFrame
1689
+
1690
+ """
1691
+ with open(w_forecast_cache_path, "rb") as file:
1692
+ data = cPickle.load(file)
1693
+ if not isinstance(data, pd.DataFrame) or len(data) < len(
1694
+ self.forecast_dates
1695
+ ):
1696
+ self.logger.error(
1697
+ "There has been a error obtaining cached forecast data."
1698
+ )
1699
+ self.logger.error(
1700
+ "Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from forecast API and cache."
1701
+ )
1702
+ self.logger.warning(
1703
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1704
+ )
1705
+ os.remove(w_forecast_cache_path)
1706
+ return False
1707
+ # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
1708
+ if (
1709
+ self.forecast_dates[0] in data.index
1710
+ and self.forecast_dates[-1] in data.index
1711
+ ):
1712
+ data = data.loc[self.forecast_dates[0] : self.forecast_dates[-1]]
1713
+ self.logger.info(
1714
+ "Retrieved forecast data from the previously saved cache."
1715
+ )
1716
+ else:
1717
+ self.logger.error(
1718
+ "Unable to obtain cached forecast data within the requested timeframe range."
1719
+ )
1720
+ self.logger.error(
1721
+ "Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from forecast API and cache."
1722
+ )
1723
+ self.logger.warning(
1724
+ "Removing old forecast cache file. Next optimization will pull data from forecast API, unless 'weather_forecast_cache_only': true"
1725
+ )
1726
+ os.remove(w_forecast_cache_path)
1727
+ return False
1728
+ return data
1729
+
1730
+ def set_cached_forecast_data(self, w_forecast_cache_path, data) -> pd.DataFrame:
1731
+ r"""
1732
+ Set generated weather forecast data to file.
1733
+ Trim data to match the original requested forecast dates
1734
+
1735
+ :param w_forecast_cache_path: the path to file.
1736
+ :type method: Any
1737
+ :param: The DataFrame containing the forecasted data
1738
+ :type: pd.DataFrame
1739
+ :return: The DataFrame containing the forecasted data
1740
+ :rtype: pd.DataFrame
1741
+
1742
+ """
1743
+ with open(w_forecast_cache_path, "wb") as file:
1744
+ cPickle.dump(data, file)
1745
+ if not os.path.isfile(w_forecast_cache_path):
1746
+ self.logger.warning("forecast data could not be saved to file.")
1747
+ else:
1748
+ self.logger.info(
1749
+ "Saved the forecast results to cache, for later reference."
1750
+ )
1751
+
1752
+ # Trim cached data to match requested dates
1753
+ end_forecast = (
1754
+ self.start_forecast + self.optim_conf["delta_forecast_daily"]
1755
+ ).replace(microsecond=0)
1756
+ forecast_dates = (
1757
+ pd.date_range(
1758
+ start=self.start_forecast,
1759
+ end=end_forecast - self.freq,
1760
+ freq=self.freq,
1761
+ tz=self.time_zone,
1762
+ )
1763
+ .tz_convert("utc")
1764
+ .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
1765
+ .tz_convert(self.time_zone)
1766
+ )
1767
+ data = data.loc[forecast_dates[0] : forecast_dates[-1]]
1768
+ return data