emhass 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
emhass/forecast.py DELETED
@@ -1,1348 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import bz2
5
- import copy
6
- import json
7
- import logging
8
- import os
9
- import pickle
10
- import pickle as cPickle
11
- from datetime import datetime, timedelta
12
- import re
13
- from itertools import zip_longest
14
- from typing import Optional
15
-
16
- import numpy as np
17
- import pandas as pd
18
- import pvlib
19
- from bs4 import BeautifulSoup
20
- from pvlib.irradiance import disc
21
- from pvlib.location import Location
22
- from pvlib.modelchain import ModelChain
23
- from pvlib.pvsystem import PVSystem
24
- from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
25
- from requests import get
26
-
27
- from emhass.machine_learning_forecaster import MLForecaster
28
- from emhass.retrieve_hass import RetrieveHass
29
- from emhass.utils import get_days_list, set_df_index_freq
30
-
31
-
32
- class Forecast(object):
33
- r"""
34
- Generate weather, load and costs forecasts needed as inputs to the optimization.
35
-
36
- In EMHASS we have basically 4 forecasts to deal with:
37
-
38
- - PV power production forecast (internally based on the weather forecast and the
39
- characteristics of your PV plant). This is given in Watts.
40
-
41
- - Load power forecast: how much power your house will demand on the next 24h. This
42
- is given in Watts.
43
-
44
- - PV production selling price forecast: at what price are you selling your excess
45
- PV production on the next 24h. This is given in EUR/kWh.
46
-
47
- - Load cost forecast: the price of the energy from the grid on the next 24h. This
48
- is given in EUR/kWh.
49
-
50
- There are methods that are generalized to the 4 forecast needed. For all there
51
- forecasts it is possible to pass the data either as a passed list of values or by
52
- reading from a CSV file. With these methods it is then possible to use data from
53
- external forecast providers.
54
-
55
- Then there are the methods that are specific to each type of forecast and that
56
- proposed forecast treated and generated internally by this EMHASS forecast class.
57
- For the weather forecast a first method (`scrapper`) uses a scrapping to the
58
- ClearOutside webpage which proposes detailed forecasts based on Lat/Lon locations.
59
- This method seems stable but as with any scrape method it will fail if any changes
60
- are made to the webpage API. Another method (`solcast`) is using the SolCast PV
61
- production forecast service. A final method (`solar.forecast`) is using another
62
- external service: Solar.Forecast, for which just the nominal PV peak installed
63
- power should be provided. Search the forecast section on the documentation for examples
64
- on how to implement these different methods.
65
-
66
- The `get_power_from_weather` method is proposed here to convert from irradiance
67
- data to electrical power. The PVLib module is used to model the PV plant.
68
-
69
- The specific methods for the load forecast are a first method (`naive`) that uses
70
- a naive approach, also called persistance. It simply assumes that the forecast for
71
- a future period will be equal to the observed values in a past period. The past
72
- period is controlled using parameter `delta_forecast`. A second method (`mlforecaster`)
73
- uses an internal custom forecasting model using machine learning. There is a section
74
- in the documentation explaining how to use this method.
75
-
76
- .. note:: This custom machine learning model is introduced from v0.4.0. EMHASS \
77
- proposed this new `mlforecaster` class with `fit`, `predict` and `tune` methods. \
78
- Only the `predict` method is used here to generate new forecasts, but it is \
79
- necessary to previously fit a forecaster model and it is a good idea to \
80
- optimize the model hyperparameters using the `tune` method. See the dedicated \
81
- section in the documentation for more help.
82
-
83
- For the PV production selling price and Load cost forecasts the privileged method
84
- is a direct read from a user provided list of values. The list should be passed
85
- as a runtime parameter during the `curl` to the EMHASS API.
86
-
87
- I reading from a CSV file, it should contain no header and the timestamped data
88
- should have the following format:
89
-
90
- 2021-04-29 00:00:00+00:00,287.07
91
-
92
- 2021-04-29 00:30:00+00:00,274.27
93
-
94
- 2021-04-29 01:00:00+00:00,243.38
95
-
96
- ...
97
-
98
- The data columns in these files will correspond to the data in the units expected
99
- for each forecasting method.
100
-
101
- """
102
-
103
- def __init__(
104
- self,
105
- retrieve_hass_conf: dict,
106
- optim_conf: dict,
107
- plant_conf: dict,
108
- params: str,
109
- emhass_conf: dict,
110
- logger: logging.Logger,
111
- opt_time_delta: Optional[int] = 24,
112
- get_data_from_file: Optional[bool] = False,
113
- ) -> None:
114
- """
115
- Define constructor for the forecast class.
116
-
117
- :param retrieve_hass_conf: Dictionary containing the needed configuration
118
- data from the configuration file, specific to retrieve data from HASS
119
- :type retrieve_hass_conf: dict
120
- :param optim_conf: Dictionary containing the needed configuration
121
- data from the configuration file, specific for the optimization task
122
- :type optim_conf: dict
123
- :param plant_conf: Dictionary containing the needed configuration
124
- data from the configuration file, specific for the modeling of the PV plant
125
- :type plant_conf: dict
126
- :param params: Configuration parameters passed from data/options.json
127
- :type params: str
128
- :param emhass_conf: Dictionary containing the needed emhass paths
129
- :type emhass_conf: dict
130
- :param logger: The passed logger object
131
- :type logger: logging object
132
- :param opt_time_delta: The time delta in hours used to generate forecasts,
133
- a value of 24 will generate 24 hours of forecast data, defaults to 24
134
- :type opt_time_delta: int, optional
135
- :param get_data_from_file: Select if data should be retrieved from a
136
- previously saved pickle useful for testing or directly from connection to
137
- hass database
138
- :type get_data_from_file: bool, optional
139
-
140
- """
141
- self.retrieve_hass_conf = retrieve_hass_conf
142
- self.optim_conf = optim_conf
143
- self.plant_conf = plant_conf
144
- self.freq = self.retrieve_hass_conf["optimization_time_step"]
145
- self.time_zone = self.retrieve_hass_conf["time_zone"]
146
- self.method_ts_round = self.retrieve_hass_conf["method_ts_round"]
147
- self.timeStep = self.freq.seconds / 3600 # in hours
148
- self.time_delta = pd.to_timedelta(opt_time_delta, "hours")
149
- self.var_PV = self.retrieve_hass_conf["sensor_power_photovoltaics"]
150
- self.var_load = self.retrieve_hass_conf["sensor_power_load_no_var_loads"]
151
- self.var_load_new = self.var_load + "_positive"
152
- self.lat = self.retrieve_hass_conf["Latitude"]
153
- self.lon = self.retrieve_hass_conf["Longitude"]
154
- self.emhass_conf = emhass_conf
155
- self.logger = logger
156
- self.get_data_from_file = get_data_from_file
157
- self.var_load_cost = "unit_load_cost"
158
- self.var_prod_price = "unit_prod_price"
159
- if (params == None) or (params == "null"):
160
- self.params = {}
161
- elif type(params) is dict:
162
- self.params = params
163
- else:
164
- self.params = json.loads(params)
165
- if self.method_ts_round == "nearest":
166
- self.start_forecast = pd.Timestamp(
167
- datetime.now(), tz=self.time_zone
168
- ).replace(microsecond=0)
169
- elif self.method_ts_round == "first":
170
- self.start_forecast = (
171
- pd.Timestamp(datetime.now(), tz=self.time_zone)
172
- .replace(microsecond=0)
173
- .floor(freq=self.freq)
174
- )
175
- elif self.method_ts_round == "last":
176
- self.start_forecast = (
177
- pd.Timestamp(datetime.now(), tz=self.time_zone)
178
- .replace(microsecond=0)
179
- .ceil(freq=self.freq)
180
- )
181
- else:
182
- self.logger.error("Wrong method_ts_round passed parameter")
183
- self.end_forecast = (
184
- self.start_forecast + self.optim_conf["delta_forecast_daily"]
185
- ).replace(microsecond=0)
186
- self.forecast_dates = (
187
- pd.date_range(
188
- start=self.start_forecast,
189
- end=self.end_forecast - self.freq,
190
- freq=self.freq,
191
- tz=self.time_zone,
192
- )
193
- .tz_convert("utc")
194
- .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
195
- .tz_convert(self.time_zone)
196
- )
197
- if params is not None:
198
- if "prediction_horizon" in list(self.params["passed_data"].keys()):
199
- if self.params["passed_data"]["prediction_horizon"] is not None:
200
- self.forecast_dates = self.forecast_dates[
201
- 0 : self.params["passed_data"]["prediction_horizon"]
202
- ]
203
-
204
- def get_weather_forecast(
205
- self,
206
- method: Optional[str] = "scrapper",
207
- csv_path: Optional[str] = "data_weather_forecast.csv",
208
- ) -> pd.DataFrame:
209
- r"""
210
- Get and generate weather forecast data.
211
-
212
- :param method: The desired method, options are 'scrapper', 'csv', 'list', 'solcast' and \
213
- 'solar.forecast'. Defaults to 'scrapper'.
214
- :type method: str, optional
215
- :return: The DataFrame containing the forecasted data
216
- :rtype: pd.DataFrame
217
-
218
- """
219
- csv_path = self.emhass_conf["data_path"] / csv_path
220
- w_forecast_cache_path = os.path.abspath(
221
- self.emhass_conf["data_path"] / "weather_forecast_data.pkl"
222
- )
223
-
224
- self.logger.info("Retrieving weather forecast data using method = " + method)
225
- self.weather_forecast_method = (
226
- method # Saving this attribute for later use to identify csv method usage
227
- )
228
- if method == "scrapper":
229
- freq_scrap = pd.to_timedelta(
230
- 60, "minutes"
231
- ) # The scrapping time step is 60min on clearoutside
232
- forecast_dates_scrap = (
233
- pd.date_range(
234
- start=self.start_forecast,
235
- end=self.end_forecast - freq_scrap,
236
- freq=freq_scrap,
237
- tz=self.time_zone,
238
- )
239
- .tz_convert("utc")
240
- .round(freq_scrap, ambiguous="infer", nonexistent="shift_forward")
241
- .tz_convert(self.time_zone)
242
- )
243
- # Using the clearoutside webpage
244
- response = get(
245
- "https://clearoutside.com/forecast/"
246
- + str(round(self.lat, 2))
247
- + "/"
248
- + str(round(self.lon, 2))
249
- + "?desktop=true"
250
- )
251
- """import bz2 # Uncomment to save a serialized data for tests
252
- import _pickle as cPickle
253
- with bz2.BZ2File("data/test_response_scrapper_get_method.pbz2", "w") as f:
254
- cPickle.dump(response.content, f)"""
255
- soup = BeautifulSoup(response.content, "html.parser")
256
- table = soup.find_all(id="day_0")[0]
257
- list_names = table.find_all(class_="fc_detail_label")
258
- list_tables = table.find_all("ul")[1:]
259
- selected_cols = [0, 1, 2, 3, 10, 12, 15] # Selected variables
260
- col_names = [list_names[i].get_text() for i in selected_cols]
261
- list_tables = [list_tables[i] for i in selected_cols]
262
- # Building the raw DF container
263
- raw_data = pd.DataFrame(
264
- index=range(len(forecast_dates_scrap)), columns=col_names, dtype=float
265
- )
266
- for count_col, col in enumerate(col_names):
267
- list_rows = list_tables[count_col].find_all("li")
268
- for count_row, row in enumerate(list_rows):
269
- raw_data.loc[count_row, col] = float(row.get_text())
270
- # Treating index
271
- raw_data.set_index(forecast_dates_scrap, inplace=True)
272
- raw_data = raw_data[~raw_data.index.duplicated(keep="first")]
273
- raw_data = raw_data.reindex(self.forecast_dates)
274
- raw_data.interpolate(
275
- method="linear",
276
- axis=0,
277
- limit=None,
278
- limit_direction="both",
279
- inplace=True,
280
- )
281
- # Converting the cloud cover into Global Horizontal Irradiance with a PVLib method
282
- ghi_est = self.cloud_cover_to_irradiance(
283
- raw_data["Total Clouds (% Sky Obscured)"]
284
- )
285
- data = ghi_est
286
- data["temp_air"] = raw_data["Temperature (°C)"]
287
- data["wind_speed"] = (
288
- raw_data["Wind Speed/Direction (mph)"] * 1.60934
289
- ) # conversion to km/h
290
- data["relative_humidity"] = raw_data["Relative Humidity (%)"]
291
- data["precipitable_water"] = pvlib.atmosphere.gueymard94_pw(
292
- data["temp_air"], data["relative_humidity"]
293
- )
294
- elif method == "solcast": # using Solcast API
295
- # Check if weather_forecast_cache is true or if forecast_data file does not exist
296
- if not os.path.isfile(w_forecast_cache_path):
297
- # Check if weather_forecast_cache_only is true, if so produce error for not finding cache file
298
- if not self.params["passed_data"].get(
299
- "weather_forecast_cache_only", False
300
- ):
301
- # Retrieve data from the Solcast API
302
- if "solcast_api_key" not in self.retrieve_hass_conf:
303
- self.logger.error(
304
- "The solcast_api_key parameter was not defined"
305
- )
306
- return False
307
- if "solcast_rooftop_id" not in self.retrieve_hass_conf:
308
- self.logger.error(
309
- "The solcast_rooftop_id parameter was not defined"
310
- )
311
- return False
312
- headers = {
313
- "User-Agent": "EMHASS",
314
- "Authorization": "Bearer "
315
- + self.retrieve_hass_conf["solcast_api_key"],
316
- "content-type": "application/json",
317
- }
318
- days_solcast = int(
319
- len(self.forecast_dates) * self.freq.seconds / 3600
320
- )
321
- # If weather_forecast_cache, set request days as twice as long to avoid length issues (add a buffer)
322
- if self.params["passed_data"].get("weather_forecast_cache", False):
323
- days_solcast = min((days_solcast * 2), 336)
324
- # Split `roof_id` into a list (support comma or space as separator)
325
- roof_ids = re.split(r"[,\s]+", self.retrieve_hass_conf["solcast_rooftop_id"].strip())
326
- # Summary list of data
327
- total_data_list = [0] * len(self.forecast_dates)
328
- # Iteration over individual `roof_id`
329
- for roof_id in roof_ids:
330
- url = (
331
- f"https://api.solcast.com.au/rooftop_sites/{roof_id}/forecasts?hours={days_solcast}"
332
- )
333
- response = get(url, headers=headers)
334
- """import bz2 # Uncomment to save a serialized data for tests
335
- import _pickle as cPickle
336
- with bz2.BZ2File("data/test_response_solcast_get_method.pbz2", "w") as f:
337
- cPickle.dump(response, f)"""
338
- # Verify the request passed
339
- if int(response.status_code) == 200:
340
- data = response.json()
341
- elif (
342
- int(response.status_code) == 402
343
- or int(response.status_code) == 429
344
- ):
345
- self.logger.error(
346
- "Solcast error: May have exceeded your subscription limit."
347
- )
348
- return False
349
- elif (
350
- int(response.status_code) >= 400
351
- or (int(response.status_code) >= 202 and int(response.status_code) <= 299)
352
- ):
353
- self.logger.error(
354
- "Solcast error: There was a issue with the solcast request, check solcast API key and rooftop ID."
355
- )
356
- self.logger.error(
357
- "Solcast error: Check that your subscription is valid and your network can connect to Solcast."
358
- )
359
- return False
360
- # Data processing for the current `roof_id`
361
- data_list = []
362
- for elm in data["forecasts"]:
363
- data_list.append(
364
- elm["pv_estimate"] * 1000
365
- ) # Converting kW to W
366
- # Check if the retrieved data has the correct length
367
- if len(data_list) < len(self.forecast_dates):
368
- self.logger.error(
369
- "Not enough data retrieved from Solcast service, try increasing the time step or use MPC."
370
- )
371
- return False
372
- # Adding the data of the current `roof_id` to the total
373
- total_data_list = [
374
- total + current for total, current in zip_longest(total_data_list, data_list, fillvalue=0)
375
- ]
376
- # If runtime weather_forecast_cache is true save forecast result to file as cache
377
- if self.params["passed_data"].get(
378
- "weather_forecast_cache", False
379
- ):
380
- # Add x2 forecast periods for cached results. This adds a extra delta_forecast amount of days for a buffer
381
- cached_forecast_dates = self.forecast_dates.union(
382
- pd.date_range(
383
- self.forecast_dates[-1],
384
- periods=(len(self.forecast_dates) + 1),
385
- freq=self.freq,
386
- )[1:]
387
- )
388
- cache_data_list = total_data_list[0 : len(cached_forecast_dates)]
389
- cache_data_dict = {
390
- "ts": cached_forecast_dates,
391
- "yhat": cache_data_list,
392
- }
393
- data_cache = pd.DataFrame.from_dict(cache_data_dict)
394
- data_cache.set_index("ts", inplace=True)
395
- with open(w_forecast_cache_path, "wb") as file:
396
- cPickle.dump(data_cache, file)
397
- if not os.path.isfile(w_forecast_cache_path):
398
- self.logger.warning(
399
- "Solcast forecast data could not be saved to file."
400
- )
401
- else:
402
- self.logger.info(
403
- "Saved the Solcast results to cache, for later reference."
404
- )
405
- # Trim request results to forecast_dates
406
- total_data_list = total_data_list[0 : len(self.forecast_dates)]
407
- data_dict = {"ts": self.forecast_dates, "yhat": total_data_list}
408
- # Define DataFrame
409
- data = pd.DataFrame.from_dict(data_dict)
410
- # Define index
411
- data.set_index("ts", inplace=True)
412
- # Else, notify user to update cache
413
- else:
414
- self.logger.error("Unable to obtain Solcast cache file.")
415
- self.logger.error(
416
- "Try running optimization again with 'weather_forecast_cache_only': false"
417
- )
418
- self.logger.error(
419
- "Optionally, obtain new Solcast cache with runtime parameter 'weather_forecast_cache': true in an optimization, or run the `weather-forecast-cache` action, to pull new data from Solcast and cache."
420
- )
421
- return False
422
- # Else, open stored weather_forecast_data.pkl file for previous forecast data (cached data)
423
- else:
424
- with open(w_forecast_cache_path, "rb") as file:
425
- data = cPickle.load(file)
426
- if not isinstance(data, pd.DataFrame) or len(data) < len(
427
- self.forecast_dates
428
- ):
429
- self.logger.error(
430
- "There has been a error obtaining cached Solcast forecast data."
431
- )
432
- self.logger.error(
433
- "Try running optimization again with 'weather_forecast_cache': true, or run action `weather-forecast-cache`, to pull new data from Solcast and cache."
434
- )
435
- self.logger.warning(
436
- "Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
437
- )
438
- os.remove(w_forecast_cache_path)
439
- return False
440
- # Filter cached forecast data to match current forecast_dates start-end range (reduce forecast Dataframe size to appropriate length)
441
- if (
442
- self.forecast_dates[0] in data.index
443
- and self.forecast_dates[-1] in data.index
444
- ):
445
- data = data.loc[
446
- self.forecast_dates[0] : self.forecast_dates[-1]
447
- ]
448
- self.logger.info(
449
- "Retrieved Solcast data from the previously saved cache."
450
- )
451
- else:
452
- self.logger.error(
453
- "Unable to obtain cached Solcast forecast data within the requested timeframe range."
454
- )
455
- self.logger.error(
456
- "Try running optimization again (not using cache). Optionally, add runtime parameter 'weather_forecast_cache': true to pull new data from Solcast and cache."
457
- )
458
- self.logger.warning(
459
- "Removing old Solcast cache file. Next optimization will pull data from Solcast, unless 'weather_forecast_cache_only': true"
460
- )
461
- os.remove(w_forecast_cache_path)
462
- return False
463
- elif method == "solar.forecast": # using the solar.forecast API
464
- # Retrieve data from the solar.forecast API
465
- if "solar_forecast_kwp" not in self.retrieve_hass_conf:
466
- self.logger.warning(
467
- "The solar_forecast_kwp parameter was not defined, using dummy values for testing"
468
- )
469
- self.retrieve_hass_conf["solar_forecast_kwp"] = 5
470
- if self.retrieve_hass_conf["solar_forecast_kwp"] == 0:
471
- self.logger.warning(
472
- "The solar_forecast_kwp parameter is set to zero, setting to default 5"
473
- )
474
- self.retrieve_hass_conf["solar_forecast_kwp"] = 5
475
- if self.optim_conf["delta_forecast_daily"].days > 1:
476
- self.logger.warning(
477
- "The free public tier for solar.forecast only provides one day forecasts"
478
- )
479
- self.logger.warning(
480
- "Continuing with just the first day of data, the other days are filled with 0.0."
481
- )
482
- self.logger.warning(
483
- "Use the other available methods for delta_forecast_daily > 1"
484
- )
485
- headers = {"Accept": "application/json"}
486
- data = pd.DataFrame()
487
- for i in range(len(self.plant_conf["pv_module_model"])):
488
- url = (
489
- "https://api.forecast.solar/estimate/"
490
- + str(round(self.lat, 2))
491
- + "/"
492
- + str(round(self.lon, 2))
493
- + "/"
494
- + str(self.plant_conf["surface_tilt"][i])
495
- + "/"
496
- + str(self.plant_conf["surface_azimuth"][i] - 180)
497
- + "/"
498
- + str(self.retrieve_hass_conf["solar_forecast_kwp"])
499
- )
500
- response = get(url, headers=headers)
501
- """import bz2 # Uncomment to save a serialized data for tests
502
- import _pickle as cPickle
503
- with bz2.BZ2File("data/test_response_solarforecast_get_method.pbz2", "w") as f:
504
- cPickle.dump(response.json(), f)"""
505
- data_raw = response.json()
506
- data_dict = {
507
- "ts": list(data_raw["result"]["watts"].keys()),
508
- "yhat": list(data_raw["result"]["watts"].values()),
509
- }
510
- # Form the final DataFrame
511
- data_tmp = pd.DataFrame.from_dict(data_dict)
512
- data_tmp.set_index("ts", inplace=True)
513
- data_tmp.index = pd.to_datetime(data_tmp.index)
514
- data_tmp = data_tmp.tz_localize(self.forecast_dates.tz)
515
- data_tmp = data_tmp.reindex(index=self.forecast_dates)
516
- mask_up_data_df = (
517
- data_tmp.copy(deep=True).fillna(method="ffill").isnull()
518
- )
519
- mask_down_data_df = (
520
- data_tmp.copy(deep=True).fillna(method="bfill").isnull()
521
- )
522
- data_tmp.loc[data_tmp.index[mask_up_data_df["yhat"] == True], :] = 0.0
523
- data_tmp.loc[data_tmp.index[mask_down_data_df["yhat"] == True], :] = 0.0
524
- data_tmp.interpolate(inplace=True, limit=1)
525
- data_tmp = data_tmp.fillna(0.0)
526
- if len(data) == 0:
527
- data = copy.deepcopy(data_tmp)
528
- else:
529
- data = data + data_tmp
530
- elif method == "csv": # reading from a csv file
531
- weather_csv_file_path = csv_path
532
- # Loading the csv file, we will consider that this is the PV power in W
533
- data = pd.read_csv(weather_csv_file_path, header=None, names=["ts", "yhat"])
534
- # Check if the passed data has the correct length
535
- if len(data) < len(self.forecast_dates):
536
- self.logger.error("Passed data from CSV is not long enough")
537
- else:
538
- # Ensure correct length
539
- data = data.loc[data.index[0 : len(self.forecast_dates)], :]
540
- # Define index
541
- data.index = self.forecast_dates
542
- data.drop("ts", axis=1, inplace=True)
543
- data = data.copy().loc[self.forecast_dates]
544
- elif method == "list": # reading a list of values
545
- # Loading data from passed list
546
- data_list = self.params["passed_data"]["pv_power_forecast"]
547
- # Check if the passed data has the correct length
548
- if (
549
- len(data_list) < len(self.forecast_dates)
550
- and self.params["passed_data"]["prediction_horizon"] is None
551
- ):
552
- self.logger.error("Passed data from passed list is not long enough")
553
- else:
554
- # Ensure correct length
555
- data_list = data_list[0 : len(self.forecast_dates)]
556
- # Define DataFrame
557
- data_dict = {"ts": self.forecast_dates, "yhat": data_list}
558
- data = pd.DataFrame.from_dict(data_dict)
559
- # Define index
560
- data.set_index("ts", inplace=True)
561
- else:
562
- self.logger.error("Method %r is not valid", method)
563
- data = None
564
- return data
565
-
566
- def cloud_cover_to_irradiance(
567
- self, cloud_cover: pd.Series, offset: Optional[int] = 35
568
- ) -> pd.DataFrame:
569
- """
570
- Estimates irradiance from cloud cover in the following steps.
571
-
572
- 1. Determine clear sky GHI using Ineichen model and
573
- climatological turbidity.
574
-
575
- 2. Estimate cloudy sky GHI using a function of cloud_cover
576
-
577
- 3. Estimate cloudy sky DNI using the DISC model.
578
-
579
- 4. Calculate DHI from DNI and GHI.
580
-
581
- (This function was copied and modified from PVLib)
582
-
583
- :param cloud_cover: Cloud cover in %.
584
- :type cloud_cover: pd.Series
585
- :param offset: Determines the minimum GHI., defaults to 35
586
- :type offset: Optional[int], optional
587
- :return: Estimated GHI, DNI, and DHI.
588
- :rtype: pd.DataFrame
589
- """
590
- location = Location(latitude=self.lat, longitude=self.lon)
591
- solpos = location.get_solarposition(cloud_cover.index)
592
- cs = location.get_clearsky(
593
- cloud_cover.index, model="ineichen", solar_position=solpos
594
- )
595
- # Using only the linear method
596
- offset = offset / 100.0
597
- cloud_cover_unit = copy.deepcopy(cloud_cover) / 100.0
598
- ghi = (offset + (1 - offset) * (1 - cloud_cover_unit)) * cs["ghi"]
599
- # Using disc model
600
- dni = disc(ghi, solpos["zenith"], cloud_cover.index)["dni"]
601
- dhi = ghi - dni * np.cos(np.radians(solpos["zenith"]))
602
- irrads = pd.DataFrame({"ghi": ghi, "dni": dni, "dhi": dhi}).fillna(0)
603
- return irrads
604
-
605
- @staticmethod
606
- def get_mix_forecast(
607
- df_now: pd.DataFrame,
608
- df_forecast: pd.DataFrame,
609
- alpha: float,
610
- beta: float,
611
- col: str,
612
- ) -> pd.DataFrame:
613
- """A simple correction method for forecasted data using the current real values of a variable.
614
-
615
- :param df_now: The DataFrame containing the current/real values
616
- :type df_now: pd.DataFrame
617
- :param df_forecast: The DataFrame containing the forecast data
618
- :type df_forecast: pd.DataFrame
619
- :param alpha: A weight for the forecast data side
620
- :type alpha: float
621
- :param beta: A weight for the current/real values sied
622
- :type beta: float
623
- :param col: The column variable name
624
- :type col: str
625
- :return: The output DataFrame with the corrected values
626
- :rtype: pd.DataFrame
627
- """
628
- first_fcst = alpha * df_forecast.iloc[0] + beta * df_now[col].iloc[-1]
629
- df_forecast.iloc[0] = first_fcst
630
- return df_forecast
631
-
632
- def get_power_from_weather(
633
- self,
634
- df_weather: pd.DataFrame,
635
- set_mix_forecast: Optional[bool] = False,
636
- df_now: Optional[pd.DataFrame] = pd.DataFrame(),
637
- ) -> pd.Series:
638
- r"""
639
- Convert wheater forecast data into electrical power.
640
-
641
- :param df_weather: The DataFrame containing the weather forecasted data. \
642
- This DF should be generated by the 'get_weather_forecast' method or at \
643
- least contain the same columns names filled with proper data.
644
- :type df_weather: pd.DataFrame
645
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
646
- :type set_mix_forecast: Bool, optional
647
- :param df_now: The DataFrame containing the now/current data.
648
- :type df_now: pd.DataFrame
649
- :return: The DataFrame containing the electrical power in Watts
650
- :rtype: pd.DataFrame
651
-
652
- """
653
- # If using csv method we consider that yhat is the PV power in W
654
- if (
655
- "solar_forecast_kwp" in self.retrieve_hass_conf.keys()
656
- and self.retrieve_hass_conf["solar_forecast_kwp"] == 0
657
- ):
658
- P_PV_forecast = pd.Series(0, index=df_weather.index)
659
- else:
660
- if (
661
- self.weather_forecast_method == "solcast"
662
- or self.weather_forecast_method == "solar.forecast"
663
- or self.weather_forecast_method == "csv"
664
- or self.weather_forecast_method == "list"
665
- ):
666
- P_PV_forecast = df_weather["yhat"]
667
- P_PV_forecast.name = None
668
- else: # We will transform the weather data into electrical power
669
- # Transform to power (Watts)
670
- # Setting the main parameters of the PV plant
671
- location = Location(latitude=self.lat, longitude=self.lon)
672
- temp_params = TEMPERATURE_MODEL_PARAMETERS["sapm"][
673
- "close_mount_glass_glass"
674
- ]
675
- cec_modules = bz2.BZ2File(
676
- self.emhass_conf["root_path"] / "data" / "cec_modules.pbz2", "rb"
677
- )
678
- cec_modules = cPickle.load(cec_modules)
679
- cec_inverters = bz2.BZ2File(
680
- self.emhass_conf["root_path"] / "data" / "cec_inverters.pbz2", "rb"
681
- )
682
- cec_inverters = cPickle.load(cec_inverters)
683
- if type(self.plant_conf["pv_module_model"]) == list:
684
- P_PV_forecast = pd.Series(0, index=df_weather.index)
685
- for i in range(len(self.plant_conf["pv_module_model"])):
686
- # Selecting correct module and inverter
687
- module = cec_modules[self.plant_conf["pv_module_model"][i]]
688
- inverter = cec_inverters[
689
- self.plant_conf["pv_inverter_model"][i]
690
- ]
691
- # Building the PV system in PVLib
692
- system = PVSystem(
693
- surface_tilt=self.plant_conf["surface_tilt"][i],
694
- surface_azimuth=self.plant_conf["surface_azimuth"][i],
695
- module_parameters=module,
696
- inverter_parameters=inverter,
697
- temperature_model_parameters=temp_params,
698
- modules_per_string=self.plant_conf["modules_per_string"][i],
699
- strings_per_inverter=self.plant_conf[
700
- "strings_per_inverter"
701
- ][i],
702
- )
703
- mc = ModelChain(system, location, aoi_model="physical")
704
- # Run the model on the weather DF indexes
705
- mc.run_model(df_weather)
706
- # Extracting results for AC power
707
- P_PV_forecast = P_PV_forecast + mc.results.ac
708
- else:
709
- # Selecting correct module and inverter
710
- module = cec_modules[self.plant_conf["pv_module_model"]]
711
- inverter = cec_inverters[self.plant_conf["pv_inverter_model"]]
712
- # Building the PV system in PVLib
713
- system = PVSystem(
714
- surface_tilt=self.plant_conf["surface_tilt"],
715
- surface_azimuth=self.plant_conf["surface_azimuth"],
716
- module_parameters=module,
717
- inverter_parameters=inverter,
718
- temperature_model_parameters=temp_params,
719
- modules_per_string=self.plant_conf["modules_per_string"],
720
- strings_per_inverter=self.plant_conf["strings_per_inverter"],
721
- )
722
- mc = ModelChain(system, location, aoi_model="physical")
723
- # Run the model on the weather DF indexes
724
- mc.run_model(df_weather)
725
- # Extracting results for AC power
726
- P_PV_forecast = mc.results.ac
727
- if set_mix_forecast:
728
- P_PV_forecast = Forecast.get_mix_forecast(
729
- df_now,
730
- P_PV_forecast,
731
- self.params["passed_data"]["alpha"],
732
- self.params["passed_data"]["beta"],
733
- self.var_PV,
734
- )
735
- return P_PV_forecast
736
-
737
- def get_forecast_days_csv(self, timedelta_days: Optional[int] = 1) -> pd.date_range:
738
- r"""
739
- Get the date range vector of forecast dates that will be used when loading a CSV file.
740
-
741
- :return: The forecast dates vector
742
- :rtype: pd.date_range
743
-
744
- """
745
- start_forecast_csv = pd.Timestamp(datetime.now(), tz=self.time_zone).replace(
746
- microsecond=0
747
- )
748
- if self.method_ts_round == "nearest":
749
- start_forecast_csv = pd.Timestamp(
750
- datetime.now(), tz=self.time_zone
751
- ).replace(microsecond=0)
752
- elif self.method_ts_round == "first":
753
- start_forecast_csv = (
754
- pd.Timestamp(datetime.now(), tz=self.time_zone)
755
- .replace(microsecond=0)
756
- .floor(freq=self.freq)
757
- )
758
- elif self.method_ts_round == "last":
759
- start_forecast_csv = (
760
- pd.Timestamp(datetime.now(), tz=self.time_zone)
761
- .replace(microsecond=0)
762
- .ceil(freq=self.freq)
763
- )
764
- else:
765
- self.logger.error("Wrong method_ts_round passed parameter")
766
- end_forecast_csv = (
767
- start_forecast_csv + self.optim_conf["delta_forecast_daily"]
768
- ).replace(microsecond=0)
769
- forecast_dates_csv = (
770
- pd.date_range(
771
- start=start_forecast_csv,
772
- end=end_forecast_csv + timedelta(days=timedelta_days) - self.freq,
773
- freq=self.freq,
774
- tz=self.time_zone,
775
- )
776
- .tz_convert("utc")
777
- .round(self.freq, ambiguous="infer", nonexistent="shift_forward")
778
- .tz_convert(self.time_zone)
779
- )
780
- if self.params is not None:
781
- if "prediction_horizon" in list(self.params["passed_data"].keys()):
782
- if self.params["passed_data"]["prediction_horizon"] is not None:
783
- forecast_dates_csv = forecast_dates_csv[
784
- 0 : self.params["passed_data"]["prediction_horizon"]
785
- ]
786
- return forecast_dates_csv
787
-
788
- def get_forecast_out_from_csv_or_list(
789
- self,
790
- df_final: pd.DataFrame,
791
- forecast_dates_csv: pd.date_range,
792
- csv_path: str,
793
- data_list: Optional[list] = None,
794
- list_and_perfect: Optional[bool] = False,
795
- ) -> pd.DataFrame:
796
- r"""
797
- Get the forecast data as a DataFrame from a CSV file.
798
-
799
- The data contained in the CSV file should be a 24h forecast with the same frequency as
800
- the main 'optimization_time_step' parameter in the configuration file. The timestamp will not be used and
801
- a new DateTimeIndex is generated to fit the timestamp index of the input data in 'df_final'.
802
-
803
- :param df_final: The DataFrame containing the input data.
804
- :type df_final: pd.DataFrame
805
- :param forecast_dates_csv: The forecast dates vector
806
- :type forecast_dates_csv: pd.date_range
807
- :param csv_path: The path to the CSV file
808
- :type csv_path: str
809
- :return: The data from the CSV file
810
- :rtype: pd.DataFrame
811
-
812
- """
813
- if csv_path is None:
814
- data_dict = {"ts": forecast_dates_csv, "yhat": data_list}
815
- df_csv = pd.DataFrame.from_dict(data_dict)
816
- df_csv.index = forecast_dates_csv
817
- df_csv.drop(["ts"], axis=1, inplace=True)
818
- df_csv = set_df_index_freq(df_csv)
819
- if list_and_perfect:
820
- days_list = df_final.index.day.unique().tolist()
821
- else:
822
- days_list = df_csv.index.day.unique().tolist()
823
- else:
824
- if not os.path.exists(csv_path):
825
- csv_path = self.emhass_conf["data_path"] / csv_path
826
- load_csv_file_path = csv_path
827
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
828
- df_csv.index = forecast_dates_csv
829
- df_csv.drop(["ts"], axis=1, inplace=True)
830
- df_csv = set_df_index_freq(df_csv)
831
- days_list = df_final.index.day.unique().tolist()
832
- forecast_out = pd.DataFrame()
833
- for day in days_list:
834
- if csv_path is None:
835
- if list_and_perfect:
836
- df_tmp = copy.deepcopy(df_final)
837
- else:
838
- df_tmp = copy.deepcopy(df_csv)
839
- else:
840
- df_tmp = copy.deepcopy(df_final)
841
- first_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][0]
842
- last_elm_index = [i for i, x in enumerate(df_tmp.index.day == day) if x][-1]
843
- fcst_index = pd.date_range(
844
- start=df_tmp.index[first_elm_index],
845
- end=df_tmp.index[last_elm_index],
846
- freq=df_tmp.index.freq,
847
- )
848
- first_hour = (
849
- str(df_tmp.index[first_elm_index].hour)
850
- + ":"
851
- + str(df_tmp.index[first_elm_index].minute)
852
- )
853
- last_hour = (
854
- str(df_tmp.index[last_elm_index].hour)
855
- + ":"
856
- + str(df_tmp.index[last_elm_index].minute)
857
- )
858
- if len(forecast_out) == 0:
859
- if csv_path is None:
860
- if list_and_perfect:
861
- forecast_out = pd.DataFrame(
862
- df_csv.between_time(first_hour, last_hour).values,
863
- index=fcst_index,
864
- )
865
- else:
866
- forecast_out = pd.DataFrame(
867
- df_csv.loc[fcst_index, :]
868
- .between_time(first_hour, last_hour)
869
- .values,
870
- index=fcst_index,
871
- )
872
- else:
873
- forecast_out = pd.DataFrame(
874
- df_csv.between_time(first_hour, last_hour).values,
875
- index=fcst_index,
876
- )
877
- else:
878
- if csv_path is None:
879
- if list_and_perfect:
880
- forecast_tp = pd.DataFrame(
881
- df_csv.between_time(first_hour, last_hour).values,
882
- index=fcst_index,
883
- )
884
- else:
885
- forecast_tp = pd.DataFrame(
886
- df_csv.loc[fcst_index, :]
887
- .between_time(first_hour, last_hour)
888
- .values,
889
- index=fcst_index,
890
- )
891
- else:
892
- forecast_tp = pd.DataFrame(
893
- df_csv.between_time(first_hour, last_hour).values,
894
- index=fcst_index,
895
- )
896
- forecast_out = pd.concat([forecast_out, forecast_tp], axis=0)
897
- return forecast_out
898
-
899
- @staticmethod
900
- def resample_data(data, freq, current_freq):
901
- r"""
902
- Resample a DataFrame with a custom frequency.
903
-
904
- :param data: Original time series data with a DateTimeIndex.
905
- :type data: pd.DataFrame
906
- :param freq: Desired frequency for resampling (e.g., pd.Timedelta("10min")).
907
- :type freq: pd.Timedelta
908
- :return: Resampled data at the specified frequency.
909
- :rtype: pd.DataFrame
910
- """
911
- if freq > current_freq:
912
- # Downsampling
913
- # Use 'mean' to aggregate or choose other options ('sum', 'max', etc.)
914
- resampled_data = data.resample(freq).mean()
915
- elif freq < current_freq:
916
- # Upsampling
917
- # Use 'asfreq' to create empty slots, then interpolate
918
- resampled_data = data.resample(freq).asfreq()
919
- resampled_data = resampled_data.interpolate(method="time")
920
- else:
921
- # No resampling needed
922
- resampled_data = data.copy()
923
- return resampled_data
924
-
925
- @staticmethod
926
- def get_typical_load_forecast(data, forecast_date):
927
- r"""
928
- Forecast the load profile for the next day based on historic data.
929
-
930
- :param data: A DataFrame with a DateTimeIndex containing the historic load data.
931
- Must include a 'load' column.
932
- :type data: pd.DataFrame
933
- :param forecast_date: The date for which the forecast will be generated.
934
- :type forecast_date: pd.Timestamp
935
- :return: A Series with the forecasted load profile for the next day and a list of days used
936
- to calculate the forecast.
937
- :rtype: tuple (pd.Series, list)
938
- """
939
- # Ensure the 'load' column exists
940
- if "load" not in data.columns:
941
- raise ValueError("Data must have a 'load' column.")
942
- # Filter historic data for the same month and day of the week
943
- month = forecast_date.month
944
- day_of_week = forecast_date.dayofweek
945
- historic_data = data[
946
- (data.index.month == month) & (data.index.dayofweek == day_of_week)
947
- ]
948
- used_days = np.unique(historic_data.index.date)
949
- # Align all historic data to the forecast day
950
- aligned_data = []
951
- for day in used_days:
952
- daily_data = data[data.index.date == pd.Timestamp(day).date()]
953
- aligned_daily_data = daily_data.copy()
954
- aligned_daily_data.index = aligned_daily_data.index.map(
955
- lambda x: x.replace(
956
- year=forecast_date.year,
957
- month=forecast_date.month,
958
- day=forecast_date.day,
959
- )
960
- )
961
- aligned_data.append(aligned_daily_data)
962
- # Combine all aligned historic data into a single DataFrame
963
- combined_data = pd.concat(aligned_data)
964
- # Compute the mean load for each timestamp
965
- forecast = combined_data.groupby(combined_data.index).mean()
966
- return forecast, used_days
967
-
968
- def get_load_forecast(
969
- self,
970
- days_min_load_forecast: Optional[int] = 3,
971
- method: Optional[str] = "typical",
972
- csv_path: Optional[str] = "data_load_forecast.csv",
973
- set_mix_forecast: Optional[bool] = False,
974
- df_now: Optional[pd.DataFrame] = pd.DataFrame(),
975
- use_last_window: Optional[bool] = True,
976
- mlf: Optional[MLForecaster] = None,
977
- debug: Optional[bool] = False,
978
- ) -> pd.Series:
979
- r"""
980
- Get and generate the load forecast data.
981
-
982
- :param days_min_load_forecast: The number of last days to retrieve that \
983
- will be used to generate a naive forecast, defaults to 3
984
- :type days_min_load_forecast: int, optional
985
- :param method: The method to be used to generate load forecast, the options \
986
- are 'typical' for a typical household load consumption curve, \
987
- are 'naive' for a persistance model, 'mlforecaster' for using a custom \
988
- previously fitted machine learning model, 'csv' to read the forecast from \
989
- a CSV file and 'list' to use data directly passed at runtime as a list of \
990
- values. Defaults to 'typical'.
991
- :type method: str, optional
992
- :param csv_path: The path to the CSV file used when method = 'csv', \
993
- defaults to "/data/data_load_forecast.csv"
994
- :type csv_path: str, optional
995
- :param set_mix_forecast: Use a mixed forcast strategy to integra now/current values.
996
- :type set_mix_forecast: Bool, optional
997
- :param df_now: The DataFrame containing the now/current data.
998
- :type df_now: pd.DataFrame, optional
999
- :param use_last_window: True if the 'last_window' option should be used for the \
1000
- custom machine learning forecast model. The 'last_window=True' means that the data \
1001
- that will be used to generate the new forecast will be freshly retrieved from \
1002
- Home Assistant. This data is needed because the forecast model is an auto-regressive \
1003
- model with lags. If 'False' then the data using during the model train is used.
1004
- :type use_last_window: Bool, optional
1005
- :param mlf: The 'mlforecaster' object previously trained. This is mainly used for debug \
1006
- and unit testing. In production the actual model will be read from a saved pickle file.
1007
- :type mlf: mlforecaster, optional
1008
- :param debug: The DataFrame containing the now/current data.
1009
- :type debug: Bool, optional
1010
- :return: The DataFrame containing the electrical load power in Watts
1011
- :rtype: pd.DataFrame
1012
-
1013
- """
1014
- csv_path = self.emhass_conf["data_path"] / csv_path
1015
-
1016
- if (
1017
- method == "naive" or method == "mlforecaster"
1018
- ): # retrieving needed data for these methods
1019
- self.logger.info(
1020
- "Retrieving data from hass for load forecast using method = " + method
1021
- )
1022
- var_list = [self.var_load]
1023
- var_replace_zero = None
1024
- var_interp = [self.var_load]
1025
- time_zone_load_foreacast = None
1026
- # We will need to retrieve a new set of load data according to the days_min_load_forecast parameter
1027
- rh = RetrieveHass(
1028
- self.retrieve_hass_conf["hass_url"],
1029
- self.retrieve_hass_conf["long_lived_token"],
1030
- self.freq,
1031
- time_zone_load_foreacast,
1032
- self.params,
1033
- self.emhass_conf,
1034
- self.logger,
1035
- )
1036
- if self.get_data_from_file:
1037
- filename_path = self.emhass_conf["data_path"] / "test_df_final.pkl"
1038
- with open(filename_path, "rb") as inp:
1039
- rh.df_final, days_list, var_list, rh.ha_config = pickle.load(inp)
1040
- self.var_load = var_list[0]
1041
- self.retrieve_hass_conf["sensor_power_load_no_var_loads"] = (
1042
- self.var_load
1043
- )
1044
- var_interp = [var_list[0]]
1045
- self.var_list = [var_list[0]]
1046
- self.var_load_new = self.var_load + "_positive"
1047
- else:
1048
- days_list = get_days_list(days_min_load_forecast)
1049
- if not rh.get_data(days_list, var_list):
1050
- return False
1051
- if not rh.prepare_data(
1052
- self.retrieve_hass_conf["sensor_power_load_no_var_loads"],
1053
- load_negative=self.retrieve_hass_conf["load_negative"],
1054
- set_zero_min=self.retrieve_hass_conf["set_zero_min"],
1055
- var_replace_zero=var_replace_zero,
1056
- var_interp=var_interp,
1057
- ):
1058
- return False
1059
- df = rh.df_final.copy()[[self.var_load_new]]
1060
- if (
1061
- method == "typical"
1062
- ): # using typical statistical data from a household power consumption
1063
- # Loading data from history file
1064
- model_type = "load_clustering"
1065
- data_path = self.emhass_conf["data_path"] / str(
1066
- "data_train_" + model_type + ".pkl"
1067
- )
1068
- with open(data_path, "rb") as fid:
1069
- data, _ = pickle.load(fid)
1070
- # Resample the data if needed
1071
- current_freq = pd.Timedelta("30min")
1072
- if self.freq != current_freq:
1073
- data = Forecast.resample_data(data, self.freq, current_freq)
1074
- # Generate forecast
1075
- data_list = []
1076
- dates_list = np.unique(self.forecast_dates.date).tolist()
1077
- forecast = pd.DataFrame()
1078
- for date in dates_list:
1079
- forecast_date = pd.Timestamp(date)
1080
- data.columns = ["load"]
1081
- forecast_tmp, used_days = Forecast.get_typical_load_forecast(
1082
- data, forecast_date
1083
- )
1084
- self.logger.debug(
1085
- f"Using {len(used_days)} days of data to generate the forecast."
1086
- )
1087
- # Normalize the forecast
1088
- forecast_tmp = (
1089
- forecast_tmp * self.plant_conf["maximum_power_from_grid"] / 9000
1090
- )
1091
- data_list.extend(forecast_tmp.values.ravel().tolist())
1092
- if len(forecast) == 0:
1093
- forecast = forecast_tmp
1094
- else:
1095
- forecast = pd.concat([forecast, forecast_tmp], axis=0)
1096
- forecast.index = forecast.index.tz_convert(self.time_zone)
1097
- forecast_out = forecast.loc[
1098
- forecast.index.intersection(self.forecast_dates)
1099
- ]
1100
- forecast_out.index.name = "ts"
1101
- forecast_out = forecast_out.rename(columns={"load": "yhat"})
1102
- elif method == "naive": # using a naive approach
1103
- mask_forecast_out = (
1104
- df.index > days_list[-1] - self.optim_conf["delta_forecast_daily"]
1105
- )
1106
- forecast_out = df.copy().loc[mask_forecast_out]
1107
- forecast_out = forecast_out.rename(columns={self.var_load_new: "yhat"})
1108
- # Force forecast_out length to avoid mismatches
1109
- forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1110
- forecast_out.index = self.forecast_dates
1111
- elif (
1112
- method == "mlforecaster"
1113
- ): # using a custom forecast model with machine learning
1114
- # Load model
1115
- model_type = self.params["passed_data"]["model_type"]
1116
- filename = model_type + "_mlf.pkl"
1117
- filename_path = self.emhass_conf["data_path"] / filename
1118
- if not debug:
1119
- if filename_path.is_file():
1120
- with open(filename_path, "rb") as inp:
1121
- mlf = pickle.load(inp)
1122
- else:
1123
- self.logger.error(
1124
- "The ML forecaster file was not found, please run a model fit method before this predict method"
1125
- )
1126
- return False
1127
- # Make predictions
1128
- if use_last_window:
1129
- data_last_window = copy.deepcopy(df)
1130
- data_last_window = data_last_window.rename(
1131
- columns={self.var_load_new: self.var_load}
1132
- )
1133
- else:
1134
- data_last_window = None
1135
- forecast_out = mlf.predict(data_last_window)
1136
- # Force forecast length to avoid mismatches
1137
- self.logger.debug(
1138
- "Number of ML predict forcast data generated (lags_opt): "
1139
- + str(len(forecast_out.index))
1140
- )
1141
- self.logger.debug(
1142
- "Number of forcast dates obtained: " + str(len(self.forecast_dates))
1143
- )
1144
- if len(self.forecast_dates) < len(forecast_out.index):
1145
- forecast_out = forecast_out.iloc[0 : len(self.forecast_dates)]
1146
- # To be removed once bug is fixed
1147
- elif len(self.forecast_dates) > len(forecast_out.index):
1148
- self.logger.error(
1149
- "Unable to obtain: "
1150
- + str(len(self.forecast_dates))
1151
- + " lags_opt values from sensor: power load no var loads, check optimization_time_step/freq and historic_days_to_retrieve/days_to_retrieve parameters"
1152
- )
1153
- return False
1154
- # Define DataFrame
1155
- data_dict = {
1156
- "ts": self.forecast_dates,
1157
- "yhat": forecast_out.values.tolist(),
1158
- }
1159
- data = pd.DataFrame.from_dict(data_dict)
1160
- # Define index
1161
- data.set_index("ts", inplace=True)
1162
- forecast_out = data.copy().loc[self.forecast_dates]
1163
- elif method == "csv": # reading from a csv file
1164
- load_csv_file_path = csv_path
1165
- df_csv = pd.read_csv(load_csv_file_path, header=None, names=["ts", "yhat"])
1166
- if len(df_csv) < len(self.forecast_dates):
1167
- self.logger.error("Passed data from CSV is not long enough")
1168
- else:
1169
- # Ensure correct length
1170
- df_csv = df_csv.loc[df_csv.index[0 : len(self.forecast_dates)], :]
1171
- # Define index
1172
- df_csv.index = self.forecast_dates
1173
- df_csv.drop(["ts"], axis=1, inplace=True)
1174
- forecast_out = df_csv.copy().loc[self.forecast_dates]
1175
- elif method == "list": # reading a list of values
1176
- # Loading data from passed list
1177
- data_list = self.params["passed_data"]["load_power_forecast"]
1178
- # Check if the passed data has the correct length
1179
- if (
1180
- len(data_list) < len(self.forecast_dates)
1181
- and self.params["passed_data"]["prediction_horizon"] is None
1182
- ):
1183
- self.logger.error("Passed data from passed list is not long enough")
1184
- return False
1185
- else:
1186
- # Ensure correct length
1187
- data_list = data_list[0 : len(self.forecast_dates)]
1188
- # Define DataFrame
1189
- data_dict = {"ts": self.forecast_dates, "yhat": data_list}
1190
- data = pd.DataFrame.from_dict(data_dict)
1191
- # Define index
1192
- data.set_index("ts", inplace=True)
1193
- forecast_out = data.copy().loc[self.forecast_dates]
1194
- else:
1195
- self.logger.error("Passed method is not valid")
1196
- return False
1197
- P_Load_forecast = copy.deepcopy(forecast_out["yhat"])
1198
- if set_mix_forecast:
1199
- P_Load_forecast = Forecast.get_mix_forecast(
1200
- df_now,
1201
- P_Load_forecast,
1202
- self.params["passed_data"]["alpha"],
1203
- self.params["passed_data"]["beta"],
1204
- self.var_load_new,
1205
- )
1206
- return P_Load_forecast
1207
-
1208
- def get_load_cost_forecast(
1209
- self,
1210
- df_final: pd.DataFrame,
1211
- method: Optional[str] = "hp_hc_periods",
1212
- csv_path: Optional[str] = "data_load_cost_forecast.csv",
1213
- list_and_perfect: Optional[bool] = False,
1214
- ) -> pd.DataFrame:
1215
- r"""
1216
- Get the unit cost for the load consumption based on multiple tariff \
1217
- periods. This is the cost of the energy from the utility in a vector \
1218
- sampled at the fixed freq value.
1219
-
1220
- :param df_final: The DataFrame containing the input data.
1221
- :type df_final: pd.DataFrame
1222
- :param method: The method to be used to generate load cost forecast, \
1223
- the options are 'hp_hc_periods' for peak and non-peak hours contracts\
1224
- and 'csv' to load a CSV file, defaults to 'hp_hc_periods'
1225
- :type method: str, optional
1226
- :param csv_path: The path to the CSV file used when method = 'csv', \
1227
- defaults to "data_load_cost_forecast.csv"
1228
- :type csv_path: str, optional
1229
- :return: The input DataFrame with one additionnal column appended containing
1230
- the load cost for each time observation.
1231
- :rtype: pd.DataFrame
1232
-
1233
- """
1234
- csv_path = self.emhass_conf["data_path"] / csv_path
1235
- if method == "hp_hc_periods":
1236
- df_final[self.var_load_cost] = self.optim_conf["load_offpeak_hours_cost"]
1237
- list_df_hp = []
1238
- for key, period_hp in self.optim_conf["load_peak_hour_periods"].items():
1239
- list_df_hp.append(
1240
- df_final[self.var_load_cost].between_time(
1241
- period_hp[0]["start"], period_hp[1]["end"]
1242
- )
1243
- )
1244
- for df_hp in list_df_hp:
1245
- df_final.loc[df_hp.index, self.var_load_cost] = self.optim_conf[
1246
- "load_peak_hours_cost"
1247
- ]
1248
- elif method == "csv":
1249
- forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1250
- forecast_out = self.get_forecast_out_from_csv_or_list(
1251
- df_final, forecast_dates_csv, csv_path
1252
- )
1253
- df_final[self.var_load_cost] = forecast_out
1254
- elif method == "list": # reading a list of values
1255
- # Loading data from passed list
1256
- data_list = self.params["passed_data"]["load_cost_forecast"]
1257
- # Check if the passed data has the correct length
1258
- if (
1259
- len(data_list) < len(self.forecast_dates)
1260
- and self.params["passed_data"]["prediction_horizon"] is None
1261
- ):
1262
- self.logger.error("Passed data from passed list is not long enough")
1263
- return False
1264
- else:
1265
- # Ensure correct length
1266
- data_list = data_list[0 : len(self.forecast_dates)]
1267
- # Define the correct dates
1268
- forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1269
- forecast_out = self.get_forecast_out_from_csv_or_list(
1270
- df_final,
1271
- forecast_dates_csv,
1272
- None,
1273
- data_list=data_list,
1274
- list_and_perfect=list_and_perfect,
1275
- )
1276
- # Fill the final DF
1277
- df_final[self.var_load_cost] = forecast_out
1278
- else:
1279
- self.logger.error("Passed method is not valid")
1280
- return False
1281
- return df_final
1282
-
1283
- def get_prod_price_forecast(
1284
- self,
1285
- df_final: pd.DataFrame,
1286
- method: Optional[str] = "constant",
1287
- csv_path: Optional[str] = "data_prod_price_forecast.csv",
1288
- list_and_perfect: Optional[bool] = False,
1289
- ) -> pd.DataFrame:
1290
- r"""
1291
- Get the unit power production price for the energy injected to the grid.\
1292
- This is the price of the energy injected to the utility in a vector \
1293
- sampled at the fixed freq value.
1294
-
1295
- :param df_input_data: The DataFrame containing all the input data retrieved
1296
- from hass
1297
- :type df_input_data: pd.DataFrame
1298
- :param method: The method to be used to generate the production price forecast, \
1299
- the options are 'constant' for a fixed constant value and 'csv'\
1300
- to load a CSV file, defaults to 'constant'
1301
- :type method: str, optional
1302
- :param csv_path: The path to the CSV file used when method = 'csv', \
1303
- defaults to "/data/data_load_cost_forecast.csv"
1304
- :type csv_path: str, optional
1305
- :return: The input DataFrame with one additionnal column appended containing
1306
- the power production price for each time observation.
1307
- :rtype: pd.DataFrame
1308
-
1309
- """
1310
- csv_path = self.emhass_conf["data_path"] / csv_path
1311
- if method == "constant":
1312
- df_final[self.var_prod_price] = self.optim_conf[
1313
- "photovoltaic_production_sell_price"
1314
- ]
1315
- elif method == "csv":
1316
- forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1317
- forecast_out = self.get_forecast_out_from_csv_or_list(
1318
- df_final, forecast_dates_csv, csv_path
1319
- )
1320
- df_final[self.var_prod_price] = forecast_out
1321
- elif method == "list": # reading a list of values
1322
- # Loading data from passed list
1323
- data_list = self.params["passed_data"]["prod_price_forecast"]
1324
- # Check if the passed data has the correct length
1325
- if (
1326
- len(data_list) < len(self.forecast_dates)
1327
- and self.params["passed_data"]["prediction_horizon"] is None
1328
- ):
1329
- self.logger.error("Passed data from passed list is not long enough")
1330
- return False
1331
- else:
1332
- # Ensure correct length
1333
- data_list = data_list[0 : len(self.forecast_dates)]
1334
- # Define the correct dates
1335
- forecast_dates_csv = self.get_forecast_days_csv(timedelta_days=0)
1336
- forecast_out = self.get_forecast_out_from_csv_or_list(
1337
- df_final,
1338
- forecast_dates_csv,
1339
- None,
1340
- data_list=data_list,
1341
- list_and_perfect=list_and_perfect,
1342
- )
1343
- # Fill the final DF
1344
- df_final[self.var_prod_price] = forecast_out
1345
- else:
1346
- self.logger.error("Passed method is not valid")
1347
- return False
1348
- return df_final