loone-data-prep 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
  2. loone_data_prep/LOONE_DATA_PREP.py +0 -1
  3. loone_data_prep/dbhydro_insights.py +195 -0
  4. loone_data_prep/flow_data/S65E_total.py +57 -57
  5. loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
  6. loone_data_prep/flow_data/get_forecast_flows.py +19 -105
  7. loone_data_prep/flow_data/get_inflows.py +18 -8
  8. loone_data_prep/flow_data/get_outflows.py +16 -7
  9. loone_data_prep/flow_data/hydro.py +62 -91
  10. loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
  11. loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
  12. loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
  13. loone_data_prep/utils.py +262 -32
  14. loone_data_prep/water_level_data/get_all.py +52 -44
  15. loone_data_prep/water_level_data/hydro.py +49 -68
  16. loone_data_prep/water_quality_data/get_inflows.py +69 -27
  17. loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
  18. loone_data_prep/water_quality_data/wq.py +114 -88
  19. loone_data_prep/weather_data/get_all.py +5 -3
  20. loone_data_prep/weather_data/weather.py +117 -180
  21. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
  22. loone_data_prep-1.3.1.dist-info/RECORD +38 -0
  23. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
  24. loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
  25. loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
  26. loone_data_prep-1.2.4.dist-info/RECORD +0 -38
  27. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
  28. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
@@ -45,20 +45,22 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
45
45
  Returns:
46
46
  dict: Success or error message
47
47
  """
48
+ # Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
49
+ dbkeys = dbkeys.copy()
48
50
 
49
51
  # Retrieve inflow data
50
52
  for dbkey, station in dbkeys.copy().items():
51
- file_name = f"{station}_FLOW_cmd.csv"
53
+ file_name = f"{station.replace(' ', '_')}_FLOW_cmd.csv"
52
54
  date_latest = find_last_date_in_csv(workspace, file_name)
53
55
 
54
56
  # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
55
57
  if date_latest is None:
56
58
  # Download all the data
57
59
  print(f'Downloading all inflow data for {station}')
58
- hydro.get(workspace, dbkey)
60
+ hydro.get(workspace=workspace, dbkey=dbkey, station=station)
59
61
  else:
60
62
  # Check whether the latest data is already up to date.
61
- if dbhydro_data_is_latest(date_latest):
63
+ if dbhydro_data_is_latest(date_latest, dbkey):
62
64
  # Notify that the data is already up to date
63
65
  print(f'Downloading of new inflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
64
66
 
@@ -67,8 +69,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
67
69
  continue
68
70
 
69
71
  # Download only the new data
70
- print(f'Downloading new inflow data for {station} starting from date {date_latest}')
71
- hydro.get(workspace, dbkey, date_latest)
72
+ date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
73
+ print(f'Downloading new inflow data for {station} starting from date {date_next}')
74
+ hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
75
+
76
+ # Check if the station name contains a space
77
+ if ' ' in station:
78
+ # Replace space with underscore in the station name
79
+ station_previous = station
80
+ station = station.replace(' ', '_')
72
81
 
73
82
  # Make sure both our original data and newly downloaded data exist
74
83
  df_original_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
@@ -94,7 +103,7 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
94
103
  S65E_total.get(workspace, date_max=datetime.now().strftime("%Y-%m-%d"))
95
104
  else:
96
105
  # Check whether the latest data is already up to date.
97
- if dbhydro_data_is_latest(date_latest):
106
+ if dbhydro_data_is_latest(date_latest, '91656') and dbhydro_data_is_latest(date_latest, 'AL760'):
98
107
  # Notify that the data is already up to date
99
108
  print(f'Downloading of new inflow data skipped for S65E_total. Data is already up to date.')
100
109
  else:
@@ -104,8 +113,9 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
104
113
 
105
114
  try:
106
115
  # Download only the new data
107
- print(f'Downloading new S65E_total data starting from date {date_latest}')
108
- S65E_total.get(workspace, date_min=date_latest, date_max=datetime.now().strftime("%Y-%m-%d"))
116
+ date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
117
+ print(f'Downloading new S65E_total data starting from date {date_next}')
118
+ S65E_total.get(workspace, date_min=date_next, date_max=datetime.now().strftime("%Y-%m-%d"))
109
119
 
110
120
  # Merge the new data with the original data
111
121
  df_original = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
@@ -56,8 +56,8 @@ def _get_outflow_data_from_station_ids(workspace: str, station_ids: list) -> dic
56
56
  dict: Success or error message
57
57
  """
58
58
  # Get dbkeys from station ids
59
- dbkeys = list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF", detail_level="dbkey"))
60
- dbkeys.extend(list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV", detail_level="dbkey")))
59
+ dbkeys = get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF")
60
+ dbkeys.extend(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV"))
61
61
 
62
62
  for dbkey in dbkeys:
63
63
  hydro.get(workspace, dbkey, "2000-01-01")
@@ -94,6 +94,8 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
94
94
  Returns:
95
95
  dict: Success or error message
96
96
  """
97
+ # Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
98
+ dbkeys = dbkeys.copy()
97
99
 
98
100
  # No dbkeys given, attempt to get data from station ids
99
101
  if dbkeys is None:
@@ -102,16 +104,16 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
102
104
  # Get outflow data from dbkeys
103
105
  for dbkey, station in dbkeys.copy().items():
104
106
  # Get the date of the latest data in the csv file (if any)
105
- date_latest = find_last_date_in_csv(workspace, f"{station}_FLOW_cmd.csv")
107
+ date_latest = find_last_date_in_csv(workspace, f"{station.replace(' ', '_')}_FLOW_cmd.csv")
106
108
 
107
109
  # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
108
110
  if date_latest is None:
109
111
  # Download all data
110
112
  print(f'Downloading all outflow data for {station}')
111
- hydro.get(workspace, dbkey, "2000-01-01")
113
+ hydro.get(workspace=workspace, dbkey=dbkey, date_min="2000-01-01", station=station)
112
114
  else:
113
115
  # Check whether the latest data is already up to date.
114
- if dbhydro_data_is_latest(date_latest):
116
+ if dbhydro_data_is_latest(date_latest, dbkey):
115
117
  # Notify that the data is already up to date
116
118
  print(f'Downloading of new outflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
117
119
 
@@ -120,8 +122,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
120
122
  continue
121
123
 
122
124
  # Download only the new data
123
- print(f'Downloading new outflow data for {station} starting from date {date_latest}')
124
- hydro.get(workspace, dbkey, date_latest)
125
+ date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
126
+ print(f'Downloading new outflow data for {station} starting from date {date_next}')
127
+ hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
128
+
129
+ # Check if the station name contains a space
130
+ if ' ' in station:
131
+ # Replace space with underscore in the station name
132
+ station_previous = station
133
+ station = station.replace(' ', '_')
125
134
 
126
135
  # Make sure both our original data and newly downloaded data exist
127
136
  df_old_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
@@ -1,116 +1,68 @@
1
1
  import sys
2
2
  from datetime import datetime
3
- from glob import glob
4
3
  from retry import retry
5
- import os
6
4
  import pandas as pd
7
- from rpy2.robjects import r
8
- from rpy2.rinterface_lib.embedded import RRuntimeError
5
+ from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
9
6
 
10
7
 
11
8
  DATE_NOW = datetime.now().strftime("%Y-%m-%d")
12
9
 
13
10
 
14
- @retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
11
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
15
12
  def get(
16
13
  workspace: str,
17
14
  dbkey: str,
18
15
  date_min: str = "1990-01-01",
19
- date_max: str = DATE_NOW
16
+ date_max: str = DATE_NOW,
17
+ station: str | None = None
20
18
  ) -> None:
21
- r_str = f"""
22
- download_flow_data <- function(workspace, dbkey, date_min, date_max)
23
- {{
24
- # Load the required libraries
25
- library(dbhydroR)
26
- library(dplyr)
27
-
28
- # Retrieve data for the dbkey
29
- data <- get_hydro(dbkey = "{dbkey}", date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
30
-
31
- # Check if data is empty or contains only the "date" column
32
- if (ncol(data) <= 1) {{
33
- cat("No data found for dbkey", "{dbkey}", "Skipping to the next dbkey.\n")
34
- }}
35
-
36
- # Give data.frame correct column names so it can be cleaned using the clean_hydro function
37
- colnames(data) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
38
-
39
- # Check if the data.frame has any rows
40
- if (nrow(data) == 0)
41
- {{
42
- # No data given back, It's possible that the dbkey has reached its end date.
43
- print(paste("Empty data.frame returned for dbkey", "{dbkey}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
44
- return(list(success = FALSE, dbkey = "{dbkey}"))
45
- }}
46
-
47
- # Add a type and units column to data so it can be cleaned using the clean_hydro function
48
- data$type <- "FLOW"
49
- data$units <- "cfs"
50
-
51
- # Get the station
52
- station <- data$station[1]
53
-
54
- # Clean the data.frame
55
- data <- clean_hydro(data)
56
-
57
- # Multiply all columns except "date" column by 0.0283168466 * 86400 to convert Flow rate from cfs to m³/day
58
- data[, -1] <- data[, -1] * (0.0283168466 * 86400)
59
-
60
- # Drop the " _FLOW_cfs" column
61
- data <- data %>% select(-` _FLOW_cfs`)
62
-
63
- # Sort the data by date
64
- data <- data[order(data$date), ]
65
-
66
- # Get the filename for the output CSV file
67
- filename <- paste0(station, "_FLOW", "_{dbkey}_cmd.csv")
68
-
69
- # Save data to a CSV file
70
- write.csv(data, file = paste0("{workspace}/", filename))
71
-
72
- # Print a message indicating the file has been saved
73
- cat("CSV file", filename, "has been saved.\n")
74
-
75
- # Add a delay between requests
76
- Sys.sleep(1) # Wait for 1 second before the next iteration
77
-
78
- # Return the station and dbkey to the python code
79
- list(success = TRUE, station = station, dbkey = "{dbkey}")
80
- }}
19
+ """Fetches daily flow data from DBHYDRO and saves it to a CSV file.
20
+
21
+ Args:
22
+ workspace (str): Path to the workspace directory where data will be saved.
23
+ dbkey (str): The DBHYDRO database key for the station.
24
+ date_min (str): Minimum date for data retrieval in 'YYYY-MM-DD' format.
25
+ date_max (str): Maximum date for data retrieval in 'YYYY-MM-DD' format.
26
+ station (str | None): The station name. If None, the station name will be fetched from DBHYDRO.
81
27
  """
82
-
83
- r(r_str)
28
+ # Get a DbHydroApi instance
29
+ api = get_dbhydro_api()
84
30
 
85
- # Call the R function to download the flow data
86
- result = r.download_flow_data(workspace, dbkey, date_min, date_max)
31
+ # Get the daily data from DbHydro
32
+ response = api.get_daily_data([dbkey], 'id', date_min, date_max, 'NGVD29', False)
87
33
 
88
34
  # Check for failure
89
- success = result.rx2("success")[0]
90
-
91
- if not success:
35
+ if not response.has_data():
92
36
  return
93
37
 
94
- # Get the station name for _reformat_flow_file()
95
- station = result.rx2("station")[0]
38
+ # Get the station name for _reformat_flow_df()
39
+ if station is None:
40
+ station = response.get_site_codes()[0]
41
+
42
+ # Get the data as a dataframe
43
+ df = response.to_dataframe(True)
96
44
 
97
- # Reformat the flow data file to the expected layout
98
- _reformat_flow_file(workspace, station, dbkey)
45
+ # Replace flagged 0 values and -99999.0 with NaN
46
+ df = df_replace_missing_with_nan(df)
47
+
48
+ # Convert flow from cfs to cmd
49
+ df['value'] = df['value'] * (0.0283168466 * 86400)
50
+
51
+ # Prepare the dataframe to be reformatted into the expected layout
52
+ df.reset_index(inplace=True)
53
+ df.rename(columns={'datetime': 'date', 'value': f'{station}_FLOW_cmd'}, inplace=True)
54
+
55
+ # Reformat the flow df to the expected layout
56
+ df = _reformat_flow_df(df, station)
99
57
 
100
58
  # Check if the station name contains a space
101
- if " " in station:
59
+ if ' ' in station:
102
60
  # Replace space with underscore in the station name
103
61
  station_previous = station
104
- station = station.replace(" ", "_")
105
-
106
- # Rename the file
107
- os.rename(f"{workspace}/{station_previous}_FLOW_{dbkey}_cmd.csv", f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
62
+ station = station.replace(' ', '_')
108
63
 
109
- # column values are converted to cmd in R. This snippet makes sure column names are updated accordingly.
110
- file = glob(f'{workspace}/*FLOW*{dbkey}_cmd.csv')[0]
111
- df = pd.read_csv(file, index_col=False)
112
- df.columns = df.columns.astype(str).str.replace("_cfs", "_cmd")
113
- df.to_csv(file, index=False)
64
+ # Write the data to a CSV file
65
+ df.to_csv(f'{workspace}/{station}_FLOW_{dbkey}_cmd.csv', index=True)
114
66
 
115
67
 
116
68
  def _reformat_flow_file(workspace:str, station: str, dbkey: str):
@@ -130,8 +82,27 @@ def _reformat_flow_file(workspace:str, station: str, dbkey: str):
130
82
  # Read in the data
131
83
  df = pd.read_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
132
84
 
85
+ # Reformat the data
86
+ df = _reformat_flow_df(df, station)
87
+
88
+ # Write the updated data back to the file
89
+ df.to_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
90
+
91
+
92
+ def _reformat_flow_df(df: pd.DataFrame, station: str) -> pd.DataFrame:
93
+ '''
94
+ Reformat the flow data file to the expected layout.
95
+ Converts the format of the dates in the file to 'YYYY-MM-DD' then sorts the data by date.
96
+
97
+ Args:
98
+ df (pd.DataFrame): The dataframe containing the flow data.
99
+ station (str): The station name.
100
+
101
+ Returns:
102
+ pd.DataFrame: The reformatted dataframe.
103
+ '''
133
104
  # Grab only the columns we need
134
- df = df[['date', f'{station}_FLOW_cfs']]
105
+ df = df[['date', f'{station}_FLOW_cmd']].copy()
135
106
 
136
107
  # Convert date column to datetime
137
108
  df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
@@ -143,10 +114,10 @@ def _reformat_flow_file(workspace:str, station: str, dbkey: str):
143
114
  df.reset_index(drop=True, inplace=True)
144
115
 
145
116
  # Drop rows that are missing values for both the date and value columns
146
- df = df.drop(df[(df['date'].isna()) & (df[f'{station}_FLOW_cfs'].isna())].index)
117
+ df = df.drop(df[(df['date'].isna()) & (df[f'{station}_FLOW_cmd'].isna())].index)
147
118
 
148
- # Write the updated data back to the file
149
- df.to_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
119
+ # Return the updated dataframe
120
+ return df
150
121
 
151
122
 
152
123
  if __name__ == "__main__":
@@ -9,7 +9,7 @@ def get_Chla_predicted(input_dir, output_dir):
9
9
  output_dir: Directory where the output files will be saved.
10
10
  """
11
11
  # Read forecast inflow file and get overall date range
12
- # TODO: Should this be an average/median of all of the ensembles? worst case?
12
+ # We are only taking the dates, so it is okay to just use one ensemble because they all have the same dates
13
13
  Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
14
  Q_in['date'] = pd.to_datetime(Q_in['date'])
15
15
  date_start = Q_in['date'].min()
@@ -9,7 +9,7 @@ def get_NO_Loads_predicted(input_dir, output_dir):
9
9
  output_dir: Directory where the output files will be saved.
10
10
  This function reads the forecast inflow file, retrieves nitrate data for specified stations,
11
11
  """
12
- # TODO: Should this be an average/median of all of the ensembles? worst case?
12
+ # It is okay to use just one ensemble because they all have the same dates and we only use the dates
13
13
  Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
14
 
15
15
  datetime_str = Q_in['date'].iloc[0]
@@ -0,0 +1,220 @@
1
+ import os
2
+ import warnings
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from retry import retry
6
+ from loone_data_prep.herbie_utils import get_fast_herbie_object
7
+ from herbie import FastHerbie
8
+ import openmeteo_requests
9
+ from retry_requests import retry as retry_requests
10
+ import requests_cache
11
+
12
+ warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
13
+
14
+ POINTS = pd.DataFrame({
15
+ "station": ["L001", "L005", "L006", "LZ40"],
16
+ "longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
17
+ "latitude": [27.1389, 26.9567, 26.8226, 26.9018]
18
+ })
19
+
20
+ WIND_FILE_MAP = {
21
+ "L001": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
22
+ "L005": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
23
+ "L006": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
24
+ "LZ40": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
25
+ }
26
+
27
+ AIRT_FILE_MAP = {
28
+ "L001": "L001_AIRT_Degrees Celsius_forecast.csv",
29
+ "L005": "L005_AIRT_Degrees Celsius_forecast.csv",
30
+ "L006": "L006_AIRT_Degrees Celsius_forecast.csv",
31
+ "LZ40": "LZ40_AIRT_Degrees Celsius_forecast.csv"
32
+ }
33
+
34
+ AIRT_COLUMN_MAP = {
35
+ "L001": "L001_AIRT_Degrees Celsius",
36
+ "L005": "L005_AIRT_Degrees Celsius",
37
+ "L006": "L006_AIRT_Degrees Celsius",
38
+ "LZ40": "LZ40_AIRT_Degrees Celsius"
39
+ }
40
+
41
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
42
+ def download_herbie_variable(FH, variable_key, variable_name, point_df):
43
+ """Download a Herbie variable for a given point and return a DataFrame."""
44
+ FH.download(f":{variable_key}")
45
+ ds = FH.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
46
+ dsi = ds.herbie.pick_points(point_df, method="nearest")
47
+
48
+ var_name = {
49
+ "10u": "u10",
50
+ "10v": "v10",
51
+ "2t": "t2m"
52
+ }.get(variable_name, variable_name)
53
+
54
+ ts = dsi[var_name].squeeze()
55
+ df = ts.to_dataframe().reset_index()
56
+ if "valid_time" in df.columns:
57
+ df.rename(columns={"valid_time": "datetime"}, inplace=True)
58
+ elif "time" in df.columns:
59
+ df.rename(columns={"time": "datetime"}, inplace=True)
60
+
61
+ df = df[["datetime", var_name]].drop_duplicates()
62
+ ds.close()
63
+ dsi.close()
64
+ del ds, dsi, ts
65
+ return df
66
+
67
+ # Download ET from Open-Meteo
68
+ def download_hourly_et(lat, lon):
69
+ cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
70
+ retry_session = retry_requests(cache_session, retries=5, backoff_factor=0.2)
71
+ client = openmeteo_requests.Client(session=retry_session)
72
+
73
+ url = "https://api.open-meteo.com/v1/forecast"
74
+ params = {
75
+ "latitude": lat,
76
+ "longitude": lon,
77
+ "hourly": "evapotranspiration",
78
+ "forecast_days": 16,
79
+ "models": "gfs_seamless"
80
+ }
81
+ responses = client.weather_api(url, params=params)
82
+ response = responses[0]
83
+
84
+ hourly = response.Hourly()
85
+ hourly_evap = hourly.Variables(0).ValuesAsNumpy()
86
+ hourly_data = {"date": pd.date_range(
87
+ start=pd.to_datetime(hourly.Time(), unit="s"),
88
+ end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
89
+ freq=pd.Timedelta(seconds=hourly.Interval()),
90
+ inclusive="left"
91
+ )}
92
+ hourly_data["evapotranspiration"] = hourly_evap
93
+ return pd.DataFrame(hourly_data)
94
+
95
+ # Main generation function
96
+ def generate_all_outputs(output_dir):
97
+ os.makedirs(output_dir, exist_ok=True)
98
+ today_str = datetime.today().strftime('%Y-%m-%d 00:00')
99
+ FH = get_fast_herbie_object(today_str)
100
+
101
+ # Forecasted weather data (single point)
102
+ point_df = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
103
+ forecast_vars = ["10u", "10v", "2t", "tp", "ssrd"]
104
+ data = {var: download_herbie_variable(FH, var, var, point_df) for var in forecast_vars}
105
+
106
+ merged = data["10u"].merge(data["10v"], on="datetime")
107
+ merged = merged.merge(data["2t"], on="datetime")
108
+ merged = merged.merge(data["tp"], on="datetime")
109
+ merged = merged.merge(data["ssrd"], on="datetime")
110
+
111
+ # Derived columns
112
+ merged["wind_speed"] = (merged["u10"]**2 + merged["v10"]**2)**0.5 # wind speed in m/s
113
+ merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
114
+ merged["tp_inc_m"] = merged["tp"].diff().clip(lower=0)
115
+ # Convert incremental meters → mm
116
+ merged["tp_inc_mm"] = merged["tp_inc_m"] * 1000.0
117
+ # Apply bias correction (in mm)
118
+ merged["tp_corrected_mm"] = 0.7247 * merged["tp_inc_mm"] + 0.1853
119
+ # convert to inches
120
+ merged["tp_corrected"] = merged["tp_corrected_mm"] * 0.0393701
121
+
122
+ merged["ssrd_kwm2"] = merged["ssrd"].diff() / merged["datetime"].diff().dt.total_seconds() / 1000
123
+ merged["ssrd_corrected"] = (1.0530 * merged["ssrd_kwm2"] - 0.0347).clip(lower=0)
124
+ merged = merged[[
125
+ "datetime",
126
+ "wind_speed_corrected",
127
+ "tp_corrected",
128
+ "ssrd_corrected"
129
+ ]]
130
+
131
+ # ET for main point
132
+ df_et = download_hourly_et(26.9690, -80.7976)
133
+ merged = merged.merge(df_et, left_on="datetime", right_on="date", how="left").drop(columns=["date"])
134
+ merged.to_csv(os.path.join(output_dir, "forecasted_weather_data.csv"), index=False)
135
+
136
+ # 4-point wind and air temp CSVs
137
+ for idx, row in POINTS.iterrows():
138
+ station = row["station"]
139
+ point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
140
+
141
+ # Wind
142
+ df_u = download_herbie_variable(FH, "10u", "10u", point_df)
143
+ df_v = download_herbie_variable(FH, "10v", "10v", point_df)
144
+ merged_ws = df_u.merge(df_v, on="datetime")
145
+ merged_ws["wind_speed"] = (merged_ws["u10"]**2 + merged_ws["v10"]**2)**0.5
146
+ merged_ws["wind_speed_corrected"] = 0.4167 * merged_ws["wind_speed"] + 4.1868
147
+
148
+ filename, new_col = WIND_FILE_MAP[station]
149
+ merged_ws[["datetime", "wind_speed_corrected"]].rename(
150
+ columns={"datetime": "date", "wind_speed_corrected": new_col}
151
+ ).to_csv(os.path.join(output_dir, filename), index=False)
152
+
153
+ # Air temp
154
+ df_t = download_herbie_variable(FH, "2t", "2t", point_df)
155
+ df_t["t2m"] = df_t["t2m"] - 273.15
156
+ df_t.rename(columns={"datetime": "date", "t2m": AIRT_COLUMN_MAP[station]}).to_csv(
157
+ os.path.join(output_dir, AIRT_FILE_MAP[station]), index=False
158
+ )
159
+
160
+ # Rainfall, ET, and SSRD 4-point CSVs
161
+ rainfall_dfs, et_dfs, ssrd_dfs = [], [], []
162
+
163
+ for idx, row in POINTS.iterrows():
164
+ station = row["station"]
165
+ point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
166
+
167
+ # Rainfall
168
+ df_tp = download_herbie_variable(FH, "tp", "tp", point_df)
169
+ # Convert cumulative meters → incremental meters
170
+ df_tp["tp_inc_m"] = df_tp["tp"].diff().clip(lower=0)
171
+ # Convert incremental meters → millimeters
172
+ df_tp["tp_inc_mm"] = df_tp["tp_inc_m"] * 1000.0
173
+ df_tp["date_only"] = df_tp["datetime"].dt.date
174
+ # Sum incremental precipitation per day
175
+ df_daily = df_tp.groupby("date_only")["tp_inc_mm"].sum().reset_index()
176
+ # Apply bias correction on daily totals (in mm)
177
+ df_daily["tp_corrected_mm"] = 0.7247 * df_daily["tp_inc_mm"] + 0.1853
178
+ # Convert corrected mm → inches
179
+ df_daily["tp_corrected_in"] = df_daily["tp_corrected_mm"] * 0.0393701
180
+ df_daily = df_daily.rename(columns={"date_only": "date", "tp_corrected_in": station})
181
+ rainfall_dfs.append(df_daily[["date", station]])
182
+
183
+ # ET
184
+ df_et_point = download_hourly_et(row.latitude, row.longitude)
185
+ df_et_point.rename(columns={"evapotranspiration": station}, inplace=True)
186
+ et_dfs.append(df_et_point)
187
+
188
+ # SSRD
189
+ df_ssrd = download_herbie_variable(FH, "ssrd", "ssrd", point_df)
190
+ df_ssrd["ssrd_kwm2"] = df_ssrd["ssrd"].diff() / df_ssrd["datetime"].diff().dt.total_seconds() / 1000
191
+ df_ssrd["ssrd_corrected"] = (1.0530 * df_ssrd["ssrd_kwm2"] - 0.0347).clip(lower=0)
192
+ df_ssrd = df_ssrd[["datetime", "ssrd_corrected"]].rename(columns={"datetime": "date", "ssrd_corrected": station})
193
+ ssrd_dfs.append(df_ssrd)
194
+
195
+ # Merge rainfall
196
+ rainfall_df = pd.concat(rainfall_dfs, axis=0).groupby("date").first().reset_index()
197
+ rainfall_df["average_rainfall"] = rainfall_df[POINTS["station"]].mean(axis=1)
198
+ rainfall_df.to_csv(os.path.join(output_dir, "LAKE_RAINFALL_DATA_FORECAST.csv"), index=False)
199
+
200
+ # Merge ET
201
+ et_df_all = pd.concat(et_dfs, axis=0).groupby("date").first().reset_index()
202
+ et_df_all["average_ETPI"] = et_df_all[POINTS["station"]].mean(axis=1)
203
+ et_df_all.to_csv(os.path.join(output_dir, "LOONE_AVERAGE_ETPI_DATA_FORECAST.csv"), index=False)
204
+
205
+ # Combine all SSRD DataFrames
206
+ ssrd_df_all = pd.concat(ssrd_dfs, axis=0)
207
+ ssrd_df_all["date"] = pd.to_datetime(ssrd_df_all["date"])
208
+
209
+ # Compute the daily mean for each station
210
+ daily_ssrd = (
211
+ ssrd_df_all.groupby(ssrd_df_all["date"].dt.date)[POINTS["station"]]
212
+ .mean()
213
+ .reset_index()
214
+ )
215
+
216
+ daily_ssrd = daily_ssrd.rename(columns={"date": "date"})
217
+ daily_ssrd["Mean_RADT"] = daily_ssrd[POINTS["station"]].mean(axis=1)
218
+ daily_ssrd.to_csv(os.path.join(output_dir, "LO_RADT_data_forecast.csv"), index=False)
219
+
220
+ print("All outputs generated successfully.")