loone-data-prep 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
- loone_data_prep/LOONE_DATA_PREP.py +0 -1
- loone_data_prep/dbhydro_insights.py +195 -0
- loone_data_prep/flow_data/S65E_total.py +57 -57
- loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
- loone_data_prep/flow_data/get_forecast_flows.py +19 -105
- loone_data_prep/flow_data/get_inflows.py +18 -8
- loone_data_prep/flow_data/get_outflows.py +16 -7
- loone_data_prep/flow_data/hydro.py +62 -91
- loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
- loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
- loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
- loone_data_prep/utils.py +262 -32
- loone_data_prep/water_level_data/get_all.py +52 -44
- loone_data_prep/water_level_data/hydro.py +49 -68
- loone_data_prep/water_quality_data/get_inflows.py +69 -27
- loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
- loone_data_prep/water_quality_data/wq.py +114 -88
- loone_data_prep/weather_data/get_all.py +5 -3
- loone_data_prep/weather_data/weather.py +117 -180
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
- loone_data_prep-1.3.1.dist-info/RECORD +38 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
- loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
- loone_data_prep-1.2.4.dist-info/RECORD +0 -38
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -45,20 +45,22 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
|
|
|
45
45
|
Returns:
|
|
46
46
|
dict: Success or error message
|
|
47
47
|
"""
|
|
48
|
+
# Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
|
|
49
|
+
dbkeys = dbkeys.copy()
|
|
48
50
|
|
|
49
51
|
# Retrieve inflow data
|
|
50
52
|
for dbkey, station in dbkeys.copy().items():
|
|
51
|
-
file_name = f"{station}_FLOW_cmd.csv"
|
|
53
|
+
file_name = f"{station.replace(' ', '_')}_FLOW_cmd.csv"
|
|
52
54
|
date_latest = find_last_date_in_csv(workspace, file_name)
|
|
53
55
|
|
|
54
56
|
# File with data for this dbkey does NOT already exist (or possibly some other error occurred)
|
|
55
57
|
if date_latest is None:
|
|
56
58
|
# Download all the data
|
|
57
59
|
print(f'Downloading all inflow data for {station}')
|
|
58
|
-
hydro.get(workspace, dbkey)
|
|
60
|
+
hydro.get(workspace=workspace, dbkey=dbkey, station=station)
|
|
59
61
|
else:
|
|
60
62
|
# Check whether the latest data is already up to date.
|
|
61
|
-
if dbhydro_data_is_latest(date_latest):
|
|
63
|
+
if dbhydro_data_is_latest(date_latest, dbkey):
|
|
62
64
|
# Notify that the data is already up to date
|
|
63
65
|
print(f'Downloading of new inflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
|
|
64
66
|
|
|
@@ -67,8 +69,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
|
|
|
67
69
|
continue
|
|
68
70
|
|
|
69
71
|
# Download only the new data
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
|
73
|
+
print(f'Downloading new inflow data for {station} starting from date {date_next}')
|
|
74
|
+
hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
|
|
75
|
+
|
|
76
|
+
# Check if the station name contains a space
|
|
77
|
+
if ' ' in station:
|
|
78
|
+
# Replace space with underscore in the station name
|
|
79
|
+
station_previous = station
|
|
80
|
+
station = station.replace(' ', '_')
|
|
72
81
|
|
|
73
82
|
# Make sure both our original data and newly downloaded data exist
|
|
74
83
|
df_original_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
|
|
@@ -94,7 +103,7 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
|
|
|
94
103
|
S65E_total.get(workspace, date_max=datetime.now().strftime("%Y-%m-%d"))
|
|
95
104
|
else:
|
|
96
105
|
# Check whether the latest data is already up to date.
|
|
97
|
-
if dbhydro_data_is_latest(date_latest):
|
|
106
|
+
if dbhydro_data_is_latest(date_latest, '91656') and dbhydro_data_is_latest(date_latest, 'AL760'):
|
|
98
107
|
# Notify that the data is already up to date
|
|
99
108
|
print(f'Downloading of new inflow data skipped for S65E_total. Data is already up to date.')
|
|
100
109
|
else:
|
|
@@ -104,8 +113,9 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
|
|
|
104
113
|
|
|
105
114
|
try:
|
|
106
115
|
# Download only the new data
|
|
107
|
-
|
|
108
|
-
|
|
116
|
+
date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
|
117
|
+
print(f'Downloading new S65E_total data starting from date {date_next}')
|
|
118
|
+
S65E_total.get(workspace, date_min=date_next, date_max=datetime.now().strftime("%Y-%m-%d"))
|
|
109
119
|
|
|
110
120
|
# Merge the new data with the original data
|
|
111
121
|
df_original = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
|
|
@@ -56,8 +56,8 @@ def _get_outflow_data_from_station_ids(workspace: str, station_ids: list) -> dic
|
|
|
56
56
|
dict: Success or error message
|
|
57
57
|
"""
|
|
58
58
|
# Get dbkeys from station ids
|
|
59
|
-
dbkeys =
|
|
60
|
-
dbkeys.extend(
|
|
59
|
+
dbkeys = get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF")
|
|
60
|
+
dbkeys.extend(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV"))
|
|
61
61
|
|
|
62
62
|
for dbkey in dbkeys:
|
|
63
63
|
hydro.get(workspace, dbkey, "2000-01-01")
|
|
@@ -94,6 +94,8 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
|
|
|
94
94
|
Returns:
|
|
95
95
|
dict: Success or error message
|
|
96
96
|
"""
|
|
97
|
+
# Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
|
|
98
|
+
dbkeys = dbkeys.copy()
|
|
97
99
|
|
|
98
100
|
# No dbkeys given, attempt to get data from station ids
|
|
99
101
|
if dbkeys is None:
|
|
@@ -102,16 +104,16 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
|
|
|
102
104
|
# Get outflow data from dbkeys
|
|
103
105
|
for dbkey, station in dbkeys.copy().items():
|
|
104
106
|
# Get the date of the latest data in the csv file (if any)
|
|
105
|
-
date_latest = find_last_date_in_csv(workspace, f"{station}_FLOW_cmd.csv")
|
|
107
|
+
date_latest = find_last_date_in_csv(workspace, f"{station.replace(' ', '_')}_FLOW_cmd.csv")
|
|
106
108
|
|
|
107
109
|
# File with data for this dbkey does NOT already exist (or possibly some other error occurred)
|
|
108
110
|
if date_latest is None:
|
|
109
111
|
# Download all data
|
|
110
112
|
print(f'Downloading all outflow data for {station}')
|
|
111
|
-
hydro.get(workspace, dbkey, "2000-01-01")
|
|
113
|
+
hydro.get(workspace=workspace, dbkey=dbkey, date_min="2000-01-01", station=station)
|
|
112
114
|
else:
|
|
113
115
|
# Check whether the latest data is already up to date.
|
|
114
|
-
if dbhydro_data_is_latest(date_latest):
|
|
116
|
+
if dbhydro_data_is_latest(date_latest, dbkey):
|
|
115
117
|
# Notify that the data is already up to date
|
|
116
118
|
print(f'Downloading of new outflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
|
|
117
119
|
|
|
@@ -120,8 +122,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
|
|
|
120
122
|
continue
|
|
121
123
|
|
|
122
124
|
# Download only the new data
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
|
126
|
+
print(f'Downloading new outflow data for {station} starting from date {date_next}')
|
|
127
|
+
hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
|
|
128
|
+
|
|
129
|
+
# Check if the station name contains a space
|
|
130
|
+
if ' ' in station:
|
|
131
|
+
# Replace space with underscore in the station name
|
|
132
|
+
station_previous = station
|
|
133
|
+
station = station.replace(' ', '_')
|
|
125
134
|
|
|
126
135
|
# Make sure both our original data and newly downloaded data exist
|
|
127
136
|
df_old_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
|
|
@@ -1,116 +1,68 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from glob import glob
|
|
4
3
|
from retry import retry
|
|
5
|
-
import os
|
|
6
4
|
import pandas as pd
|
|
7
|
-
from
|
|
8
|
-
from rpy2.rinterface_lib.embedded import RRuntimeError
|
|
5
|
+
from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
|
|
9
6
|
|
|
10
7
|
|
|
11
8
|
DATE_NOW = datetime.now().strftime("%Y-%m-%d")
|
|
12
9
|
|
|
13
10
|
|
|
14
|
-
@retry(
|
|
11
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
15
12
|
def get(
|
|
16
13
|
workspace: str,
|
|
17
14
|
dbkey: str,
|
|
18
15
|
date_min: str = "1990-01-01",
|
|
19
|
-
date_max: str = DATE_NOW
|
|
16
|
+
date_max: str = DATE_NOW,
|
|
17
|
+
station: str | None = None
|
|
20
18
|
) -> None:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
data <- get_hydro(dbkey = "{dbkey}", date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
|
|
30
|
-
|
|
31
|
-
# Check if data is empty or contains only the "date" column
|
|
32
|
-
if (ncol(data) <= 1) {{
|
|
33
|
-
cat("No data found for dbkey", "{dbkey}", "Skipping to the next dbkey.\n")
|
|
34
|
-
}}
|
|
35
|
-
|
|
36
|
-
# Give data.frame correct column names so it can be cleaned using the clean_hydro function
|
|
37
|
-
colnames(data) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
|
|
38
|
-
|
|
39
|
-
# Check if the data.frame has any rows
|
|
40
|
-
if (nrow(data) == 0)
|
|
41
|
-
{{
|
|
42
|
-
# No data given back, It's possible that the dbkey has reached its end date.
|
|
43
|
-
print(paste("Empty data.frame returned for dbkey", "{dbkey}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
|
|
44
|
-
return(list(success = FALSE, dbkey = "{dbkey}"))
|
|
45
|
-
}}
|
|
46
|
-
|
|
47
|
-
# Add a type and units column to data so it can be cleaned using the clean_hydro function
|
|
48
|
-
data$type <- "FLOW"
|
|
49
|
-
data$units <- "cfs"
|
|
50
|
-
|
|
51
|
-
# Get the station
|
|
52
|
-
station <- data$station[1]
|
|
53
|
-
|
|
54
|
-
# Clean the data.frame
|
|
55
|
-
data <- clean_hydro(data)
|
|
56
|
-
|
|
57
|
-
# Multiply all columns except "date" column by 0.0283168466 * 86400 to convert Flow rate from cfs to m³/day
|
|
58
|
-
data[, -1] <- data[, -1] * (0.0283168466 * 86400)
|
|
59
|
-
|
|
60
|
-
# Drop the " _FLOW_cfs" column
|
|
61
|
-
data <- data %>% select(-` _FLOW_cfs`)
|
|
62
|
-
|
|
63
|
-
# Sort the data by date
|
|
64
|
-
data <- data[order(data$date), ]
|
|
65
|
-
|
|
66
|
-
# Get the filename for the output CSV file
|
|
67
|
-
filename <- paste0(station, "_FLOW", "_{dbkey}_cmd.csv")
|
|
68
|
-
|
|
69
|
-
# Save data to a CSV file
|
|
70
|
-
write.csv(data, file = paste0("{workspace}/", filename))
|
|
71
|
-
|
|
72
|
-
# Print a message indicating the file has been saved
|
|
73
|
-
cat("CSV file", filename, "has been saved.\n")
|
|
74
|
-
|
|
75
|
-
# Add a delay between requests
|
|
76
|
-
Sys.sleep(1) # Wait for 1 second before the next iteration
|
|
77
|
-
|
|
78
|
-
# Return the station and dbkey to the python code
|
|
79
|
-
list(success = TRUE, station = station, dbkey = "{dbkey}")
|
|
80
|
-
}}
|
|
19
|
+
"""Fetches daily flow data from DBHYDRO and saves it to a CSV file.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
workspace (str): Path to the workspace directory where data will be saved.
|
|
23
|
+
dbkey (str): The DBHYDRO database key for the station.
|
|
24
|
+
date_min (str): Minimum date for data retrieval in 'YYYY-MM-DD' format.
|
|
25
|
+
date_max (str): Maximum date for data retrieval in 'YYYY-MM-DD' format.
|
|
26
|
+
station (str | None): The station name. If None, the station name will be fetched from DBHYDRO.
|
|
81
27
|
"""
|
|
82
|
-
|
|
83
|
-
|
|
28
|
+
# Get a DbHydroApi instance
|
|
29
|
+
api = get_dbhydro_api()
|
|
84
30
|
|
|
85
|
-
#
|
|
86
|
-
|
|
31
|
+
# Get the daily data from DbHydro
|
|
32
|
+
response = api.get_daily_data([dbkey], 'id', date_min, date_max, 'NGVD29', False)
|
|
87
33
|
|
|
88
34
|
# Check for failure
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if not success:
|
|
35
|
+
if not response.has_data():
|
|
92
36
|
return
|
|
93
37
|
|
|
94
|
-
# Get the station name for
|
|
95
|
-
station
|
|
38
|
+
# Get the station name for _reformat_flow_df()
|
|
39
|
+
if station is None:
|
|
40
|
+
station = response.get_site_codes()[0]
|
|
41
|
+
|
|
42
|
+
# Get the data as a dataframe
|
|
43
|
+
df = response.to_dataframe(True)
|
|
96
44
|
|
|
97
|
-
#
|
|
98
|
-
|
|
45
|
+
# Replace flagged 0 values and -99999.0 with NaN
|
|
46
|
+
df = df_replace_missing_with_nan(df)
|
|
47
|
+
|
|
48
|
+
# Convert flow from cfs to cmd
|
|
49
|
+
df['value'] = df['value'] * (0.0283168466 * 86400)
|
|
50
|
+
|
|
51
|
+
# Prepare the dataframe to be reformatted into the expected layout
|
|
52
|
+
df.reset_index(inplace=True)
|
|
53
|
+
df.rename(columns={'datetime': 'date', 'value': f'{station}_FLOW_cmd'}, inplace=True)
|
|
54
|
+
|
|
55
|
+
# Reformat the flow df to the expected layout
|
|
56
|
+
df = _reformat_flow_df(df, station)
|
|
99
57
|
|
|
100
58
|
# Check if the station name contains a space
|
|
101
|
-
if
|
|
59
|
+
if ' ' in station:
|
|
102
60
|
# Replace space with underscore in the station name
|
|
103
61
|
station_previous = station
|
|
104
|
-
station = station.replace(
|
|
105
|
-
|
|
106
|
-
# Rename the file
|
|
107
|
-
os.rename(f"{workspace}/{station_previous}_FLOW_{dbkey}_cmd.csv", f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
|
|
62
|
+
station = station.replace(' ', '_')
|
|
108
63
|
|
|
109
|
-
#
|
|
110
|
-
|
|
111
|
-
df = pd.read_csv(file, index_col=False)
|
|
112
|
-
df.columns = df.columns.astype(str).str.replace("_cfs", "_cmd")
|
|
113
|
-
df.to_csv(file, index=False)
|
|
64
|
+
# Write the data to a CSV file
|
|
65
|
+
df.to_csv(f'{workspace}/{station}_FLOW_{dbkey}_cmd.csv', index=True)
|
|
114
66
|
|
|
115
67
|
|
|
116
68
|
def _reformat_flow_file(workspace:str, station: str, dbkey: str):
|
|
@@ -130,8 +82,27 @@ def _reformat_flow_file(workspace:str, station: str, dbkey: str):
|
|
|
130
82
|
# Read in the data
|
|
131
83
|
df = pd.read_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
|
|
132
84
|
|
|
85
|
+
# Reformat the data
|
|
86
|
+
df = _reformat_flow_df(df, station)
|
|
87
|
+
|
|
88
|
+
# Write the updated data back to the file
|
|
89
|
+
df.to_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _reformat_flow_df(df: pd.DataFrame, station: str) -> pd.DataFrame:
|
|
93
|
+
'''
|
|
94
|
+
Reformat the flow data file to the expected layout.
|
|
95
|
+
Converts the format of the dates in the file to 'YYYY-MM-DD' then sorts the data by date.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
df (pd.DataFrame): The dataframe containing the flow data.
|
|
99
|
+
station (str): The station name.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
pd.DataFrame: The reformatted dataframe.
|
|
103
|
+
'''
|
|
133
104
|
# Grab only the columns we need
|
|
134
|
-
df = df[['date', f'{station}
|
|
105
|
+
df = df[['date', f'{station}_FLOW_cmd']].copy()
|
|
135
106
|
|
|
136
107
|
# Convert date column to datetime
|
|
137
108
|
df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
|
|
@@ -143,10 +114,10 @@ def _reformat_flow_file(workspace:str, station: str, dbkey: str):
|
|
|
143
114
|
df.reset_index(drop=True, inplace=True)
|
|
144
115
|
|
|
145
116
|
# Drop rows that are missing values for both the date and value columns
|
|
146
|
-
df = df.drop(df[(df['date'].isna()) & (df[f'{station}
|
|
117
|
+
df = df.drop(df[(df['date'].isna()) & (df[f'{station}_FLOW_cmd'].isna())].index)
|
|
147
118
|
|
|
148
|
-
#
|
|
149
|
-
df
|
|
119
|
+
# Return the updated dataframe
|
|
120
|
+
return df
|
|
150
121
|
|
|
151
122
|
|
|
152
123
|
if __name__ == "__main__":
|
|
@@ -9,7 +9,7 @@ def get_Chla_predicted(input_dir, output_dir):
|
|
|
9
9
|
output_dir: Directory where the output files will be saved.
|
|
10
10
|
"""
|
|
11
11
|
# Read forecast inflow file and get overall date range
|
|
12
|
-
#
|
|
12
|
+
# We are only taking the dates, so it is okay to just use one ensemble because they all have the same dates
|
|
13
13
|
Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
|
|
14
14
|
Q_in['date'] = pd.to_datetime(Q_in['date'])
|
|
15
15
|
date_start = Q_in['date'].min()
|
|
@@ -9,7 +9,7 @@ def get_NO_Loads_predicted(input_dir, output_dir):
|
|
|
9
9
|
output_dir: Directory where the output files will be saved.
|
|
10
10
|
This function reads the forecast inflow file, retrieves nitrate data for specified stations,
|
|
11
11
|
"""
|
|
12
|
-
#
|
|
12
|
+
# It is okay to use just one ensemble because they all have the same dates and we only use the dates
|
|
13
13
|
Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
|
|
14
14
|
|
|
15
15
|
datetime_str = Q_in['date'].iloc[0]
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from retry import retry
|
|
6
|
+
from loone_data_prep.herbie_utils import get_fast_herbie_object
|
|
7
|
+
from herbie import FastHerbie
|
|
8
|
+
import openmeteo_requests
|
|
9
|
+
from retry_requests import retry as retry_requests
|
|
10
|
+
import requests_cache
|
|
11
|
+
|
|
12
|
+
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
13
|
+
|
|
14
|
+
POINTS = pd.DataFrame({
|
|
15
|
+
"station": ["L001", "L005", "L006", "LZ40"],
|
|
16
|
+
"longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
|
|
17
|
+
"latitude": [27.1389, 26.9567, 26.8226, 26.9018]
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
WIND_FILE_MAP = {
|
|
21
|
+
"L001": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
|
|
22
|
+
"L005": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
|
|
23
|
+
"L006": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
|
|
24
|
+
"LZ40": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
AIRT_FILE_MAP = {
|
|
28
|
+
"L001": "L001_AIRT_Degrees Celsius_forecast.csv",
|
|
29
|
+
"L005": "L005_AIRT_Degrees Celsius_forecast.csv",
|
|
30
|
+
"L006": "L006_AIRT_Degrees Celsius_forecast.csv",
|
|
31
|
+
"LZ40": "LZ40_AIRT_Degrees Celsius_forecast.csv"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
AIRT_COLUMN_MAP = {
|
|
35
|
+
"L001": "L001_AIRT_Degrees Celsius",
|
|
36
|
+
"L005": "L005_AIRT_Degrees Celsius",
|
|
37
|
+
"L006": "L006_AIRT_Degrees Celsius",
|
|
38
|
+
"LZ40": "LZ40_AIRT_Degrees Celsius"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
42
|
+
def download_herbie_variable(FH, variable_key, variable_name, point_df):
|
|
43
|
+
"""Download a Herbie variable for a given point and return a DataFrame."""
|
|
44
|
+
FH.download(f":{variable_key}")
|
|
45
|
+
ds = FH.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
|
|
46
|
+
dsi = ds.herbie.pick_points(point_df, method="nearest")
|
|
47
|
+
|
|
48
|
+
var_name = {
|
|
49
|
+
"10u": "u10",
|
|
50
|
+
"10v": "v10",
|
|
51
|
+
"2t": "t2m"
|
|
52
|
+
}.get(variable_name, variable_name)
|
|
53
|
+
|
|
54
|
+
ts = dsi[var_name].squeeze()
|
|
55
|
+
df = ts.to_dataframe().reset_index()
|
|
56
|
+
if "valid_time" in df.columns:
|
|
57
|
+
df.rename(columns={"valid_time": "datetime"}, inplace=True)
|
|
58
|
+
elif "time" in df.columns:
|
|
59
|
+
df.rename(columns={"time": "datetime"}, inplace=True)
|
|
60
|
+
|
|
61
|
+
df = df[["datetime", var_name]].drop_duplicates()
|
|
62
|
+
ds.close()
|
|
63
|
+
dsi.close()
|
|
64
|
+
del ds, dsi, ts
|
|
65
|
+
return df
|
|
66
|
+
|
|
67
|
+
# Download ET from Open-Meteo
|
|
68
|
+
def download_hourly_et(lat, lon):
|
|
69
|
+
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
|
|
70
|
+
retry_session = retry_requests(cache_session, retries=5, backoff_factor=0.2)
|
|
71
|
+
client = openmeteo_requests.Client(session=retry_session)
|
|
72
|
+
|
|
73
|
+
url = "https://api.open-meteo.com/v1/forecast"
|
|
74
|
+
params = {
|
|
75
|
+
"latitude": lat,
|
|
76
|
+
"longitude": lon,
|
|
77
|
+
"hourly": "evapotranspiration",
|
|
78
|
+
"forecast_days": 16,
|
|
79
|
+
"models": "gfs_seamless"
|
|
80
|
+
}
|
|
81
|
+
responses = client.weather_api(url, params=params)
|
|
82
|
+
response = responses[0]
|
|
83
|
+
|
|
84
|
+
hourly = response.Hourly()
|
|
85
|
+
hourly_evap = hourly.Variables(0).ValuesAsNumpy()
|
|
86
|
+
hourly_data = {"date": pd.date_range(
|
|
87
|
+
start=pd.to_datetime(hourly.Time(), unit="s"),
|
|
88
|
+
end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
|
|
89
|
+
freq=pd.Timedelta(seconds=hourly.Interval()),
|
|
90
|
+
inclusive="left"
|
|
91
|
+
)}
|
|
92
|
+
hourly_data["evapotranspiration"] = hourly_evap
|
|
93
|
+
return pd.DataFrame(hourly_data)
|
|
94
|
+
|
|
95
|
+
# Main generation function
|
|
96
|
+
def generate_all_outputs(output_dir):
|
|
97
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
98
|
+
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
99
|
+
FH = get_fast_herbie_object(today_str)
|
|
100
|
+
|
|
101
|
+
# Forecasted weather data (single point)
|
|
102
|
+
point_df = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
|
|
103
|
+
forecast_vars = ["10u", "10v", "2t", "tp", "ssrd"]
|
|
104
|
+
data = {var: download_herbie_variable(FH, var, var, point_df) for var in forecast_vars}
|
|
105
|
+
|
|
106
|
+
merged = data["10u"].merge(data["10v"], on="datetime")
|
|
107
|
+
merged = merged.merge(data["2t"], on="datetime")
|
|
108
|
+
merged = merged.merge(data["tp"], on="datetime")
|
|
109
|
+
merged = merged.merge(data["ssrd"], on="datetime")
|
|
110
|
+
|
|
111
|
+
# Derived columns
|
|
112
|
+
merged["wind_speed"] = (merged["u10"]**2 + merged["v10"]**2)**0.5 # wind speed in m/s
|
|
113
|
+
merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
|
|
114
|
+
merged["tp_inc_m"] = merged["tp"].diff().clip(lower=0)
|
|
115
|
+
# Convert incremental meters → mm
|
|
116
|
+
merged["tp_inc_mm"] = merged["tp_inc_m"] * 1000.0
|
|
117
|
+
# Apply bias correction (in mm)
|
|
118
|
+
merged["tp_corrected_mm"] = 0.7247 * merged["tp_inc_mm"] + 0.1853
|
|
119
|
+
# convert to inches
|
|
120
|
+
merged["tp_corrected"] = merged["tp_corrected_mm"] * 0.0393701
|
|
121
|
+
|
|
122
|
+
merged["ssrd_kwm2"] = merged["ssrd"].diff() / merged["datetime"].diff().dt.total_seconds() / 1000
|
|
123
|
+
merged["ssrd_corrected"] = (1.0530 * merged["ssrd_kwm2"] - 0.0347).clip(lower=0)
|
|
124
|
+
merged = merged[[
|
|
125
|
+
"datetime",
|
|
126
|
+
"wind_speed_corrected",
|
|
127
|
+
"tp_corrected",
|
|
128
|
+
"ssrd_corrected"
|
|
129
|
+
]]
|
|
130
|
+
|
|
131
|
+
# ET for main point
|
|
132
|
+
df_et = download_hourly_et(26.9690, -80.7976)
|
|
133
|
+
merged = merged.merge(df_et, left_on="datetime", right_on="date", how="left").drop(columns=["date"])
|
|
134
|
+
merged.to_csv(os.path.join(output_dir, "forecasted_weather_data.csv"), index=False)
|
|
135
|
+
|
|
136
|
+
# 4-point wind and air temp CSVs
|
|
137
|
+
for idx, row in POINTS.iterrows():
|
|
138
|
+
station = row["station"]
|
|
139
|
+
point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
|
|
140
|
+
|
|
141
|
+
# Wind
|
|
142
|
+
df_u = download_herbie_variable(FH, "10u", "10u", point_df)
|
|
143
|
+
df_v = download_herbie_variable(FH, "10v", "10v", point_df)
|
|
144
|
+
merged_ws = df_u.merge(df_v, on="datetime")
|
|
145
|
+
merged_ws["wind_speed"] = (merged_ws["u10"]**2 + merged_ws["v10"]**2)**0.5
|
|
146
|
+
merged_ws["wind_speed_corrected"] = 0.4167 * merged_ws["wind_speed"] + 4.1868
|
|
147
|
+
|
|
148
|
+
filename, new_col = WIND_FILE_MAP[station]
|
|
149
|
+
merged_ws[["datetime", "wind_speed_corrected"]].rename(
|
|
150
|
+
columns={"datetime": "date", "wind_speed_corrected": new_col}
|
|
151
|
+
).to_csv(os.path.join(output_dir, filename), index=False)
|
|
152
|
+
|
|
153
|
+
# Air temp
|
|
154
|
+
df_t = download_herbie_variable(FH, "2t", "2t", point_df)
|
|
155
|
+
df_t["t2m"] = df_t["t2m"] - 273.15
|
|
156
|
+
df_t.rename(columns={"datetime": "date", "t2m": AIRT_COLUMN_MAP[station]}).to_csv(
|
|
157
|
+
os.path.join(output_dir, AIRT_FILE_MAP[station]), index=False
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Rainfall, ET, and SSRD 4-point CSVs
|
|
161
|
+
rainfall_dfs, et_dfs, ssrd_dfs = [], [], []
|
|
162
|
+
|
|
163
|
+
for idx, row in POINTS.iterrows():
|
|
164
|
+
station = row["station"]
|
|
165
|
+
point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
|
|
166
|
+
|
|
167
|
+
# Rainfall
|
|
168
|
+
df_tp = download_herbie_variable(FH, "tp", "tp", point_df)
|
|
169
|
+
# Convert cumulative meters → incremental meters
|
|
170
|
+
df_tp["tp_inc_m"] = df_tp["tp"].diff().clip(lower=0)
|
|
171
|
+
# Convert incremental meters → millimeters
|
|
172
|
+
df_tp["tp_inc_mm"] = df_tp["tp_inc_m"] * 1000.0
|
|
173
|
+
df_tp["date_only"] = df_tp["datetime"].dt.date
|
|
174
|
+
# Sum incremental precipitation per day
|
|
175
|
+
df_daily = df_tp.groupby("date_only")["tp_inc_mm"].sum().reset_index()
|
|
176
|
+
# Apply bias correction on daily totals (in mm)
|
|
177
|
+
df_daily["tp_corrected_mm"] = 0.7247 * df_daily["tp_inc_mm"] + 0.1853
|
|
178
|
+
# Convert corrected mm → inches
|
|
179
|
+
df_daily["tp_corrected_in"] = df_daily["tp_corrected_mm"] * 0.0393701
|
|
180
|
+
df_daily = df_daily.rename(columns={"date_only": "date", "tp_corrected_in": station})
|
|
181
|
+
rainfall_dfs.append(df_daily[["date", station]])
|
|
182
|
+
|
|
183
|
+
# ET
|
|
184
|
+
df_et_point = download_hourly_et(row.latitude, row.longitude)
|
|
185
|
+
df_et_point.rename(columns={"evapotranspiration": station}, inplace=True)
|
|
186
|
+
et_dfs.append(df_et_point)
|
|
187
|
+
|
|
188
|
+
# SSRD
|
|
189
|
+
df_ssrd = download_herbie_variable(FH, "ssrd", "ssrd", point_df)
|
|
190
|
+
df_ssrd["ssrd_kwm2"] = df_ssrd["ssrd"].diff() / df_ssrd["datetime"].diff().dt.total_seconds() / 1000
|
|
191
|
+
df_ssrd["ssrd_corrected"] = (1.0530 * df_ssrd["ssrd_kwm2"] - 0.0347).clip(lower=0)
|
|
192
|
+
df_ssrd = df_ssrd[["datetime", "ssrd_corrected"]].rename(columns={"datetime": "date", "ssrd_corrected": station})
|
|
193
|
+
ssrd_dfs.append(df_ssrd)
|
|
194
|
+
|
|
195
|
+
# Merge rainfall
|
|
196
|
+
rainfall_df = pd.concat(rainfall_dfs, axis=0).groupby("date").first().reset_index()
|
|
197
|
+
rainfall_df["average_rainfall"] = rainfall_df[POINTS["station"]].mean(axis=1)
|
|
198
|
+
rainfall_df.to_csv(os.path.join(output_dir, "LAKE_RAINFALL_DATA_FORECAST.csv"), index=False)
|
|
199
|
+
|
|
200
|
+
# Merge ET
|
|
201
|
+
et_df_all = pd.concat(et_dfs, axis=0).groupby("date").first().reset_index()
|
|
202
|
+
et_df_all["average_ETPI"] = et_df_all[POINTS["station"]].mean(axis=1)
|
|
203
|
+
et_df_all.to_csv(os.path.join(output_dir, "LOONE_AVERAGE_ETPI_DATA_FORECAST.csv"), index=False)
|
|
204
|
+
|
|
205
|
+
# Combine all SSRD DataFrames
|
|
206
|
+
ssrd_df_all = pd.concat(ssrd_dfs, axis=0)
|
|
207
|
+
ssrd_df_all["date"] = pd.to_datetime(ssrd_df_all["date"])
|
|
208
|
+
|
|
209
|
+
# Compute the daily mean for each station
|
|
210
|
+
daily_ssrd = (
|
|
211
|
+
ssrd_df_all.groupby(ssrd_df_all["date"].dt.date)[POINTS["station"]]
|
|
212
|
+
.mean()
|
|
213
|
+
.reset_index()
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
daily_ssrd = daily_ssrd.rename(columns={"date": "date"})
|
|
217
|
+
daily_ssrd["Mean_RADT"] = daily_ssrd[POINTS["station"]].mean(axis=1)
|
|
218
|
+
daily_ssrd.to_csv(os.path.join(output_dir, "LO_RADT_data_forecast.csv"), index=False)
|
|
219
|
+
|
|
220
|
+
print("All outputs generated successfully.")
|