loone-data-prep 0.1.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +252 -228
- loone_data_prep/LOONE_DATA_PREP.py +34 -17
- loone_data_prep/flow_data/forecast_bias_correction.py +52 -34
- loone_data_prep/flow_data/get_forecast_flows.py +131 -88
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py +127 -0
- loone_data_prep/forecast_scripts/forecast_stages.py +40 -0
- loone_data_prep/forecast_scripts/predict_PI.py +51 -0
- loone_data_prep/forecast_scripts/trib_cond.py +84 -0
- loone_data_prep/forecast_scripts/weather_forecast.py +155 -0
- loone_data_prep/utils.py +52 -19
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.1.dist-info}/METADATA +9 -4
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.1.dist-info}/RECORD +15 -10
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.1.dist-info}/WHEEL +1 -1
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.1.dist-info/licenses}/LICENSE +0 -0
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -545,7 +545,7 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
545
545
|
LO_TP_data_Inter['Mean_TP'] = LO_TP_data_Inter.mean(axis=1, numeric_only=True)
|
|
546
546
|
LO_TP_data_Inter = LO_TP_data_Inter.set_index(['date'])
|
|
547
547
|
LO_TP_data_Inter.index = pd.to_datetime(LO_TP_data_Inter.index, unit='ns')
|
|
548
|
-
LO_TP_Monthly_Inter = LO_TP_data_Inter.resample('
|
|
548
|
+
LO_TP_Monthly_Inter = LO_TP_data_Inter.resample('ME').mean()
|
|
549
549
|
Max = LO_TP_Monthly_Inter.max(axis=1)
|
|
550
550
|
Min = LO_TP_Monthly_Inter.min(axis=1)
|
|
551
551
|
LO_TP_Monthly_Inter['Max'] = Max.values
|
|
@@ -624,7 +624,7 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
624
624
|
LO_NH4_Clean_Inter.to_csv(f'{output_dir}/LO_NH4_Clean_daily.csv', index=False)
|
|
625
625
|
LO_NH4_Clean_Inter = LO_NH4_Clean_Inter.set_index(['date'])
|
|
626
626
|
LO_NH4_Clean_Inter.index = pd.to_datetime(LO_NH4_Clean_Inter.index, unit='ns')
|
|
627
|
-
LO_NH4_Monthly_Inter = LO_NH4_Clean_Inter.resample('
|
|
627
|
+
LO_NH4_Monthly_Inter = LO_NH4_Clean_Inter.resample('ME').mean()
|
|
628
628
|
LO_NH4_Monthly_Inter.to_csv(f'{output_dir}/LO_NH4_Monthly_Inter.csv')
|
|
629
629
|
|
|
630
630
|
# Interpolated NO Observations in Lake
|
|
@@ -967,6 +967,7 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
967
967
|
NO_list = {'S65_NO': S65_NO, 'S71_NO': S71_NO, 'S72_NO': S72_NO, 'S84_NO': S84_NO, 'S127_NO': S127_NO,
|
|
968
968
|
'S133_NO': S133_NO, 'S154_NO': S154_NO, 'S191_NO': S191_NO, 'S308_NO': S308_NO,
|
|
969
969
|
'FISHP_NO': FISHP_NO, 'L8_NO': L8_NO, 'S4_NO': S4_NO}
|
|
970
|
+
#TODO: Why is this date hard coded into this part?
|
|
970
971
|
date_NO = pd.date_range(start='1/1/2008', end='3/31/2023', freq='D')
|
|
971
972
|
|
|
972
973
|
NO_df = pd.DataFrame(date_NO, columns=['date'])
|
|
@@ -982,32 +983,48 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
982
983
|
Flow_df = DF_Date_Range(Flow_df, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
|
|
983
984
|
|
|
984
985
|
# Determine NO Loads
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
986
|
+
# Ensure 'date' is datetime
|
|
987
|
+
NO_df['date'] = pd.to_datetime(NO_df['date'])
|
|
988
|
+
Flow_df['date'] = pd.to_datetime(Flow_df['date'])
|
|
989
|
+
|
|
990
|
+
# Merge the two dataframes on date - this will ensure that the dates match
|
|
991
|
+
merged = pd.merge(NO_df, Flow_df, on='date', how='inner')
|
|
992
|
+
|
|
993
|
+
# Compute NO Loads
|
|
994
|
+
NO_Loads_In = merged[['date']].copy()
|
|
995
|
+
NO_Loads_In['S65_NO_Ld'] = merged['S65_Q'] * merged['S65_NO'] * 1000
|
|
996
|
+
NO_Loads_In['S71_NO_Ld'] = merged['S71_Q'] * merged['S71_NO'] * 1000
|
|
997
|
+
NO_Loads_In['S71_NO_Ld'] = merged['S71_Q'] * merged['S71_NO'] * 1000
|
|
998
|
+
NO_Loads_In['S72_NO_Ld'] = merged['S72_Q'] * merged['S72_NO'] * 1000
|
|
999
|
+
NO_Loads_In['S84_NO_Ld'] = merged['S84_Q'] * merged['S84_NO'] * 1000
|
|
1000
|
+
NO_Loads_In['S127_NO_Ld'] = merged['S127_In'] * merged['S127_NO'] * 1000
|
|
1001
|
+
NO_Loads_In['S133_NO_Ld'] = merged['S133_P_Q'] * merged['S133_NO'] * 1000
|
|
992
1002
|
# NO_Loads_In['S135_NO_Ld'] = Flow_df['S135_In'].values * NO_df['S135_NO'].values * 1000
|
|
993
|
-
NO_Loads_In['S154_NO_Ld'] =
|
|
994
|
-
NO_Loads_In['S191_NO_Ld'] =
|
|
995
|
-
NO_Loads_In['S308_NO_Ld'] =
|
|
996
|
-
NO_Loads_In['FISHP_NO_Ld'] =
|
|
997
|
-
NO_Loads_In['L8_NO_Ld'] =
|
|
998
|
-
NO_Loads_In['S4_NO_Ld'] =
|
|
1003
|
+
NO_Loads_In['S154_NO_Ld'] = merged['S154_Q'] * merged['S154_NO'] * 1000
|
|
1004
|
+
NO_Loads_In['S191_NO_Ld'] = merged['S191_Q'] * merged['S191_NO'] * 1000
|
|
1005
|
+
NO_Loads_In['S308_NO_Ld'] = merged['S308_In'] * merged['S308_NO'] * 1000
|
|
1006
|
+
NO_Loads_In['FISHP_NO_Ld'] = merged['FISHP_Q'] * merged['FISHP_NO'] * 1000
|
|
1007
|
+
NO_Loads_In['L8_NO_Ld'] = merged['L8_In'] * merged['L8_NO'] * 1000
|
|
1008
|
+
NO_Loads_In['S4_NO_Ld'] = merged['S4_P_Q'] * merged['S4_NO'] * 1000
|
|
999
1009
|
# Calculate the total External Loads to Lake Okeechobee
|
|
1000
1010
|
NO_Loads_In['External_NO_Ld_mg'] = NO_Loads_In.sum(axis=1, numeric_only=True)
|
|
1001
1011
|
NO_Loads_In.to_csv(f'{output_dir}/LO_External_Loadings_NO.csv', index=False)
|
|
1002
1012
|
|
|
1003
1013
|
# Determine Chla Loads
|
|
1004
1014
|
# Create File (Chla_Loads_In)
|
|
1015
|
+
# Read and date-filter Chla data
|
|
1005
1016
|
S65E_Chla = pd.read_csv(f'{output_dir}/S65E_Chla_Merged.csv')
|
|
1017
|
+
S65E_Chla['date'] = pd.to_datetime(S65E_Chla['date']) # Ensure date column is datetime
|
|
1006
1018
|
S65E_Chla = DF_Date_Range(S65E_Chla, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
|
|
1007
|
-
|
|
1008
|
-
|
|
1019
|
+
# Merge on date
|
|
1020
|
+
merged = pd.merge(Flow_df[['date', 'Inflows']], S65E_Chla[['date', 'Data']], on='date', how='inner')
|
|
1021
|
+
# Calculate Chlorophyll-a loads
|
|
1022
|
+
merged['Chla_Loads'] = merged['Inflows'] * merged['Data']
|
|
1023
|
+
# Save results
|
|
1024
|
+
Chla_Loads_In = merged[['date', 'Chla_Loads']]
|
|
1009
1025
|
Chla_Loads_In.to_csv(f'{output_dir}/Chla_Loads_In.csv', index=False)
|
|
1010
1026
|
|
|
1027
|
+
|
|
1011
1028
|
# Write Data into csv files
|
|
1012
1029
|
# write Avg Stage (ft, m) Storage (acft, m3) SA (acres) to csv
|
|
1013
1030
|
LO_Stg_Sto_SA_df.to_csv(f'{output_dir}/Average_LO_Storage_3MLag.csv', index=False)
|
|
@@ -37,47 +37,65 @@ def get_bias_corrected_data(
|
|
|
37
37
|
|
|
38
38
|
# Prepare the observed data by filling NaN values with the 10yr average
|
|
39
39
|
prepared_od = prep_observed_data(observed_data)
|
|
40
|
-
|
|
41
|
-
# Get the historical simulation data for the given reach ID
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
40
|
+
historical_data = geoglows.data.retro_daily(reach_id)
|
|
41
|
+
# Get the historical simulation data for the given reach ID - TODO: Do we for sure want to cache the historical data?
|
|
42
|
+
# I am reading the observed data that we queried earlier instead of caching it
|
|
43
|
+
# historical_data = None
|
|
44
|
+
|
|
45
|
+
# if cache_path is None:
|
|
46
|
+
# historical_data = geoglows.streamflow.historic_simulation(reach_id)
|
|
47
|
+
# else:
|
|
48
|
+
# # Create the geoglows cache directory if it doesn't exist
|
|
49
|
+
# geoglows_cache_path = os.path.join(cache_path, "geoglows_cache")
|
|
50
|
+
# if not os.path.exists(geoglows_cache_path):
|
|
51
|
+
# os.makedirs(geoglows_cache_path)
|
|
52
|
+
|
|
53
|
+
# # Check if the historical simulation data is already cached
|
|
54
|
+
# if os.path.exists(
|
|
55
|
+
# os.path.join(
|
|
56
|
+
# geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
|
|
57
|
+
# )
|
|
58
|
+
# ):
|
|
59
|
+
# historical_data = pd.read_csv(
|
|
60
|
+
# os.path.join(
|
|
61
|
+
# geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
|
|
62
|
+
# ),
|
|
63
|
+
# index_col=0,
|
|
64
|
+
# )
|
|
65
|
+
# historical_data.index = pd.to_datetime(historical_data.index)
|
|
66
|
+
# else:
|
|
67
|
+
# historical_data = geoglows.streamflow.historic_simulation(reach_id)
|
|
68
|
+
# historical_data.to_csv(
|
|
69
|
+
# os.path.join(
|
|
70
|
+
# geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
|
|
71
|
+
# )
|
|
72
|
+
# )
|
|
73
|
+
# Drop 'ensemble_52' column if it exists - not necessary but we don't need it
|
|
74
|
+
station_ensembles.drop(columns=['ensemble_52'], inplace=True, errors='ignore')
|
|
75
|
+
|
|
76
|
+
# Drop all rows with any NaN values - again not necessary but we can drop them because we don't need it
|
|
77
|
+
station_ensembles.dropna(inplace=True)
|
|
72
78
|
|
|
73
79
|
# Correct the forecast bias in the station ensembles
|
|
74
|
-
station_ensembles =
|
|
80
|
+
station_ensembles = geoglows.bias.correct_forecast(
|
|
75
81
|
station_ensembles, historical_data, prepared_od
|
|
76
82
|
)
|
|
83
|
+
|
|
77
84
|
# Correct the forecast bias in the station stats
|
|
78
|
-
station_stats =
|
|
85
|
+
station_stats = geoglows.bias.correct_forecast(
|
|
79
86
|
station_stats, historical_data, prepared_od
|
|
80
87
|
)
|
|
88
|
+
#This is to clean out any infinite values that may have occurred during bias correction
|
|
89
|
+
station_ensembles = station_ensembles.replace([np.inf, -np.inf], np.nan)
|
|
90
|
+
station_ensembles = station_ensembles.interpolate(axis=0, limit_direction='both')
|
|
91
|
+
|
|
92
|
+
# Fill any remaining NaNs (e.g., at column ends)
|
|
93
|
+
station_ensembles = station_ensembles.ffill(axis=0).bfill(axis=0)
|
|
94
|
+
station_stats = station_stats.replace([np.inf, -np.inf], np.nan)
|
|
95
|
+
station_stats = station_stats.interpolate(axis=0, limit_direction='both')
|
|
96
|
+
|
|
97
|
+
# Fill any remaining NaNs (e.g., at column ends)
|
|
98
|
+
station_stats = station_stats.ffill(axis=0).bfill(axis=0)
|
|
81
99
|
|
|
82
100
|
# Return the bias-corrected station ensembles and station stats
|
|
83
101
|
return station_ensembles, station_stats
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
|
-
import glob
|
|
4
3
|
import pandas as pd
|
|
5
4
|
import rpy2.robjects as ro
|
|
6
5
|
from rpy2.robjects import pandas2ri
|
|
@@ -44,37 +43,16 @@ STATION_IDS = [
|
|
|
44
43
|
"S49_S",
|
|
45
44
|
] # Added these stations. They seemed to be missing.
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
"L8.441": 13082747,
|
|
58
|
-
"S133_P": 13082709,
|
|
59
|
-
"S127_C": 13082716,
|
|
60
|
-
"S127_P": 13082716,
|
|
61
|
-
"S129_C": 13082727,
|
|
62
|
-
"S135_C": 13082725,
|
|
63
|
-
"S2_P": 13082783,
|
|
64
|
-
"S3_P": 13082809,
|
|
65
|
-
"S4_P": 13082806,
|
|
66
|
-
"S351_S": 13082804,
|
|
67
|
-
"S352_S": 13082762,
|
|
68
|
-
"S354_S": 13082809,
|
|
69
|
-
"S129 PMP_P": 13082727,
|
|
70
|
-
"S135 PMP_P": 13082725,
|
|
71
|
-
"S77_S": 13082767,
|
|
72
|
-
"INDUST": 13082806,
|
|
73
|
-
"S79_S": 13082791,
|
|
74
|
-
"S80_S": 13082718,
|
|
75
|
-
"S40_S": 13082797,
|
|
76
|
-
"S49_S": 13082696,
|
|
77
|
-
}
|
|
46
|
+
INFLOW_IDS = [
|
|
47
|
+
750059718, 750043742, 750035446, 750034865, 750055574, 750053211,
|
|
48
|
+
750050248, 750065049, 750064453, 750049661, 750069195, 750051436,
|
|
49
|
+
750068005, 750063868, 750069782, 750072741
|
|
50
|
+
]
|
|
51
|
+
OUTFLOW_IDS = [750053809, 750057949]
|
|
52
|
+
MATCHED_IDS = [750052624, 750049656, 750057357,
|
|
53
|
+
750038427, 750051428, 750068601, 750058536, 750038416,
|
|
54
|
+
750050259, 750045514, 750053213, 750028935]
|
|
55
|
+
|
|
78
56
|
|
|
79
57
|
SECONDS_IN_HOUR = 3600
|
|
80
58
|
SECONDS_IN_DAY = 86400
|
|
@@ -140,7 +118,7 @@ def get_reach_id(latitude: float, longitude: float):
|
|
|
140
118
|
Returns:
|
|
141
119
|
(int): The reach id of the given latitude/longitude
|
|
142
120
|
"""
|
|
143
|
-
reach_data = geoglows.
|
|
121
|
+
reach_data = geoglows.streams.latlon_to_reach(latitude, longitude)
|
|
144
122
|
|
|
145
123
|
if "error" in reach_data:
|
|
146
124
|
raise Exception(reach_data["error"])
|
|
@@ -159,8 +137,8 @@ def get_flow_forecast_ensembles(reach_id: str, forecast_date: str):
|
|
|
159
137
|
Returns:
|
|
160
138
|
(pandas.core.frame.DataFrame): The 52 ensemble flow forecasts.
|
|
161
139
|
"""
|
|
162
|
-
return geoglows.
|
|
163
|
-
|
|
140
|
+
return geoglows.data.forecast_ensembles(
|
|
141
|
+
river_id=reach_id, date=forecast_date
|
|
164
142
|
)
|
|
165
143
|
|
|
166
144
|
|
|
@@ -176,14 +154,15 @@ def get_flow_forecast_stats(reach_id: str, forecast_date: str):
|
|
|
176
154
|
Returns:
|
|
177
155
|
(pandas.core.frame.DataFrame): The forecast stats
|
|
178
156
|
"""
|
|
179
|
-
return geoglows.
|
|
180
|
-
|
|
157
|
+
return geoglows.data.forecast_stats(
|
|
158
|
+
river_id=reach_id, date=forecast_date
|
|
181
159
|
)
|
|
182
160
|
|
|
183
161
|
|
|
184
162
|
def ensembles_to_csv(
|
|
185
163
|
workspace: str,
|
|
186
|
-
|
|
164
|
+
flow_type: str,
|
|
165
|
+
reach_id: str,
|
|
187
166
|
ensembles: pd.core.frame.DataFrame,
|
|
188
167
|
stats: pd.core.frame.DataFrame,
|
|
189
168
|
):
|
|
@@ -202,7 +181,7 @@ def ensembles_to_csv(
|
|
|
202
181
|
data.
|
|
203
182
|
"""
|
|
204
183
|
# Get the path to the file that will be written
|
|
205
|
-
file_name = f"{
|
|
184
|
+
file_name = f"{reach_id}_{flow_type}_cmd_geoglows.csv"
|
|
206
185
|
file_path = os.path.join(workspace, file_name)
|
|
207
186
|
|
|
208
187
|
# Format DataFrames for LOONE
|
|
@@ -234,8 +213,8 @@ def _format_ensembles_DataFrame(dataframe: pd.core.frame.DataFrame):
|
|
|
234
213
|
DataFrame.
|
|
235
214
|
"""
|
|
236
215
|
# Remove high resolution columns (ensemble 52)
|
|
237
|
-
if "
|
|
238
|
-
dataframe.drop(columns="
|
|
216
|
+
if "ensemble_52" in dataframe.columns:
|
|
217
|
+
dataframe.drop(columns="ensemble_52", inplace=True)
|
|
239
218
|
|
|
240
219
|
# Remove rows with null values
|
|
241
220
|
dataframe.dropna(axis="index", inplace=True)
|
|
@@ -284,8 +263,8 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
|
|
|
284
263
|
DataFrame.
|
|
285
264
|
"""
|
|
286
265
|
# Remove high resolution columns (ensemble 52, high_res_m^3/s)
|
|
287
|
-
if "
|
|
288
|
-
dataframe.drop(columns="
|
|
266
|
+
if "high_res" in dataframe.columns:
|
|
267
|
+
dataframe.drop(columns="high_res", inplace=True)
|
|
289
268
|
|
|
290
269
|
# Remove rows with null values
|
|
291
270
|
dataframe.dropna(axis="index", inplace=True)
|
|
@@ -300,28 +279,28 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
|
|
|
300
279
|
dataframe.clip(0, inplace=True)
|
|
301
280
|
|
|
302
281
|
# Max Column (Max)
|
|
303
|
-
column_max = dataframe[["
|
|
282
|
+
column_max = dataframe[["flow_max"]].copy()
|
|
304
283
|
column_max = column_max.groupby([column_max.index]).max()
|
|
305
284
|
|
|
306
285
|
# 75th Percentile Column (Average)
|
|
307
|
-
column_75percentile = dataframe[["
|
|
286
|
+
column_75percentile = dataframe[["flow_75p"]].copy()
|
|
308
287
|
column_75percentile = column_75percentile.groupby(
|
|
309
288
|
[column_75percentile.index]
|
|
310
289
|
).mean()
|
|
311
290
|
|
|
312
291
|
# Average Column (Weighted Average)
|
|
313
|
-
column_average = dataframe[["
|
|
292
|
+
column_average = dataframe[["flow_avg"]].copy()
|
|
314
293
|
column_average.transform(lambda x: x / 8)
|
|
315
294
|
column_average = column_average.groupby([column_average.index]).sum()
|
|
316
295
|
|
|
317
296
|
# 25th Percentile Column (Average)
|
|
318
|
-
column_25percentile = dataframe[["
|
|
297
|
+
column_25percentile = dataframe[["flow_25p"]].copy()
|
|
319
298
|
column_25percentile = column_25percentile.groupby(
|
|
320
299
|
[column_25percentile.index]
|
|
321
300
|
).mean()
|
|
322
301
|
|
|
323
302
|
# Min Column (Min)
|
|
324
|
-
column_min = dataframe[["
|
|
303
|
+
column_min = dataframe[["flow_min"]].copy()
|
|
325
304
|
column_min = column_min.groupby([column_min.index]).min()
|
|
326
305
|
|
|
327
306
|
# Convert values in each column from m^3/h to m^3/d
|
|
@@ -338,17 +317,17 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
|
|
|
338
317
|
# Append modified columns into one pandas DataFrame
|
|
339
318
|
dataframe_result = pd.DataFrame()
|
|
340
319
|
dataframe_result.index = dataframe.groupby([dataframe.index]).mean().index
|
|
341
|
-
dataframe_result["flow_max_m^3/d"] = column_max["
|
|
320
|
+
dataframe_result["flow_max_m^3/d"] = column_max["flow_max"].tolist()
|
|
342
321
|
dataframe_result["flow_75%_m^3/d"] = column_75percentile[
|
|
343
|
-
"
|
|
322
|
+
"flow_75p"
|
|
344
323
|
].tolist()
|
|
345
324
|
dataframe_result["flow_avg_m^3/d"] = column_average[
|
|
346
|
-
"
|
|
325
|
+
"flow_avg"
|
|
347
326
|
].tolist()
|
|
348
327
|
dataframe_result["flow_25%_m^3/d"] = column_25percentile[
|
|
349
|
-
"
|
|
328
|
+
"flow_25p"
|
|
350
329
|
].tolist()
|
|
351
|
-
dataframe_result["flow_min_m^3/d"] = column_min["
|
|
330
|
+
dataframe_result["flow_min_m^3/d"] = column_min["flow_min"].tolist()
|
|
352
331
|
|
|
353
332
|
# Format datetimes to just dates
|
|
354
333
|
dataframe_result.index = dataframe_result.index.strftime("%Y-%m-%d")
|
|
@@ -383,48 +362,111 @@ def main(
|
|
|
383
362
|
cache_path (str): The path to the cache directory for geoglows data.
|
|
384
363
|
Should hold a directory named geoglows_cache that holds the cached files. Use None to not use a cache.
|
|
385
364
|
"""
|
|
386
|
-
# Local Variables
|
|
387
|
-
reach_ids = {}
|
|
388
|
-
|
|
389
|
-
# Get the latitude/longitude for each station
|
|
390
|
-
station_locations = get_stations_latitude_longitude(station_ids)
|
|
391
|
-
|
|
392
|
-
# Check for any download failures
|
|
393
|
-
for station_id in station_ids:
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
# Get station reach ids
|
|
403
|
-
if station_id not in REACH_IDS.keys():
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
365
|
+
# # Local Variables
|
|
366
|
+
# reach_ids = {}
|
|
367
|
+
|
|
368
|
+
# # Get the latitude/longitude for each station
|
|
369
|
+
# station_locations = get_stations_latitude_longitude(station_ids)
|
|
370
|
+
|
|
371
|
+
# # Check for any download failures
|
|
372
|
+
# for station_id in station_ids:
|
|
373
|
+
# if station_id in REACH_IDS.keys():
|
|
374
|
+
# reach_ids[station_id] = REACH_IDS[station_id]
|
|
375
|
+
# elif station_id not in station_locations.keys():
|
|
376
|
+
# raise Exception(
|
|
377
|
+
# "Error: The longitude and latitude could not be downloaded "
|
|
378
|
+
# f"for station {station_id}"
|
|
379
|
+
# )
|
|
380
|
+
|
|
381
|
+
# # Get station reach ids
|
|
382
|
+
# if station_id not in REACH_IDS.keys():
|
|
383
|
+
# for station_id in station_locations.keys():
|
|
384
|
+
# location = station_locations[station_id]
|
|
385
|
+
# try:
|
|
386
|
+
# reach_ids[station_id] = get_reach_id(location[0], location[1])
|
|
387
|
+
# except Exception as e:
|
|
388
|
+
# print(
|
|
389
|
+
# "Error: Failed to get reach id for station "
|
|
390
|
+
# f"{station_id} ({str(e)})"
|
|
391
|
+
# )
|
|
413
392
|
|
|
414
393
|
# Get the flow data for each station
|
|
415
|
-
|
|
416
|
-
|
|
394
|
+
stations_inflow_by_comid = {
|
|
395
|
+
750072741: "S65E_S", # TODO: Should this be S65E_total or S65E_S? - this is a station we definitely want
|
|
396
|
+
750069782: "S84_S", #
|
|
397
|
+
# 750053211: "S129_C", # TODO: Should this be S129_C or S129_PMP_P? - Also right now it is all 0s
|
|
398
|
+
# 750035446: "S133_P", # TODO: Should this be S133_P or S133_C? - Also right now it is all 0s
|
|
399
|
+
750064453: "S154_C", # This is primarily 0s
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
for reach_id in INFLOW_IDS:
|
|
404
|
+
station_ensembles = get_flow_forecast_ensembles(
|
|
405
|
+
reach_id, forecast_date
|
|
406
|
+
)
|
|
407
|
+
station_stats = get_flow_forecast_stats(reach_id, forecast_date)
|
|
408
|
+
|
|
409
|
+
if bias_corrected:
|
|
410
|
+
if reach_id in stations_inflow_by_comid:
|
|
411
|
+
station_id = stations_inflow_by_comid[reach_id]
|
|
412
|
+
observed_data_path = os.path.join(observed_data_dir, f"{station_id}_FLOW_cmd.csv")
|
|
413
|
+
# if observed_data_list:
|
|
414
|
+
# observed_data_path = observed_data_list[0]
|
|
415
|
+
station_ensembles, station_stats = get_bias_corrected_data(
|
|
416
|
+
station_id,
|
|
417
|
+
reach_id,
|
|
418
|
+
observed_data_path,
|
|
419
|
+
station_ensembles,
|
|
420
|
+
station_stats,
|
|
421
|
+
cache_path,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
ensembles_to_csv(
|
|
425
|
+
workspace,
|
|
426
|
+
"INFLOW",
|
|
427
|
+
reach_id,
|
|
428
|
+
station_ensembles,
|
|
429
|
+
station_stats,
|
|
430
|
+
)
|
|
431
|
+
for reach_id in OUTFLOW_IDS:
|
|
417
432
|
station_ensembles = get_flow_forecast_ensembles(
|
|
418
433
|
reach_id, forecast_date
|
|
419
434
|
)
|
|
420
435
|
station_stats = get_flow_forecast_stats(reach_id, forecast_date)
|
|
421
436
|
|
|
437
|
+
ensembles_to_csv(
|
|
438
|
+
workspace,
|
|
439
|
+
"OUTFLOW",
|
|
440
|
+
reach_id,
|
|
441
|
+
station_ensembles,
|
|
442
|
+
station_stats,
|
|
443
|
+
)
|
|
444
|
+
for reach_id in MATCHED_IDS:
|
|
445
|
+
stations_matched_by_comid = {
|
|
446
|
+
750068601: "S71_S",
|
|
447
|
+
750052624: "S135_C", # TODO: Should this be S135_C or S135_P?
|
|
448
|
+
# 750052624: "S308", # NOTE: Same COMID as S135 — only one key allowed!
|
|
449
|
+
750053213: "FISHP",
|
|
450
|
+
750038416: "S77_S",
|
|
451
|
+
750050259: "S79_TOT",
|
|
452
|
+
750045514: "S80_S",
|
|
453
|
+
750058536: "S72_S",
|
|
454
|
+
750051428: "S49_S",
|
|
455
|
+
# 750038427: "S40",
|
|
456
|
+
750057357: "S191_S",
|
|
457
|
+
750028935: "S127_C", #TODO: Should this be S127_C or S127_P?
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
station_ensembles = get_flow_forecast_ensembles(
|
|
461
|
+
reach_id, forecast_date
|
|
462
|
+
)
|
|
463
|
+
station_stats = get_flow_forecast_stats(reach_id, forecast_date)
|
|
422
464
|
if bias_corrected:
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
observed_data_path = observed_data_list[0]
|
|
465
|
+
if reach_id in stations_matched_by_comid:
|
|
466
|
+
station_id = stations_matched_by_comid[reach_id]
|
|
467
|
+
observed_data_path = os.path.join(observed_data_dir, f"{station_id}_FLOW_cmd.csv")
|
|
468
|
+
# if observed_data_list:
|
|
469
|
+
# observed_data_path = observed_data_list[0]
|
|
428
470
|
station_ensembles, station_stats = get_bias_corrected_data(
|
|
429
471
|
station_id,
|
|
430
472
|
reach_id,
|
|
@@ -436,7 +478,8 @@ def main(
|
|
|
436
478
|
|
|
437
479
|
ensembles_to_csv(
|
|
438
480
|
workspace,
|
|
439
|
-
|
|
481
|
+
"MATCHED",
|
|
482
|
+
reach_id,
|
|
440
483
|
station_ensembles,
|
|
441
484
|
station_stats,
|
|
442
485
|
)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from herbie import FastHerbie
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from retry_requests import retry
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def generate_wind_forecasts(output_dir):
|
|
10
|
+
# Ensure output directory exists
|
|
11
|
+
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
12
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
# Define points of interest
|
|
15
|
+
points = pd.DataFrame({
|
|
16
|
+
"longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
|
|
17
|
+
"latitude": [27.1389, 26.9567, 26.8226, 26.9018]
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
# Station-specific file and column names
|
|
21
|
+
file_map = {
|
|
22
|
+
"Point_1": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
|
|
23
|
+
"Point_2": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
|
|
24
|
+
"Point_3": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
|
|
25
|
+
"Point_4": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
29
|
+
FH = FastHerbie([today_str], model="ifs", fxx=range(0, 360, 3))
|
|
30
|
+
dfs = []
|
|
31
|
+
|
|
32
|
+
variables = {
|
|
33
|
+
"10u": "10u",
|
|
34
|
+
"10v": "10v",
|
|
35
|
+
"2t": "2t",
|
|
36
|
+
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Loop through points and extract data
|
|
40
|
+
for index, point in points.iterrows():
|
|
41
|
+
print(f"\nProcessing Point {index + 1}: ({point.latitude}, {point.longitude})")
|
|
42
|
+
|
|
43
|
+
point_df = pd.DataFrame({
|
|
44
|
+
"longitude": [point.longitude],
|
|
45
|
+
"latitude": [point.latitude]
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
for var_key, var_name in variables.items():
|
|
49
|
+
print(f" Variable: {var_key}")
|
|
50
|
+
|
|
51
|
+
# Download and load dataset
|
|
52
|
+
FH.download(f":{var_key}")
|
|
53
|
+
ds = FH.xarray(f":{var_key}", backend_kwargs={"decode_timedelta": True})
|
|
54
|
+
|
|
55
|
+
# Extract point data
|
|
56
|
+
dsi = ds.herbie.pick_points(point_df, method="nearest")
|
|
57
|
+
|
|
58
|
+
# Get actual variable name
|
|
59
|
+
if var_name == "10u":
|
|
60
|
+
var_name_actual = "u10" # Map 10u to u10
|
|
61
|
+
elif var_name == "10v":
|
|
62
|
+
var_name_actual = "v10" # Map 10v to v10
|
|
63
|
+
elif var_name == "2t":
|
|
64
|
+
var_name_actual = "t2m" #TODO: check that this is correct
|
|
65
|
+
|
|
66
|
+
# Convert to DataFrame
|
|
67
|
+
time_series = dsi[var_name_actual].squeeze()
|
|
68
|
+
df = time_series.to_dataframe().reset_index()
|
|
69
|
+
|
|
70
|
+
# Handle datetime columns
|
|
71
|
+
if "valid_time" in df.columns:
|
|
72
|
+
df = df.rename(columns={"valid_time": "datetime"})
|
|
73
|
+
elif "step" in df.columns and "time" in dsi.coords:
|
|
74
|
+
df["datetime"] = dsi.time.values[0] + df["step"]
|
|
75
|
+
|
|
76
|
+
# Retain necessary columns
|
|
77
|
+
df = df[["datetime", var_name_actual]].drop_duplicates()
|
|
78
|
+
dfs.append((index, var_name_actual, df))
|
|
79
|
+
|
|
80
|
+
# Merge and process data per point
|
|
81
|
+
results = {}
|
|
82
|
+
for point_index in range(len(points)):
|
|
83
|
+
u_df = [df for idx, name, df in dfs if idx == point_index and name == "u10"][0]
|
|
84
|
+
v_df = [df for idx, name, df in dfs if idx == point_index and name == "v10"][0]
|
|
85
|
+
merged = u_df.merge(v_df, on="datetime", how="outer")
|
|
86
|
+
|
|
87
|
+
# Compute wind speed and correction
|
|
88
|
+
merged["wind_speed"] = (merged["u10"] ** 2 + merged["v10"] ** 2) ** 0.5
|
|
89
|
+
merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
|
|
90
|
+
merged["wind_speed_corrected"] = merged["wind_speed_corrected"] * 2.23694 # m/s to mph
|
|
91
|
+
|
|
92
|
+
results[f"Point_{point_index + 1}"] = merged
|
|
93
|
+
|
|
94
|
+
# Save outputs with station-specific column names
|
|
95
|
+
for key, (filename, new_col_name) in file_map.items():
|
|
96
|
+
df = results[key].copy()
|
|
97
|
+
df = df[["datetime", "wind_speed_corrected"]].rename(columns={
|
|
98
|
+
"wind_speed_corrected": new_col_name,
|
|
99
|
+
"datetime": "date"
|
|
100
|
+
})
|
|
101
|
+
filepath = os.path.join(output_dir, filename)
|
|
102
|
+
df.to_csv(filepath, index=False)
|
|
103
|
+
# Save 2-meter air temperature data
|
|
104
|
+
airt_file_map = {
|
|
105
|
+
"Point_1": "L001_AIRT_Degrees Celsius_forecast.csv",
|
|
106
|
+
"Point_2": "L005_AIRT_Degrees Celsius_forecast.csv",
|
|
107
|
+
"Point_3": "L006_AIRT_Degrees Celsius_forecast.csv",
|
|
108
|
+
"Point_4": "LZ40_AIRT_Degrees Celsius_forecast.csv"
|
|
109
|
+
}
|
|
110
|
+
airt_column_map = {
|
|
111
|
+
"Point_1": "L001_AIRT_Degrees Celsius",
|
|
112
|
+
"Point_2": "L005_AIRT_Degrees Celsius",
|
|
113
|
+
"Point_3": "L006_AIRT_Degrees Celsius",
|
|
114
|
+
"Point_4": "LZ40_AIRT_Degrees Celsius"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
for key in airt_file_map:
|
|
118
|
+
point_index = int(key.split("_")[1]) - 1
|
|
119
|
+
df_airt = [df for idx, name, df in dfs if idx == point_index and name == "t2m"][0].copy()
|
|
120
|
+
df_airt["t2m"] = df_airt["t2m"] - 273.15 # Convert from Kelvin to Celsius
|
|
121
|
+
df_airt = df_airt.rename(columns={
|
|
122
|
+
"datetime": "date",
|
|
123
|
+
"t2m": airt_column_map[key]
|
|
124
|
+
})
|
|
125
|
+
filepath = os.path.join(output_dir, airt_file_map[key])
|
|
126
|
+
df_airt.to_csv(filepath, index=False)
|
|
127
|
+
|