loone-data-prep 1.2.4__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
- loone_data_prep/LOONE_DATA_PREP.py +0 -1
- loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
- loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
- loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
- loone_data_prep/utils.py +19 -2
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.0.dist-info}/METADATA +1 -1
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.0.dist-info}/RECORD +11 -12
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
- loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.0.dist-info}/WHEEL +0 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -15,7 +15,7 @@ from loone_data_prep.utils import stg2sto, stg2ar
|
|
|
15
15
|
import datetime
|
|
16
16
|
|
|
17
17
|
START_DATE = datetime.datetime.now()
|
|
18
|
-
END_DATE = START_DATE + datetime.timedelta(days=
|
|
18
|
+
END_DATE = START_DATE + datetime.timedelta(days=14)
|
|
19
19
|
|
|
20
20
|
M3_Yr = 2008
|
|
21
21
|
M3_M = 1
|
|
@@ -373,8 +373,8 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
373
373
|
C44RO_df['C44RO_cmd'] = C44RO
|
|
374
374
|
C43RO_df['C43RO'] = C43RO_df['C43RO_cmd']/(0.0283168466 * 86400)
|
|
375
375
|
C44RO_df['C44RO'] = C44RO_df['C44RO_cmd']/(0.0283168466 * 86400)
|
|
376
|
-
C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv'
|
|
377
|
-
C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv'
|
|
376
|
+
C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv')
|
|
377
|
+
C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv')
|
|
378
378
|
C43RO_df.index = pd.to_datetime(C43RO_df["date"])
|
|
379
379
|
C43RO_df = C43RO_df.drop(columns="date")
|
|
380
380
|
|
|
@@ -384,13 +384,13 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
384
384
|
C43Mon = C43RO_df.resample('ME').mean()
|
|
385
385
|
C44Mon = C44RO_df.resample('ME').mean()
|
|
386
386
|
|
|
387
|
-
C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv'
|
|
388
|
-
C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv'
|
|
387
|
+
C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv')
|
|
388
|
+
C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv')
|
|
389
389
|
Basin_RO = pd.DataFrame(C44Mon.index, columns=['date'])
|
|
390
390
|
# Basin_RO['SLTRIB'] = SLTRIBMon['SLTRIB_cfs'].values * 1.9835 # cfs to acft
|
|
391
391
|
Basin_RO['C44RO'] = C44Mon['C44RO'].values * 86400
|
|
392
392
|
Basin_RO['C43RO'] = C43Mon['C43RO'].values * 86400
|
|
393
|
-
Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv'
|
|
393
|
+
Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv')
|
|
394
394
|
|
|
395
395
|
# # Get monthly C43RO and C44RO from historical run
|
|
396
396
|
# shutil.copyfile(os.path.join(historical_files_src, "C43RO_Monthly.csv"), os.path.join(output_dir, 'C43RO_Monthly.csv'))
|
|
@@ -461,16 +461,47 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
461
461
|
LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
|
|
462
462
|
|
|
463
463
|
# # RFVol acft
|
|
464
|
-
|
|
465
|
-
#
|
|
466
|
-
#
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
#
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
#
|
|
473
|
-
#
|
|
464
|
+
RF_data = pd.read_csv(f'{input_dir}/LAKE_RAINFALL_DATA_FORECAST.csv')
|
|
465
|
+
# RF_data_copy = RF_data.copy()
|
|
466
|
+
# LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
|
|
467
|
+
RF_data['date'] = pd.to_datetime(RF_data['date'])
|
|
468
|
+
# LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
|
|
469
|
+
# LO_Stg_Sto_SA_df_copy.index.name = None
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
|
|
473
|
+
# LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
|
|
474
|
+
# on='date', how='inner')
|
|
475
|
+
#I am just using the most recent SA_acres value for all forecast dates since we do not have forecasted surface area
|
|
476
|
+
RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
|
|
477
|
+
RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
|
|
478
|
+
|
|
479
|
+
date_reference = RFVol['date'].iloc[0]
|
|
480
|
+
date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
|
|
481
|
+
df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
|
|
482
|
+
RFVol = pd.concat([df_insert, RFVol])
|
|
483
|
+
RFVol.to_csv(f'{output_dir}/RFVol_Forecast.csv', index=False)
|
|
484
|
+
|
|
485
|
+
# ETVol acft
|
|
486
|
+
# Create File (ETVol)
|
|
487
|
+
# Merge the DataFrames on date to ensure matching rows
|
|
488
|
+
ET_data = pd.read_csv(f'{input_dir}/LOONE_AVERAGE_ETPI_DATA_FORECAST.csv')
|
|
489
|
+
# ET_data_copy = ET_data.copy()
|
|
490
|
+
# LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
|
|
491
|
+
ET_data['date'] = pd.to_datetime(ET_data['date'])
|
|
492
|
+
# LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
|
|
493
|
+
# merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
|
|
494
|
+
# LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
|
|
495
|
+
# on='date', how='inner')
|
|
496
|
+
|
|
497
|
+
ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
|
|
498
|
+
ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
|
|
499
|
+
date_reference = ETVol['date'].iloc[0]
|
|
500
|
+
date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
|
|
501
|
+
df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
|
|
502
|
+
ETVol = pd.concat([df_insert, ETVol])
|
|
503
|
+
ETVol.to_csv(f'{output_dir}/ETVol_forecast.csv', index=False)
|
|
504
|
+
|
|
474
505
|
|
|
475
506
|
# # WCA Stages
|
|
476
507
|
# # Create File (WCA_Stages_Inputs)
|
|
@@ -351,7 +351,6 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
351
351
|
S65E.index = pd.to_datetime(S65E.index, unit='ns')
|
|
352
352
|
S65E_Weekly = S65E.resample('W-FRI').mean()
|
|
353
353
|
# PI
|
|
354
|
-
# TODO
|
|
355
354
|
# This is prepared manually
|
|
356
355
|
# Weekly data is downloaded from https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/time-series/0804
|
|
357
356
|
# State:Florida Division:4.South Central
|
|
@@ -9,7 +9,7 @@ def get_Chla_predicted(input_dir, output_dir):
|
|
|
9
9
|
output_dir: Directory where the output files will be saved.
|
|
10
10
|
"""
|
|
11
11
|
# Read forecast inflow file and get overall date range
|
|
12
|
-
#
|
|
12
|
+
# We are only taking the dates, so it is okay to just use one ensemble because they all have the same dates
|
|
13
13
|
Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
|
|
14
14
|
Q_in['date'] = pd.to_datetime(Q_in['date'])
|
|
15
15
|
date_start = Q_in['date'].min()
|
|
@@ -9,7 +9,7 @@ def get_NO_Loads_predicted(input_dir, output_dir):
|
|
|
9
9
|
output_dir: Directory where the output files will be saved.
|
|
10
10
|
This function reads the forecast inflow file, retrieves nitrate data for specified stations,
|
|
11
11
|
"""
|
|
12
|
-
#
|
|
12
|
+
# It is okay to use just one ensemble because they all have the same dates and we only use the dates
|
|
13
13
|
Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
|
|
14
14
|
|
|
15
15
|
datetime_str = Q_in['date'].iloc[0]
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from retry import retry
|
|
6
|
+
from loone_data_prep.herbie_utils import get_fast_herbie_object
|
|
7
|
+
from herbie import FastHerbie
|
|
8
|
+
import openmeteo_requests
|
|
9
|
+
from retry_requests import retry as retry_requests
|
|
10
|
+
import requests_cache
|
|
11
|
+
|
|
12
|
+
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
13
|
+
|
|
14
|
+
POINTS = pd.DataFrame({
|
|
15
|
+
"station": ["L001", "L005", "L006", "LZ40"],
|
|
16
|
+
"longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
|
|
17
|
+
"latitude": [27.1389, 26.9567, 26.8226, 26.9018]
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
WIND_FILE_MAP = {
|
|
21
|
+
"L001": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
|
|
22
|
+
"L005": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
|
|
23
|
+
"L006": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
|
|
24
|
+
"LZ40": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
AIRT_FILE_MAP = {
|
|
28
|
+
"L001": "L001_AIRT_Degrees Celsius_forecast.csv",
|
|
29
|
+
"L005": "L005_AIRT_Degrees Celsius_forecast.csv",
|
|
30
|
+
"L006": "L006_AIRT_Degrees Celsius_forecast.csv",
|
|
31
|
+
"LZ40": "LZ40_AIRT_Degrees Celsius_forecast.csv"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
AIRT_COLUMN_MAP = {
|
|
35
|
+
"L001": "L001_AIRT_Degrees Celsius",
|
|
36
|
+
"L005": "L005_AIRT_Degrees Celsius",
|
|
37
|
+
"L006": "L006_AIRT_Degrees Celsius",
|
|
38
|
+
"LZ40": "LZ40_AIRT_Degrees Celsius"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
42
|
+
def download_herbie_variable(FH, variable_key, variable_name, point_df):
|
|
43
|
+
"""Download a Herbie variable for a given point and return a DataFrame."""
|
|
44
|
+
FH.download(f":{variable_key}")
|
|
45
|
+
ds = FH.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
|
|
46
|
+
dsi = ds.herbie.pick_points(point_df, method="nearest")
|
|
47
|
+
|
|
48
|
+
var_name = {
|
|
49
|
+
"10u": "u10",
|
|
50
|
+
"10v": "v10",
|
|
51
|
+
"2t": "t2m"
|
|
52
|
+
}.get(variable_name, variable_name)
|
|
53
|
+
|
|
54
|
+
ts = dsi[var_name].squeeze()
|
|
55
|
+
df = ts.to_dataframe().reset_index()
|
|
56
|
+
if "valid_time" in df.columns:
|
|
57
|
+
df.rename(columns={"valid_time": "datetime"}, inplace=True)
|
|
58
|
+
elif "time" in df.columns:
|
|
59
|
+
df.rename(columns={"time": "datetime"}, inplace=True)
|
|
60
|
+
|
|
61
|
+
df = df[["datetime", var_name]].drop_duplicates()
|
|
62
|
+
ds.close()
|
|
63
|
+
dsi.close()
|
|
64
|
+
del ds, dsi, ts
|
|
65
|
+
return df
|
|
66
|
+
|
|
67
|
+
# Download ET from Open-Meteo
|
|
68
|
+
def download_hourly_et(lat, lon):
|
|
69
|
+
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
|
|
70
|
+
retry_session = retry_requests(cache_session, retries=5, backoff_factor=0.2)
|
|
71
|
+
client = openmeteo_requests.Client(session=retry_session)
|
|
72
|
+
|
|
73
|
+
url = "https://api.open-meteo.com/v1/forecast"
|
|
74
|
+
params = {
|
|
75
|
+
"latitude": lat,
|
|
76
|
+
"longitude": lon,
|
|
77
|
+
"hourly": "evapotranspiration",
|
|
78
|
+
"forecast_days": 16,
|
|
79
|
+
"models": "gfs_seamless"
|
|
80
|
+
}
|
|
81
|
+
responses = client.weather_api(url, params=params)
|
|
82
|
+
response = responses[0]
|
|
83
|
+
|
|
84
|
+
hourly = response.Hourly()
|
|
85
|
+
hourly_evap = hourly.Variables(0).ValuesAsNumpy()
|
|
86
|
+
hourly_data = {"date": pd.date_range(
|
|
87
|
+
start=pd.to_datetime(hourly.Time(), unit="s"),
|
|
88
|
+
end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
|
|
89
|
+
freq=pd.Timedelta(seconds=hourly.Interval()),
|
|
90
|
+
inclusive="left"
|
|
91
|
+
)}
|
|
92
|
+
hourly_data["evapotranspiration"] = hourly_evap
|
|
93
|
+
return pd.DataFrame(hourly_data)
|
|
94
|
+
|
|
95
|
+
# Main generation function
|
|
96
|
+
def generate_all_outputs(output_dir):
|
|
97
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
98
|
+
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
99
|
+
FH = get_fast_herbie_object(today_str)
|
|
100
|
+
|
|
101
|
+
# Forecasted weather data (single point)
|
|
102
|
+
point_df = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
|
|
103
|
+
forecast_vars = ["10u", "10v", "2t", "tp", "ssrd"]
|
|
104
|
+
data = {var: download_herbie_variable(FH, var, var, point_df) for var in forecast_vars}
|
|
105
|
+
|
|
106
|
+
merged = data["10u"].merge(data["10v"], on="datetime")
|
|
107
|
+
merged = merged.merge(data["2t"], on="datetime")
|
|
108
|
+
merged = merged.merge(data["tp"], on="datetime")
|
|
109
|
+
merged = merged.merge(data["ssrd"], on="datetime")
|
|
110
|
+
|
|
111
|
+
# Derived columns
|
|
112
|
+
merged["wind_speed"] = (merged["u10"]**2 + merged["v10"]**2)**0.5 # wind speed in m/s
|
|
113
|
+
merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
|
|
114
|
+
merged["tp_inc_m"] = merged["tp"].diff().clip(lower=0)
|
|
115
|
+
# Convert incremental meters → mm
|
|
116
|
+
merged["tp_inc_mm"] = merged["tp_inc_m"] * 1000.0
|
|
117
|
+
# Apply bias correction (in mm)
|
|
118
|
+
merged["tp_corrected_mm"] = 0.7247 * merged["tp_inc_mm"] + 0.1853
|
|
119
|
+
# convert to inches
|
|
120
|
+
merged["tp_corrected"] = merged["tp_corrected_mm"] * 0.0393701
|
|
121
|
+
|
|
122
|
+
merged["ssrd_kwm2"] = merged["ssrd"].diff() / merged["datetime"].diff().dt.total_seconds() / 1000
|
|
123
|
+
merged["ssrd_corrected"] = (1.0530 * merged["ssrd_kwm2"] - 0.0347).clip(lower=0)
|
|
124
|
+
merged = merged[[
|
|
125
|
+
"datetime",
|
|
126
|
+
"wind_speed_corrected",
|
|
127
|
+
"tp_corrected",
|
|
128
|
+
"ssrd_corrected"
|
|
129
|
+
]]
|
|
130
|
+
|
|
131
|
+
# ET for main point
|
|
132
|
+
df_et = download_hourly_et(26.9690, -80.7976)
|
|
133
|
+
merged = merged.merge(df_et, left_on="datetime", right_on="date", how="left").drop(columns=["date"])
|
|
134
|
+
merged.to_csv(os.path.join(output_dir, "forecasted_weather_data.csv"), index=False)
|
|
135
|
+
|
|
136
|
+
# 4-point wind and air temp CSVs
|
|
137
|
+
for idx, row in POINTS.iterrows():
|
|
138
|
+
station = row["station"]
|
|
139
|
+
point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
|
|
140
|
+
|
|
141
|
+
# Wind
|
|
142
|
+
df_u = download_herbie_variable(FH, "10u", "10u", point_df)
|
|
143
|
+
df_v = download_herbie_variable(FH, "10v", "10v", point_df)
|
|
144
|
+
merged_ws = df_u.merge(df_v, on="datetime")
|
|
145
|
+
merged_ws["wind_speed"] = (merged_ws["u10"]**2 + merged_ws["v10"]**2)**0.5
|
|
146
|
+
merged_ws["wind_speed_corrected"] = 0.4167 * merged_ws["wind_speed"] + 4.1868
|
|
147
|
+
|
|
148
|
+
filename, new_col = WIND_FILE_MAP[station]
|
|
149
|
+
merged_ws[["datetime", "wind_speed_corrected"]].rename(
|
|
150
|
+
columns={"datetime": "date", "wind_speed_corrected": new_col}
|
|
151
|
+
).to_csv(os.path.join(output_dir, filename), index=False)
|
|
152
|
+
|
|
153
|
+
# Air temp
|
|
154
|
+
df_t = download_herbie_variable(FH, "2t", "2t", point_df)
|
|
155
|
+
df_t["t2m"] = df_t["t2m"] - 273.15
|
|
156
|
+
df_t.rename(columns={"datetime": "date", "t2m": AIRT_COLUMN_MAP[station]}).to_csv(
|
|
157
|
+
os.path.join(output_dir, AIRT_FILE_MAP[station]), index=False
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Rainfall, ET, and SSRD 4-point CSVs
|
|
161
|
+
rainfall_dfs, et_dfs, ssrd_dfs = [], [], []
|
|
162
|
+
|
|
163
|
+
for idx, row in POINTS.iterrows():
|
|
164
|
+
station = row["station"]
|
|
165
|
+
point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
|
|
166
|
+
|
|
167
|
+
# Rainfall
|
|
168
|
+
df_tp = download_herbie_variable(FH, "tp", "tp", point_df)
|
|
169
|
+
# Convert cumulative meters → incremental meters
|
|
170
|
+
df_tp["tp_inc_m"] = df_tp["tp"].diff().clip(lower=0)
|
|
171
|
+
# Convert incremental meters → millimeters
|
|
172
|
+
df_tp["tp_inc_mm"] = df_tp["tp_inc_m"] * 1000.0
|
|
173
|
+
df_tp["date_only"] = df_tp["datetime"].dt.date
|
|
174
|
+
# Sum incremental precipitation per day
|
|
175
|
+
df_daily = df_tp.groupby("date_only")["tp_inc_mm"].sum().reset_index()
|
|
176
|
+
# Apply bias correction on daily totals (in mm)
|
|
177
|
+
df_daily["tp_corrected_mm"] = 0.7247 * df_daily["tp_inc_mm"] + 0.1853
|
|
178
|
+
# Convert corrected mm → inches
|
|
179
|
+
df_daily["tp_corrected_in"] = df_daily["tp_corrected_mm"] * 0.0393701
|
|
180
|
+
df_daily = df_daily.rename(columns={"date_only": "date", "tp_corrected_in": station})
|
|
181
|
+
rainfall_dfs.append(df_daily[["date", station]])
|
|
182
|
+
|
|
183
|
+
# ET
|
|
184
|
+
df_et_point = download_hourly_et(row.latitude, row.longitude)
|
|
185
|
+
df_et_point.rename(columns={"evapotranspiration": station}, inplace=True)
|
|
186
|
+
et_dfs.append(df_et_point)
|
|
187
|
+
|
|
188
|
+
# SSRD
|
|
189
|
+
df_ssrd = download_herbie_variable(FH, "ssrd", "ssrd", point_df)
|
|
190
|
+
df_ssrd["ssrd_kwm2"] = df_ssrd["ssrd"].diff() / df_ssrd["datetime"].diff().dt.total_seconds() / 1000
|
|
191
|
+
df_ssrd["ssrd_corrected"] = (1.0530 * df_ssrd["ssrd_kwm2"] - 0.0347).clip(lower=0)
|
|
192
|
+
df_ssrd = df_ssrd[["datetime", "ssrd_corrected"]].rename(columns={"datetime": "date", "ssrd_corrected": station})
|
|
193
|
+
ssrd_dfs.append(df_ssrd)
|
|
194
|
+
|
|
195
|
+
# Merge rainfall
|
|
196
|
+
rainfall_df = pd.concat(rainfall_dfs, axis=0).groupby("date").first().reset_index()
|
|
197
|
+
rainfall_df["average_rainfall"] = rainfall_df[POINTS["station"]].mean(axis=1)
|
|
198
|
+
rainfall_df.to_csv(os.path.join(output_dir, "LAKE_RAINFALL_DATA_FORECAST.csv"), index=False)
|
|
199
|
+
|
|
200
|
+
# Merge ET
|
|
201
|
+
et_df_all = pd.concat(et_dfs, axis=0).groupby("date").first().reset_index()
|
|
202
|
+
et_df_all["average_ETPI"] = et_df_all[POINTS["station"]].mean(axis=1)
|
|
203
|
+
et_df_all.to_csv(os.path.join(output_dir, "LOONE_AVERAGE_ETPI_DATA_FORECAST.csv"), index=False)
|
|
204
|
+
|
|
205
|
+
# Combine all SSRD DataFrames
|
|
206
|
+
ssrd_df_all = pd.concat(ssrd_dfs, axis=0)
|
|
207
|
+
ssrd_df_all["date"] = pd.to_datetime(ssrd_df_all["date"])
|
|
208
|
+
|
|
209
|
+
# Compute the daily mean for each station
|
|
210
|
+
daily_ssrd = (
|
|
211
|
+
ssrd_df_all.groupby(ssrd_df_all["date"].dt.date)[POINTS["station"]]
|
|
212
|
+
.mean()
|
|
213
|
+
.reset_index()
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
daily_ssrd = daily_ssrd.rename(columns={"date": "date"})
|
|
217
|
+
daily_ssrd["Mean_RADT"] = daily_ssrd[POINTS["station"]].mean(axis=1)
|
|
218
|
+
daily_ssrd.to_csv(os.path.join(output_dir, "LO_RADT_data_forecast.csv"), index=False)
|
|
219
|
+
|
|
220
|
+
print("All outputs generated successfully.")
|
loone_data_prep/utils.py
CHANGED
|
@@ -996,14 +996,31 @@ def get_synthetic_data(date_start: str, df: pd.DataFrame):
|
|
|
996
996
|
end_month_day = date_end.strftime('%m-%d')
|
|
997
997
|
|
|
998
998
|
# Filter the DataFrame to include only rows between date_start and date_end for all previous years
|
|
999
|
-
|
|
999
|
+
# (handle year wrap, e.g., Dec -> Jan)
|
|
1000
|
+
wraps_year = start_month_day > end_month_day
|
|
1001
|
+
|
|
1002
|
+
if wraps_year:
|
|
1003
|
+
mask = (
|
|
1004
|
+
(df['month_day'] >= start_month_day) |
|
|
1005
|
+
(df['month_day'] <= end_month_day)
|
|
1006
|
+
)
|
|
1007
|
+
else:
|
|
1008
|
+
mask = (
|
|
1009
|
+
(df['month_day'] >= start_month_day) &
|
|
1010
|
+
(df['month_day'] <= end_month_day)
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1000
1013
|
filtered_data = df.loc[mask]
|
|
1001
1014
|
|
|
1002
1015
|
# Group by the month and day, then calculate the average for each group
|
|
1003
1016
|
average_values = filtered_data.groupby('month_day')['Data'].mean()
|
|
1004
1017
|
# Interpolate in case there are missing values:
|
|
1005
1018
|
start_date = pd.to_datetime('2001-' + start_month_day)
|
|
1006
|
-
|
|
1019
|
+
|
|
1020
|
+
if wraps_year:
|
|
1021
|
+
end_date = pd.to_datetime('2002-' + end_month_day)
|
|
1022
|
+
else:
|
|
1023
|
+
end_date = pd.to_datetime('2001-' + end_month_day)
|
|
1007
1024
|
|
|
1008
1025
|
full_dates = pd.date_range(start=start_date, end=end_date)
|
|
1009
1026
|
full_index = full_dates.strftime('%m-%d')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loone_data_prep
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Prepare data to run the LOONE model.
|
|
5
5
|
Author-email: Osama Tarabih <osamatarabih@usf.edu>
|
|
6
6
|
Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=
|
|
2
|
-
loone_data_prep/LOONE_DATA_PREP.py,sha256=
|
|
1
|
+
loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=P1CV9UtePWCzsPmni_U881eb10BXugeeBaj2JDbmI0M,37569
|
|
2
|
+
loone_data_prep/LOONE_DATA_PREP.py,sha256=pk7AQMdsiq6nwOXlNClso2ICIasyDgUV16Wo9un99NE,69303
|
|
3
3
|
loone_data_prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
loone_data_prep/data_analyses_fns.py,sha256=BZ7famrSKoUfExQvZfbl72CyADHLb-zzgdWZ-kLJxcQ,4603
|
|
5
5
|
loone_data_prep/herbie_utils.py,sha256=O5Lcn6lSWUKjJwWhak7U3eyUPxo4jH59xdfBgl5ExJQ,914
|
|
6
|
-
loone_data_prep/utils.py,sha256=
|
|
6
|
+
loone_data_prep/utils.py,sha256=v26f6ckDhRLfy3acVsIRn1uIHTMgJgoSPmF5fq9LYRI,36111
|
|
7
7
|
loone_data_prep/flow_data/S65E_total.py,sha256=szNUfj0EyyyDzuKNhTGAZtWc5owiOpxYS55YTt4u19k,2835
|
|
8
8
|
loone_data_prep/flow_data/__init__.py,sha256=u7fENFUZsJjyl13Bc9ZE47sHMKmjxtqXhV9t7vDTm7Y,93
|
|
9
9
|
loone_data_prep/flow_data/forecast_bias_correction.py,sha256=pcMH7qR3RZvXOHoYOtP7azNn5rVuRPL9mwgoVk2NeLA,11378
|
|
@@ -12,15 +12,14 @@ loone_data_prep/flow_data/get_inflows.py,sha256=xKuSyJBdPrpjqMdRiyNDyxwdhYVIgLhi
|
|
|
12
12
|
loone_data_prep/flow_data/get_outflows.py,sha256=x7aisIkbXoTkcubFQLDghX-P8lztPq-tU0dQzoVRTtQ,5620
|
|
13
13
|
loone_data_prep/flow_data/hydro.py,sha256=5MwrzSUTCgPgeC_YGhz-en1CbOMp379Qf5zjpJlp-HM,5312
|
|
14
14
|
loone_data_prep/forecast_scripts/Chla_merged.py,sha256=PxVEbTrqHEka6Jg0QjEC6qfFtPNzY-0_71WmlelAfPY,1225
|
|
15
|
-
loone_data_prep/forecast_scripts/create_forecast_LOWs.py,sha256=xUYO0_9EbtVDX6LPBAfDFyvQQIFN7dNaNYFO4D5pe8Y,6591
|
|
16
15
|
loone_data_prep/forecast_scripts/forecast_stages.py,sha256=6S6aHlYi2_t6GAh901KBiBWPueYCwAzyb-AliHJexoU,1373
|
|
17
|
-
loone_data_prep/forecast_scripts/get_Chla_predicted.py,sha256=
|
|
18
|
-
loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py,sha256=
|
|
16
|
+
loone_data_prep/forecast_scripts/get_Chla_predicted.py,sha256=_P5_op8P_Z8d2dYWG4E5zGXcmw5TEwaK4CKdveoyPN4,4789
|
|
17
|
+
loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py,sha256=7HdRe7acw1AAzBgSdv_i7zIDIrl7gy6ykXwzWn96LPI,4223
|
|
19
18
|
loone_data_prep/forecast_scripts/loone_q_predict.py,sha256=k8ndTnsRly4BxGS52Gznca75oX2wkPX6nkid6Ccb6aQ,5834
|
|
20
19
|
loone_data_prep/forecast_scripts/loone_wq_predict.py,sha256=xCiH6QScTYdeZyAhqoqNiJEDTFoXJPh-Yma9VGN_-GY,2123
|
|
20
|
+
loone_data_prep/forecast_scripts/new_combined_weather_forecast.py,sha256=6-_05pQ2Vj_I_218ROGrM5U5q7NZE8Wi6xfwv0DQQcY,9124
|
|
21
21
|
loone_data_prep/forecast_scripts/predict_PI.py,sha256=f0n2-gt5t9FKNdpJ5QGpyP2QBFLDGetYzfTYL95Vi_8,1937
|
|
22
22
|
loone_data_prep/forecast_scripts/trib_cond.py,sha256=LlMxD0a9jwtQ9grI4Ho0KpTgphl6VAjg1cBUtfXZ01A,4030
|
|
23
|
-
loone_data_prep/forecast_scripts/weather_forecast.py,sha256=5RFA2Pg4j9Df3633SEt6vEAQH0HXjR3TVDgNqYqETEY,7108
|
|
24
23
|
loone_data_prep/water_level_data/__init__.py,sha256=rgHDDkwccemsZnwUlw2M0h2ML4KmI89yPscmLoxbEHM,43
|
|
25
24
|
loone_data_prep/water_level_data/get_all.py,sha256=arPSWpb0XfQm0GKZJmoWhWdLuuNDxtGVX6_6UuD1_Vs,10885
|
|
26
25
|
loone_data_prep/water_level_data/hydro.py,sha256=PtsNdMXe1Y4e5CzEyLH6nJx_xv8sB90orGcSgxt7nao,3653
|
|
@@ -31,8 +30,8 @@ loone_data_prep/water_quality_data/wq.py,sha256=sl6G3iDCk6QUzpHTXPHpRZNMBG0-wHuc
|
|
|
31
30
|
loone_data_prep/weather_data/__init__.py,sha256=TX58EPgGRzEK_LmLze79lC4L7kU_j3yZf5_iC4nOIP4,45
|
|
32
31
|
loone_data_prep/weather_data/get_all.py,sha256=aCufuxORU51XhXt7LN9wN_V4qtjNt1qRC1UKlI2b3Ko,6918
|
|
33
32
|
loone_data_prep/weather_data/weather.py,sha256=hvceksrGSnDkCjheBVBuPgY1DrdR0ZAtrFB-K2tYTtk,12043
|
|
34
|
-
loone_data_prep-1.
|
|
35
|
-
loone_data_prep-1.
|
|
36
|
-
loone_data_prep-1.
|
|
37
|
-
loone_data_prep-1.
|
|
38
|
-
loone_data_prep-1.
|
|
33
|
+
loone_data_prep-1.3.0.dist-info/licenses/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
|
|
34
|
+
loone_data_prep-1.3.0.dist-info/METADATA,sha256=AdYgJqXbXIXt3cBrE5hRDrPPlsDGuddMrlda6zNCo1c,4343
|
|
35
|
+
loone_data_prep-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
36
|
+
loone_data_prep-1.3.0.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
|
|
37
|
+
loone_data_prep-1.3.0.dist-info/RECORD,,
|
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from herbie import FastHerbie
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from retry_requests import retry as retry_requests
|
|
6
|
-
from retry import retry
|
|
7
|
-
import warnings
|
|
8
|
-
from typing import Tuple
|
|
9
|
-
from loone_data_prep.herbie_utils import get_fast_herbie_object
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def generate_wind_forecasts(output_dir):
|
|
13
|
-
# Ensure output directory exists
|
|
14
|
-
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
15
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
16
|
-
|
|
17
|
-
# Define points of interest
|
|
18
|
-
points = pd.DataFrame({
|
|
19
|
-
"longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
|
|
20
|
-
"latitude": [27.1389, 26.9567, 26.8226, 26.9018]
|
|
21
|
-
})
|
|
22
|
-
|
|
23
|
-
# Station-specific file and column names
|
|
24
|
-
file_map = {
|
|
25
|
-
"Point_1": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
|
|
26
|
-
"Point_2": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
|
|
27
|
-
"Point_3": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
|
|
28
|
-
"Point_4": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
32
|
-
FH = get_fast_herbie_object(today_str)
|
|
33
|
-
print("FastHerbie initialized.")
|
|
34
|
-
dfs = []
|
|
35
|
-
|
|
36
|
-
variables = {
|
|
37
|
-
"10u": "10u",
|
|
38
|
-
"10v": "10v",
|
|
39
|
-
"2t": "2t",
|
|
40
|
-
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
# Loop through points and extract data
|
|
44
|
-
for index, point in points.iterrows():
|
|
45
|
-
print(f"\nProcessing Point {index + 1}: ({point.latitude}, {point.longitude})")
|
|
46
|
-
|
|
47
|
-
point_df = pd.DataFrame({
|
|
48
|
-
"longitude": [point.longitude],
|
|
49
|
-
"latitude": [point.latitude]
|
|
50
|
-
})
|
|
51
|
-
|
|
52
|
-
# Loop through variables for current point and extract data
|
|
53
|
-
for var_key, var_name in variables.items():
|
|
54
|
-
# Get the current variable data at the current point
|
|
55
|
-
print(f" Variable: {var_key}")
|
|
56
|
-
try:
|
|
57
|
-
df, var_name_actual = _download_herbie_variable(FH, var_key, var_name, point_df)
|
|
58
|
-
except Exception as e:
|
|
59
|
-
print(f"Error processing {var_key} for Point {index + 1} ({point.latitude}, {point.longitude}): {e}")
|
|
60
|
-
print(f'Skipping {var_key}')
|
|
61
|
-
continue
|
|
62
|
-
|
|
63
|
-
# Append the DataFrame and variable name to the list
|
|
64
|
-
if not df.empty:
|
|
65
|
-
dfs.append((index, var_name_actual, df))
|
|
66
|
-
|
|
67
|
-
# Merge and process data per point
|
|
68
|
-
results = {}
|
|
69
|
-
for point_index in range(len(points)):
|
|
70
|
-
u_df = [df for idx, name, df in dfs if idx == point_index and name == "u10"][0]
|
|
71
|
-
v_df = [df for idx, name, df in dfs if idx == point_index and name == "v10"][0]
|
|
72
|
-
merged = u_df.merge(v_df, on="datetime", how="outer")
|
|
73
|
-
|
|
74
|
-
# Compute wind speed and correction
|
|
75
|
-
merged["wind_speed"] = (merged["u10"] ** 2 + merged["v10"] ** 2) ** 0.5
|
|
76
|
-
merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
|
|
77
|
-
merged["wind_speed_corrected"] = merged["wind_speed_corrected"] * 2.23694 # m/s to mph
|
|
78
|
-
|
|
79
|
-
results[f"Point_{point_index + 1}"] = merged
|
|
80
|
-
|
|
81
|
-
# Save outputs with station-specific column names
|
|
82
|
-
for key, (filename, new_col_name) in file_map.items():
|
|
83
|
-
df = results[key].copy()
|
|
84
|
-
df = df[["datetime", "wind_speed_corrected"]].rename(columns={
|
|
85
|
-
"wind_speed_corrected": new_col_name,
|
|
86
|
-
"datetime": "date"
|
|
87
|
-
})
|
|
88
|
-
filepath = os.path.join(output_dir, filename)
|
|
89
|
-
df.to_csv(filepath, index=False)
|
|
90
|
-
# Save 2-meter air temperature data
|
|
91
|
-
airt_file_map = {
|
|
92
|
-
"Point_1": "L001_AIRT_Degrees Celsius_forecast.csv",
|
|
93
|
-
"Point_2": "L005_AIRT_Degrees Celsius_forecast.csv",
|
|
94
|
-
"Point_3": "L006_AIRT_Degrees Celsius_forecast.csv",
|
|
95
|
-
"Point_4": "LZ40_AIRT_Degrees Celsius_forecast.csv"
|
|
96
|
-
}
|
|
97
|
-
airt_column_map = {
|
|
98
|
-
"Point_1": "L001_AIRT_Degrees Celsius",
|
|
99
|
-
"Point_2": "L005_AIRT_Degrees Celsius",
|
|
100
|
-
"Point_3": "L006_AIRT_Degrees Celsius",
|
|
101
|
-
"Point_4": "LZ40_AIRT_Degrees Celsius"
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
for key in airt_file_map:
|
|
105
|
-
point_index = int(key.split("_")[1]) - 1
|
|
106
|
-
df_airt = [df for idx, name, df in dfs if idx == point_index and name == "t2m"][0].copy()
|
|
107
|
-
df_airt["t2m"] = df_airt["t2m"] - 273.15 # Convert from Kelvin to Celsius
|
|
108
|
-
df_airt = df_airt.rename(columns={
|
|
109
|
-
"datetime": "date",
|
|
110
|
-
"t2m": airt_column_map[key]
|
|
111
|
-
})
|
|
112
|
-
filepath = os.path.join(output_dir, airt_file_map[key])
|
|
113
|
-
df_airt.to_csv(filepath, index=False)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
117
|
-
def _download_herbie_variable(fast_herbie_object: FastHerbie, variable_key: str, variable_name: str, point_df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
|
|
118
|
-
"""
|
|
119
|
-
Download a specific variable from the Herbie API.
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
fast_herbie_object: An instance of the FastHerbie class.
|
|
123
|
-
variable_key: The key of the variable to download.
|
|
124
|
-
variable_name: The name of the variable to download.
|
|
125
|
-
point_df: A DataFrame containing the point of interest (longitude and latitude).
|
|
126
|
-
|
|
127
|
-
Returns:
|
|
128
|
-
A DataFrame containing the downloaded variable data.
|
|
129
|
-
|
|
130
|
-
Example:
|
|
131
|
-
point_df = pd.DataFrame({"longitude": [-80.7934], "latitude": [27.1389]})
|
|
132
|
-
df, var_name_actual = _download_herbie_variable(FastHerbie('2020-05-16 00:00', model='ifs', fxx=range(0, 360, 3)), '10u', '10u', point_df)
|
|
133
|
-
"""
|
|
134
|
-
# Download and load dataset
|
|
135
|
-
fast_herbie_object.download(f":{variable_key}")
|
|
136
|
-
ds = fast_herbie_object.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
|
|
137
|
-
|
|
138
|
-
# Extract point data
|
|
139
|
-
dsi = ds.herbie.pick_points(point_df, method="nearest")
|
|
140
|
-
|
|
141
|
-
# Close and delete the original dataset to free up resources
|
|
142
|
-
ds.close()
|
|
143
|
-
del ds
|
|
144
|
-
|
|
145
|
-
# Get actual variable name
|
|
146
|
-
if variable_name == "10u":
|
|
147
|
-
var_name_actual = "u10" # Map 10u to u10
|
|
148
|
-
elif variable_name == "10v":
|
|
149
|
-
var_name_actual = "v10" # Map 10v to v10
|
|
150
|
-
elif variable_name == "2t":
|
|
151
|
-
var_name_actual = "t2m" #TODO: check that this is correct
|
|
152
|
-
|
|
153
|
-
# Convert to DataFrame
|
|
154
|
-
time_series = dsi[var_name_actual].squeeze()
|
|
155
|
-
df = time_series.to_dataframe().reset_index()
|
|
156
|
-
|
|
157
|
-
# Handle datetime columns
|
|
158
|
-
if "valid_time" in df.columns:
|
|
159
|
-
df = df.rename(columns={"valid_time": "datetime"})
|
|
160
|
-
elif "step" in df.columns and "time" in dsi.coords:
|
|
161
|
-
df["datetime"] = dsi.time.values[0] + df["step"]
|
|
162
|
-
|
|
163
|
-
# Close and delete the intermediate dataset to free memory
|
|
164
|
-
dsi.close()
|
|
165
|
-
del dsi, time_series
|
|
166
|
-
|
|
167
|
-
# Retain necessary columns
|
|
168
|
-
df = df[["datetime", var_name_actual]].drop_duplicates()
|
|
169
|
-
|
|
170
|
-
return df, var_name_actual
|
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
from herbie import FastHerbie
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import openmeteo_requests
|
|
5
|
-
import argparse
|
|
6
|
-
import requests_cache
|
|
7
|
-
from retry_requests import retry as retry_requests
|
|
8
|
-
from retry import retry
|
|
9
|
-
import warnings
|
|
10
|
-
from loone_data_prep.herbie_utils import get_fast_herbie_object
|
|
11
|
-
|
|
12
|
-
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def download_weather_forecast(file_path):
|
|
16
|
-
# Get today's date in the required format
|
|
17
|
-
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
18
|
-
|
|
19
|
-
# Define variables to download and extract
|
|
20
|
-
variables = {
|
|
21
|
-
"10u": "10u",
|
|
22
|
-
"ssrd": "ssrd",
|
|
23
|
-
"tp": "tp",
|
|
24
|
-
"10v": "10v",
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
# Initialize FastHerbie
|
|
28
|
-
FH = get_fast_herbie_object(today_str)
|
|
29
|
-
print("FastHerbie initialized.")
|
|
30
|
-
|
|
31
|
-
dfs = []
|
|
32
|
-
|
|
33
|
-
for var_key, var_name in variables.items():
|
|
34
|
-
# Download the current variable
|
|
35
|
-
print(f"Processing {var_key}...")
|
|
36
|
-
try:
|
|
37
|
-
df = _download_herbie_variable(FH, var_key, var_name)
|
|
38
|
-
except Exception as e:
|
|
39
|
-
print(f"Error processing {var_key}: {e}")
|
|
40
|
-
print(f'Skipping {var_key}')
|
|
41
|
-
continue
|
|
42
|
-
|
|
43
|
-
# Append to list
|
|
44
|
-
if not df.empty:
|
|
45
|
-
dfs.append(df)
|
|
46
|
-
|
|
47
|
-
try:
|
|
48
|
-
# Merge all variables into a single DataFrame
|
|
49
|
-
final_df = dfs[0]
|
|
50
|
-
for df in dfs[1:]:
|
|
51
|
-
final_df = final_df.merge(df, on="datetime", how="outer")
|
|
52
|
-
print(final_df)
|
|
53
|
-
# Calculate wind speed
|
|
54
|
-
final_df["wind_speed"] = (final_df["u10"] ** 2 + final_df["v10"] ** 2) ** 0.5
|
|
55
|
-
|
|
56
|
-
#rainfall corrected: OLS Regression Equation: Corrected Forecast = 0.7247 * Forecast + 0.1853
|
|
57
|
-
final_df["tp_corrected"] = 0.7247 * final_df["tp"] + 0.1853
|
|
58
|
-
|
|
59
|
-
#wind speed correction: Corrected Forecast = 0.4167 * Forecast + 4.1868
|
|
60
|
-
final_df["wind_speed_corrected"] = 0.4167 * final_df["wind_speed"] + 4.1868
|
|
61
|
-
|
|
62
|
-
#radiation correction will need to be fixed because it was done on fdir instead of ssdr
|
|
63
|
-
#radiation corrected: Corrected Forecast = 0.0553 * Forecast - 0.0081
|
|
64
|
-
final_df["ssrd_corrected"] = 0.0553 * final_df["ssrd"] - 0.0081
|
|
65
|
-
except Exception as e:
|
|
66
|
-
print(f'Error correcting herbie weather data: {e}')
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
# Setup the Open-Meteo API client with cache and retry on error
|
|
70
|
-
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
|
|
71
|
-
retry_session = retry_requests(cache_session, retries = 5, backoff_factor = 0.2)
|
|
72
|
-
openmeteo = openmeteo_requests.Client(session = retry_session)
|
|
73
|
-
|
|
74
|
-
# Make sure all required weather variables are listed here
|
|
75
|
-
# The order of variables in hourly or daily is important to assign them correctly below
|
|
76
|
-
url = "https://api.open-meteo.com/v1/forecast"
|
|
77
|
-
params = {
|
|
78
|
-
"latitude": 26.9690,
|
|
79
|
-
"longitude": -80.7976,
|
|
80
|
-
"hourly": "evapotranspiration",
|
|
81
|
-
"forecast_days": 16,
|
|
82
|
-
"models": "gfs_seamless"
|
|
83
|
-
}
|
|
84
|
-
responses = openmeteo.weather_api(url, params=params)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# Process first location. Add a for-loop for multiple locations or weather models
|
|
88
|
-
response = responses[0]
|
|
89
|
-
|
|
90
|
-
hourly = response.Hourly()
|
|
91
|
-
hourly_evapotranspiration = hourly.Variables(0).ValuesAsNumpy()
|
|
92
|
-
|
|
93
|
-
hourly_data = {"date": pd.date_range(
|
|
94
|
-
start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
|
|
95
|
-
end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
|
|
96
|
-
freq = pd.Timedelta(seconds = hourly.Interval()),
|
|
97
|
-
inclusive = "left"
|
|
98
|
-
)}
|
|
99
|
-
|
|
100
|
-
hourly_data["evapotranspiration"] = hourly_evapotranspiration
|
|
101
|
-
|
|
102
|
-
hourly_dataframe = pd.DataFrame(data = hourly_data)
|
|
103
|
-
|
|
104
|
-
# Convert datetime to date for merging
|
|
105
|
-
final_df['date'] = final_df['datetime']
|
|
106
|
-
# Ensure final_df['date'] is timezone-aware (convert to UTC)
|
|
107
|
-
final_df['date'] = pd.to_datetime(final_df['date'], utc=True)
|
|
108
|
-
|
|
109
|
-
# Ensure hourly_dataframe['date'] is also timezone-aware (convert to UTC)
|
|
110
|
-
hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'], utc=True)
|
|
111
|
-
|
|
112
|
-
# Merge while keeping only matching dates from final_df
|
|
113
|
-
merged_df = final_df.merge(hourly_dataframe, on='date', how='left')
|
|
114
|
-
|
|
115
|
-
# Print final combined DataFrame
|
|
116
|
-
merged_df.drop(columns=['date'], inplace=True)
|
|
117
|
-
# print(merged_df)
|
|
118
|
-
|
|
119
|
-
merged_df.to_csv(file_path, index=False)
|
|
120
|
-
except Exception as e:
|
|
121
|
-
print(f'Error retrieving openmeteo weather data: {e}')
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
125
|
-
def _download_herbie_variable(fast_herbie_object: FastHerbie, variable_key: str, variable_name: str) -> pd.DataFrame:
|
|
126
|
-
"""
|
|
127
|
-
Download a specific variable from the Herbie API.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
fast_herbie_object: An instance of the FastHerbie class.
|
|
131
|
-
variable_key: The key of the variable to download.
|
|
132
|
-
variable_name: The name of the variable to download.
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
A DataFrame containing the downloaded variable data.
|
|
136
|
-
|
|
137
|
-
Example:
|
|
138
|
-
df = _download_herbie_variable(FastHerbie('2020-05-16 00:00', model='ifs', fxx=range(0, 360, 3)), '10u', '10u')
|
|
139
|
-
"""
|
|
140
|
-
# Define point of interest
|
|
141
|
-
points = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
|
|
142
|
-
|
|
143
|
-
# Download and load the dataset
|
|
144
|
-
fast_herbie_object.download(f":{variable_key}")
|
|
145
|
-
ds = fast_herbie_object.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
|
|
146
|
-
|
|
147
|
-
# Extract point data
|
|
148
|
-
dsi = ds.herbie.pick_points(points, method="nearest")
|
|
149
|
-
|
|
150
|
-
# Close and delete the original dataset to free up resources
|
|
151
|
-
ds.close()
|
|
152
|
-
del ds
|
|
153
|
-
|
|
154
|
-
# Extract the correct variable name dynamically
|
|
155
|
-
if variable_name == "10u":
|
|
156
|
-
var_name_actual = "u10" # Map 10u to u10
|
|
157
|
-
elif variable_name == "10v":
|
|
158
|
-
var_name_actual = "v10" # Map 10v to v10
|
|
159
|
-
else:
|
|
160
|
-
var_name_actual = variable_name # For ssrd and tp, use the same name
|
|
161
|
-
|
|
162
|
-
# Extract time series
|
|
163
|
-
time_series = dsi[var_name_actual].squeeze()
|
|
164
|
-
|
|
165
|
-
# Convert to DataFrame
|
|
166
|
-
df = time_series.to_dataframe().reset_index()
|
|
167
|
-
|
|
168
|
-
# Convert `valid_time` to datetime
|
|
169
|
-
if "valid_time" in df.columns:
|
|
170
|
-
df = df.rename(columns={"valid_time": "datetime"})
|
|
171
|
-
elif "step" in df.columns and "time" in dsi.coords:
|
|
172
|
-
df["datetime"] = dsi.time.values[0] + df["step"]
|
|
173
|
-
|
|
174
|
-
# Keep only datetime and variable of interest
|
|
175
|
-
df = df[["datetime", var_name_actual]].drop_duplicates()
|
|
176
|
-
|
|
177
|
-
# Print extracted data
|
|
178
|
-
# print(df)
|
|
179
|
-
|
|
180
|
-
# Clean up intermediate datasets to free memory
|
|
181
|
-
del dsi, time_series
|
|
182
|
-
|
|
183
|
-
return df
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def main():
|
|
187
|
-
# Set up command-line argument parsing
|
|
188
|
-
parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
|
|
189
|
-
parser.add_argument("file_path", help="Path to save the resulting CSV file.")
|
|
190
|
-
|
|
191
|
-
# Parse the arguments
|
|
192
|
-
args = parser.parse_args()
|
|
193
|
-
|
|
194
|
-
# Call the function with the provided file path
|
|
195
|
-
download_weather_forecast(args.file_path)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
if __name__ == "__main__":
|
|
199
|
-
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|