loone-data-prep 1.2.4__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ from loone_data_prep.utils import stg2sto, stg2ar
15
15
  import datetime
16
16
 
17
17
  START_DATE = datetime.datetime.now()
18
- END_DATE = START_DATE + datetime.timedelta(days=15)
18
+ END_DATE = START_DATE + datetime.timedelta(days=14)
19
19
 
20
20
  M3_Yr = 2008
21
21
  M3_M = 1
@@ -373,8 +373,8 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
373
373
  C44RO_df['C44RO_cmd'] = C44RO
374
374
  C43RO_df['C43RO'] = C43RO_df['C43RO_cmd']/(0.0283168466 * 86400)
375
375
  C44RO_df['C44RO'] = C44RO_df['C44RO_cmd']/(0.0283168466 * 86400)
376
- C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv', index=False)
377
- C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv', index=False)
376
+ C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv')
377
+ C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv')
378
378
  C43RO_df.index = pd.to_datetime(C43RO_df["date"])
379
379
  C43RO_df = C43RO_df.drop(columns="date")
380
380
 
@@ -384,13 +384,13 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
384
384
  C43Mon = C43RO_df.resample('ME').mean()
385
385
  C44Mon = C44RO_df.resample('ME').mean()
386
386
 
387
- C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv', index=False)
388
- C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv', index=False)
387
+ C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv')
388
+ C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv')
389
389
  Basin_RO = pd.DataFrame(C44Mon.index, columns=['date'])
390
390
  # Basin_RO['SLTRIB'] = SLTRIBMon['SLTRIB_cfs'].values * 1.9835 # cfs to acft
391
391
  Basin_RO['C44RO'] = C44Mon['C44RO'].values * 86400
392
392
  Basin_RO['C43RO'] = C43Mon['C43RO'].values * 86400
393
- Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv', index=False)
393
+ Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv')
394
394
 
395
395
  # # Get monthly C43RO and C44RO from historical run
396
396
  # shutil.copyfile(os.path.join(historical_files_src, "C43RO_Monthly.csv"), os.path.join(output_dir, 'C43RO_Monthly.csv'))
@@ -461,16 +461,47 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
461
461
  LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
462
462
 
463
463
  # # RFVol acft
464
- # # Create File (RF_Volume)
465
- # RFVol = pd.DataFrame(RF_data["date"], columns=["date"])
466
- # RFVol["RFVol_acft"] = (RF_data["average_rainfall"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
467
- # RFVol.to_csv(f"{output_dir}/RFVol_LORS_20082023.csv", index=False)
468
-
469
- # # ETVol acft
470
- # # Create File (ETVol)
471
- # ETVol = pd.DataFrame(ET_data["date"], columns=["date"])
472
- # ETVol["ETVol_acft"] = (ET_data["average_ETPI"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
473
- # ETVol.to_csv(f"{output_dir}/ETVol_LORS_20082023.csv", index=False)
464
+ RF_data = pd.read_csv(f'{input_dir}/LAKE_RAINFALL_DATA_FORECAST.csv')
465
+ # RF_data_copy = RF_data.copy()
466
+ # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
467
+ RF_data['date'] = pd.to_datetime(RF_data['date'])
468
+ # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
469
+ # LO_Stg_Sto_SA_df_copy.index.name = None
470
+
471
+
472
+ # merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
473
+ # LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
474
+ # on='date', how='inner')
475
+ #I am just using the most recent SA_acres value for all forecast dates since we do not have forecasted surface area
476
+ RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
477
+ RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
478
+
479
+ date_reference = RFVol['date'].iloc[0]
480
+ date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
481
+ df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
482
+ RFVol = pd.concat([df_insert, RFVol])
483
+ RFVol.to_csv(f'{output_dir}/RFVol_Forecast.csv', index=False)
484
+
485
+ # ETVol acft
486
+ # Create File (ETVol)
487
+ # Merge the DataFrames on date to ensure matching rows
488
+ ET_data = pd.read_csv(f'{input_dir}/LOONE_AVERAGE_ETPI_DATA_FORECAST.csv')
489
+ # ET_data_copy = ET_data.copy()
490
+ # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
491
+ ET_data['date'] = pd.to_datetime(ET_data['date'])
492
+ # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
493
+ # merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
494
+ # LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
495
+ # on='date', how='inner')
496
+
497
+ ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
498
+ ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
499
+ date_reference = ETVol['date'].iloc[0]
500
+ date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
501
+ df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
502
+ ETVol = pd.concat([df_insert, ETVol])
503
+ ETVol.to_csv(f'{output_dir}/ETVol_forecast.csv', index=False)
504
+
474
505
 
475
506
  # # WCA Stages
476
507
  # # Create File (WCA_Stages_Inputs)
@@ -351,7 +351,6 @@ def main(input_dir: str, output_dir: str) -> None:
351
351
  S65E.index = pd.to_datetime(S65E.index, unit='ns')
352
352
  S65E_Weekly = S65E.resample('W-FRI').mean()
353
353
  # PI
354
- # TODO
355
354
  # This is prepared manually
356
355
  # Weekly data is downloaded from https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/time-series/0804
357
356
  # State:Florida Division:4.South Central
@@ -9,7 +9,7 @@ def get_Chla_predicted(input_dir, output_dir):
9
9
  output_dir: Directory where the output files will be saved.
10
10
  """
11
11
  # Read forecast inflow file and get overall date range
12
- # TODO: Should this be an average/median of all of the ensembles? worst case?
12
+ # We are only taking the dates, so it is okay to just use one ensemble because they all have the same dates
13
13
  Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
14
  Q_in['date'] = pd.to_datetime(Q_in['date'])
15
15
  date_start = Q_in['date'].min()
@@ -9,7 +9,7 @@ def get_NO_Loads_predicted(input_dir, output_dir):
9
9
  output_dir: Directory where the output files will be saved.
10
10
  This function reads the forecast inflow file, retrieves nitrate data for specified stations,
11
11
  """
12
- # TODO: Should this be an average/median of all of the ensembles? worst case?
12
+ # It is okay to use just one ensemble because they all have the same dates and we only use the dates
13
13
  Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
14
 
15
15
  datetime_str = Q_in['date'].iloc[0]
@@ -0,0 +1,220 @@
1
+ import os
2
+ import warnings
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from retry import retry
6
+ from loone_data_prep.herbie_utils import get_fast_herbie_object
7
+ from herbie import FastHerbie
8
+ import openmeteo_requests
9
+ from retry_requests import retry as retry_requests
10
+ import requests_cache
11
+
12
+ warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
13
+
14
+ POINTS = pd.DataFrame({
15
+ "station": ["L001", "L005", "L006", "LZ40"],
16
+ "longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
17
+ "latitude": [27.1389, 26.9567, 26.8226, 26.9018]
18
+ })
19
+
20
+ WIND_FILE_MAP = {
21
+ "L001": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
22
+ "L005": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
23
+ "L006": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
24
+ "LZ40": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
25
+ }
26
+
27
+ AIRT_FILE_MAP = {
28
+ "L001": "L001_AIRT_Degrees Celsius_forecast.csv",
29
+ "L005": "L005_AIRT_Degrees Celsius_forecast.csv",
30
+ "L006": "L006_AIRT_Degrees Celsius_forecast.csv",
31
+ "LZ40": "LZ40_AIRT_Degrees Celsius_forecast.csv"
32
+ }
33
+
34
+ AIRT_COLUMN_MAP = {
35
+ "L001": "L001_AIRT_Degrees Celsius",
36
+ "L005": "L005_AIRT_Degrees Celsius",
37
+ "L006": "L006_AIRT_Degrees Celsius",
38
+ "LZ40": "LZ40_AIRT_Degrees Celsius"
39
+ }
40
+
41
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
42
+ def download_herbie_variable(FH, variable_key, variable_name, point_df):
43
+ """Download a Herbie variable for a given point and return a DataFrame."""
44
+ FH.download(f":{variable_key}")
45
+ ds = FH.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
46
+ dsi = ds.herbie.pick_points(point_df, method="nearest")
47
+
48
+ var_name = {
49
+ "10u": "u10",
50
+ "10v": "v10",
51
+ "2t": "t2m"
52
+ }.get(variable_name, variable_name)
53
+
54
+ ts = dsi[var_name].squeeze()
55
+ df = ts.to_dataframe().reset_index()
56
+ if "valid_time" in df.columns:
57
+ df.rename(columns={"valid_time": "datetime"}, inplace=True)
58
+ elif "time" in df.columns:
59
+ df.rename(columns={"time": "datetime"}, inplace=True)
60
+
61
+ df = df[["datetime", var_name]].drop_duplicates()
62
+ ds.close()
63
+ dsi.close()
64
+ del ds, dsi, ts
65
+ return df
66
+
67
+ # Download ET from Open-Meteo
68
+ def download_hourly_et(lat, lon):
69
+ cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
70
+ retry_session = retry_requests(cache_session, retries=5, backoff_factor=0.2)
71
+ client = openmeteo_requests.Client(session=retry_session)
72
+
73
+ url = "https://api.open-meteo.com/v1/forecast"
74
+ params = {
75
+ "latitude": lat,
76
+ "longitude": lon,
77
+ "hourly": "evapotranspiration",
78
+ "forecast_days": 16,
79
+ "models": "gfs_seamless"
80
+ }
81
+ responses = client.weather_api(url, params=params)
82
+ response = responses[0]
83
+
84
+ hourly = response.Hourly()
85
+ hourly_evap = hourly.Variables(0).ValuesAsNumpy()
86
+ hourly_data = {"date": pd.date_range(
87
+ start=pd.to_datetime(hourly.Time(), unit="s"),
88
+ end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
89
+ freq=pd.Timedelta(seconds=hourly.Interval()),
90
+ inclusive="left"
91
+ )}
92
+ hourly_data["evapotranspiration"] = hourly_evap
93
+ return pd.DataFrame(hourly_data)
94
+
95
+ # Main generation function
96
+ def generate_all_outputs(output_dir):
97
+ os.makedirs(output_dir, exist_ok=True)
98
+ today_str = datetime.today().strftime('%Y-%m-%d 00:00')
99
+ FH = get_fast_herbie_object(today_str)
100
+
101
+ # Forecasted weather data (single point)
102
+ point_df = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
103
+ forecast_vars = ["10u", "10v", "2t", "tp", "ssrd"]
104
+ data = {var: download_herbie_variable(FH, var, var, point_df) for var in forecast_vars}
105
+
106
+ merged = data["10u"].merge(data["10v"], on="datetime")
107
+ merged = merged.merge(data["2t"], on="datetime")
108
+ merged = merged.merge(data["tp"], on="datetime")
109
+ merged = merged.merge(data["ssrd"], on="datetime")
110
+
111
+ # Derived columns
112
+ merged["wind_speed"] = (merged["u10"]**2 + merged["v10"]**2)**0.5 # wind speed in m/s
113
+ merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
114
+ merged["tp_inc_m"] = merged["tp"].diff().clip(lower=0)
115
+ # Convert incremental meters → mm
116
+ merged["tp_inc_mm"] = merged["tp_inc_m"] * 1000.0
117
+ # Apply bias correction (in mm)
118
+ merged["tp_corrected_mm"] = 0.7247 * merged["tp_inc_mm"] + 0.1853
119
+ # convert to inches
120
+ merged["tp_corrected"] = merged["tp_corrected_mm"] * 0.0393701
121
+
122
+ merged["ssrd_kwm2"] = merged["ssrd"].diff() / merged["datetime"].diff().dt.total_seconds() / 1000
123
+ merged["ssrd_corrected"] = (1.0530 * merged["ssrd_kwm2"] - 0.0347).clip(lower=0)
124
+ merged = merged[[
125
+ "datetime",
126
+ "wind_speed_corrected",
127
+ "tp_corrected",
128
+ "ssrd_corrected"
129
+ ]]
130
+
131
+ # ET for main point
132
+ df_et = download_hourly_et(26.9690, -80.7976)
133
+ merged = merged.merge(df_et, left_on="datetime", right_on="date", how="left").drop(columns=["date"])
134
+ merged.to_csv(os.path.join(output_dir, "forecasted_weather_data.csv"), index=False)
135
+
136
+ # 4-point wind and air temp CSVs
137
+ for idx, row in POINTS.iterrows():
138
+ station = row["station"]
139
+ point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
140
+
141
+ # Wind
142
+ df_u = download_herbie_variable(FH, "10u", "10u", point_df)
143
+ df_v = download_herbie_variable(FH, "10v", "10v", point_df)
144
+ merged_ws = df_u.merge(df_v, on="datetime")
145
+ merged_ws["wind_speed"] = (merged_ws["u10"]**2 + merged_ws["v10"]**2)**0.5
146
+ merged_ws["wind_speed_corrected"] = 0.4167 * merged_ws["wind_speed"] + 4.1868
147
+
148
+ filename, new_col = WIND_FILE_MAP[station]
149
+ merged_ws[["datetime", "wind_speed_corrected"]].rename(
150
+ columns={"datetime": "date", "wind_speed_corrected": new_col}
151
+ ).to_csv(os.path.join(output_dir, filename), index=False)
152
+
153
+ # Air temp
154
+ df_t = download_herbie_variable(FH, "2t", "2t", point_df)
155
+ df_t["t2m"] = df_t["t2m"] - 273.15
156
+ df_t.rename(columns={"datetime": "date", "t2m": AIRT_COLUMN_MAP[station]}).to_csv(
157
+ os.path.join(output_dir, AIRT_FILE_MAP[station]), index=False
158
+ )
159
+
160
+ # Rainfall, ET, and SSRD 4-point CSVs
161
+ rainfall_dfs, et_dfs, ssrd_dfs = [], [], []
162
+
163
+ for idx, row in POINTS.iterrows():
164
+ station = row["station"]
165
+ point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
166
+
167
+ # Rainfall
168
+ df_tp = download_herbie_variable(FH, "tp", "tp", point_df)
169
+ # Convert cumulative meters → incremental meters
170
+ df_tp["tp_inc_m"] = df_tp["tp"].diff().clip(lower=0)
171
+ # Convert incremental meters → millimeters
172
+ df_tp["tp_inc_mm"] = df_tp["tp_inc_m"] * 1000.0
173
+ df_tp["date_only"] = df_tp["datetime"].dt.date
174
+ # Sum incremental precipitation per day
175
+ df_daily = df_tp.groupby("date_only")["tp_inc_mm"].sum().reset_index()
176
+ # Apply bias correction on daily totals (in mm)
177
+ df_daily["tp_corrected_mm"] = 0.7247 * df_daily["tp_inc_mm"] + 0.1853
178
+ # Convert corrected mm → inches
179
+ df_daily["tp_corrected_in"] = df_daily["tp_corrected_mm"] * 0.0393701
180
+ df_daily = df_daily.rename(columns={"date_only": "date", "tp_corrected_in": station})
181
+ rainfall_dfs.append(df_daily[["date", station]])
182
+
183
+ # ET
184
+ df_et_point = download_hourly_et(row.latitude, row.longitude)
185
+ df_et_point.rename(columns={"evapotranspiration": station}, inplace=True)
186
+ et_dfs.append(df_et_point)
187
+
188
+ # SSRD
189
+ df_ssrd = download_herbie_variable(FH, "ssrd", "ssrd", point_df)
190
+ df_ssrd["ssrd_kwm2"] = df_ssrd["ssrd"].diff() / df_ssrd["datetime"].diff().dt.total_seconds() / 1000
191
+ df_ssrd["ssrd_corrected"] = (1.0530 * df_ssrd["ssrd_kwm2"] - 0.0347).clip(lower=0)
192
+ df_ssrd = df_ssrd[["datetime", "ssrd_corrected"]].rename(columns={"datetime": "date", "ssrd_corrected": station})
193
+ ssrd_dfs.append(df_ssrd)
194
+
195
+ # Merge rainfall
196
+ rainfall_df = pd.concat(rainfall_dfs, axis=0).groupby("date").first().reset_index()
197
+ rainfall_df["average_rainfall"] = rainfall_df[POINTS["station"]].mean(axis=1)
198
+ rainfall_df.to_csv(os.path.join(output_dir, "LAKE_RAINFALL_DATA_FORECAST.csv"), index=False)
199
+
200
+ # Merge ET
201
+ et_df_all = pd.concat(et_dfs, axis=0).groupby("date").first().reset_index()
202
+ et_df_all["average_ETPI"] = et_df_all[POINTS["station"]].mean(axis=1)
203
+ et_df_all.to_csv(os.path.join(output_dir, "LOONE_AVERAGE_ETPI_DATA_FORECAST.csv"), index=False)
204
+
205
+ # Combine all SSRD DataFrames
206
+ ssrd_df_all = pd.concat(ssrd_dfs, axis=0)
207
+ ssrd_df_all["date"] = pd.to_datetime(ssrd_df_all["date"])
208
+
209
+ # Compute the daily mean for each station
210
+ daily_ssrd = (
211
+ ssrd_df_all.groupby(ssrd_df_all["date"].dt.date)[POINTS["station"]]
212
+ .mean()
213
+ .reset_index()
214
+ )
215
+
216
+ daily_ssrd = daily_ssrd.rename(columns={"date": "date"})
217
+ daily_ssrd["Mean_RADT"] = daily_ssrd[POINTS["station"]].mean(axis=1)
218
+ daily_ssrd.to_csv(os.path.join(output_dir, "LO_RADT_data_forecast.csv"), index=False)
219
+
220
+ print("All outputs generated successfully.")
loone_data_prep/utils.py CHANGED
@@ -996,14 +996,31 @@ def get_synthetic_data(date_start: str, df: pd.DataFrame):
996
996
  end_month_day = date_end.strftime('%m-%d')
997
997
 
998
998
  # Filter the DataFrame to include only rows between date_start and date_end for all previous years
999
- mask = (df['month_day'] >= start_month_day) & (df['month_day'] <= end_month_day)
999
+ # (handle year wrap, e.g., Dec -> Jan)
1000
+ wraps_year = start_month_day > end_month_day
1001
+
1002
+ if wraps_year:
1003
+ mask = (
1004
+ (df['month_day'] >= start_month_day) |
1005
+ (df['month_day'] <= end_month_day)
1006
+ )
1007
+ else:
1008
+ mask = (
1009
+ (df['month_day'] >= start_month_day) &
1010
+ (df['month_day'] <= end_month_day)
1011
+ )
1012
+
1000
1013
  filtered_data = df.loc[mask]
1001
1014
 
1002
1015
  # Group by the month and day, then calculate the average for each group
1003
1016
  average_values = filtered_data.groupby('month_day')['Data'].mean()
1004
1017
  # Interpolate in case there are missing values:
1005
1018
  start_date = pd.to_datetime('2001-' + start_month_day)
1006
- end_date = pd.to_datetime('2001-' + end_month_day)
1019
+
1020
+ if wraps_year:
1021
+ end_date = pd.to_datetime('2002-' + end_month_day)
1022
+ else:
1023
+ end_date = pd.to_datetime('2001-' + end_month_day)
1007
1024
 
1008
1025
  full_dates = pd.date_range(start=start_date, end=end_date)
1009
1026
  full_index = full_dates.strftime('%m-%d')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loone_data_prep
3
- Version: 1.2.4
3
+ Version: 1.3.0
4
4
  Summary: Prepare data to run the LOONE model.
5
5
  Author-email: Osama Tarabih <osamatarabih@usf.edu>
6
6
  Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -1,9 +1,9 @@
1
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=gfpnaOTjZ-YhWqOEvOaDvear4_59IbqARpLyg2Y_c8U,35851
2
- loone_data_prep/LOONE_DATA_PREP.py,sha256=vEWcGHKN10ipLk9o5I5aKu_LPfDyFW3HBJ8GgqISYjA,69315
1
+ loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=P1CV9UtePWCzsPmni_U881eb10BXugeeBaj2JDbmI0M,37569
2
+ loone_data_prep/LOONE_DATA_PREP.py,sha256=pk7AQMdsiq6nwOXlNClso2ICIasyDgUV16Wo9un99NE,69303
3
3
  loone_data_prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loone_data_prep/data_analyses_fns.py,sha256=BZ7famrSKoUfExQvZfbl72CyADHLb-zzgdWZ-kLJxcQ,4603
5
5
  loone_data_prep/herbie_utils.py,sha256=O5Lcn6lSWUKjJwWhak7U3eyUPxo4jH59xdfBgl5ExJQ,914
6
- loone_data_prep/utils.py,sha256=UlNc84ofh3ZY3lYsgQmDsgGgohXIBwZ0bK9rX6SgGF4,35730
6
+ loone_data_prep/utils.py,sha256=v26f6ckDhRLfy3acVsIRn1uIHTMgJgoSPmF5fq9LYRI,36111
7
7
  loone_data_prep/flow_data/S65E_total.py,sha256=szNUfj0EyyyDzuKNhTGAZtWc5owiOpxYS55YTt4u19k,2835
8
8
  loone_data_prep/flow_data/__init__.py,sha256=u7fENFUZsJjyl13Bc9ZE47sHMKmjxtqXhV9t7vDTm7Y,93
9
9
  loone_data_prep/flow_data/forecast_bias_correction.py,sha256=pcMH7qR3RZvXOHoYOtP7azNn5rVuRPL9mwgoVk2NeLA,11378
@@ -12,15 +12,14 @@ loone_data_prep/flow_data/get_inflows.py,sha256=xKuSyJBdPrpjqMdRiyNDyxwdhYVIgLhi
12
12
  loone_data_prep/flow_data/get_outflows.py,sha256=x7aisIkbXoTkcubFQLDghX-P8lztPq-tU0dQzoVRTtQ,5620
13
13
  loone_data_prep/flow_data/hydro.py,sha256=5MwrzSUTCgPgeC_YGhz-en1CbOMp379Qf5zjpJlp-HM,5312
14
14
  loone_data_prep/forecast_scripts/Chla_merged.py,sha256=PxVEbTrqHEka6Jg0QjEC6qfFtPNzY-0_71WmlelAfPY,1225
15
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py,sha256=xUYO0_9EbtVDX6LPBAfDFyvQQIFN7dNaNYFO4D5pe8Y,6591
16
15
  loone_data_prep/forecast_scripts/forecast_stages.py,sha256=6S6aHlYi2_t6GAh901KBiBWPueYCwAzyb-AliHJexoU,1373
17
- loone_data_prep/forecast_scripts/get_Chla_predicted.py,sha256=wnGFJlu2zyO1QSUiQ3W8iAcLOtkDZpLhuRr037Nmgb4,4759
18
- loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py,sha256=MvJNgY7KPkjyot2BYInQCcp5lg8_N_D_SLSt8WpUmHQ,4200
16
+ loone_data_prep/forecast_scripts/get_Chla_predicted.py,sha256=_P5_op8P_Z8d2dYWG4E5zGXcmw5TEwaK4CKdveoyPN4,4789
17
+ loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py,sha256=7HdRe7acw1AAzBgSdv_i7zIDIrl7gy6ykXwzWn96LPI,4223
19
18
  loone_data_prep/forecast_scripts/loone_q_predict.py,sha256=k8ndTnsRly4BxGS52Gznca75oX2wkPX6nkid6Ccb6aQ,5834
20
19
  loone_data_prep/forecast_scripts/loone_wq_predict.py,sha256=xCiH6QScTYdeZyAhqoqNiJEDTFoXJPh-Yma9VGN_-GY,2123
20
+ loone_data_prep/forecast_scripts/new_combined_weather_forecast.py,sha256=6-_05pQ2Vj_I_218ROGrM5U5q7NZE8Wi6xfwv0DQQcY,9124
21
21
  loone_data_prep/forecast_scripts/predict_PI.py,sha256=f0n2-gt5t9FKNdpJ5QGpyP2QBFLDGetYzfTYL95Vi_8,1937
22
22
  loone_data_prep/forecast_scripts/trib_cond.py,sha256=LlMxD0a9jwtQ9grI4Ho0KpTgphl6VAjg1cBUtfXZ01A,4030
23
- loone_data_prep/forecast_scripts/weather_forecast.py,sha256=5RFA2Pg4j9Df3633SEt6vEAQH0HXjR3TVDgNqYqETEY,7108
24
23
  loone_data_prep/water_level_data/__init__.py,sha256=rgHDDkwccemsZnwUlw2M0h2ML4KmI89yPscmLoxbEHM,43
25
24
  loone_data_prep/water_level_data/get_all.py,sha256=arPSWpb0XfQm0GKZJmoWhWdLuuNDxtGVX6_6UuD1_Vs,10885
26
25
  loone_data_prep/water_level_data/hydro.py,sha256=PtsNdMXe1Y4e5CzEyLH6nJx_xv8sB90orGcSgxt7nao,3653
@@ -31,8 +30,8 @@ loone_data_prep/water_quality_data/wq.py,sha256=sl6G3iDCk6QUzpHTXPHpRZNMBG0-wHuc
31
30
  loone_data_prep/weather_data/__init__.py,sha256=TX58EPgGRzEK_LmLze79lC4L7kU_j3yZf5_iC4nOIP4,45
32
31
  loone_data_prep/weather_data/get_all.py,sha256=aCufuxORU51XhXt7LN9wN_V4qtjNt1qRC1UKlI2b3Ko,6918
33
32
  loone_data_prep/weather_data/weather.py,sha256=hvceksrGSnDkCjheBVBuPgY1DrdR0ZAtrFB-K2tYTtk,12043
34
- loone_data_prep-1.2.4.dist-info/licenses/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
35
- loone_data_prep-1.2.4.dist-info/METADATA,sha256=WIXZJw2ShnnkeaZGRYL7JtjE-yIIDerzWtFPCxt9SVQ,4343
36
- loone_data_prep-1.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- loone_data_prep-1.2.4.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
38
- loone_data_prep-1.2.4.dist-info/RECORD,,
33
+ loone_data_prep-1.3.0.dist-info/licenses/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
34
+ loone_data_prep-1.3.0.dist-info/METADATA,sha256=AdYgJqXbXIXt3cBrE5hRDrPPlsDGuddMrlda6zNCo1c,4343
35
+ loone_data_prep-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
+ loone_data_prep-1.3.0.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
37
+ loone_data_prep-1.3.0.dist-info/RECORD,,
@@ -1,170 +0,0 @@
1
- import os
2
- from herbie import FastHerbie
3
- from datetime import datetime
4
- import pandas as pd
5
- from retry_requests import retry as retry_requests
6
- from retry import retry
7
- import warnings
8
- from typing import Tuple
9
- from loone_data_prep.herbie_utils import get_fast_herbie_object
10
-
11
-
12
- def generate_wind_forecasts(output_dir):
13
- # Ensure output directory exists
14
- warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
15
- os.makedirs(output_dir, exist_ok=True)
16
-
17
- # Define points of interest
18
- points = pd.DataFrame({
19
- "longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
20
- "latitude": [27.1389, 26.9567, 26.8226, 26.9018]
21
- })
22
-
23
- # Station-specific file and column names
24
- file_map = {
25
- "Point_1": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
26
- "Point_2": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
27
- "Point_3": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
28
- "Point_4": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
29
- }
30
-
31
- today_str = datetime.today().strftime('%Y-%m-%d 00:00')
32
- FH = get_fast_herbie_object(today_str)
33
- print("FastHerbie initialized.")
34
- dfs = []
35
-
36
- variables = {
37
- "10u": "10u",
38
- "10v": "10v",
39
- "2t": "2t",
40
-
41
- }
42
-
43
- # Loop through points and extract data
44
- for index, point in points.iterrows():
45
- print(f"\nProcessing Point {index + 1}: ({point.latitude}, {point.longitude})")
46
-
47
- point_df = pd.DataFrame({
48
- "longitude": [point.longitude],
49
- "latitude": [point.latitude]
50
- })
51
-
52
- # Loop through variables for current point and extract data
53
- for var_key, var_name in variables.items():
54
- # Get the current variable data at the current point
55
- print(f" Variable: {var_key}")
56
- try:
57
- df, var_name_actual = _download_herbie_variable(FH, var_key, var_name, point_df)
58
- except Exception as e:
59
- print(f"Error processing {var_key} for Point {index + 1} ({point.latitude}, {point.longitude}): {e}")
60
- print(f'Skipping {var_key}')
61
- continue
62
-
63
- # Append the DataFrame and variable name to the list
64
- if not df.empty:
65
- dfs.append((index, var_name_actual, df))
66
-
67
- # Merge and process data per point
68
- results = {}
69
- for point_index in range(len(points)):
70
- u_df = [df for idx, name, df in dfs if idx == point_index and name == "u10"][0]
71
- v_df = [df for idx, name, df in dfs if idx == point_index and name == "v10"][0]
72
- merged = u_df.merge(v_df, on="datetime", how="outer")
73
-
74
- # Compute wind speed and correction
75
- merged["wind_speed"] = (merged["u10"] ** 2 + merged["v10"] ** 2) ** 0.5
76
- merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
77
- merged["wind_speed_corrected"] = merged["wind_speed_corrected"] * 2.23694 # m/s to mph
78
-
79
- results[f"Point_{point_index + 1}"] = merged
80
-
81
- # Save outputs with station-specific column names
82
- for key, (filename, new_col_name) in file_map.items():
83
- df = results[key].copy()
84
- df = df[["datetime", "wind_speed_corrected"]].rename(columns={
85
- "wind_speed_corrected": new_col_name,
86
- "datetime": "date"
87
- })
88
- filepath = os.path.join(output_dir, filename)
89
- df.to_csv(filepath, index=False)
90
- # Save 2-meter air temperature data
91
- airt_file_map = {
92
- "Point_1": "L001_AIRT_Degrees Celsius_forecast.csv",
93
- "Point_2": "L005_AIRT_Degrees Celsius_forecast.csv",
94
- "Point_3": "L006_AIRT_Degrees Celsius_forecast.csv",
95
- "Point_4": "LZ40_AIRT_Degrees Celsius_forecast.csv"
96
- }
97
- airt_column_map = {
98
- "Point_1": "L001_AIRT_Degrees Celsius",
99
- "Point_2": "L005_AIRT_Degrees Celsius",
100
- "Point_3": "L006_AIRT_Degrees Celsius",
101
- "Point_4": "LZ40_AIRT_Degrees Celsius"
102
- }
103
-
104
- for key in airt_file_map:
105
- point_index = int(key.split("_")[1]) - 1
106
- df_airt = [df for idx, name, df in dfs if idx == point_index and name == "t2m"][0].copy()
107
- df_airt["t2m"] = df_airt["t2m"] - 273.15 # Convert from Kelvin to Celsius
108
- df_airt = df_airt.rename(columns={
109
- "datetime": "date",
110
- "t2m": airt_column_map[key]
111
- })
112
- filepath = os.path.join(output_dir, airt_file_map[key])
113
- df_airt.to_csv(filepath, index=False)
114
-
115
-
116
- @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
117
- def _download_herbie_variable(fast_herbie_object: FastHerbie, variable_key: str, variable_name: str, point_df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
118
- """
119
- Download a specific variable from the Herbie API.
120
-
121
- Args:
122
- fast_herbie_object: An instance of the FastHerbie class.
123
- variable_key: The key of the variable to download.
124
- variable_name: The name of the variable to download.
125
- point_df: A DataFrame containing the point of interest (longitude and latitude).
126
-
127
- Returns:
128
- A DataFrame containing the downloaded variable data.
129
-
130
- Example:
131
- point_df = pd.DataFrame({"longitude": [-80.7934], "latitude": [27.1389]})
132
- df, var_name_actual = _download_herbie_variable(FastHerbie('2020-05-16 00:00', model='ifs', fxx=range(0, 360, 3)), '10u', '10u', point_df)
133
- """
134
- # Download and load dataset
135
- fast_herbie_object.download(f":{variable_key}")
136
- ds = fast_herbie_object.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
137
-
138
- # Extract point data
139
- dsi = ds.herbie.pick_points(point_df, method="nearest")
140
-
141
- # Close and delete the original dataset to free up resources
142
- ds.close()
143
- del ds
144
-
145
- # Get actual variable name
146
- if variable_name == "10u":
147
- var_name_actual = "u10" # Map 10u to u10
148
- elif variable_name == "10v":
149
- var_name_actual = "v10" # Map 10v to v10
150
- elif variable_name == "2t":
151
- var_name_actual = "t2m" #TODO: check that this is correct
152
-
153
- # Convert to DataFrame
154
- time_series = dsi[var_name_actual].squeeze()
155
- df = time_series.to_dataframe().reset_index()
156
-
157
- # Handle datetime columns
158
- if "valid_time" in df.columns:
159
- df = df.rename(columns={"valid_time": "datetime"})
160
- elif "step" in df.columns and "time" in dsi.coords:
161
- df["datetime"] = dsi.time.values[0] + df["step"]
162
-
163
- # Close and delete the intermediate dataset to free memory
164
- dsi.close()
165
- del dsi, time_series
166
-
167
- # Retain necessary columns
168
- df = df[["datetime", var_name_actual]].drop_duplicates()
169
-
170
- return df, var_name_actual
@@ -1,199 +0,0 @@
1
- from herbie import FastHerbie
2
- from datetime import datetime
3
- import pandas as pd
4
- import openmeteo_requests
5
- import argparse
6
- import requests_cache
7
- from retry_requests import retry as retry_requests
8
- from retry import retry
9
- import warnings
10
- from loone_data_prep.herbie_utils import get_fast_herbie_object
11
-
12
- warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
13
-
14
-
15
- def download_weather_forecast(file_path):
16
- # Get today's date in the required format
17
- today_str = datetime.today().strftime('%Y-%m-%d 00:00')
18
-
19
- # Define variables to download and extract
20
- variables = {
21
- "10u": "10u",
22
- "ssrd": "ssrd",
23
- "tp": "tp",
24
- "10v": "10v",
25
- }
26
-
27
- # Initialize FastHerbie
28
- FH = get_fast_herbie_object(today_str)
29
- print("FastHerbie initialized.")
30
-
31
- dfs = []
32
-
33
- for var_key, var_name in variables.items():
34
- # Download the current variable
35
- print(f"Processing {var_key}...")
36
- try:
37
- df = _download_herbie_variable(FH, var_key, var_name)
38
- except Exception as e:
39
- print(f"Error processing {var_key}: {e}")
40
- print(f'Skipping {var_key}')
41
- continue
42
-
43
- # Append to list
44
- if not df.empty:
45
- dfs.append(df)
46
-
47
- try:
48
- # Merge all variables into a single DataFrame
49
- final_df = dfs[0]
50
- for df in dfs[1:]:
51
- final_df = final_df.merge(df, on="datetime", how="outer")
52
- print(final_df)
53
- # Calculate wind speed
54
- final_df["wind_speed"] = (final_df["u10"] ** 2 + final_df["v10"] ** 2) ** 0.5
55
-
56
- #rainfall corrected: OLS Regression Equation: Corrected Forecast = 0.7247 * Forecast + 0.1853
57
- final_df["tp_corrected"] = 0.7247 * final_df["tp"] + 0.1853
58
-
59
- #wind speed correction: Corrected Forecast = 0.4167 * Forecast + 4.1868
60
- final_df["wind_speed_corrected"] = 0.4167 * final_df["wind_speed"] + 4.1868
61
-
62
- #radiation correction will need to be fixed because it was done on fdir instead of ssdr
63
- #radiation corrected: Corrected Forecast = 0.0553 * Forecast - 0.0081
64
- final_df["ssrd_corrected"] = 0.0553 * final_df["ssrd"] - 0.0081
65
- except Exception as e:
66
- print(f'Error correcting herbie weather data: {e}')
67
-
68
- try:
69
- # Setup the Open-Meteo API client with cache and retry on error
70
- cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
71
- retry_session = retry_requests(cache_session, retries = 5, backoff_factor = 0.2)
72
- openmeteo = openmeteo_requests.Client(session = retry_session)
73
-
74
- # Make sure all required weather variables are listed here
75
- # The order of variables in hourly or daily is important to assign them correctly below
76
- url = "https://api.open-meteo.com/v1/forecast"
77
- params = {
78
- "latitude": 26.9690,
79
- "longitude": -80.7976,
80
- "hourly": "evapotranspiration",
81
- "forecast_days": 16,
82
- "models": "gfs_seamless"
83
- }
84
- responses = openmeteo.weather_api(url, params=params)
85
-
86
-
87
- # Process first location. Add a for-loop for multiple locations or weather models
88
- response = responses[0]
89
-
90
- hourly = response.Hourly()
91
- hourly_evapotranspiration = hourly.Variables(0).ValuesAsNumpy()
92
-
93
- hourly_data = {"date": pd.date_range(
94
- start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
95
- end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
96
- freq = pd.Timedelta(seconds = hourly.Interval()),
97
- inclusive = "left"
98
- )}
99
-
100
- hourly_data["evapotranspiration"] = hourly_evapotranspiration
101
-
102
- hourly_dataframe = pd.DataFrame(data = hourly_data)
103
-
104
- # Convert datetime to date for merging
105
- final_df['date'] = final_df['datetime']
106
- # Ensure final_df['date'] is timezone-aware (convert to UTC)
107
- final_df['date'] = pd.to_datetime(final_df['date'], utc=True)
108
-
109
- # Ensure hourly_dataframe['date'] is also timezone-aware (convert to UTC)
110
- hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'], utc=True)
111
-
112
- # Merge while keeping only matching dates from final_df
113
- merged_df = final_df.merge(hourly_dataframe, on='date', how='left')
114
-
115
- # Print final combined DataFrame
116
- merged_df.drop(columns=['date'], inplace=True)
117
- # print(merged_df)
118
-
119
- merged_df.to_csv(file_path, index=False)
120
- except Exception as e:
121
- print(f'Error retrieving openmeteo weather data: {e}')
122
-
123
-
124
- @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
125
- def _download_herbie_variable(fast_herbie_object: FastHerbie, variable_key: str, variable_name: str) -> pd.DataFrame:
126
- """
127
- Download a specific variable from the Herbie API.
128
-
129
- Args:
130
- fast_herbie_object: An instance of the FastHerbie class.
131
- variable_key: The key of the variable to download.
132
- variable_name: The name of the variable to download.
133
-
134
- Returns:
135
- A DataFrame containing the downloaded variable data.
136
-
137
- Example:
138
- df = _download_herbie_variable(FastHerbie('2020-05-16 00:00', model='ifs', fxx=range(0, 360, 3)), '10u', '10u')
139
- """
140
- # Define point of interest
141
- points = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
142
-
143
- # Download and load the dataset
144
- fast_herbie_object.download(f":{variable_key}")
145
- ds = fast_herbie_object.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
146
-
147
- # Extract point data
148
- dsi = ds.herbie.pick_points(points, method="nearest")
149
-
150
- # Close and delete the original dataset to free up resources
151
- ds.close()
152
- del ds
153
-
154
- # Extract the correct variable name dynamically
155
- if variable_name == "10u":
156
- var_name_actual = "u10" # Map 10u to u10
157
- elif variable_name == "10v":
158
- var_name_actual = "v10" # Map 10v to v10
159
- else:
160
- var_name_actual = variable_name # For ssrd and tp, use the same name
161
-
162
- # Extract time series
163
- time_series = dsi[var_name_actual].squeeze()
164
-
165
- # Convert to DataFrame
166
- df = time_series.to_dataframe().reset_index()
167
-
168
- # Convert `valid_time` to datetime
169
- if "valid_time" in df.columns:
170
- df = df.rename(columns={"valid_time": "datetime"})
171
- elif "step" in df.columns and "time" in dsi.coords:
172
- df["datetime"] = dsi.time.values[0] + df["step"]
173
-
174
- # Keep only datetime and variable of interest
175
- df = df[["datetime", var_name_actual]].drop_duplicates()
176
-
177
- # Print extracted data
178
- # print(df)
179
-
180
- # Clean up intermediate datasets to free memory
181
- del dsi, time_series
182
-
183
- return df
184
-
185
-
186
- def main():
187
- # Set up command-line argument parsing
188
- parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
189
- parser.add_argument("file_path", help="Path to save the resulting CSV file.")
190
-
191
- # Parse the arguments
192
- args = parser.parse_args()
193
-
194
- # Call the function with the provided file path
195
- download_weather_forecast(args.file_path)
196
-
197
-
198
- if __name__ == "__main__":
199
- main()