loone-data-prep 0.1.9__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +252 -228
- loone_data_prep/LOONE_DATA_PREP.py +34 -17
- loone_data_prep/flow_data/forecast_bias_correction.py +52 -34
- loone_data_prep/flow_data/get_forecast_flows.py +131 -88
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py +127 -0
- loone_data_prep/forecast_scripts/forecast_stages.py +40 -0
- loone_data_prep/forecast_scripts/predict_PI.py +51 -0
- loone_data_prep/forecast_scripts/trib_cond.py +84 -0
- loone_data_prep/forecast_scripts/weather_forecast.py +155 -0
- loone_data_prep/utils.py +52 -19
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.2.dist-info}/METADATA +9 -4
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.2.dist-info}/RECORD +15 -10
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.2.dist-info}/WHEEL +1 -1
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.2.dist-info/licenses}/LICENSE +0 -0
- {loone_data_prep-0.1.9.dist-info → loone_data_prep-1.1.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from datetime import timedelta
|
|
3
|
+
|
|
4
|
+
def forecast_stages(workspace_path: str):
|
|
5
|
+
"""
|
|
6
|
+
Forecasts the next 15 days of WCA stages based on historical data.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
workspace_path (str): Path to the workspace directory.
|
|
10
|
+
"""
|
|
11
|
+
# Load historical data
|
|
12
|
+
stages = pd.read_csv(f"{workspace_path}/WCA_Stages_Inputs.csv")
|
|
13
|
+
|
|
14
|
+
# Convert 'date' column to datetime
|
|
15
|
+
stages['date'] = pd.to_datetime(stages['date'])
|
|
16
|
+
|
|
17
|
+
# Start forecast from today (normalized to remove time)
|
|
18
|
+
start_date = pd.Timestamp.today().normalize()
|
|
19
|
+
|
|
20
|
+
# Generate forecast for the next 15 days
|
|
21
|
+
forecast_rows = []
|
|
22
|
+
for i in range(16):
|
|
23
|
+
forecast_date = start_date + timedelta(days=i)
|
|
24
|
+
month = forecast_date.month
|
|
25
|
+
day = forecast_date.day
|
|
26
|
+
|
|
27
|
+
# Filter historical rows for the same month and day
|
|
28
|
+
same_day_rows = stages[(stages['date'].dt.month == month) & (stages['date'].dt.day == day)]
|
|
29
|
+
|
|
30
|
+
if not same_day_rows.empty:
|
|
31
|
+
mean_values = same_day_rows.drop(columns='date').mean()
|
|
32
|
+
forecast_row = {'date': forecast_date}
|
|
33
|
+
forecast_row.update(mean_values.to_dict())
|
|
34
|
+
forecast_rows.append(forecast_row)
|
|
35
|
+
|
|
36
|
+
# Create forecast DataFrame
|
|
37
|
+
forecast_df = pd.DataFrame(forecast_rows)
|
|
38
|
+
|
|
39
|
+
forecast_df.to_csv(f"{workspace_path}/WCA_Stages_Inputs_Predicted.csv", index=False)
|
|
40
|
+
return
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
def extend_PI(PI_path, output_path):
|
|
6
|
+
df = pd.read_csv(PI_path, header=None, skiprows=1, names=["date", "PI"])
|
|
7
|
+
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
|
|
8
|
+
# Extract month and day for grouping
|
|
9
|
+
df["month_day"] = df["date"].dt.strftime("%m-%d")
|
|
10
|
+
# Sort data in case it's unordered
|
|
11
|
+
df = df.sort_values(by="date").reset_index(drop=True)
|
|
12
|
+
|
|
13
|
+
# Compute historical average PI for each month-day
|
|
14
|
+
average_pi = df.groupby("month_day")["PI"].mean().reset_index()
|
|
15
|
+
|
|
16
|
+
# Generate future dates (today + next 16 days)
|
|
17
|
+
today = datetime.today()
|
|
18
|
+
future_dates = [today + timedelta(days=i) for i in range(17)]
|
|
19
|
+
future_df = pd.DataFrame({"date": future_dates})
|
|
20
|
+
|
|
21
|
+
# Extract month-day from future dates to match historical data
|
|
22
|
+
future_df["month_day"] = future_df["date"].dt.strftime("%m-%d")
|
|
23
|
+
|
|
24
|
+
# Merge with historical averages
|
|
25
|
+
future_df = future_df.merge(average_pi, on="month_day", how="left")
|
|
26
|
+
|
|
27
|
+
# Append the new rows to the original dataframe
|
|
28
|
+
df_extended = pd.concat([df, future_df[["date", "PI"]]], ignore_index=True)
|
|
29
|
+
df_extended.drop(columns=["month_day"], inplace=True)
|
|
30
|
+
df_extended.set_index('date', inplace=True)
|
|
31
|
+
df_extended = df_extended.resample('W-FRI').mean().reset_index()
|
|
32
|
+
|
|
33
|
+
# Save the updated dataframe to CSV
|
|
34
|
+
df_extended.to_csv(output_path, index=False)
|
|
35
|
+
|
|
36
|
+
def main():
|
|
37
|
+
# Set up command-line argument parsing
|
|
38
|
+
parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
|
|
39
|
+
parser.add_argument("PI_path", help="Path to the historical palmer index file.")
|
|
40
|
+
parser.add_argument("output_path", help="Path to save the new output file path.")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Parse the arguments
|
|
44
|
+
args = parser.parse_args()
|
|
45
|
+
|
|
46
|
+
# Call the function with the provided file path
|
|
47
|
+
extend_PI(args.PI_path, args.output_path)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
main()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from datetime import date, timedelta
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
def create_trib_cond (weather_data, net_inflows, main_tributary, PI, output, ensemble):
|
|
7
|
+
# TODO - check that all these unit are right
|
|
8
|
+
future_date = date.today() + timedelta(days=15)
|
|
9
|
+
today = date.today()
|
|
10
|
+
|
|
11
|
+
all_data = pd.read_csv(weather_data)
|
|
12
|
+
|
|
13
|
+
all_data['date'] = pd.to_datetime(all_data['datetime']).dt.date
|
|
14
|
+
all_data = all_data[(all_data['date'] >= today) & (all_data['date'] <= future_date)]
|
|
15
|
+
all_data = all_data.set_index(['date'])
|
|
16
|
+
all_data.index = pd.to_datetime(all_data.index, unit='ns')
|
|
17
|
+
Net_RF_Weekly = all_data.resample('W-FRI').sum()
|
|
18
|
+
# Net Inflows cfs
|
|
19
|
+
Net_Inflows = pd.read_csv(net_inflows)
|
|
20
|
+
Net_Inflows['date'] = pd.to_datetime(Net_Inflows['date']).dt.date
|
|
21
|
+
Net_Inflows = Net_Inflows[(Net_Inflows['date'] >= today) & (Net_Inflows['date'] <= future_date)]
|
|
22
|
+
#This is just the sum of the inflows that we want to read in
|
|
23
|
+
Net_Inflows['Net_Inflows'] = Net_Inflows['Netflows_acft']*(43560) # acft to cf
|
|
24
|
+
Net_Inflows = Net_Inflows.set_index(['date'])
|
|
25
|
+
Net_Inflows.index = pd.to_datetime(Net_Inflows.index, unit='ns')
|
|
26
|
+
Net_Inflow_Weekly = Net_Inflows.resample('W-FRI').mean()
|
|
27
|
+
# S65 cfs
|
|
28
|
+
S65E = pd.read_csv(main_tributary)
|
|
29
|
+
S65E['date'] = pd.to_datetime(S65E['date']).dt.date
|
|
30
|
+
S65E = S65E[(S65E['date'] >= today) & (S65E['date'] <= future_date)]
|
|
31
|
+
#We want specifically S65_Q
|
|
32
|
+
#Check that the units are right
|
|
33
|
+
S65E = S65E.set_index(['date'])
|
|
34
|
+
S65E = S65E / (0.0283168466 * 86400) # Convert all columns from cmd to cfs
|
|
35
|
+
S65E.index = pd.to_datetime(S65E.index, unit='ns') # Ensure index is datetime
|
|
36
|
+
S65E_Weekly = S65E.resample('W-FRI').mean()
|
|
37
|
+
# PI
|
|
38
|
+
PI_week_data = pd.DataFrame(S65E_Weekly.index, columns=['date'])
|
|
39
|
+
PI_week_data['date'] = pd.to_datetime(PI_week_data['date'])
|
|
40
|
+
|
|
41
|
+
PI_data = pd.read_csv(PI)
|
|
42
|
+
PI_data['date'] = pd.to_datetime(PI_data['date'])
|
|
43
|
+
|
|
44
|
+
PI = PI_week_data.merge(PI_data[['date', 'PI']], on='date', how='left')
|
|
45
|
+
|
|
46
|
+
ensemble_col = f"ensemble_{ensemble:02d}"
|
|
47
|
+
|
|
48
|
+
# Create the initial DataFrame with the date
|
|
49
|
+
Trib_Cond_Wkly = pd.DataFrame(S65E_Weekly.index, columns=['date'])
|
|
50
|
+
|
|
51
|
+
# Calculate NetRF and NetInf
|
|
52
|
+
Trib_Cond_Wkly['NetRF'] = Net_RF_Weekly['tp_corrected'].values - Net_RF_Weekly['evapotranspiration'].values
|
|
53
|
+
Trib_Cond_Wkly['NetInf'] = Net_Inflow_Weekly['Net_Inflows'].values
|
|
54
|
+
|
|
55
|
+
# Select only the desired ensemble column and rename it
|
|
56
|
+
S65E_selected = S65E_Weekly[[ensemble_col]].rename(columns={ensemble_col: "S65E"})
|
|
57
|
+
|
|
58
|
+
# Merge it into Trib_Cond_Wkly
|
|
59
|
+
Trib_Cond_Wkly = Trib_Cond_Wkly.merge(S65E_selected, left_on="date", right_index=True, how="left")
|
|
60
|
+
|
|
61
|
+
# Add the Palmer Index
|
|
62
|
+
Trib_Cond_Wkly['Palmer'] = PI['PI'].values
|
|
63
|
+
|
|
64
|
+
# Export to CSV
|
|
65
|
+
Trib_Cond_Wkly.to_csv(output, index=False)
|
|
66
|
+
|
|
67
|
+
def main():
|
|
68
|
+
# Set up command-line argument parsing
|
|
69
|
+
parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
|
|
70
|
+
parser.add_argument("weather_data", help="Path to the weather forecasts.")
|
|
71
|
+
parser.add_argument("net_inflows", help="Path to the net inflow forecasts.")
|
|
72
|
+
parser.add_argument("main_tributary", help="Path to save the S65E forecasts.")
|
|
73
|
+
parser.add_argument("PI", help="Path to the Palmer Index forecasts.")
|
|
74
|
+
parser.add_argument("output", help="Path to save the trib file")
|
|
75
|
+
|
|
76
|
+
# Parse the arguments
|
|
77
|
+
args = parser.parse_args()
|
|
78
|
+
|
|
79
|
+
# Call the function with the provided file path
|
|
80
|
+
create_trib_cond(args.file_path, args.net_inflows, args.main_tributary, args.PI, args.output)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
main()
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from herbie import FastHerbie
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import openmeteo_requests
|
|
5
|
+
import argparse
|
|
6
|
+
import requests_cache
|
|
7
|
+
from retry_requests import retry
|
|
8
|
+
import warnings
|
|
9
|
+
|
|
10
|
+
warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def download_weather_forecast (file_path):
|
|
14
|
+
# Get today's date in the required format
|
|
15
|
+
today_str = datetime.today().strftime('%Y-%m-%d 00:00')
|
|
16
|
+
|
|
17
|
+
# Define variables to download and extract
|
|
18
|
+
variables = {
|
|
19
|
+
"10u": "10u",
|
|
20
|
+
"ssrd": "ssrd",
|
|
21
|
+
"tp": "tp",
|
|
22
|
+
"10v": "10v",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Define point of interest
|
|
26
|
+
points = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
|
|
27
|
+
|
|
28
|
+
# Initialize FastHerbie
|
|
29
|
+
FH = FastHerbie([today_str], model="ifs", fxx=range(0, 360, 3))
|
|
30
|
+
dfs = []
|
|
31
|
+
|
|
32
|
+
for var_key, var_name in variables.items():
|
|
33
|
+
print(f"Processing {var_key}...")
|
|
34
|
+
|
|
35
|
+
# Download and load the dataset
|
|
36
|
+
FH.download(f":{var_key}")
|
|
37
|
+
ds = FH.xarray(f":{var_key}", backend_kwargs={"decode_timedelta": True})
|
|
38
|
+
|
|
39
|
+
# Extract point data
|
|
40
|
+
dsi = ds.herbie.pick_points(points, method="nearest")
|
|
41
|
+
|
|
42
|
+
# Extract the correct variable name dynamically
|
|
43
|
+
if var_name == "10u":
|
|
44
|
+
var_name_actual = "u10" # Map 10u to u10
|
|
45
|
+
elif var_name == "10v":
|
|
46
|
+
var_name_actual = "v10" # Map 10v to v10
|
|
47
|
+
else:
|
|
48
|
+
var_name_actual = var_name # For ssrd and tp, use the same name
|
|
49
|
+
|
|
50
|
+
# Extract time series
|
|
51
|
+
time_series = dsi[var_name_actual].squeeze()
|
|
52
|
+
|
|
53
|
+
# Convert to DataFrame
|
|
54
|
+
df = time_series.to_dataframe().reset_index()
|
|
55
|
+
|
|
56
|
+
# Convert `valid_time` to datetime
|
|
57
|
+
if "valid_time" in df.columns:
|
|
58
|
+
df = df.rename(columns={"valid_time": "datetime"})
|
|
59
|
+
elif "step" in df.columns and "time" in dsi.coords:
|
|
60
|
+
df["datetime"] = dsi.time.values[0] + df["step"]
|
|
61
|
+
|
|
62
|
+
# Keep only datetime and variable of interest
|
|
63
|
+
df = df[["datetime", var_name_actual]].drop_duplicates()
|
|
64
|
+
|
|
65
|
+
# Append to list
|
|
66
|
+
dfs.append(df)
|
|
67
|
+
|
|
68
|
+
# Print extracted data
|
|
69
|
+
# print(df)
|
|
70
|
+
|
|
71
|
+
# Merge all variables into a single DataFrame
|
|
72
|
+
final_df = dfs[0]
|
|
73
|
+
for df in dfs[1:]:
|
|
74
|
+
final_df = final_df.merge(df, on="datetime", how="outer")
|
|
75
|
+
print(final_df)
|
|
76
|
+
# Calculate wind speed
|
|
77
|
+
final_df["wind_speed"] = (final_df["u10"] ** 2 + final_df["v10"] ** 2) ** 0.5
|
|
78
|
+
|
|
79
|
+
#rainfall corrected: OLS Regression Equation: Corrected Forecast = 0.7247 * Forecast + 0.1853
|
|
80
|
+
final_df["tp_corrected"] = 0.7247 * final_df["tp"] + 0.1853
|
|
81
|
+
|
|
82
|
+
#wind speed correction: Corrected Forecast = 0.4167 * Forecast + 4.1868
|
|
83
|
+
final_df["wind_speed_corrected"] = 0.4167 * final_df["wind_speed"] + 4.1868
|
|
84
|
+
|
|
85
|
+
#radiation correction will need to be fixed because it was done on fdir instead of ssdr
|
|
86
|
+
#radiation corrected: Corrected Forecast = 0.0553 * Forecast - 0.0081
|
|
87
|
+
final_df["ssrd_corrected"] = 0.0553 * final_df["ssrd"] - 0.0081
|
|
88
|
+
|
|
89
|
+
# Setup the Open-Meteo API client with cache and retry on error
|
|
90
|
+
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
|
|
91
|
+
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
|
|
92
|
+
openmeteo = openmeteo_requests.Client(session = retry_session)
|
|
93
|
+
|
|
94
|
+
# Make sure all required weather variables are listed here
|
|
95
|
+
# The order of variables in hourly or daily is important to assign them correctly below
|
|
96
|
+
url = "https://api.open-meteo.com/v1/forecast"
|
|
97
|
+
params = {
|
|
98
|
+
"latitude": 26.9690,
|
|
99
|
+
"longitude": -80.7976,
|
|
100
|
+
"hourly": "evapotranspiration",
|
|
101
|
+
"forecast_days": 16,
|
|
102
|
+
"models": "gfs_seamless"
|
|
103
|
+
}
|
|
104
|
+
responses = openmeteo.weather_api(url, params=params)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Process first location. Add a for-loop for multiple locations or weather models
|
|
108
|
+
response = responses[0]
|
|
109
|
+
|
|
110
|
+
hourly = response.Hourly()
|
|
111
|
+
hourly_evapotranspiration = hourly.Variables(0).ValuesAsNumpy()
|
|
112
|
+
|
|
113
|
+
hourly_data = {"date": pd.date_range(
|
|
114
|
+
start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
|
|
115
|
+
end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
|
|
116
|
+
freq = pd.Timedelta(seconds = hourly.Interval()),
|
|
117
|
+
inclusive = "left"
|
|
118
|
+
)}
|
|
119
|
+
|
|
120
|
+
hourly_data["evapotranspiration"] = hourly_evapotranspiration
|
|
121
|
+
|
|
122
|
+
hourly_dataframe = pd.DataFrame(data = hourly_data)
|
|
123
|
+
|
|
124
|
+
# Convert datetime to date for merging
|
|
125
|
+
final_df['date'] = final_df['datetime']
|
|
126
|
+
# Ensure final_df['date'] is timezone-aware (convert to UTC)
|
|
127
|
+
final_df['date'] = pd.to_datetime(final_df['date'], utc=True)
|
|
128
|
+
|
|
129
|
+
# Ensure hourly_dataframe['date'] is also timezone-aware (convert to UTC)
|
|
130
|
+
hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'], utc=True)
|
|
131
|
+
|
|
132
|
+
# Merge while keeping only matching dates from final_df
|
|
133
|
+
merged_df = final_df.merge(hourly_dataframe, on='date', how='left')
|
|
134
|
+
|
|
135
|
+
# Print final combined DataFrame
|
|
136
|
+
merged_df.drop(columns=['date'], inplace=True)
|
|
137
|
+
# print(merged_df)
|
|
138
|
+
|
|
139
|
+
merged_df.to_csv(file_path, index=False)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def main():
|
|
143
|
+
# Set up command-line argument parsing
|
|
144
|
+
parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
|
|
145
|
+
parser.add_argument("file_path", help="Path to save the resulting CSV file.")
|
|
146
|
+
|
|
147
|
+
# Parse the arguments
|
|
148
|
+
args = parser.parse_args()
|
|
149
|
+
|
|
150
|
+
# Call the function with the provided file path
|
|
151
|
+
download_weather_forecast(args.file_path)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
if __name__ == "__main__":
|
|
155
|
+
main()
|
loone_data_prep/utils.py
CHANGED
|
@@ -193,7 +193,7 @@ DEFAULT_PREDICTION_STATIONS_IDS = [
|
|
|
193
193
|
"S135_P",
|
|
194
194
|
"S135_C",
|
|
195
195
|
]
|
|
196
|
-
|
|
196
|
+
DEFAULT_EXPFUNC_PHOSPHATE_CONSTANTS = {
|
|
197
197
|
"S65E_S": {"a": 2.00040151533473, "b": 0.837387838314323},
|
|
198
198
|
"S71_S": {"a": 2.55809777403484, "b": 0.765894033054918},
|
|
199
199
|
"S72_S": {"a": 2.85270576092534, "b": 0.724935760736887},
|
|
@@ -208,7 +208,21 @@ DEFAULT_EXPFUNC_CONSTANTS = {
|
|
|
208
208
|
"S135_P": {"a": 2.50975664040355, "b": 0.760702496334553},
|
|
209
209
|
"S135_C": {"a": 2.43076251736749, "b": 0.759494593788417},
|
|
210
210
|
}
|
|
211
|
-
|
|
211
|
+
DEFAULT_EXPFUNC_NITROGEN_CONSTANTS = {
|
|
212
|
+
"FISHP": {"a": 3.45714698709252, "b": 0.592252136022012},
|
|
213
|
+
"S4_P": {"a": 1.2337557014752, "b": 1.04595934798695},
|
|
214
|
+
"S65E_S": {"a": 4.71575889172016, "b": 0.505549283553318},
|
|
215
|
+
"S71_S": {"a": 3.97701995028333, "b": 0.606281118481932},
|
|
216
|
+
"S72_S": {"a": 2.36651051985955, "b": 0.774589654354149},
|
|
217
|
+
"S84_S": {"a": 2.69855941365441, "b": 0.697201188144741},
|
|
218
|
+
"S127_C": {"a": 2.22368957908813, "b": 0.758610540522343},
|
|
219
|
+
"S127_P": {"a": 2.19477310222979, "b": 0.786485799309641},
|
|
220
|
+
"S133_P": {"a": 1.79092549100026, "b": 0.882497515298829},
|
|
221
|
+
"S154_C": {"a": 2.88850639994145, "b": 0.665252221554856},
|
|
222
|
+
"S191_S": {"a": 3.99798269355392, "b": 0.586177156114969},
|
|
223
|
+
"S135_C": {"a": 6.44418674308781, "b": 0.322821841402605},
|
|
224
|
+
"S135_P": {"a": 3.09890183766129, "b": 0.657896838486496},
|
|
225
|
+
}
|
|
212
226
|
|
|
213
227
|
@retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
|
|
214
228
|
def get_dbkeys(
|
|
@@ -298,7 +312,7 @@ def data_interpolations(
|
|
|
298
312
|
Data_df["Yr_M"] = pd.to_datetime(Data_df["date"]).dt.to_period("M")
|
|
299
313
|
start_date = Data_df["date"].iloc[0]
|
|
300
314
|
end_date = Data_df["date"].iloc[-1]
|
|
301
|
-
date_rng = pd.date_range(start=start_date, end=end_date, freq="
|
|
315
|
+
date_rng = pd.date_range(start=start_date, end=end_date, freq="ME")
|
|
302
316
|
Monthly_df = pd.DataFrame(date_rng, columns=["date"])
|
|
303
317
|
Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
|
|
304
318
|
"M"
|
|
@@ -705,28 +719,47 @@ def nutrient_prediction(
|
|
|
705
719
|
input_dir: str,
|
|
706
720
|
output_dir: str,
|
|
707
721
|
station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS,
|
|
708
|
-
constants: dict =
|
|
722
|
+
constants: dict = DEFAULT_EXPFUNC_PHOSPHATE_CONSTANTS,
|
|
723
|
+
nutrient: str = "PHOSPHATE",
|
|
709
724
|
) -> None:
|
|
725
|
+
"""Predict nutrient loads for the given station IDs.
|
|
726
|
+
|
|
727
|
+
Args:
|
|
728
|
+
input_dir (str): Path to the directory where the input files are located.
|
|
729
|
+
output_dir (str): Path to the directory where the output files will be saved.
|
|
730
|
+
station_ids (list, optional): List with station IDs to do predictions for. Defaults to DEFAULT_PREDICTION_STATIONS_IDS.
|
|
731
|
+
constants (dict, optional): Dictionary with constants for the exponential function. Defaults to DEFAULT_EXPFUNC_PHOSPHATE_CONSTANTS.
|
|
732
|
+
nutrient (str, optional): Nutrient to predict. Defaults to "PHOSPHATE". Options are "PHOSPHATE" or "NITROGEN".
|
|
733
|
+
"""
|
|
710
734
|
for station in station_ids:
|
|
711
735
|
print(f"Predicting nutrient loads for station: {station}.")
|
|
712
736
|
# Construct paths for flow file
|
|
713
737
|
flow_file_path = ""
|
|
714
738
|
flow_file_path_exists = True
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
739
|
+
# Manually define matches for forecast case
|
|
740
|
+
station_file_map = {
|
|
741
|
+
'S65E_S': f"{input_dir}/750072741_INFLOW_cmd_geoglows.csv",
|
|
742
|
+
'S71_S': f"{input_dir}/750068601_MATCHED_cmd_geoglows.csv",
|
|
743
|
+
'FISHP': f"{input_dir}/750053213_MATCHED_cmd_geoglows.csv",
|
|
744
|
+
'S84_S': f"{input_dir}/750069782_INFLOW_cmd_geoglows.csv",
|
|
745
|
+
'S133_P': f"{input_dir}/750035446_INFLOW_cmd_geoglows.csv",
|
|
746
|
+
'S154_C': f"{input_dir}/750064453_INFLOW_cmd_geoglows.csv",
|
|
747
|
+
'S135_P': f"{input_dir}/750052624_MATCHED_cmd_geoglows.csv",
|
|
748
|
+
'S135_C': f"{input_dir}/750052624_MATCHED_cmd_geoglows.csv",
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
if station in station_file_map:
|
|
752
|
+
flow_file_path = station_file_map[station]
|
|
753
|
+
if os.path.exists(flow_file_path):
|
|
754
|
+
flow = pd.read_csv(flow_file_path)
|
|
755
|
+
else:
|
|
756
|
+
print(
|
|
757
|
+
f"Skipping nutrient prediction for station: {station}. Forecast file path does not exist."
|
|
758
|
+
)
|
|
759
|
+
continue
|
|
726
760
|
else:
|
|
727
|
-
# If it doesn't exist, skip to the next iteration of the loop
|
|
728
761
|
print(
|
|
729
|
-
f"Skipping nutrient prediction for station: {station}.
|
|
762
|
+
f"Skipping nutrient prediction for station: {station}. No forecast match defined."
|
|
730
763
|
)
|
|
731
764
|
continue
|
|
732
765
|
|
|
@@ -788,13 +821,13 @@ def nutrient_prediction(
|
|
|
788
821
|
|
|
789
822
|
# Save the predicted TP loads to a CSV file
|
|
790
823
|
out_dataframe.to_csv(
|
|
791
|
-
os.path.join(output_dir, f"{station}
|
|
824
|
+
os.path.join(output_dir, f"{station}_{nutrient}_predicted.csv")
|
|
792
825
|
)
|
|
793
826
|
|
|
794
827
|
# Save the predicted TP loads to a CSV file (in input_dir)
|
|
795
828
|
# Output is needed in input_dir by GEOGLOWS_LOONE_DATA_PREP.py and in output_dir for graph visualization in the app
|
|
796
829
|
out_dataframe.to_csv(
|
|
797
|
-
os.path.join(input_dir, f"{station}
|
|
830
|
+
os.path.join(input_dir, f"{station}_{nutrient}_predicted.csv")
|
|
798
831
|
)
|
|
799
832
|
|
|
800
833
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: loone_data_prep
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: Prepare data to run the LOONE model.
|
|
5
5
|
Author-email: Osama Tarabih <osamatarabih@usf.edu>
|
|
6
6
|
Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
|
|
@@ -20,10 +20,15 @@ Description-Content-Type: text/markdown
|
|
|
20
20
|
License-File: LICENSE
|
|
21
21
|
Requires-Dist: rpy2
|
|
22
22
|
Requires-Dist: retry
|
|
23
|
-
Requires-Dist: numpy
|
|
23
|
+
Requires-Dist: numpy<2
|
|
24
24
|
Requires-Dist: pandas
|
|
25
25
|
Requires-Dist: scipy
|
|
26
|
-
Requires-Dist: geoglows
|
|
26
|
+
Requires-Dist: geoglows>=2.0.0
|
|
27
|
+
Requires-Dist: herbie-data[extras]
|
|
28
|
+
Requires-Dist: openmeteo_requests
|
|
29
|
+
Requires-Dist: requests_cache
|
|
30
|
+
Requires-Dist: retry-requests
|
|
31
|
+
Dynamic: license-file
|
|
27
32
|
|
|
28
33
|
LOONE_DATA_PREP
|
|
29
34
|
# LOONE_DATA_PREP
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
-
loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=
|
|
2
|
-
loone_data_prep/LOONE_DATA_PREP.py,sha256=
|
|
1
|
+
loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=QQ2VMp7TmcKiffxwYhOc3uDUG4wXf9CTOqWMy7c4xMI,35809
|
|
2
|
+
loone_data_prep/LOONE_DATA_PREP.py,sha256=aHC1gYR4D7yUWk6I_ydR6nzBC3nvZlt7tD6kKfOqh2A,68302
|
|
3
3
|
loone_data_prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
loone_data_prep/data_analyses_fns.py,sha256=BZ7famrSKoUfExQvZfbl72CyADHLb-zzgdWZ-kLJxcQ,4603
|
|
5
|
-
loone_data_prep/utils.py,sha256
|
|
5
|
+
loone_data_prep/utils.py,sha256=CObio2wEPvdNhMJjn36jq0ArQURyGYZBqidh5crb0JU,35229
|
|
6
6
|
loone_data_prep/flow_data/S65E_total.py,sha256=szNUfj0EyyyDzuKNhTGAZtWc5owiOpxYS55YTt4u19k,2835
|
|
7
7
|
loone_data_prep/flow_data/__init__.py,sha256=u7fENFUZsJjyl13Bc9ZE47sHMKmjxtqXhV9t7vDTm7Y,93
|
|
8
|
-
loone_data_prep/flow_data/forecast_bias_correction.py,sha256=
|
|
9
|
-
loone_data_prep/flow_data/get_forecast_flows.py,sha256
|
|
8
|
+
loone_data_prep/flow_data/forecast_bias_correction.py,sha256=pcMH7qR3RZvXOHoYOtP7azNn5rVuRPL9mwgoVk2NeLA,11378
|
|
9
|
+
loone_data_prep/flow_data/get_forecast_flows.py,sha256=0TvOMR_Z2FbqnIEQATugpdluwhzQ0Y4tEyZHoRIaAYU,16580
|
|
10
10
|
loone_data_prep/flow_data/get_inflows.py,sha256=xKuSyJBdPrpjqMdRiyNDyxwdhYVIgLhiTP0k_1I1uWI,6456
|
|
11
11
|
loone_data_prep/flow_data/get_outflows.py,sha256=x7aisIkbXoTkcubFQLDghX-P8lztPq-tU0dQzoVRTtQ,5620
|
|
12
12
|
loone_data_prep/flow_data/hydro.py,sha256=5MwrzSUTCgPgeC_YGhz-en1CbOMp379Qf5zjpJlp-HM,5312
|
|
13
|
+
loone_data_prep/forecast_scripts/create_forecast_LOWs.py,sha256=_n6haJvGWuCfcgXO1x3ondmsaEDxzPlhJJQJHQhhd10,4864
|
|
14
|
+
loone_data_prep/forecast_scripts/forecast_stages.py,sha256=6S6aHlYi2_t6GAh901KBiBWPueYCwAzyb-AliHJexoU,1373
|
|
15
|
+
loone_data_prep/forecast_scripts/predict_PI.py,sha256=f0n2-gt5t9FKNdpJ5QGpyP2QBFLDGetYzfTYL95Vi_8,1937
|
|
16
|
+
loone_data_prep/forecast_scripts/trib_cond.py,sha256=F2t1FHDmqU9CFOnv1eaSkNE6tsNwsQdKYlhMRgj9xkE,3653
|
|
17
|
+
loone_data_prep/forecast_scripts/weather_forecast.py,sha256=AIHRjcdGUuA2a15ZcLyYws1gtH016QRU0bWIp4Ty2K0,5457
|
|
13
18
|
loone_data_prep/water_level_data/__init__.py,sha256=rgHDDkwccemsZnwUlw2M0h2ML4KmI89yPscmLoxbEHM,43
|
|
14
19
|
loone_data_prep/water_level_data/get_all.py,sha256=arPSWpb0XfQm0GKZJmoWhWdLuuNDxtGVX6_6UuD1_Vs,10885
|
|
15
20
|
loone_data_prep/water_level_data/hydro.py,sha256=PtsNdMXe1Y4e5CzEyLH6nJx_xv8sB90orGcSgxt7nao,3653
|
|
@@ -20,8 +25,8 @@ loone_data_prep/water_quality_data/wq.py,sha256=sl6G3iDCk6QUzpHTXPHpRZNMBG0-wHuc
|
|
|
20
25
|
loone_data_prep/weather_data/__init__.py,sha256=TX58EPgGRzEK_LmLze79lC4L7kU_j3yZf5_iC4nOIP4,45
|
|
21
26
|
loone_data_prep/weather_data/get_all.py,sha256=aCufuxORU51XhXt7LN9wN_V4qtjNt1qRC1UKlI2b3Ko,6918
|
|
22
27
|
loone_data_prep/weather_data/weather.py,sha256=hvceksrGSnDkCjheBVBuPgY1DrdR0ZAtrFB-K2tYTtk,12043
|
|
23
|
-
loone_data_prep-
|
|
24
|
-
loone_data_prep-
|
|
25
|
-
loone_data_prep-
|
|
26
|
-
loone_data_prep-
|
|
27
|
-
loone_data_prep-
|
|
28
|
+
loone_data_prep-1.1.2.dist-info/licenses/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
|
|
29
|
+
loone_data_prep-1.1.2.dist-info/METADATA,sha256=1J8LtPpM_xq-LoCzvgrqhuvdRAK7bG5lye9ow99AY54,4270
|
|
30
|
+
loone_data_prep-1.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
31
|
+
loone_data_prep-1.1.2.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
|
|
32
|
+
loone_data_prep-1.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|