loone-data-prep 1.1.2__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/PKG-INFO +1 -1
  2. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +3 -2
  3. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/LOONE_DATA_PREP.py +23 -5
  4. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/get_forecast_flows.py +6 -7
  5. loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/Chla_merged.py +27 -0
  6. loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/get_Chla_predicted.py +109 -0
  7. loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +97 -0
  8. loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/loone_q_predict.py +159 -0
  9. loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/loone_wq_predict.py +71 -0
  10. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/trib_cond.py +9 -1
  11. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/utils.py +12 -1
  12. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/PKG-INFO +1 -1
  13. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/SOURCES.txt +5 -0
  14. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/pyproject.toml +1 -1
  15. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/LICENSE +0 -0
  16. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/README.md +0 -0
  17. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/__init__.py +0 -0
  18. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/data_analyses_fns.py +0 -0
  19. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/S65E_total.py +0 -0
  20. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/__init__.py +0 -0
  21. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/forecast_bias_correction.py +0 -0
  22. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/get_inflows.py +0 -0
  23. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/get_outflows.py +0 -0
  24. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/hydro.py +0 -0
  25. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -0
  26. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/forecast_stages.py +0 -0
  27. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/predict_PI.py +0 -0
  28. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/weather_forecast.py +0 -0
  29. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_level_data/__init__.py +0 -0
  30. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_level_data/get_all.py +0 -0
  31. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_level_data/hydro.py +0 -0
  32. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_quality_data/__init__.py +0 -0
  33. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_quality_data/get_inflows.py +0 -0
  34. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_quality_data/get_lake_wq.py +0 -0
  35. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/water_quality_data/wq.py +0 -0
  36. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/weather_data/__init__.py +0 -0
  37. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/weather_data/get_all.py +0 -0
  38. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/weather_data/weather.py +0 -0
  39. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/dependency_links.txt +0 -0
  40. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/requires.txt +0 -0
  41. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/top_level.txt +0 -0
  42. {loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loone_data_prep
3
- Version: 1.1.2
3
+ Version: 1.2.1
4
4
  Summary: Prepare data to run the LOONE model.
5
5
  Author-email: Osama Tarabih <osamatarabih@usf.edu>
6
6
  Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -44,7 +44,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
44
44
  # Read LO Average Stage (ft)
45
45
  LO_Stage = pd.read_csv(f"{input_dir}/LO_Stage.csv")
46
46
  # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
47
- LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
47
+ # LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
48
48
  LO_Stage.index = LO_Stage["date"]
49
49
  # Calculate average
50
50
  if "Average_Stage" not in LO_Stage.columns:
@@ -457,6 +457,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
457
457
  LOWS["LZ40WS"] = LZ40WS["LZ40_WNDS_MPH"]
458
458
  LOWS = LOWS.set_index("date")
459
459
  LOWS["LO_Avg_WS_MPH"] = LOWS.mean(axis=1)
460
+ LOWS = LOWS.resample("D").mean()
460
461
  LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
461
462
 
462
463
  # # RFVol acft
@@ -592,7 +593,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
592
593
 
593
594
  # Write Data into csv files
594
595
  # write Avg Stage (ft, m) Storage (acft, m3) SA (acres) to csv
595
- LO_Stg_Sto_SA_df.to_csv(f"{output_dir}/Average_LO_Storage_3MLag_{ensemble_number}.csv", index=False)
596
+ # LO_Stg_Sto_SA_df.to_csv(f"{output_dir}/Average_LO_Storage_3MLag_{ensemble_number}.csv", index=False)
596
597
  # Write S65 TP concentrations (mg/L)
597
598
  S65_total_TP.to_csv(f"{output_dir}/S65_TP_3MLag_{ensemble_number}.csv", index=False)
598
599
  # TP External Loads 3 Months Lag (mg)
@@ -34,7 +34,7 @@ def main(input_dir: str, output_dir: str) -> None:
34
34
  # Read LO Average Stage (ft)
35
35
  LO_Stage = pd.read_csv(f'{input_dir}/LO_Stage.csv')
36
36
  # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
37
- LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
37
+ # LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
38
38
  # Calculate average
39
39
  if "Average_Stage" not in LO_Stage.columns:
40
40
  LO_Stage = LO_Stage.loc[:, ~LO_Stage.columns.str.contains('^Unnamed')]
@@ -386,8 +386,17 @@ def main(input_dir: str, output_dir: str) -> None:
386
386
 
387
387
  # RFVol acft
388
388
  # Create File (RF_Volume)
389
- RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
390
- RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df['SA_acres'].values
389
+ # Merge the DataFrames on date to ensure matching rows
390
+ RF_data_copy = RF_data.copy()
391
+ LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
392
+ RF_data_copy['date'] = pd.to_datetime(RF_data_copy['date'])
393
+ LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
394
+ merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
395
+ LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
396
+ on='date', how='inner')
397
+
398
+ RFVol = pd.DataFrame(merged_rf_sa['date'], columns=['date'])
399
+ RFVol['RFVol_acft'] = (merged_rf_sa['average_rainfall'].values/12) * merged_rf_sa['SA_acres'].values
391
400
  date_reference = RFVol['date'].iloc[0]
392
401
  date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
393
402
  df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
@@ -396,8 +405,17 @@ def main(input_dir: str, output_dir: str) -> None:
396
405
 
397
406
  # ETVol acft
398
407
  # Create File (ETVol)
399
- ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
400
- ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df['SA_acres'].values
408
+ # Merge the DataFrames on date to ensure matching rows
409
+ ET_data_copy = ET_data.copy()
410
+ LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
411
+ ET_data_copy['date'] = pd.to_datetime(ET_data_copy['date'])
412
+ LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
413
+ merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
414
+ LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
415
+ on='date', how='inner')
416
+
417
+ ETVol = pd.DataFrame(merged_et_sa['date'], columns=['date'])
418
+ ETVol['ETVol_acft'] = (merged_et_sa['average_ETPI'].values/12) * merged_et_sa['SA_acres'].values
401
419
  date_reference = ETVol['date'].iloc[0]
402
420
  date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
403
421
  df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
@@ -392,10 +392,10 @@ def main(
392
392
 
393
393
  # Get the flow data for each station
394
394
  stations_inflow_by_comid = {
395
- 750072741: "S65E_S", # TODO: Should this be S65E_total or S65E_S? - this is a station we definitely want
396
- 750069782: "S84_S", #
397
- # 750053211: "S129_C", # TODO: Should this be S129_C or S129_PMP_P? - Also right now it is all 0s
398
- # 750035446: "S133_P", # TODO: Should this be S133_P or S133_C? - Also right now it is all 0s
395
+ 750072741: "S65E_S",
396
+ 750069782: "S84_S",
397
+ # 750053211: "S129_C",
398
+ # 750035446: "S133_P",
399
399
  750064453: "S154_C", # This is primarily 0s
400
400
  }
401
401
 
@@ -444,8 +444,7 @@ def main(
444
444
  for reach_id in MATCHED_IDS:
445
445
  stations_matched_by_comid = {
446
446
  750068601: "S71_S",
447
- 750052624: "S135_C", # TODO: Should this be S135_C or S135_P?
448
- # 750052624: "S308", # NOTE: Same COMID as S135 — only one key allowed!
447
+ 750052624: "S135_C",
449
448
  750053213: "FISHP",
450
449
  750038416: "S77_S",
451
450
  750050259: "S79_TOT",
@@ -454,7 +453,7 @@ def main(
454
453
  750051428: "S49_S",
455
454
  # 750038427: "S40",
456
455
  750057357: "S191_S",
457
- 750028935: "S127_C", #TODO: Should this be S127_C or S127_P?
456
+ 750028935: "S127_C",
458
457
  }
459
458
 
460
459
  station_ensembles = get_flow_forecast_ensembles(
@@ -0,0 +1,27 @@
1
+ import pandas as pd
2
+ def loads_predicted(input_dir, output_dir):
3
+ """
4
+ Calculate Chlorophyll-a loads based on inflows and chlorophyll-a data.
5
+
6
+ input_dir: Directory where the input files are located.
7
+ output_dir: Directory where the output files will be saved.
8
+ St_Yr, St_M, St_D: Start date (year, month, day).
9
+ En_Yr, En_M, En_D: End date (year, month, day).
10
+ """
11
+
12
+ # Read forecast inflow file
13
+ # TODO: Should this be an average/median of all of the ensembles? worst case?
14
+ Flow_df = pd.read_csv(f"{input_dir}/geoglows_flow_df_ens_01_predicted.csv")
15
+ Flow_df['date'] = pd.to_datetime(Flow_df['date'])
16
+
17
+
18
+ # Read S65E Chlorophyll-a data
19
+ S65E_Chla = pd.read_csv(f'{output_dir}/S65E_Chla_Merged_forecast.csv')
20
+ S65E_Chla['date'] = pd.to_datetime(S65E_Chla['date']) # Ensure date column is datetime
21
+ # Merge on date
22
+ merged = pd.merge(Flow_df[['date', 'Inflows']], S65E_Chla[['date', 'Data']], on='date', how='inner')
23
+ # Calculate Chlorophyll-a loads
24
+ merged['Chla_Loads'] = merged['Inflows'] * merged['Data']
25
+ # Save results
26
+ Chla_Loads_In = merged[['date', 'Chla_Loads']]
27
+ Chla_Loads_In.to_csv(f'{output_dir}/Chla_Loads_In_forecast.csv', index=False)
@@ -0,0 +1,109 @@
1
+ import os
2
+ import pandas as pd
3
+ import datetime
4
+ from loone_data_prep.utils import get_synthetic_data
5
+
6
+ def get_Chla_predicted(input_dir, output_dir):
7
+ """
8
+ input_dir: Directory where the input files are located.
9
+ output_dir: Directory where the output files will be saved.
10
+ """
11
+ # Read forecast inflow file and get overall date range
12
+ # TODO: Should this be an average/median of all of the ensembles? worst case?
13
+ Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
+ Q_in['date'] = pd.to_datetime(Q_in['date'])
15
+ date_start = Q_in['date'].min()
16
+ date_end = Q_in['date'].max()
17
+
18
+ # Define stations
19
+ stations = {
20
+ "L001": True,
21
+ "L004": True,
22
+ "L005": True,
23
+ "L006": True,
24
+ "L007": True,
25
+ "L008": True,
26
+ "LZ40": True
27
+ }
28
+
29
+ def load_and_check_forecast(station, suffix, start_date, end_date, forecast_suffix="_forecast"):
30
+ fname = f"water_quality_{station}_CHLOROPHYLL-A{suffix}.csv"
31
+ fpath = os.path.join(input_dir, fname)
32
+ df_full = pd.read_csv(fpath).drop(columns=["days"], errors="ignore")
33
+ df_full['date'] = pd.to_datetime(df_full['date'])
34
+ # Rename the specific column if it exists
35
+ possible_cols = [
36
+ f"{station}_CHLOROPHYLL-A, CORRECTED_ug/L",
37
+ f"{station}_CHLOROPHYLL-A(LC)_ug/L"
38
+ ]
39
+
40
+ original_col_name = None
41
+ for col in possible_cols:
42
+ if col in df_full.columns:
43
+ df_full.rename(columns={col: "Data"}, inplace=True)
44
+ original_col_name = col
45
+ break
46
+
47
+ # Filter df to only rows between start_date and end_date
48
+ df_filtered = df_full[(df_full['date'] >= start_date) & (df_full['date'] <= end_date)]
49
+
50
+ # Check if full date range is covered; if not, fill with synthetic data
51
+ missing_dates = pd.date_range(start_date, end_date).difference(df_filtered['date'])
52
+ if len(missing_dates) > 0:
53
+ # Pass the original full historical df_full to get_synthetic_data, along with the forecast start_date
54
+ synthetic_df = get_synthetic_data(start_date, df_full)
55
+
56
+ # Rename "Data" back to original column name before saving
57
+ if original_col_name is not None:
58
+ synthetic_df.rename(columns={"Data": original_col_name}, inplace=True)
59
+
60
+ # Save synthetic forecast file
61
+ forecast_fname = f"water_quality_{station}_CHLOROPHYLL-A{suffix}{forecast_suffix}.csv"
62
+ synthetic_df.to_csv(os.path.join(input_dir, forecast_fname), index=False)
63
+
64
+ return synthetic_df
65
+
66
+ return df_filtered
67
+
68
+ # Load data for all stations and both suffix types
69
+ chla_data = {}
70
+ chla_data_lc = {}
71
+
72
+ for station in stations:
73
+ chla_data[station] = load_and_check_forecast(station, ", CORRECTED", date_start, date_end)
74
+ chla_data_lc[station] = load_and_check_forecast(station, "(LC)", date_start, date_end)
75
+
76
+ # Merge function
77
+ def merge_chla_sources(chla_dict):
78
+ merged = None
79
+ for df in chla_dict.values():
80
+ if merged is None:
81
+ merged = df
82
+ else:
83
+ merged = pd.merge(merged, df, on="date", how="left")
84
+ merged = merged.loc[:, ~merged.columns.str.startswith("Unnamed")]
85
+ return merged
86
+
87
+ # Calculate aggregates
88
+ def calculate_chla_aggregates(df, suffix=""):
89
+ df = df.set_index("date")
90
+ df["Mean_Chla"] = df.mean(axis=1)
91
+ df["Chla_North"] = df[[col for col in df.columns if any(site in col for site in ["L001", "L005", "L008"])]].mean(axis=1)
92
+ df["Chla_South"] = df[[col for col in df.columns if any(site in col for site in ["L004", "L006", "L007", "L008", "LZ40"])]].mean(axis=1)
93
+ df = df.reset_index()
94
+ return df[["date", "Mean_Chla", "Chla_North", "Chla_South"]].rename(
95
+ columns={"Mean_Chla": f"Chla{suffix}", "Chla_North": f"Chla_N{suffix}", "Chla_South": f"Chla_S{suffix}"}
96
+ )
97
+
98
+ # Process and merge
99
+ LO_Chla = calculate_chla_aggregates(merge_chla_sources(chla_data))
100
+ LO_Chla_LC = calculate_chla_aggregates(merge_chla_sources(chla_data_lc))
101
+
102
+ # Merge the two dataframes (no date slicing here since all are limited by Q_in dates)
103
+ LO_Chla_Merge = pd.concat([LO_Chla, LO_Chla_LC]).reset_index(drop=True)
104
+
105
+ # Export
106
+ LO_Chla_Merge.to_csv(os.path.join(output_dir, "LO_Chla_Obs_predicted.csv"), index=False)
107
+ LO_Chla_Merge[["date", "Chla_N"]].rename(columns={"Chla_N": "Chla"}).to_csv(os.path.join(output_dir, "N_Merged_Chla_predicted.csv"), index=False)
108
+ LO_Chla_Merge[["date", "Chla_S"]].rename(columns={"Chla_S": "Chla"}).to_csv(os.path.join(output_dir, "S_Merged_Chla_predicted.csv"), index=False)
109
+ return
@@ -0,0 +1,97 @@
1
+ import os
2
+ import pandas as pd
3
+ import datetime
4
+ from loone_data_prep.utils import photo_period, get_synthetic_data
5
+
6
+ def get_NO_Loads_predicted(input_dir, output_dir):
7
+ """
8
+ input_dir: Directory where the input files are located.
9
+ output_dir: Directory where the output files will be saved.
10
+ This function reads the forecast inflow file, retrieves nitrate data for specified stations,
11
+ """
12
+ # TODO: Should this be an average/median of all of the ensembles? worst case?
13
+ Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
14
+
15
+ datetime_str = Q_in['date'].iloc[0]
16
+ date_start = datetime.datetime.strptime(datetime_str, '%Y-%m-%d')
17
+ stations = [
18
+ "S65E", "S71", "S72", "S84", "S127", "S133",
19
+ "S154", "S191", "S308C", "FECSR78", "CULV10A", "S4"
20
+ ]
21
+
22
+ station_alias_map = {
23
+ "S65E": "S65_NO",
24
+ "S71": "S71_NO",
25
+ "S72": "S72_NO",
26
+ "S84": "S84_NO",
27
+ "S127": "S127_NO",
28
+ "S133": "S133_NO",
29
+ "S154": "S154_NO",
30
+ "S191": "S191_NO",
31
+ "S308C": "S308_NO",
32
+ "FECSR78": "FISHP_NO",
33
+ "CULV10A": "L8_NO",
34
+ "S4": "S4_NO"
35
+ }
36
+
37
+ NO_list = {}
38
+ NO_names = []
39
+
40
+ for station, alias in station_alias_map.items():
41
+ filename = f'water_quality_{station}_NITRATE+NITRITE-N_Interpolated.csv'
42
+ file_path = os.path.join(input_dir, filename)
43
+
44
+ try:
45
+ df = pd.read_csv(file_path)
46
+ except FileNotFoundError:
47
+ print(f"{filename} not found.")
48
+ continue
49
+
50
+ # Forecast if needed
51
+ if datetime_str not in df['date'].values:
52
+ df = get_synthetic_data(date_start, df)
53
+ df.to_csv(os.path.join(input_dir, f'water_quality_{station}_NITRATE+NITRITE-N_Interpolated_forecast.csv'), index=False)
54
+
55
+ NO_list[alias] = df
56
+ NO_names.append(alias)
57
+
58
+ # date_NO = pd.date_range(start='1/1/2008', end='3/31/2023', freq='D')
59
+ # Because of the flow df, I think this will be generated for every single ensemble member
60
+ for ensemble in range(1, 52):
61
+ Flow_df =pd.read_csv(f"{input_dir}/geoglows_flow_df_ens_{ensemble:02d}_predicted.csv")
62
+ Flow_df['date'] = pd.to_datetime(Flow_df['date'])
63
+
64
+ # Use Flow_df as the base for merging nitrate data
65
+ NO_df = Flow_df[['date']].copy()
66
+
67
+ for name in NO_names:
68
+ y = NO_list[name]
69
+ y.rename(columns={y.columns[-1]: name}, inplace=True)
70
+ NO_df = pd.merge(NO_df, y[['date', name]], on='date', how='left')
71
+
72
+ # Flow_df = DF_Date_Range(Flow_df, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
73
+
74
+ NO_df['date'] = pd.to_datetime(NO_df['date'])
75
+
76
+ merged = pd.merge(NO_df, Flow_df, on='date', how='inner')
77
+
78
+ NO_Loads_In = merged[['date']].copy()
79
+
80
+ # Compute individual loads (edit flow variable names if needed)
81
+ NO_Loads_In['S65_NO_Ld'] = merged['S65_Q'] * merged['S65_NO'] * 1000
82
+ NO_Loads_In['S71_NO_Ld'] = merged['S71_Q'] * merged['S71_NO'] * 1000
83
+ # NO_Loads_In['S72_NO_Ld'] = merged['S72_Q'] * merged['S72_NO'] * 1000 # No RFS forecast data
84
+ NO_Loads_In['S84_NO_Ld'] = merged['S84_Q'] * merged['S84_NO'] * 1000
85
+ # NO_Loads_In['S127_NO_Ld'] = merged['S127_In'] * merged['S127_NO'] * 1000 # This should be in here, figure out where it went
86
+ NO_Loads_In['S133_NO_Ld'] = merged['S133_P_Q'] * merged['S133_NO'] * 1000
87
+ NO_Loads_In['S154_NO_Ld'] = merged['S154_Q'] * merged['S154_NO'] * 1000
88
+ # NO_Loads_In['S191_NO_Ld'] = merged['S191_Q'] * merged['S191_NO'] * 1000 #This should be in here, figure out where it went
89
+ NO_Loads_In['S308_NO_Ld'] = merged['S308_In'] * merged['S308_NO'] * 1000
90
+ NO_Loads_In['FISHP_NO_Ld'] = merged['FISHP_Q'] * merged['FISHP_NO'] * 1000
91
+ # NO_Loads_In['L8_NO_Ld'] = merged['L8_In'] * merged['L8_NO'] * 1000 # No RFS forecast data
92
+ # NO_Loads_In['S4_NO_Ld'] = merged['S4_P_Q'] * merged['S4_NO'] * 1000 # No RFS Forecast data
93
+
94
+ NO_Loads_In['External_NO_Ld_mg'] = NO_Loads_In.sum(axis=1, numeric_only=True)
95
+
96
+ NO_Loads_In.to_csv(f'{output_dir}/LO_External_Loadings_NO_ens_{ensemble:02d}_predicted.csv', index=False)
97
+ return
@@ -0,0 +1,159 @@
1
+ import os
2
+ import pandas as pd
3
+ from datetime import datetime, timedelta
4
+
5
+ def generate_historical_predictions(workspace, forecast_days=16):
6
+ """
7
+ Generate predictions for the next `forecast_days` days using historical daily averages
8
+ from the same calendar dates across previous years. Includes all the files for loone_q.
9
+
10
+ Args:
11
+ workspace : str
12
+ Path to the folder containing the CSV files.
13
+
14
+ forecast_days : int
15
+ Number of future days to predict (default = 16).
16
+
17
+ """
18
+
19
+ file_list = [
20
+ "Estuary_needs_water_Input.csv",
21
+ "Multi_Seasonal_LONINO.csv",
22
+ "Seasonal_LONINO.csv",
23
+ "SFWMM_Daily_Outputs.csv",
24
+ "Water_dmd.csv",
25
+ "EAA_MIA_RUNOFF_Inputs.csv",
26
+ ]
27
+
28
+ possible_date_cols = ['date', 'Date']
29
+ today = datetime.today()
30
+ current_year = today.year
31
+
32
+ for filename in file_list:
33
+ path = os.path.join(workspace, filename)
34
+
35
+ try:
36
+ df = pd.read_csv(path)
37
+ except Exception as e:
38
+ print(f"Could not read {filename}. Error: {e}")
39
+ continue
40
+
41
+ if filename in ["Multi_Seasonal_LONINO.csv", "Seasonal_LONINO.csv"]:
42
+ if "Year" not in df.columns:
43
+ print(f"No 'Year' column in {filename}. Skipping.")
44
+ continue
45
+
46
+ # ➤ Skip if current year already exists
47
+ if current_year in df["Year"].values:
48
+ print(f"{current_year} already present in {filename}. No changes made.")
49
+ continue
50
+
51
+ # Otherwise calculate averages and append
52
+ month_cols = [col for col in df.columns if col != "Year"]
53
+ monthly_means = df[month_cols].mean()
54
+
55
+ new_row = {"Year": current_year}
56
+ new_row.update(monthly_means.to_dict())
57
+
58
+ updated_df = pd.concat(
59
+ [df, pd.DataFrame([new_row])],
60
+ ignore_index=True
61
+ )
62
+ output_name = filename.replace(".csv", f"_forecast.csv")
63
+ output_path = os.path.join(workspace, output_name)
64
+ updated_df.to_csv(output_path, index=False)
65
+ print(f"Appended {current_year} row and saved to {output_path}")
66
+ continue
67
+
68
+ # Identify date column
69
+ date_col = None
70
+ for col in df.columns:
71
+ if col in possible_date_cols:
72
+ date_col = col
73
+ break
74
+
75
+ if date_col is None:
76
+ print(f"Could not detect date column in {filename}. Skipping.")
77
+ continue
78
+
79
+ # Parse dates
80
+ if filename in ["SFWMM_Daily_Outputs.csv", "Water_dmd.csv"]:
81
+ df[date_col] = pd.to_datetime(
82
+ df[date_col],
83
+ format="%d-%b-%y",
84
+ errors="coerce"
85
+ )
86
+ else:
87
+ df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
88
+ df = df.dropna(subset=[date_col])
89
+ df["month_day"] = df[date_col].dt.strftime("%m-%d")
90
+
91
+ predictions_list = []
92
+
93
+ # Check if special handling is needed for the boolean file
94
+ if filename == "Estuary_needs_water_Input.csv":
95
+ bool_col = "Estuary Needs Water?"
96
+
97
+ if bool_col not in df.columns:
98
+ print(f"Column '{bool_col}' not found in {filename}. Skipping.")
99
+ continue
100
+
101
+ # Convert string "True"/"False" to boolean if necessary
102
+ if df[bool_col].dtype == object:
103
+ df[bool_col] = df[bool_col].map({"True": True, "False": False}).fillna(df[bool_col])
104
+
105
+ # Compute mode (most frequent value) for each day for each boolean column
106
+ mode_series = df.groupby("month_day")[bool_col].agg(
107
+ lambda x: x.mode().iloc[0] if not x.mode().empty else None
108
+ )
109
+ for i in range(1, forecast_days + 1):
110
+ future_date = today + timedelta(days=i)
111
+ mmdd = future_date.strftime("%m-%d")
112
+
113
+ if mmdd in mode_series.index:
114
+ pred_value = mode_series.loc[mmdd]
115
+ else:
116
+ print(f"No historical data for {mmdd} in {filename}. Skipping that day.")
117
+ pred_value = None
118
+
119
+ predictions_list.append({
120
+ date_col: future_date,
121
+ bool_col: pred_value
122
+ })
123
+
124
+ pred_df = pd.DataFrame(predictions_list)
125
+ pred_df = pred_df[[date_col, bool_col]]
126
+
127
+ else:
128
+ # Numeric file handling
129
+ numeric_cols = df.select_dtypes(include='number').columns.tolist()
130
+ if not numeric_cols:
131
+ print(f"No numeric columns in {filename}. Skipping.")
132
+ continue
133
+
134
+ historical_means = df.groupby("month_day")[numeric_cols].mean()
135
+
136
+ for i in range(0, forecast_days + 1):
137
+ future_date = (today + timedelta(days=i)).date()
138
+ mmdd = future_date.strftime("%m-%d")
139
+ if mmdd in historical_means.index:
140
+ row = historical_means.loc[mmdd].copy()
141
+ row[date_col] = future_date
142
+ predictions_list.append(row)
143
+ else:
144
+ print(f"No historical data for {mmdd} in {filename}. Skipping that day.")
145
+
146
+ if predictions_list:
147
+ pred_df = pd.DataFrame(predictions_list)
148
+ pred_df = pred_df[[date_col] + [col for col in pred_df.columns if col != date_col]]
149
+ else:
150
+ print(f"No predictions generated for {filename}.")
151
+ continue
152
+
153
+ # Save predictions
154
+ output_name = filename.replace(".csv", f"_forecast.csv")
155
+ output_path = os.path.join(workspace, output_name)
156
+ pred_df.to_csv(output_path, index=False)
157
+ print(f"Predictions saved to {output_path}")
158
+
159
+ return
@@ -0,0 +1,71 @@
1
+ import pandas as pd
2
+ import os
3
+
4
+ def create_forecasts(workspace):
5
+ """
6
+ Reads the four specified CSV files from `workspace`,
7
+ creates forecast versions using historical daily averages,
8
+ and writes new CSV files into the same folder.
9
+
10
+ The forecast always starts today and goes 16 days forward.
11
+ """
12
+
13
+ # List of filenames
14
+ files = [
15
+ 'N_OP.csv',
16
+ 'S_OP.csv',
17
+ 'N_DIN.csv',
18
+ 'S_DIN.csv',
19
+ 'LO_DO_Clean_daily.csv'
20
+ ]
21
+
22
+ def forecast_df(df, date_column='date'):
23
+ # Parse dates
24
+ df[date_column] = pd.to_datetime(df[date_column])
25
+
26
+ # Add month and day columns
27
+ df['month'] = df[date_column].dt.month
28
+ df['day'] = df[date_column].dt.day
29
+
30
+ # Identify numeric columns to forecast
31
+ value_columns = df.columns.difference([date_column, 'month', 'day'])
32
+
33
+ # Compute historical averages
34
+ avg = df.groupby(['month', 'day'])[value_columns].mean().reset_index()
35
+
36
+ # Create forecast dates: today + next 15 days
37
+ forecast_dates = pd.date_range(
38
+ start=pd.Timestamp.today().normalize(),
39
+ periods=16,
40
+ freq='D'
41
+ )
42
+
43
+ forecast_df = pd.DataFrame({date_column: forecast_dates})
44
+ forecast_df['month'] = forecast_df[date_column].dt.month
45
+ forecast_df['day'] = forecast_df[date_column].dt.day
46
+
47
+ # Merge with historical averages
48
+ forecast_df = forecast_df.merge(avg, on=['month', 'day'], how='left')
49
+
50
+ # Drop helper columns
51
+ forecast_df.drop(columns=['month', 'day'], inplace=True)
52
+
53
+ return forecast_df
54
+
55
+ # Process each file
56
+ for filename in files:
57
+ # Read file
58
+ file_path = os.path.join(workspace, filename)
59
+ df = pd.read_csv(file_path)
60
+
61
+ # Build forecast
62
+ forecast = forecast_df(df, date_column='date')
63
+
64
+ # Save new file
65
+ forecast_filename = filename.replace('.csv', '_forecast.csv')
66
+ forecast_path = os.path.join(workspace, forecast_filename)
67
+ forecast.to_csv(forecast_path, index=False)
68
+
69
+ print("Forecast files created successfully.")
70
+
71
+
@@ -50,7 +50,15 @@ def create_trib_cond (weather_data, net_inflows, main_tributary, PI, output, ens
50
50
 
51
51
  # Calculate NetRF and NetInf
52
52
  Trib_Cond_Wkly['NetRF'] = Net_RF_Weekly['tp_corrected'].values - Net_RF_Weekly['evapotranspiration'].values
53
- Trib_Cond_Wkly['NetInf'] = Net_Inflow_Weekly['Net_Inflows'].values
53
+ # First, reset index so that 'date' becomes a column in Net_Inflow_Weekly
54
+ Net_Inflow_Weekly_reset = Net_Inflow_Weekly.reset_index()
55
+
56
+ # Merge the dataframes on 'date'
57
+ Trib_Cond_Wkly = Trib_Cond_Wkly.merge(Net_Inflow_Weekly_reset[['date', 'Net_Inflows']], on='date', how='left')
58
+
59
+ # Now Trib_Cond_Wkly will have a new 'Net_Inflows' column aligned by date
60
+ Trib_Cond_Wkly.rename(columns={'Net_Inflows': 'NetInf'}, inplace=True)
61
+
54
62
 
55
63
  # Select only the desired ensemble column and rename it
56
64
  S65E_selected = S65E_Weekly[[ensemble_col]].rename(columns={ensemble_col: "S65E"})
@@ -466,12 +466,15 @@ def wind_induced_waves(
466
466
  lo_stage_in: str = "LO_Stg_Sto_SA_2008-2023.csv",
467
467
  wind_shear_stress_out: str = "WindShearStress.csv",
468
468
  current_shear_stress_out: str = "Current_ShearStress.csv",
469
+ forecast: bool = False,
469
470
  ):
470
471
  # Read Mean Wind Speed in LO
471
472
  LO_WS = pd.read_csv(os.path.join(f"{input_dir}/", wind_speed_in))
472
473
  LO_WS["WS_mps"] = LO_WS["LO_Avg_WS_MPH"] * 0.44704 # MPH to m/s
473
474
  # Read LO Stage to consider water depth changes
474
475
  LO_Stage = pd.read_csv(os.path.join(f"{input_dir}/", lo_stage_in))
476
+ if forecast:
477
+ LO_Stage["Stage_ft"] = LO_Stage["Stage"].astype(float)
475
478
  LO_Stage["Stage_m"] = LO_Stage["Stage_ft"] * 0.3048
476
479
  Bottom_Elev = 0.5 # m (Karl E. Havens • Alan D. Steinman 2013)
477
480
  LO_Wd = LO_Stage["Stage_m"] - Bottom_Elev
@@ -998,7 +1001,15 @@ def get_synthetic_data(date_start: str, df: pd.DataFrame):
998
1001
 
999
1002
  # Group by the month and day, then calculate the average for each group
1000
1003
  average_values = filtered_data.groupby('month_day')['Data'].mean()
1001
-
1004
+ # Interpolate in case there are missing values:
1005
+ start_date = pd.to_datetime('2001-' + start_month_day)
1006
+ end_date = pd.to_datetime('2001-' + end_month_day)
1007
+
1008
+ full_dates = pd.date_range(start=start_date, end=end_date)
1009
+ full_index = full_dates.strftime('%m-%d')
1010
+
1011
+ average_values = average_values.reindex(full_index)
1012
+ average_values = average_values.interpolate(method='linear')
1002
1013
  average_values_df = pd.DataFrame({
1003
1014
  'date': pd.date_range(start=date_start, end=date_end),
1004
1015
  'Data': average_values.values
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loone_data_prep
3
- Version: 1.1.2
3
+ Version: 1.2.1
4
4
  Summary: Prepare data to run the LOONE model.
5
5
  Author-email: Osama Tarabih <osamatarabih@usf.edu>
6
6
  Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -18,8 +18,13 @@ loone_data_prep/flow_data/get_forecast_flows.py
18
18
  loone_data_prep/flow_data/get_inflows.py
19
19
  loone_data_prep/flow_data/get_outflows.py
20
20
  loone_data_prep/flow_data/hydro.py
21
+ loone_data_prep/forecast_scripts/Chla_merged.py
21
22
  loone_data_prep/forecast_scripts/create_forecast_LOWs.py
22
23
  loone_data_prep/forecast_scripts/forecast_stages.py
24
+ loone_data_prep/forecast_scripts/get_Chla_predicted.py
25
+ loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py
26
+ loone_data_prep/forecast_scripts/loone_q_predict.py
27
+ loone_data_prep/forecast_scripts/loone_wq_predict.py
23
28
  loone_data_prep/forecast_scripts/predict_PI.py
24
29
  loone_data_prep/forecast_scripts/trib_cond.py
25
30
  loone_data_prep/forecast_scripts/weather_forecast.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "loone_data_prep"
7
- version = "1.1.2"
7
+ version = "1.2.1"
8
8
  description = "Prepare data to run the LOONE model."
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
File without changes