loone-data-prep 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,10 +45,11 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
45
45
  LO_Stage = pd.read_csv(f"{input_dir}/LO_Stage.csv")
46
46
  # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
47
47
  LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
48
+ LO_Stage.index = LO_Stage["date"]
48
49
  # Calculate average
49
50
  if "Average_Stage" not in LO_Stage.columns:
50
51
  LO_Stage = LO_Stage.loc[:, ~LO_Stage.columns.str.contains("^Unnamed")]
51
- LO_Stage["Average_Stage"] = LO_Stage.mean(axis=1)
52
+ LO_Stage["Average_Stage"] = LO_Stage.drop(columns=['date']).mean(axis=1)
52
53
  LO_Stage.to_csv(f"{input_dir}/LO_Stage.csv", index=False)
53
54
  LO_Storage = stg2sto(f"{input_dir}/StgSto_data.csv", LO_Stage["Average_Stage"], 0)
54
55
  LO_SA = stg2ar(f"{input_dir}/Stgar_data.csv", LO_Stage["Average_Stage"], 0)
@@ -580,6 +580,18 @@ def main(input_dir: str, output_dir: str) -> None:
580
580
  LO_OP_data_Inter['Mean_OP'] = LO_OP_data_Inter.mean(axis=1, numeric_only=True)
581
581
  LO_OP_data_Inter = DF_Date_Range(LO_OP_data_Inter, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
582
582
  LO_OP_data_Inter.to_csv(f'{output_dir}/LO_OP.csv', index=False)
583
+
584
+ # Create File (N_OP) (L001, L005, L008)
585
+ n_op = LO_OP_data_Inter[['date', 'Data_L001_OP_Inter', 'Data_L005_OP_Inter', 'Data_L008_OP_Inter']]
586
+ n_op['OP'] = n_op.mean(axis=1, numeric_only=True) * 1000 # mg/L to mg/m3
587
+ n_op.drop(['Data_L001_OP_Inter', 'Data_L005_OP_Inter', 'Data_L008_OP_Inter'], axis=1, inplace=True)
588
+ n_op.to_csv(f'{output_dir}/N_OP.csv', index=False)
589
+
590
+ # Create File (S_OP) (L004, L006, L007, L008, and LZ40)
591
+ s_op = LO_OP_data_Inter[['date', 'Data_L004_OP_Inter', 'Data_L006_OP_Inter', 'Data_L007_OP_Inter', 'Data_L008_OP_Inter', 'Data_LZ40_OP_Inter']]
592
+ s_op['OP'] = s_op.mean(axis=1, numeric_only=True) * 1000 # mg/L to mg/m3
593
+ s_op.drop(['Data_L004_OP_Inter', 'Data_L006_OP_Inter', 'Data_L007_OP_Inter', 'Data_L008_OP_Inter', 'Data_LZ40_OP_Inter'], axis=1, inplace=True)
594
+ s_op.to_csv(f'{output_dir}/S_OP.csv', index=False)
583
595
 
584
596
  # Interpolated NH4 Observations in Lake
585
597
  # Create File (LO_Avg_NH4)
@@ -662,6 +674,22 @@ def main(input_dir: str, output_dir: str) -> None:
662
674
  LO_DIN['NO'] = LO_NO_Clean_Inter['Mean_NO'].values
663
675
  LO_DIN['DIN_mg/m3'] = LO_DIN[['NH4', 'NO']].sum(axis=1)*1000
664
676
  LO_DIN.to_csv(f'{output_dir}/LO_DIN.csv', index=False)
677
+
678
+ # Create File (N_DIN) (L001, L005, L008)
679
+ n_din = pd.DataFrame(date_DIN, columns=['date'])
680
+ n_din.set_index('date', inplace=True)
681
+ n_din['NH4'] = LO_NH4_Clean_Inter[['date', 'Data_L001_NH4_Inter', 'Data_L005_NH4_Inter', 'Data_L008_NH4_Inter']].mean(axis=1, numeric_only=True)
682
+ n_din['NO'] = LO_NO_Clean_Inter[['date', 'Data_L001_NO_Inter', 'Data_L005_NO_Inter', 'Data_L008_NO_Inter']].mean(axis=1, numeric_only=True)*1000 # mg/L to mg/m3
683
+ n_din['DIN'] = n_din[['NH4', 'NO']].sum(axis=1)*1000 # mg/L to mg/m3
684
+ n_din.to_csv(f'{output_dir}/N_DIN.csv')
685
+
686
+ # Create File (S_DIN) (L004, L006, L007, L008, LZ40)
687
+ s_din = pd.DataFrame(date_DIN, columns=['date'])
688
+ s_din.set_index('date', inplace=True)
689
+ s_din['NH4'] = LO_NH4_Clean_Inter[['date', 'Data_L004_NH4_Inter', 'Data_L006_NH4_Inter', 'Data_L007_NH4_Inter', 'Data_L008_NH4_Inter', 'Data_LZ40_NH4_Inter']].mean(axis=1, numeric_only=True)
690
+ s_din['NO'] = LO_NO_Clean_Inter[['date', 'Data_L004_NO_Inter', 'Data_L006_NO_Inter', 'Data_L007_NO_Inter', 'Data_L008_NO_Inter', 'Data_LZ40_NO_Inter']].mean(axis=1, numeric_only=True)*1000 # mg/L to mg/m3
691
+ s_din['DIN'] = s_din[['NH4', 'NO']].sum(axis=1)*1000 # mg/L to mg/m3
692
+ s_din.to_csv(f'{output_dir}/S_DIN.csv')
665
693
 
666
694
  # Interpolated DO Observations in Lake
667
695
  # Create File (LO_Avg_DO)
@@ -821,6 +849,93 @@ def main(input_dir: str, output_dir: str) -> None:
821
849
  LO_Chla_Merge_Monthly_Inter = LO_Chla_Merge.resample('M').mean()
822
850
  LO_Chla_Merge_Monthly_Inter.to_csv(f'{output_dir}/LO_Chla_Merge_Monthly_Inter.csv')
823
851
 
852
+ # Create files (LO_Chla_Obs.csv, N_Merged_Chla.csv, and S_Merged_Chla.csv)
853
+ L001_Chla = pd.read_csv(f'{input_dir}/water_quality_L001_CHLOROPHYLL-A, CORRECTED.csv')
854
+ L001_Chla.drop(columns=['days'], inplace=True)
855
+ L004_Chla = pd.read_csv(f'{input_dir}/water_quality_L004_CHLOROPHYLL-A, CORRECTED.csv')
856
+ L004_Chla.drop(columns=['days'], inplace=True)
857
+ L005_Chla = pd.read_csv(f'{input_dir}/water_quality_L005_CHLOROPHYLL-A, CORRECTED.csv')
858
+ L005_Chla.drop(columns=['days'], inplace=True)
859
+ L006_Chla = pd.read_csv(f'{input_dir}/water_quality_L006_CHLOROPHYLL-A, CORRECTED.csv')
860
+ L006_Chla.drop(columns=['days'], inplace=True)
861
+ L007_Chla = pd.read_csv(f'{input_dir}/water_quality_L007_CHLOROPHYLL-A, CORRECTED.csv')
862
+ L007_Chla.drop(columns=['days'], inplace=True)
863
+ L008_Chla = pd.read_csv(f'{input_dir}/water_quality_L008_CHLOROPHYLL-A, CORRECTED.csv')
864
+ L008_Chla.drop(columns=['days'], inplace=True)
865
+ LZ40_Chla = pd.read_csv(f'{input_dir}/water_quality_LZ40_CHLOROPHYLL-A, CORRECTED.csv')
866
+ LZ40_Chla.drop(columns=['days'], inplace=True)
867
+ L001_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L001_CHLOROPHYLL-A(LC).csv')
868
+ L001_Chla_LC.drop(columns=['days'], inplace=True)
869
+ L004_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L004_CHLOROPHYLL-A(LC).csv')
870
+ L004_Chla_LC.drop(columns=['days'], inplace=True)
871
+ L005_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L005_CHLOROPHYLL-A(LC).csv')
872
+ L005_Chla_LC.drop(columns=['days'], inplace=True)
873
+ L006_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L006_CHLOROPHYLL-A(LC).csv')
874
+ L006_Chla_LC.drop(columns=['days'], inplace=True)
875
+ L007_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L007_CHLOROPHYLL-A(LC).csv')
876
+ L007_Chla_LC.drop(columns=['days'], inplace=True)
877
+ L008_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L008_CHLOROPHYLL-A(LC).csv')
878
+ L008_Chla_LC.drop(columns=['days'], inplace=True)
879
+ LZ40_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_LZ40_CHLOROPHYLL-A(LC).csv')
880
+ LZ40_Chla_LC.drop(columns=['days'], inplace=True)
881
+
882
+ LO_Chla = pd.merge(L001_Chla, L004_Chla, how='left', on='date')
883
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
884
+ LO_Chla = pd.merge(LO_Chla, L005_Chla, how='left', on='date')
885
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
886
+ LO_Chla = pd.merge(LO_Chla, L006_Chla, how='left', on='date')
887
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
888
+ LO_Chla = pd.merge(LO_Chla, L007_Chla, how='left', on='date')
889
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
890
+ LO_Chla = pd.merge(LO_Chla, L008_Chla, how='left', on='date')
891
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
892
+ LO_Chla = pd.merge(LO_Chla, LZ40_Chla, how='left', on='date')
893
+ LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
894
+ LO_Chla = LO_Chla.set_index('date')
895
+ LO_Chla['Mean_Chla'] = LO_Chla.mean(axis=1)
896
+ LO_Chla = LO_Chla.reset_index()
897
+ LO_Chla_N_cols = ['L001_CHLOROPHYLL-A, CORRECTED_ug/L', 'L005_CHLOROPHYLL-A, CORRECTED_ug/L', 'L008_CHLOROPHYLL-A, CORRECTED_ug/L']
898
+ LO_Chla['Chla_North'] = LO_Chla[LO_Chla_N_cols].mean(axis=1)
899
+ LO_Chla_S_cols = ['L004_CHLOROPHYLL-A, CORRECTED_ug/L', 'L006_CHLOROPHYLL-A, CORRECTED_ug/L', 'L007_CHLOROPHYLL-A, CORRECTED_ug/L','L008_CHLOROPHYLL-A, CORRECTED_ug/L','LZ40_CHLOROPHYLL-A, CORRECTED_ug/L']
900
+ LO_Chla['Chla_South'] = LO_Chla[LO_Chla_S_cols].mean(axis=1)
901
+
902
+ LO_Chla_LC = pd.merge(L001_Chla_LC, L004_Chla_LC, how='left', on='date')
903
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
904
+ LO_Chla_LC = pd.merge(LO_Chla_LC, L005_Chla_LC, how='left', on='date')
905
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
906
+ LO_Chla_LC = pd.merge(LO_Chla_LC, L006_Chla_LC, how='left', on='date')
907
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
908
+ LO_Chla_LC = pd.merge(LO_Chla_LC, L007_Chla_LC, how='left', on='date')
909
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
910
+ LO_Chla_LC = pd.merge(LO_Chla_LC, L008_Chla_LC, how='left', on='date')
911
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
912
+ LO_Chla_LC = pd.merge(LO_Chla_LC, LZ40_Chla_LC, how='left', on='date')
913
+ LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
914
+ LO_Chla_LC = LO_Chla_LC.set_index('date')
915
+ LO_Chla_LC['Mean_Chla'] = LO_Chla_LC.mean(axis=1)
916
+ LO_Chla_LC = LO_Chla_LC.reset_index()
917
+ LO_Chla_LC_N_cols = ['L001_CHLOROPHYLL-A(LC)_ug/L', 'L005_CHLOROPHYLL-A(LC)_ug/L', 'L008_CHLOROPHYLL-A(LC)_ug/L']
918
+ LO_Chla_LC['Chla_North'] = LO_Chla_LC[LO_Chla_LC_N_cols].mean(axis=1)
919
+ LO_Chla_LC_S_cols = ['L004_CHLOROPHYLL-A(LC)_ug/L', 'L006_CHLOROPHYLL-A(LC)_ug/L', 'L007_CHLOROPHYLL-A(LC)_ug/L','L008_CHLOROPHYLL-A(LC)_ug/L','LZ40_CHLOROPHYLL-A(LC)_ug/L']
920
+ LO_Chla_LC['Chla_South'] = LO_Chla_LC[LO_Chla_LC_S_cols].mean(axis=1)
921
+
922
+ LO_Chla = DF_Date_Range(LO_Chla, 2008, 1, 1, 2010, 10, 19)
923
+ LO_Chla_df = pd.DataFrame(LO_Chla['date'], columns=['date'])
924
+ LO_Chla_df['Chla'] = LO_Chla['Mean_Chla']
925
+ LO_Chla_df['Chla_N'] = LO_Chla['Chla_North']
926
+ LO_Chla_df['Chla_S'] = LO_Chla['Chla_South']
927
+
928
+ LO_Chla_LC = DF_Date_Range(LO_Chla_LC, 2010, 10, 20, 2023, 6, 30)
929
+ LO_Chla_LC_df = pd.DataFrame(LO_Chla_LC['date'], columns=['date'])
930
+ LO_Chla_LC_df['Chla'] = LO_Chla_LC['Mean_Chla']
931
+ LO_Chla_LC_df['Chla_N'] = LO_Chla_LC['Chla_North']
932
+ LO_Chla_LC_df['Chla_S'] = LO_Chla_LC['Chla_South']
933
+
934
+ LO_Chla_Merge = pd.concat([LO_Chla_df, LO_Chla_LC_df]).reset_index(drop=True)
935
+ LO_Chla_Merge.to_csv(f'{output_dir}/LO_Chla_Obs.csv')
936
+ LO_Chla_Merge[['date', 'Chla_N']].rename(columns={'Chla_N': 'Chla'}).to_csv(f'{output_dir}/N_Merged_Chla.csv', index=False)
937
+ LO_Chla_Merge[['date', 'Chla_S']].rename(columns={'Chla_S': 'Chla'}).to_csv(f'{output_dir}/S_Merged_Chla.csv', index=False)
938
+
824
939
  # Create Files S65E_Avg_Chla
825
940
  S65E_Chla_Inter = pd.read_csv(f'{input_dir}/water_quality_S65E_CHLOROPHYLL-A, CORRECTED_Interpolated.csv')
826
941
  S65E_Chla_LC_Inter = pd.read_csv(f'{input_dir}/water_quality_S65E_CHLOROPHYLL-A(LC)_Interpolated.csv')
@@ -1,7 +1,10 @@
1
1
  import sys
2
2
  import os
3
+ import math
4
+ import numpy as np
3
5
  import pandas as pd
4
6
  import geoglows
7
+ from scipy import interpolate
5
8
 
6
9
 
7
10
  SECONDS_IN_DAY = 86400
@@ -37,29 +40,42 @@ def get_bias_corrected_data(
37
40
 
38
41
  # Get the historical simulation data for the given reach ID
39
42
  historical_data = None
40
-
43
+
41
44
  if cache_path is None:
42
45
  historical_data = geoglows.streamflow.historic_simulation(reach_id)
43
46
  else:
44
47
  # Create the geoglows cache directory if it doesn't exist
45
- geoglows_cache_path = os.path.join(cache_path, 'geoglows_cache')
48
+ geoglows_cache_path = os.path.join(cache_path, "geoglows_cache")
46
49
  if not os.path.exists(geoglows_cache_path):
47
50
  os.makedirs(geoglows_cache_path)
48
-
51
+
49
52
  # Check if the historical simulation data is already cached
50
- if os.path.exists(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv')):
51
- historical_data = pd.read_csv(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv'), index_col=0)
53
+ if os.path.exists(
54
+ os.path.join(
55
+ geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
56
+ )
57
+ ):
58
+ historical_data = pd.read_csv(
59
+ os.path.join(
60
+ geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
61
+ ),
62
+ index_col=0,
63
+ )
52
64
  historical_data.index = pd.to_datetime(historical_data.index)
53
65
  else:
54
66
  historical_data = geoglows.streamflow.historic_simulation(reach_id)
55
- historical_data.to_csv(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv'))
67
+ historical_data.to_csv(
68
+ os.path.join(
69
+ geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
70
+ )
71
+ )
56
72
 
57
73
  # Correct the forecast bias in the station ensembles
58
- station_ensembles = geoglows.bias.correct_forecast(
74
+ station_ensembles = bias_correct_forecast(
59
75
  station_ensembles, historical_data, prepared_od
60
76
  )
61
77
  # Correct the forecast bias in the station stats
62
- station_stats = geoglows.bias.correct_forecast(
78
+ station_stats = bias_correct_forecast(
63
79
  station_stats, historical_data, prepared_od
64
80
  )
65
81
 
@@ -92,6 +108,175 @@ def prep_observed_data(observed_data: pd.DataFrame) -> pd.DataFrame:
92
108
  return observed_data
93
109
 
94
110
 
111
+ def bias_correct_historical(
112
+ simulated_data: pd.DataFrame, observed_data: pd.DataFrame
113
+ ) -> pd.DataFrame:
114
+ """
115
+ Accepts a historically simulated flow timeseries and observed flow timeseries and attempts to correct biases in the
116
+ simulation on a monthly basis.
117
+
118
+ Args:
119
+ simulated_data: A dataframe with a datetime index and a single column of streamflow values
120
+ observed_data: A dataframe with a datetime index and a single column of streamflow values
121
+
122
+ Returns:
123
+ pandas DataFrame with a datetime index and a single column of streamflow values
124
+ """
125
+ # list of the unique months in the historical simulation. should always be 1->12 but just in case...
126
+ unique_simulation_months = sorted(set(simulated_data.index.strftime("%m")))
127
+ dates = []
128
+ values = []
129
+
130
+ for month in unique_simulation_months:
131
+ # filter historic data to only be current month
132
+ monthly_simulated = simulated_data[
133
+ simulated_data.index.month == int(month)
134
+ ].dropna()
135
+ to_prob = _flow_and_probability_mapper(
136
+ monthly_simulated, to_probability=True
137
+ )
138
+ # filter the observations to current month
139
+ monthly_observed = observed_data[
140
+ observed_data.index.month == int(month)
141
+ ].dropna()
142
+ to_flow = _flow_and_probability_mapper(monthly_observed, to_flow=True)
143
+
144
+ dates += monthly_simulated.index.to_list()
145
+ value = to_flow(to_prob(monthly_simulated.values))
146
+ values += value.tolist()
147
+
148
+ corrected = pd.DataFrame(
149
+ data=values, index=dates, columns=["Corrected Simulated Streamflow"]
150
+ )
151
+ corrected.sort_index(inplace=True)
152
+ return corrected
153
+
154
+
155
+ def bias_correct_forecast(
156
+ forecast_data: pd.DataFrame,
157
+ simulated_data: pd.DataFrame,
158
+ observed_data: pd.DataFrame,
159
+ use_month: int = 0,
160
+ ) -> pd.DataFrame:
161
+ """
162
+ Accepts a short term forecast of streamflow, simulated historical flow, and observed flow timeseries and attempts
163
+ to correct biases in the forecasted data
164
+
165
+ Args:
166
+ forecast_data: A dataframe with a datetime index and any number of columns of forecasted flow. Compatible with
167
+ forecast_stats, forecast_ensembles, forecast_records
168
+ simulated_data: A dataframe with a datetime index and a single column of streamflow values
169
+ observed_data: A dataframe with a datetime index and a single column of streamflow values
170
+ use_month: Optional: either 0 for correct the forecast based on the first month of the forecast data or -1 if
171
+ you want to correct based on the ending month of the forecast data
172
+
173
+ Returns:
174
+ pandas DataFrame with a copy of forecasted data with values updated in each column
175
+ """
176
+ # make a copy of the forecasts which we update and return so the original data is not changed
177
+ forecast_copy = forecast_data.copy()
178
+
179
+ # make the flow and probability interpolation functions
180
+ monthly_simulated = simulated_data[
181
+ simulated_data.index.month == forecast_copy.index[use_month].month
182
+ ].dropna()
183
+ monthly_observed = observed_data[
184
+ observed_data.index.month == forecast_copy.index[use_month].month
185
+ ].dropna()
186
+ to_prob = _flow_and_probability_mapper(
187
+ monthly_simulated, to_probability=True, extrapolate=True
188
+ )
189
+ to_flow = _flow_and_probability_mapper(
190
+ monthly_observed, to_flow=True, extrapolate=True
191
+ )
192
+
193
+ # for each column of forecast data, make the interpolation function and update the dataframe
194
+ for column in forecast_copy.columns:
195
+ tmp = forecast_copy[column].dropna()
196
+ forecast_copy.update(
197
+ pd.DataFrame(
198
+ to_flow(to_prob(tmp.values)), index=tmp.index, columns=[column]
199
+ )
200
+ )
201
+
202
+ return forecast_copy
203
+
204
+
205
+ def _flow_and_probability_mapper(
206
+ monthly_data: pd.DataFrame,
207
+ to_probability: bool = False,
208
+ to_flow: bool = False,
209
+ extrapolate: bool = False,
210
+ ) -> interpolate.interp1d:
211
+ if not to_flow and not to_probability:
212
+ raise ValueError(
213
+ "You need to specify either to_probability or to_flow as True"
214
+ )
215
+
216
+ # get maximum value to bound histogram
217
+ max_val = math.ceil(np.max(monthly_data.max()))
218
+ min_val = math.floor(np.min(monthly_data.min()))
219
+
220
+ if max_val == min_val:
221
+ max_val += 0.1
222
+
223
+ # determine number of histograms bins needed
224
+ number_of_points = len(monthly_data.values)
225
+ number_of_classes = math.ceil(1 + (3.322 * math.log10(number_of_points)))
226
+
227
+ # specify the bin width for histogram (in m3/s)
228
+ step_width = (max_val - min_val) / number_of_classes
229
+
230
+ # specify histogram bins
231
+ bins = np.arange(
232
+ -np.min(step_width),
233
+ max_val + 2 * np.min(step_width),
234
+ np.min(step_width),
235
+ )
236
+
237
+ if bins[0] == 0:
238
+ bins = np.concatenate((-bins[1], bins))
239
+ elif bins[0] > 0:
240
+ bins = np.concatenate((-bins[0], bins))
241
+
242
+ # make the histogram
243
+ counts, bin_edges = np.histogram(monthly_data, bins=bins)
244
+
245
+ # adjust the bins to be the center
246
+ bin_edges = bin_edges[1:]
247
+
248
+ # normalize the histograms
249
+ counts = counts.astype(float) / monthly_data.size
250
+
251
+ # calculate the cdfs
252
+ cdf = np.cumsum(counts)
253
+
254
+ # Identify indices where consecutive values are the same
255
+ duplicate_indices = np.where(np.diff(cdf) == 0)[0]
256
+
257
+ # Adjust duplicate value to be an extrapolation of the previous value
258
+ for idx in duplicate_indices:
259
+ if idx > 0:
260
+ cdf[idx] = cdf[idx - 1] + (cdf[idx + 1] - cdf[idx - 1]) / 2
261
+
262
+ # interpolated function to convert simulated streamflow to prob
263
+ if to_probability:
264
+ if extrapolate:
265
+ func = interpolate.interp1d(
266
+ bin_edges, cdf, fill_value="extrapolate"
267
+ )
268
+ else:
269
+ func = interpolate.interp1d(bin_edges, cdf)
270
+ return lambda x: np.clip(func(x), 0, 1)
271
+ # interpolated function to convert simulated prob to observed streamflow
272
+ elif to_flow:
273
+ if extrapolate:
274
+ return interpolate.interp1d(
275
+ cdf, bin_edges, fill_value="extrapolate"
276
+ )
277
+ return interpolate.interp1d(cdf, bin_edges)
278
+
279
+
95
280
  if __name__ == "__main__":
96
281
  station_id = sys.argv[1]
97
282
  reach_id = sys.argv[2]
loone_data_prep/utils.py CHANGED
@@ -11,7 +11,10 @@ from retry import retry
11
11
  from scipy.optimize import fsolve
12
12
  from scipy import interpolate
13
13
  from rpy2.robjects import r
14
- from rpy2.robjects.vectors import StrVector as rpy2StrVector, DataFrame as rpy2DataFrame
14
+ from rpy2.robjects.vectors import (
15
+ StrVector as rpy2StrVector,
16
+ DataFrame as rpy2DataFrame,
17
+ )
15
18
  from rpy2.rinterface_lib.embedded import RRuntimeError
16
19
 
17
20
 
@@ -44,7 +47,15 @@ INTERP_DICT = {
44
47
  },
45
48
  "PHOSPHATE, ORTHO AS P": {
46
49
  "units": "mg/L",
47
- "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
50
+ "station_ids": [
51
+ "L001",
52
+ "L004",
53
+ "L005",
54
+ "L006",
55
+ "L007",
56
+ "L008",
57
+ "LZ40",
58
+ ],
48
59
  },
49
60
  "NITRATE+NITRITE-N": {
50
61
  "units": "mg/L",
@@ -146,9 +157,26 @@ INTERP_DICT = {
146
157
  "LZ40",
147
158
  ],
148
159
  },
149
- "DISSOLVED OXYGEN": {"units": "mg/L", "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
150
- "RADP": {"units": "MICROMOLE/m^2/s", "station_ids": ["L001", "L005", "L006", "LZ40"]},
151
- "RADT": {"units": "kW/m^2", "station_ids": ["L001", "L005", "L006", "LZ40"]},
160
+ "DISSOLVED OXYGEN": {
161
+ "units": "mg/L",
162
+ "station_ids": [
163
+ "L001",
164
+ "L004",
165
+ "L005",
166
+ "L006",
167
+ "L007",
168
+ "L008",
169
+ "LZ40",
170
+ ],
171
+ },
172
+ "RADP": {
173
+ "units": "MICROMOLE/m^2/s",
174
+ "station_ids": ["L001", "L005", "L006", "LZ40"],
175
+ },
176
+ "RADT": {
177
+ "units": "kW/m^2",
178
+ "station_ids": ["L001", "L005", "L006", "LZ40"],
179
+ },
152
180
  }
153
181
  DEFAULT_PREDICTION_STATIONS_IDS = [
154
182
  "S65E_S",
@@ -263,14 +291,18 @@ def data_interpolations(
263
291
  Data_In = Data_In.set_index(["date"])
264
292
  Data_In.index = pd.to_datetime(Data_In.index, unit="ns")
265
293
  Data_df = Data_In.resample("D").mean()
266
- Data_df = Data_df.dropna(subset=["%s_%s_%s" % (station, parameter, units)])
294
+ Data_df = Data_df.dropna(
295
+ subset=["%s_%s_%s" % (station, parameter, units)]
296
+ )
267
297
  Data_df = Data_df.reset_index()
268
298
  Data_df["Yr_M"] = pd.to_datetime(Data_df["date"]).dt.to_period("M")
269
299
  start_date = Data_df["date"].iloc[0]
270
300
  end_date = Data_df["date"].iloc[-1]
271
301
  date_rng = pd.date_range(start=start_date, end=end_date, freq="M")
272
302
  Monthly_df = pd.DataFrame(date_rng, columns=["date"])
273
- Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period("M")
303
+ Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
304
+ "M"
305
+ )
274
306
  New_date = []
275
307
  New_data = []
276
308
  Days = []
@@ -282,13 +314,27 @@ def data_interpolations(
282
314
  if i in Data_df.index:
283
315
  if type(Data_df.loc[i]["date"]) == pd.Timestamp:
284
316
  New_date.append(Data_df.loc[i]["date"])
285
- New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)])
317
+ New_data.append(
318
+ Data_df.loc[i][
319
+ "%s_%s_%s" % (station, parameter, units)
320
+ ]
321
+ )
286
322
  else:
287
323
  for j in range(len(Data_df.loc[i]["date"])):
288
324
  New_date.append(Data_df.loc[i]["date"][j])
289
- New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)][j])
325
+ New_data.append(
326
+ Data_df.loc[i][
327
+ "%s_%s_%s" % (station, parameter, units)
328
+ ][j]
329
+ )
290
330
  elif i not in Data_df.index:
291
- New_date.append(datetime.datetime(Monthly_df.loc[i]["date"].year, Monthly_df.loc[i]["date"].month, 1))
331
+ New_date.append(
332
+ datetime.datetime(
333
+ Monthly_df.loc[i]["date"].year,
334
+ Monthly_df.loc[i]["date"].month,
335
+ 1,
336
+ )
337
+ )
292
338
  New_data.append(np.NaN)
293
339
 
294
340
  New_date = pd.to_datetime(New_date, format="%Y-%m-%d")
@@ -302,7 +348,9 @@ def data_interpolations(
302
348
  Days_cum.append(
303
349
  Days_cum[i - 1]
304
350
  + Days[i]
305
- + monthrange(New_date[i - 1].year, New_date[i - 1].month)[1]
351
+ + monthrange(New_date[i - 1].year, New_date[i - 1].month)[
352
+ 1
353
+ ]
306
354
  - Days[i - 1]
307
355
  )
308
356
  Final_df = pd.DataFrame()
@@ -316,7 +364,9 @@ def data_interpolations(
316
364
  Final_df["date"] = pd.to_datetime(Final_df["date"], format="%Y-%m-%d")
317
365
  start_date = Final_df["date"].iloc[0]
318
366
  end_date = Final_df["date"].iloc[-1]
319
- date_rng_TSS_1 = pd.date_range(start=start_date, end=end_date, freq="D")
367
+ date_rng_TSS_1 = pd.date_range(
368
+ start=start_date, end=end_date, freq="D"
369
+ )
320
370
  # Create a data frame with a date column
321
371
  Data_df = pd.DataFrame(date_rng_TSS_1, columns=["date"])
322
372
  Data_len = len(Data_df.index)
@@ -328,7 +378,9 @@ def data_interpolations(
328
378
  for i in range(1, Data_len):
329
379
  Cum_days[i] = Cum_days[i - 1] + 1
330
380
  # Data_daily[i] = interpolate.interp1d(Final_df['Days'], Final_df['TSS'] , kind = 'linear')(Cum_days[i])
331
- Data_daily[i] = np.interp(Cum_days[i], Final_df["Days_cum"], Final_df["Data"])
381
+ Data_daily[i] = np.interp(
382
+ Cum_days[i], Final_df["Days_cum"], Final_df["Data"]
383
+ )
332
384
  Data_df["Data"] = Data_daily
333
385
  Data_df.to_csv(f"{workspace}/{name}_Interpolated.csv", index=False)
334
386
 
@@ -341,11 +393,17 @@ def interpolate_all(workspace: str, d: dict = INTERP_DICT) -> None:
341
393
  d (dict, optional): Dict with parameter key, units, and station IDs. Defaults to INTERP_DICT.
342
394
  """
343
395
  for param, values in d.items():
344
- print(f"Interpolating parameter: {param} for station IDs: {values['station_ids']}.")
345
- data_interpolations(workspace, param, values["units"], values["station_ids"])
396
+ print(
397
+ f"Interpolating parameter: {param} for station IDs: {values['station_ids']}."
398
+ )
399
+ data_interpolations(
400
+ workspace, param, values["units"], values["station_ids"]
401
+ )
346
402
 
347
403
 
348
- def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str = "nu.csv"):
404
+ def kinematic_viscosity(
405
+ workspace: str, in_file_name: str, out_file_name: str = "nu.csv"
406
+ ):
349
407
  # Read Mean H2O_T in LO
350
408
  LO_Temp = pd.read_csv(os.path.join(workspace, in_file_name))
351
409
  LO_T = LO_Temp["Water_T"]
@@ -354,13 +412,23 @@ def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str =
354
412
 
355
413
  class nu_Func:
356
414
  def nu(T):
357
- nu20 = 1.0034 / 1e6 # m2/s (kinematic viscosity of water at T = 20 C)
415
+ nu20 = (
416
+ 1.0034 / 1e6
417
+ ) # m2/s (kinematic viscosity of water at T = 20 C)
358
418
 
359
419
  def func(x):
360
420
  # return[log(x[0]/nu20)-((20-T)/(T+96))*(1.2364-1.37E-3*(20-T)+5.7E-6*(20-T)**2)]
361
421
  return [
362
422
  (x[0] / nu20)
363
- - 10 ** (((20 - T) / (T + 96)) * (1.2364 - 1.37e-3 * (20 - T) + 5.7e-6 * (20 - T) ** 2))
423
+ - 10
424
+ ** (
425
+ ((20 - T) / (T + 96))
426
+ * (
427
+ 1.2364
428
+ - 1.37e-3 * (20 - T)
429
+ + 5.7e-6 * (20 - T) ** 2
430
+ )
431
+ )
364
432
  ]
365
433
 
366
434
  sol = fsolve(func, [9.70238995692062e-07])
@@ -407,7 +475,11 @@ def wind_induced_waves(
407
475
  (
408
476
  0.283
409
477
  * np.tanh(0.53 * (g * d / WS**2) ** 0.75)
410
- * np.tanh(0.00565 * (g * F / WS**2) ** 0.5 / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8)))
478
+ * np.tanh(
479
+ 0.00565
480
+ * (g * F / WS**2) ** 0.5
481
+ / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8))
482
+ )
411
483
  )
412
484
  * WS**2
413
485
  / g
@@ -419,7 +491,11 @@ def wind_induced_waves(
419
491
  (
420
492
  7.54
421
493
  * np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
422
- * np.tanh(0.0379 * (g * F / WS**2) ** 0.5 / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8)))
494
+ * np.tanh(
495
+ 0.0379
496
+ * (g * F / WS**2) ** 0.5
497
+ / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
498
+ )
423
499
  )
424
500
  * WS
425
501
  / g
@@ -428,7 +504,10 @@ def wind_induced_waves(
428
504
 
429
505
  def L(g, d, T):
430
506
  def func(x):
431
- return [(g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0]) - x[0]]
507
+ return [
508
+ (g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0])
509
+ - x[0]
510
+ ]
432
511
 
433
512
  sol = fsolve(func, [1])
434
513
  L = sol[0]
@@ -443,12 +522,18 @@ def wind_induced_waves(
443
522
  W_T[i] = Wind_Func.T(g, LO_Wd[i], F, LO_WS["WS_mps"].iloc[i])
444
523
  W_L[i] = Wind_Func.L(g, LO_Wd[i], W_T[i])
445
524
  W_ShearStress[i] = (
446
- W_H[i] * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5) / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
525
+ W_H[i]
526
+ * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5)
527
+ / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
447
528
  )
448
529
 
449
530
  Wind_ShearStress = pd.DataFrame(LO_WS["date"], columns=["date"])
450
- Wind_ShearStress["ShearStress"] = W_ShearStress * 10 # Convert N/m2 to Dyne/cm2
451
- Wind_ShearStress.to_csv(os.path.join(output_dir, wind_shear_stress_out), index=False)
531
+ Wind_ShearStress["ShearStress"] = (
532
+ W_ShearStress * 10
533
+ ) # Convert N/m2 to Dyne/cm2
534
+ Wind_ShearStress.to_csv(
535
+ os.path.join(output_dir, wind_shear_stress_out), index=False
536
+ )
452
537
 
453
538
  # # Monthly
454
539
  # Wind_ShearStress['Date'] = pd.to_datetime(Wind_ShearStress['Date'])
@@ -484,8 +569,12 @@ def wind_induced_waves(
484
569
  Current_Stress[i] = Current_bottom_shear_stress(ru, Wind_Stress[i])
485
570
 
486
571
  Current_ShearStress_df = pd.DataFrame(LO_WS["date"], columns=["date"])
487
- Current_ShearStress_df["Current_Stress"] = Current_Stress * 10 # Convert N/m2 to Dyne/cm2
488
- Current_ShearStress_df["Wind_Stress"] = Wind_Stress * 10 # Convert N/m2 to Dyne/cm2
572
+ Current_ShearStress_df["Current_Stress"] = (
573
+ Current_Stress * 10
574
+ ) # Convert N/m2 to Dyne/cm2
575
+ Current_ShearStress_df["Wind_Stress"] = (
576
+ Wind_Stress * 10
577
+ ) # Convert N/m2 to Dyne/cm2
489
578
  Current_ShearStress_df["Wind_Speed_m/s"] = LO_WS["WS_mps"]
490
579
 
491
580
  def Current_bottom_shear_stress_2(u, k, nu, ks, z, ru):
@@ -500,7 +589,10 @@ def wind_induced_waves(
500
589
  sol2 = fsolve(func2, [1])
501
590
 
502
591
  def func3(u_str3):
503
- return [u_str3[0] - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))]
592
+ return [
593
+ u_str3[0]
594
+ - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))
595
+ ]
504
596
 
505
597
  sol3 = fsolve(func3, [1])
506
598
  if sol1[0] * ks / nu <= 5:
@@ -514,7 +606,9 @@ def wind_induced_waves(
514
606
 
515
607
  def Current_bottom_shear_stress_3(u, k, nu, ks, z, ru):
516
608
  def func1(u_str1):
517
- return [u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))]
609
+ return [
610
+ u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))
611
+ ]
518
612
 
519
613
  sol1 = fsolve(func1, [1])
520
614
 
@@ -524,7 +618,12 @@ def wind_induced_waves(
524
618
  sol2 = fsolve(func2, [1])
525
619
 
526
620
  def func3(u_str3):
527
- return [u_str3[0] - u * k * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))]
621
+ return [
622
+ u_str3[0]
623
+ - u
624
+ * k
625
+ * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))
626
+ ]
528
627
 
529
628
  sol3 = fsolve(func3, [1])
530
629
  if sol1[0] * ks / nu <= 5:
@@ -541,22 +640,34 @@ def wind_induced_waves(
541
640
  ks = 5.27e-4 # m
542
641
  current_stress_3 = np.zeros(n, dtype=object)
543
642
  for i in range(n):
544
- current_stress_3[i] = Current_bottom_shear_stress_3(0.05, 0.41, nu, ks, LO_Wd[i], ru)
545
- Current_ShearStress_df["Current_Stress_3"] = current_stress_3 * 10 # Convert N/m2 to Dyne/cm2
546
- Current_ShearStress_df.to_csv(os.path.join(output_dir, current_shear_stress_out), index=False)
643
+ current_stress_3[i] = Current_bottom_shear_stress_3(
644
+ 0.05, 0.41, nu, ks, LO_Wd[i], ru
645
+ )
646
+ Current_ShearStress_df["Current_Stress_3"] = (
647
+ current_stress_3 * 10
648
+ ) # Convert N/m2 to Dyne/cm2
649
+ Current_ShearStress_df.to_csv(
650
+ os.path.join(output_dir, current_shear_stress_out), index=False
651
+ )
547
652
 
548
653
 
549
- def stg2sto(stg_sto_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
654
+ def stg2sto(
655
+ stg_sto_data_path: str, v: pd.Series, i: int
656
+ ) -> interpolate.interp1d:
550
657
  stgsto_data = pd.read_csv(stg_sto_data_path)
551
658
  # NOTE: We Can use cubic interpolation instead of linear
552
659
  x = stgsto_data["Stage"]
553
660
  y = stgsto_data["Storage"]
554
661
  if i == 0:
555
662
  # return storage given stage
556
- return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
663
+ return interpolate.interp1d(
664
+ x, y, fill_value="extrapolate", kind="linear"
665
+ )(v)
557
666
  else:
558
667
  # return stage given storage
559
- return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
668
+ return interpolate.interp1d(
669
+ y, x, fill_value="extrapolate", kind="linear"
670
+ )(v)
560
671
 
561
672
 
562
673
  def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
@@ -569,10 +680,14 @@ def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
569
680
  y = stgar_data["Surf_Area"]
570
681
  if i == 0:
571
682
  # return surface area given stage
572
- return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
683
+ return interpolate.interp1d(
684
+ x, y, fill_value="extrapolate", kind="linear"
685
+ )(v)
573
686
  else:
574
687
  # return stage given surface area
575
- return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
688
+ return interpolate.interp1d(
689
+ y, x, fill_value="extrapolate", kind="linear"
690
+ )(v)
576
691
 
577
692
 
578
693
  @retry(Exception, tries=3, delay=15, backoff=2)
@@ -580,20 +695,27 @@ def get_pi(workspace: str) -> None:
580
695
  # Weekly data is downloaded from:
581
696
  # https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv
582
697
  # State:Florida Division:4.South Central
583
- df = pd.read_csv("https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv")
698
+ df = pd.read_csv(
699
+ "https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv"
700
+ )
584
701
  df.to_csv(os.path.join(workspace, "PI.csv"))
585
702
 
586
703
 
587
704
  def nutrient_prediction(
588
- input_dir: str, output_dir: str, station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS, constants: dict = DEFAULT_EXPFUNC_CONSTANTS
705
+ input_dir: str,
706
+ output_dir: str,
707
+ station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS,
708
+ constants: dict = DEFAULT_EXPFUNC_CONSTANTS,
589
709
  ) -> None:
590
710
  for station in station_ids:
591
711
  print(f"Predicting nutrient loads for station: {station}.")
592
712
  # Construct paths for flow file
593
- flow_file_path = ''
713
+ flow_file_path = ""
594
714
  flow_file_path_exists = True
595
715
  try:
596
- flow_file_path = glob(os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv"))[0]
716
+ flow_file_path = glob(
717
+ os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv")
718
+ )[0]
597
719
  except Exception as e:
598
720
  flow_file_path_exists = False
599
721
 
@@ -603,7 +725,9 @@ def nutrient_prediction(
603
725
  flow = pd.read_csv(flow_file_path)
604
726
  else:
605
727
  # If it doesn't exist, skip to the next iteration of the loop
606
- print(f'Skipping nutrient prediction for station: {station}. Flow file does not exist.')
728
+ print(
729
+ f"Skipping nutrient prediction for station: {station}. Flow file does not exist."
730
+ )
607
731
  continue
608
732
 
609
733
  # Create structures to hold resulting data
@@ -615,6 +739,7 @@ def nutrient_prediction(
615
739
  if "ensemble" not in column_name:
616
740
  continue
617
741
  import warnings
742
+
618
743
  warnings.filterwarnings("error")
619
744
 
620
745
  try:
@@ -623,16 +748,22 @@ def nutrient_prediction(
623
748
 
624
749
  # Calculate the logarithm of the flow data
625
750
 
626
- Q_Log = np.log(flow_column + 1e-8) # Add a small number to prevent log(0) errors
751
+ Q_Log = np.log(
752
+ flow_column + 1e-8
753
+ ) # Add a small number to prevent log(0) errors
627
754
 
628
755
  # Calculate the predicted TP loads using the logarithm of the flow data
629
- TP_Loads_Predicted_Log = constants[station]["a"] * Q_Log ** constants[station]["b"]
756
+ TP_Loads_Predicted_Log = (
757
+ constants[station]["a"] * Q_Log ** constants[station]["b"]
758
+ )
630
759
 
631
760
  # Calculate the predicted TP loads using the exponential of the predicted TP loads logarithm
632
761
  predicted_column = np.exp(TP_Loads_Predicted_Log)
633
762
 
634
763
  # Store prediction data in a pandas DataFrame (So we can concat all ensemble data into one dataframe)
635
- predicted_column = pd.DataFrame(predicted_column.tolist(), index=flow["date"].copy())
764
+ predicted_column = pd.DataFrame(
765
+ predicted_column.tolist(), index=flow["date"].copy()
766
+ )
636
767
  predicted_column.columns = [column_name]
637
768
 
638
769
  prediction_columns.append(predicted_column)
@@ -642,31 +773,99 @@ def nutrient_prediction(
642
773
 
643
774
  # Concat individual ensemble columns together into one pandas DataFrame
644
775
  out_dataframe = pd.concat(objs=prediction_columns, axis="columns")
645
-
646
- column_mean = out_dataframe.mean(axis='columns')
647
- column_percentile_25 = out_dataframe.quantile(q=0.25, axis='columns')
648
- column_percentile_75 = out_dataframe.quantile(q=0.75, axis='columns')
649
- column_median = out_dataframe.median(axis='columns')
650
- column_std = out_dataframe.std(axis='columns')
651
-
652
- out_dataframe['mean'] = column_mean
653
- out_dataframe['percentile_25'] = column_percentile_25
654
- out_dataframe['percentile_75'] = column_percentile_75
655
- out_dataframe['median'] = column_median
656
- out_dataframe['standard_deviation'] = column_std
776
+
777
+ column_mean = out_dataframe.mean(axis="columns")
778
+ column_percentile_25 = out_dataframe.quantile(q=0.25, axis="columns")
779
+ column_percentile_75 = out_dataframe.quantile(q=0.75, axis="columns")
780
+ column_median = out_dataframe.median(axis="columns")
781
+ column_std = out_dataframe.std(axis="columns")
782
+
783
+ out_dataframe["mean"] = column_mean
784
+ out_dataframe["percentile_25"] = column_percentile_25
785
+ out_dataframe["percentile_75"] = column_percentile_75
786
+ out_dataframe["median"] = column_median
787
+ out_dataframe["standard_deviation"] = column_std
657
788
 
658
789
  # Save the predicted TP loads to a CSV file
659
- out_dataframe.to_csv(os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv"))
660
-
790
+ out_dataframe.to_csv(
791
+ os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv")
792
+ )
793
+
661
794
  # Save the predicted TP loads to a CSV file (in input_dir)
662
795
  # Output is needed in input_dir by GEOGLOWS_LOONE_DATA_PREP.py and in output_dir for graph visualization in the app
663
- out_dataframe.to_csv(os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv"))
796
+ out_dataframe.to_csv(
797
+ os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv")
798
+ )
799
+
800
+
801
+ def photo_period(
802
+ workspace: str,
803
+ phi: float = 26.982052,
804
+ doy: np.ndarray = np.arange(1, 365),
805
+ verbose: bool = False,
806
+ ):
807
+ """Generate PhotoPeriod.csv file for the given latitude and days of the year.
808
+
809
+ Args:
810
+ workspace (str): A path to the directory where the file will be generated.
811
+ phi (float, optional): Latitude of the location. Defaults to 26.982052.
812
+ doy (np.ndarray, optional): An array holding the days of the year that you want the photo period for. Defaults to np.arange(1,365).
813
+ verbose (bool, optional): Print results of each computation. Defaults to False.
814
+ """
815
+ phi = np.radians(phi) # Convert to radians
816
+ light_intensity = 2.206 * 10**-3
817
+
818
+ C = np.sin(np.radians(23.44)) # sin of the obliquity of 23.44 degrees.
819
+ B = -4.76 - 1.03 * np.log(
820
+ light_intensity
821
+ ) # Eq. [5]. Angle of the sun below the horizon. Civil twilight is -4.76 degrees.
822
+
823
+ # Calculations
824
+ alpha = np.radians(90 + B) # Eq. [6]. Value at sunrise and sunset.
825
+ M = 0.9856 * doy - 3.251 # Eq. [4].
826
+ lmd = (
827
+ M
828
+ + 1.916 * np.sin(np.radians(M))
829
+ + 0.020 * np.sin(np.radians(2 * M))
830
+ + 282.565
831
+ ) # Eq. [3]. Lambda
832
+ delta = np.arcsin(C * np.sin(np.radians(lmd))) # Eq. [2].
833
+
834
+ # Defining sec(x) = 1/cos(x)
835
+ P = (
836
+ 2
837
+ / 15
838
+ * np.degrees(
839
+ np.arccos(
840
+ np.cos(alpha) * (1 / np.cos(phi)) * (1 / np.cos(delta))
841
+ - np.tan(phi) * np.tan(delta)
842
+ )
843
+ )
844
+ ) # Eq. [1].
845
+
846
+ # Print results in order for each computation to match example in paper
847
+ if verbose:
848
+ print("Input latitude =", np.degrees(phi))
849
+ print("[Eq 5] B =", B)
850
+ print("[Eq 6] alpha =", np.degrees(alpha))
851
+ print("[Eq 4] M =", M[0])
852
+ print("[Eq 3] Lambda =", lmd[0])
853
+ print("[Eq 2] delta=", np.degrees(delta[0]))
854
+ print("[Eq 1] Daylength =", P[0])
855
+
856
+ photo_period_df = pd.DataFrame()
857
+ photo_period_df["Day"] = doy
858
+ photo_period_df["Data"] = P
859
+
860
+ photo_period_df.to_csv(
861
+ os.path.join(workspace, "PhotoPeriod.csv"), index=False
862
+ )
664
863
 
665
864
 
666
865
  def find_last_date_in_csv(workspace: str, file_name: str) -> str:
667
866
  """
668
867
  Gets the most recent date from the last line of a .csv file.
669
- Assumes the file is formatted as a .csv file, encoded in UTF-8,
868
+ Assumes the file is formatted as a .csv file, encoded in UTF-8,
670
869
  and the rows in the file are sorted by date in ascending order.
671
870
 
672
871
  Args:
@@ -676,40 +875,41 @@ def find_last_date_in_csv(workspace: str, file_name: str) -> str:
676
875
  Returns:
677
876
  str: The most recent date as a string in YYYY-MM-DD format, or None if the file does not exist or the date cannot be found.
678
877
  """
878
+
679
879
  # Helper Functions
680
880
  def is_valid_date(date_string):
681
881
  try:
682
- datetime.datetime.strptime(date_string, '%Y-%m-%d')
882
+ datetime.datetime.strptime(date_string, "%Y-%m-%d")
683
883
  return True
684
884
  except ValueError:
685
885
  return False
686
-
886
+
687
887
  # Check that file exists
688
888
  file_path = os.path.join(workspace, file_name)
689
889
  if not os.path.exists(file_path):
690
890
  return None
691
-
891
+
692
892
  # Attempt to extract the date of the last line in the file
693
893
  try:
694
- with open(file_path, 'rb') as file:
894
+ with open(file_path, "rb") as file:
695
895
  # Go to the end of the file
696
896
  file.seek(-2, os.SEEK_END)
697
-
897
+
698
898
  # Loop backwards until you find the first newline character
699
- while file.read(1) != b'\n':
899
+ while file.read(1) != b"\n":
700
900
  file.seek(-2, os.SEEK_CUR)
701
-
901
+
702
902
  # Read the last line
703
903
  last_line = file.readline().decode()
704
-
904
+
705
905
  # Extract the date from the last line
706
906
  date = None
707
-
708
- for value in last_line.split(','):
907
+
908
+ for value in last_line.split(","):
709
909
  if is_valid_date(value):
710
910
  date = value
711
911
  break
712
-
912
+
713
913
  # Return date
714
914
  return date
715
915
  except OSError as e:
@@ -721,20 +921,26 @@ def dbhydro_data_is_latest(date_latest: str):
721
921
  """
722
922
  Checks whether the given date is the most recent date possible to get data from dbhydro.
723
923
  Can be used to check whether dbhydro data is up-to-date.
724
-
924
+
725
925
  Args:
726
926
  date_latest (str): The date of the most recent data of the dbhydro data you have
727
-
927
+
728
928
  Returns:
729
929
  bool: True if the date_latest is the most recent date possible to get data from dbhydro, False otherwise
730
930
  """
731
- date_latest_object = datetime.datetime.strptime(date_latest, "%Y-%m-%d").date()
732
- return date_latest_object == (datetime.datetime.now().date() - datetime.timedelta(days=1))
931
+ date_latest_object = datetime.datetime.strptime(
932
+ date_latest, "%Y-%m-%d"
933
+ ).date()
934
+ return date_latest_object == (
935
+ datetime.datetime.now().date() - datetime.timedelta(days=1)
936
+ )
733
937
 
734
938
 
735
939
  if __name__ == "__main__":
736
940
  if sys.argv[1] == "get_dbkeys":
737
- get_dbkeys(sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:])
941
+ get_dbkeys(
942
+ sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:]
943
+ )
738
944
  elif sys.argv[1] == "data_interp":
739
945
  interp_args = [x for x in sys.argv[2:]]
740
946
  interp_args[0] = interp_args[0].rstrip("/")
@@ -746,7 +952,9 @@ if __name__ == "__main__":
746
952
  elif sys.argv[1] == "kinematic_viscosity":
747
953
  kinematic_viscosity(sys.argv[2].rstrip("/"), *sys.argv[3:])
748
954
  elif sys.argv[1] == "wind_induced_waves":
749
- wind_induced_waves(sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:])
955
+ wind_induced_waves(
956
+ sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:]
957
+ )
750
958
  elif sys.argv[1] == "get_pi":
751
959
  get_pi(sys.argv[2].rstrip("/"))
752
960
  elif sys.argv[1] == "nutrient_prediction":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: loone_data_prep
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Prepare data to run the LOONE model.
5
5
  Author-email: Osama Tarabih <osamatarabih@usf.edu>
6
6
  Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -20,6 +20,7 @@ Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: rpy2
22
22
  Requires-Dist: retry
23
+ Requires-Dist: numpy <2
23
24
  Requires-Dist: pandas
24
25
  Requires-Dist: scipy
25
26
  Requires-Dist: geoglows ==0.27.1
@@ -1,11 +1,11 @@
1
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=loaMvDU1IgLsz7_eHAVJMtk_pgW_CTHiZE43a0_mZZE,35394
2
- loone_data_prep/LOONE_DATA_PREP.py,sha256=mI0qC03v7LnK56NAWziMjqM8Hc9clYk0auY3kJ7TinQ,59477
1
+ loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=wstZQwb_e2Z117dhvuLPrqyln6Bpb3ZTL0RfnOTvET4,35456
2
+ loone_data_prep/LOONE_DATA_PREP.py,sha256=osaLYlrfTwwUGLwXGypy61BOYBlXnoTPDp09O4Am1ZE,67761
3
3
  loone_data_prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loone_data_prep/data_analyses_fns.py,sha256=BZ7famrSKoUfExQvZfbl72CyADHLb-zzgdWZ-kLJxcQ,4603
5
- loone_data_prep/utils.py,sha256=dpaOjtnRStf0wK5CbAkrWb8KoSKfJhDIxwU2Hc0ESC4,27532
5
+ loone_data_prep/utils.py,sha256=Jsa08iaD04C-BqK0K5BHgRFZEOqp6f_dcJSjPgcz1zA,31575
6
6
  loone_data_prep/flow_data/S65E_total.py,sha256=szNUfj0EyyyDzuKNhTGAZtWc5owiOpxYS55YTt4u19k,2835
7
7
  loone_data_prep/flow_data/__init__.py,sha256=u7fENFUZsJjyl13Bc9ZE47sHMKmjxtqXhV9t7vDTm7Y,93
8
- loone_data_prep/flow_data/forecast_bias_correction.py,sha256=pABmNWWF96JDfjl3u314ORSskGbWaPgz8ZgM8FdEwvE,3752
8
+ loone_data_prep/flow_data/forecast_bias_correction.py,sha256=ydoZ0UmDZvsPLHsO7cpCFN9Pmj7w_tKjMDy9RK5EoiM,10146
9
9
  loone_data_prep/flow_data/get_forecast_flows.py,sha256=-nPkscE9UZbRzGZ_dk0zhKiNM2hOINx21HgSeQrFjaU,14462
10
10
  loone_data_prep/flow_data/get_inflows.py,sha256=xKuSyJBdPrpjqMdRiyNDyxwdhYVIgLhiTP0k_1I1uWI,6456
11
11
  loone_data_prep/flow_data/get_outflows.py,sha256=x7aisIkbXoTkcubFQLDghX-P8lztPq-tU0dQzoVRTtQ,5620
@@ -20,8 +20,8 @@ loone_data_prep/water_quality_data/wq.py,sha256=sl6G3iDCk6QUzpHTXPHpRZNMBG0-wHuc
20
20
  loone_data_prep/weather_data/__init__.py,sha256=TX58EPgGRzEK_LmLze79lC4L7kU_j3yZf5_iC4nOIP4,45
21
21
  loone_data_prep/weather_data/get_all.py,sha256=aCufuxORU51XhXt7LN9wN_V4qtjNt1qRC1UKlI2b3Ko,6918
22
22
  loone_data_prep/weather_data/weather.py,sha256=hvceksrGSnDkCjheBVBuPgY1DrdR0ZAtrFB-K2tYTtk,12043
23
- loone_data_prep-0.1.7.dist-info/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
24
- loone_data_prep-0.1.7.dist-info/METADATA,sha256=p8nGiYP4g4D6q2v0i9cHrP0nTsnFaJxJHXjSY_9n9s8,4098
25
- loone_data_prep-0.1.7.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
26
- loone_data_prep-0.1.7.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
27
- loone_data_prep-0.1.7.dist-info/RECORD,,
23
+ loone_data_prep-0.1.8.dist-info/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
24
+ loone_data_prep-0.1.8.dist-info/METADATA,sha256=WB5Nk0uuAtv55-zdjaLRZjn9qbMg1H34Yp5Qe2LpKbc,4122
25
+ loone_data_prep-0.1.8.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
26
+ loone_data_prep-0.1.8.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
27
+ loone_data_prep-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5