PyPI - loone-data-prep - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

loone-data-prep 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py CHANGED Viewed

@@ -45,10 +45,11 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     LO_Stage = pd.read_csv(f"{input_dir}/LO_Stage.csv")
     # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
     LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
+    LO_Stage.index = LO_Stage["date"]
     # Calculate average
     if "Average_Stage" not in LO_Stage.columns:
         LO_Stage = LO_Stage.loc[:, ~LO_Stage.columns.str.contains("^Unnamed")]
-        LO_Stage["Average_Stage"] = LO_Stage.mean(axis=1)
+        LO_Stage["Average_Stage"] = LO_Stage.drop(columns=['date']).mean(axis=1)
         LO_Stage.to_csv(f"{input_dir}/LO_Stage.csv", index=False)
     LO_Storage = stg2sto(f"{input_dir}/StgSto_data.csv", LO_Stage["Average_Stage"], 0)
     LO_SA = stg2ar(f"{input_dir}/Stgar_data.csv", LO_Stage["Average_Stage"], 0)

loone_data_prep/LOONE_DATA_PREP.py CHANGED Viewed

@@ -580,6 +580,18 @@ def main(input_dir: str, output_dir: str) -> None:
     LO_OP_data_Inter['Mean_OP'] = LO_OP_data_Inter.mean(axis=1, numeric_only=True)
     LO_OP_data_Inter = DF_Date_Range(LO_OP_data_Inter, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
     LO_OP_data_Inter.to_csv(f'{output_dir}/LO_OP.csv', index=False)
+    # Create File (N_OP) (L001, L005, L008)
+    n_op = LO_OP_data_Inter[['date', 'Data_L001_OP_Inter', 'Data_L005_OP_Inter', 'Data_L008_OP_Inter']]
+    n_op['OP'] = n_op.mean(axis=1, numeric_only=True) * 1000 # mg/L to mg/m3
+    n_op.drop(['Data_L001_OP_Inter', 'Data_L005_OP_Inter', 'Data_L008_OP_Inter'], axis=1, inplace=True)
+    n_op.to_csv(f'{output_dir}/N_OP.csv', index=False)
+    # Create File (S_OP) (L004, L006, L007, L008, and LZ40)
+    s_op = LO_OP_data_Inter[['date', 'Data_L004_OP_Inter', 'Data_L006_OP_Inter', 'Data_L007_OP_Inter', 'Data_L008_OP_Inter', 'Data_LZ40_OP_Inter']]
+    s_op['OP'] = s_op.mean(axis=1, numeric_only=True) * 1000 # mg/L to mg/m3
+    s_op.drop(['Data_L004_OP_Inter', 'Data_L006_OP_Inter', 'Data_L007_OP_Inter', 'Data_L008_OP_Inter', 'Data_LZ40_OP_Inter'], axis=1, inplace=True)
+    s_op.to_csv(f'{output_dir}/S_OP.csv', index=False)
     # Interpolated NH4 Observations in Lake
     # Create File (LO_Avg_NH4)
@@ -662,6 +674,22 @@ def main(input_dir: str, output_dir: str) -> None:
     LO_DIN['NO'] = LO_NO_Clean_Inter['Mean_NO'].values
     LO_DIN['DIN_mg/m3'] = LO_DIN[['NH4', 'NO']].sum(axis=1)*1000
     LO_DIN.to_csv(f'{output_dir}/LO_DIN.csv', index=False)
+    # Create File (N_DIN) (L001, L005, L008)
+    n_din = pd.DataFrame(date_DIN, columns=['date'])
+    n_din.set_index('date', inplace=True)
+    n_din['NH4'] = LO_NH4_Clean_Inter[['date', 'Data_L001_NH4_Inter', 'Data_L005_NH4_Inter', 'Data_L008_NH4_Inter']].mean(axis=1, numeric_only=True)
+    n_din['NO'] = LO_NO_Clean_Inter[['date', 'Data_L001_NO_Inter', 'Data_L005_NO_Inter', 'Data_L008_NO_Inter']].mean(axis=1, numeric_only=True)*1000    # mg/L to mg/m3
+    n_din['DIN'] = n_din[['NH4', 'NO']].sum(axis=1)*1000    # mg/L to mg/m3
+    n_din.to_csv(f'{output_dir}/N_DIN.csv')
+    # Create File (S_DIN) (L004, L006, L007, L008, LZ40)
+    s_din = pd.DataFrame(date_DIN, columns=['date'])
+    s_din.set_index('date', inplace=True)
+    s_din['NH4'] = LO_NH4_Clean_Inter[['date', 'Data_L004_NH4_Inter', 'Data_L006_NH4_Inter', 'Data_L007_NH4_Inter', 'Data_L008_NH4_Inter', 'Data_LZ40_NH4_Inter']].mean(axis=1, numeric_only=True)
+    s_din['NO'] = LO_NO_Clean_Inter[['date', 'Data_L004_NO_Inter', 'Data_L006_NO_Inter', 'Data_L007_NO_Inter', 'Data_L008_NO_Inter', 'Data_LZ40_NO_Inter']].mean(axis=1, numeric_only=True)*1000    # mg/L to mg/m3
+    s_din['DIN'] = s_din[['NH4', 'NO']].sum(axis=1)*1000    # mg/L to mg/m3
+    s_din.to_csv(f'{output_dir}/S_DIN.csv')
     # Interpolated DO Observations in Lake
     # Create File (LO_Avg_DO)
@@ -821,6 +849,93 @@ def main(input_dir: str, output_dir: str) -> None:
     LO_Chla_Merge_Monthly_Inter = LO_Chla_Merge.resample('M').mean()
     LO_Chla_Merge_Monthly_Inter.to_csv(f'{output_dir}/LO_Chla_Merge_Monthly_Inter.csv')
+    # Create files (LO_Chla_Obs.csv, N_Merged_Chla.csv, and S_Merged_Chla.csv)
+    L001_Chla = pd.read_csv(f'{input_dir}/water_quality_L001_CHLOROPHYLL-A, CORRECTED.csv')
+    L001_Chla.drop(columns=['days'], inplace=True)
+    L004_Chla = pd.read_csv(f'{input_dir}/water_quality_L004_CHLOROPHYLL-A, CORRECTED.csv')
+    L004_Chla.drop(columns=['days'], inplace=True)
+    L005_Chla = pd.read_csv(f'{input_dir}/water_quality_L005_CHLOROPHYLL-A, CORRECTED.csv')
+    L005_Chla.drop(columns=['days'], inplace=True)
+    L006_Chla = pd.read_csv(f'{input_dir}/water_quality_L006_CHLOROPHYLL-A, CORRECTED.csv')
+    L006_Chla.drop(columns=['days'], inplace=True)
+    L007_Chla = pd.read_csv(f'{input_dir}/water_quality_L007_CHLOROPHYLL-A, CORRECTED.csv')
+    L007_Chla.drop(columns=['days'], inplace=True)
+    L008_Chla = pd.read_csv(f'{input_dir}/water_quality_L008_CHLOROPHYLL-A, CORRECTED.csv')
+    L008_Chla.drop(columns=['days'], inplace=True)
+    LZ40_Chla = pd.read_csv(f'{input_dir}/water_quality_LZ40_CHLOROPHYLL-A, CORRECTED.csv')
+    LZ40_Chla.drop(columns=['days'], inplace=True)
+    L001_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L001_CHLOROPHYLL-A(LC).csv')
+    L001_Chla_LC.drop(columns=['days'], inplace=True)
+    L004_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L004_CHLOROPHYLL-A(LC).csv')
+    L004_Chla_LC.drop(columns=['days'], inplace=True)
+    L005_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L005_CHLOROPHYLL-A(LC).csv')
+    L005_Chla_LC.drop(columns=['days'], inplace=True)
+    L006_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L006_CHLOROPHYLL-A(LC).csv')
+    L006_Chla_LC.drop(columns=['days'], inplace=True)
+    L007_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L007_CHLOROPHYLL-A(LC).csv')
+    L007_Chla_LC.drop(columns=['days'], inplace=True)
+    L008_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_L008_CHLOROPHYLL-A(LC).csv')
+    L008_Chla_LC.drop(columns=['days'], inplace=True)
+    LZ40_Chla_LC = pd.read_csv(f'{input_dir}/water_quality_LZ40_CHLOROPHYLL-A(LC).csv')
+    LZ40_Chla_LC.drop(columns=['days'], inplace=True)
+    LO_Chla = pd.merge(L001_Chla, L004_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = pd.merge(LO_Chla, L005_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = pd.merge(LO_Chla, L006_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = pd.merge(LO_Chla, L007_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = pd.merge(LO_Chla, L008_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = pd.merge(LO_Chla, LZ40_Chla, how='left', on='date')
+    LO_Chla = LO_Chla.loc[:, ~LO_Chla.columns.str.startswith('Unnamed')]
+    LO_Chla = LO_Chla.set_index('date')
+    LO_Chla['Mean_Chla'] = LO_Chla.mean(axis=1)
+    LO_Chla = LO_Chla.reset_index()
+    LO_Chla_N_cols = ['L001_CHLOROPHYLL-A, CORRECTED_ug/L', 'L005_CHLOROPHYLL-A, CORRECTED_ug/L', 'L008_CHLOROPHYLL-A, CORRECTED_ug/L']
+    LO_Chla['Chla_North'] = LO_Chla[LO_Chla_N_cols].mean(axis=1)
+    LO_Chla_S_cols = ['L004_CHLOROPHYLL-A, CORRECTED_ug/L', 'L006_CHLOROPHYLL-A, CORRECTED_ug/L', 'L007_CHLOROPHYLL-A, CORRECTED_ug/L','L008_CHLOROPHYLL-A, CORRECTED_ug/L','LZ40_CHLOROPHYLL-A, CORRECTED_ug/L']
+    LO_Chla['Chla_South'] = LO_Chla[LO_Chla_S_cols].mean(axis=1)
+    LO_Chla_LC = pd.merge(L001_Chla_LC, L004_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = pd.merge(LO_Chla_LC, L005_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = pd.merge(LO_Chla_LC, L006_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = pd.merge(LO_Chla_LC, L007_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = pd.merge(LO_Chla_LC, L008_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = pd.merge(LO_Chla_LC, LZ40_Chla_LC, how='left', on='date')
+    LO_Chla_LC = LO_Chla_LC.loc[:, ~LO_Chla_LC.columns.str.startswith('Unnamed')]
+    LO_Chla_LC = LO_Chla_LC.set_index('date')
+    LO_Chla_LC['Mean_Chla'] = LO_Chla_LC.mean(axis=1)
+    LO_Chla_LC = LO_Chla_LC.reset_index()
+    LO_Chla_LC_N_cols = ['L001_CHLOROPHYLL-A(LC)_ug/L', 'L005_CHLOROPHYLL-A(LC)_ug/L', 'L008_CHLOROPHYLL-A(LC)_ug/L']
+    LO_Chla_LC['Chla_North'] = LO_Chla_LC[LO_Chla_LC_N_cols].mean(axis=1)
+    LO_Chla_LC_S_cols = ['L004_CHLOROPHYLL-A(LC)_ug/L', 'L006_CHLOROPHYLL-A(LC)_ug/L', 'L007_CHLOROPHYLL-A(LC)_ug/L','L008_CHLOROPHYLL-A(LC)_ug/L','LZ40_CHLOROPHYLL-A(LC)_ug/L']
+    LO_Chla_LC['Chla_South'] = LO_Chla_LC[LO_Chla_LC_S_cols].mean(axis=1)
+    LO_Chla = DF_Date_Range(LO_Chla, 2008, 1, 1, 2010, 10, 19)
+    LO_Chla_df = pd.DataFrame(LO_Chla['date'], columns=['date'])
+    LO_Chla_df['Chla'] = LO_Chla['Mean_Chla']
+    LO_Chla_df['Chla_N'] = LO_Chla['Chla_North']
+    LO_Chla_df['Chla_S'] = LO_Chla['Chla_South']
+    LO_Chla_LC = DF_Date_Range(LO_Chla_LC, 2010, 10, 20, 2023, 6, 30)
+    LO_Chla_LC_df = pd.DataFrame(LO_Chla_LC['date'], columns=['date'])
+    LO_Chla_LC_df['Chla'] = LO_Chla_LC['Mean_Chla']
+    LO_Chla_LC_df['Chla_N'] = LO_Chla_LC['Chla_North']
+    LO_Chla_LC_df['Chla_S'] = LO_Chla_LC['Chla_South']
+    LO_Chla_Merge = pd.concat([LO_Chla_df, LO_Chla_LC_df]).reset_index(drop=True)
+    LO_Chla_Merge.to_csv(f'{output_dir}/LO_Chla_Obs.csv')
+    LO_Chla_Merge[['date', 'Chla_N']].rename(columns={'Chla_N': 'Chla'}).to_csv(f'{output_dir}/N_Merged_Chla.csv', index=False)
+    LO_Chla_Merge[['date', 'Chla_S']].rename(columns={'Chla_S': 'Chla'}).to_csv(f'{output_dir}/S_Merged_Chla.csv', index=False)
     # Create Files S65E_Avg_Chla
     S65E_Chla_Inter = pd.read_csv(f'{input_dir}/water_quality_S65E_CHLOROPHYLL-A, CORRECTED_Interpolated.csv')
     S65E_Chla_LC_Inter = pd.read_csv(f'{input_dir}/water_quality_S65E_CHLOROPHYLL-A(LC)_Interpolated.csv')

loone_data_prep/flow_data/forecast_bias_correction.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import sys
 import os
+import math
+import numpy as np
 import pandas as pd
 import geoglows
+from scipy import interpolate
 SECONDS_IN_DAY = 86400
@@ -37,29 +40,42 @@ def get_bias_corrected_data(
     # Get the historical simulation data for the given reach ID
     historical_data = None
     if cache_path is None:
         historical_data = geoglows.streamflow.historic_simulation(reach_id)
     else:
         # Create the geoglows cache directory if it doesn't exist
-        geoglows_cache_path = os.path.join(cache_path, 'geoglows_cache')
+        geoglows_cache_path = os.path.join(cache_path, "geoglows_cache")
         if not os.path.exists(geoglows_cache_path):
             os.makedirs(geoglows_cache_path)
         # Check if the historical simulation data is already cached
-        if os.path.exists(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv')):
-            historical_data = pd.read_csv(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv'), index_col=0)
+        if os.path.exists(
+            os.path.join(
+                geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
+            )
+        ):
+            historical_data = pd.read_csv(
+                os.path.join(
+                    geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
+                ),
+                index_col=0,
+            )
             historical_data.index = pd.to_datetime(historical_data.index)
         else:
             historical_data = geoglows.streamflow.historic_simulation(reach_id)
-            historical_data.to_csv(os.path.join(geoglows_cache_path, f'{reach_id}_historic_simulation.csv'))
+            historical_data.to_csv(
+                os.path.join(
+                    geoglows_cache_path, f"{reach_id}_historic_simulation.csv"
+                )
+            )
     # Correct the forecast bias in the station ensembles
-    station_ensembles = geoglows.bias.correct_forecast(
+    station_ensembles = bias_correct_forecast(
         station_ensembles, historical_data, prepared_od
     )
     # Correct the forecast bias in the station stats
-    station_stats = geoglows.bias.correct_forecast(
+    station_stats = bias_correct_forecast(
         station_stats, historical_data, prepared_od
     )
@@ -92,6 +108,175 @@ def prep_observed_data(observed_data: pd.DataFrame) -> pd.DataFrame:
     return observed_data
+def bias_correct_historical(
+    simulated_data: pd.DataFrame, observed_data: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Accepts a historically simulated flow timeseries and observed flow timeseries and attempts to correct biases in the
+    simulation on a monthly basis.
+    Args:
+        simulated_data: A dataframe with a datetime index and a single column of streamflow values
+        observed_data: A dataframe with a datetime index and a single column of streamflow values
+    Returns:
+        pandas DataFrame with a datetime index and a single column of streamflow values
+    """
+    # list of the unique months in the historical simulation. should always be 1->12 but just in case...
+    unique_simulation_months = sorted(set(simulated_data.index.strftime("%m")))
+    dates = []
+    values = []
+    for month in unique_simulation_months:
+        # filter historic data to only be current month
+        monthly_simulated = simulated_data[
+            simulated_data.index.month == int(month)
+        ].dropna()
+        to_prob = _flow_and_probability_mapper(
+            monthly_simulated, to_probability=True
+        )
+        # filter the observations to current month
+        monthly_observed = observed_data[
+            observed_data.index.month == int(month)
+        ].dropna()
+        to_flow = _flow_and_probability_mapper(monthly_observed, to_flow=True)
+        dates += monthly_simulated.index.to_list()
+        value = to_flow(to_prob(monthly_simulated.values))
+        values += value.tolist()
+    corrected = pd.DataFrame(
+        data=values, index=dates, columns=["Corrected Simulated Streamflow"]
+    )
+    corrected.sort_index(inplace=True)
+    return corrected
+def bias_correct_forecast(
+    forecast_data: pd.DataFrame,
+    simulated_data: pd.DataFrame,
+    observed_data: pd.DataFrame,
+    use_month: int = 0,
+) -> pd.DataFrame:
+    """
+    Accepts a short term forecast of streamflow, simulated historical flow, and observed flow timeseries and attempts
+    to correct biases in the forecasted data
+    Args:
+        forecast_data: A dataframe with a datetime index and any number of columns of forecasted flow. Compatible with
+            forecast_stats, forecast_ensembles, forecast_records
+        simulated_data: A dataframe with a datetime index and a single column of streamflow values
+        observed_data: A dataframe with a datetime index and a single column of streamflow values
+        use_month: Optional: either 0 for correct the forecast based on the first month of the forecast data or -1 if
+            you want to correct based on the ending month of the forecast data
+    Returns:
+        pandas DataFrame with a copy of forecasted data with values updated in each column
+    """
+    # make a copy of the forecasts which we update and return so the original data is not changed
+    forecast_copy = forecast_data.copy()
+    # make the flow and probability interpolation functions
+    monthly_simulated = simulated_data[
+        simulated_data.index.month == forecast_copy.index[use_month].month
+    ].dropna()
+    monthly_observed = observed_data[
+        observed_data.index.month == forecast_copy.index[use_month].month
+    ].dropna()
+    to_prob = _flow_and_probability_mapper(
+        monthly_simulated, to_probability=True, extrapolate=True
+    )
+    to_flow = _flow_and_probability_mapper(
+        monthly_observed, to_flow=True, extrapolate=True
+    )
+    # for each column of forecast data, make the interpolation function and update the dataframe
+    for column in forecast_copy.columns:
+        tmp = forecast_copy[column].dropna()
+        forecast_copy.update(
+            pd.DataFrame(
+                to_flow(to_prob(tmp.values)), index=tmp.index, columns=[column]
+            )
+        )
+    return forecast_copy
+def _flow_and_probability_mapper(
+    monthly_data: pd.DataFrame,
+    to_probability: bool = False,
+    to_flow: bool = False,
+    extrapolate: bool = False,
+) -> interpolate.interp1d:
+    if not to_flow and not to_probability:
+        raise ValueError(
+            "You need to specify either to_probability or to_flow as True"
+        )
+    # get maximum value to bound histogram
+    max_val = math.ceil(np.max(monthly_data.max()))
+    min_val = math.floor(np.min(monthly_data.min()))
+    if max_val == min_val:
+        max_val += 0.1
+    # determine number of histograms bins needed
+    number_of_points = len(monthly_data.values)
+    number_of_classes = math.ceil(1 + (3.322 * math.log10(number_of_points)))
+    # specify the bin width for histogram (in m3/s)
+    step_width = (max_val - min_val) / number_of_classes
+    # specify histogram bins
+    bins = np.arange(
+        -np.min(step_width),
+        max_val + 2 * np.min(step_width),
+        np.min(step_width),
+    )
+    if bins[0] == 0:
+        bins = np.concatenate((-bins[1], bins))
+    elif bins[0] > 0:
+        bins = np.concatenate((-bins[0], bins))
+    # make the histogram
+    counts, bin_edges = np.histogram(monthly_data, bins=bins)
+    # adjust the bins to be the center
+    bin_edges = bin_edges[1:]
+    # normalize the histograms
+    counts = counts.astype(float) / monthly_data.size
+    # calculate the cdfs
+    cdf = np.cumsum(counts)
+    # Identify indices where consecutive values are the same
+    duplicate_indices = np.where(np.diff(cdf) == 0)[0]
+    # Adjust duplicate value to be an extrapolation of the previous value
+    for idx in duplicate_indices:
+        if idx > 0:
+            cdf[idx] = cdf[idx - 1] + (cdf[idx + 1] - cdf[idx - 1]) / 2
+    # interpolated function to convert simulated streamflow to prob
+    if to_probability:
+        if extrapolate:
+            func = interpolate.interp1d(
+                bin_edges, cdf, fill_value="extrapolate"
+            )
+        else:
+            func = interpolate.interp1d(bin_edges, cdf)
+        return lambda x: np.clip(func(x), 0, 1)
+    # interpolated function to convert simulated prob to observed streamflow
+    elif to_flow:
+        if extrapolate:
+            return interpolate.interp1d(
+                cdf, bin_edges, fill_value="extrapolate"
+            )
+        return interpolate.interp1d(cdf, bin_edges)
 if __name__ == "__main__":
     station_id = sys.argv[1]
     reach_id = sys.argv[2]

loone_data_prep/utils.py CHANGED Viewed

@@ -11,7 +11,10 @@ from retry import retry
 from scipy.optimize import fsolve
 from scipy import interpolate
 from rpy2.robjects import r
-from rpy2.robjects.vectors import StrVector as rpy2StrVector, DataFrame as rpy2DataFrame
+from rpy2.robjects.vectors import (
+    StrVector as rpy2StrVector,
+    DataFrame as rpy2DataFrame,
+)
 from rpy2.rinterface_lib.embedded import RRuntimeError
@@ -44,7 +47,15 @@ INTERP_DICT = {
     },
     "PHOSPHATE, ORTHO AS P": {
         "units": "mg/L",
-        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_ids": [
+            "L001",
+            "L004",
+            "L005",
+            "L006",
+            "L007",
+            "L008",
+            "LZ40",
+        ],
     },
     "NITRATE+NITRITE-N": {
         "units": "mg/L",
@@ -146,9 +157,26 @@ INTERP_DICT = {
             "LZ40",
         ],
     },
-    "DISSOLVED OXYGEN": {"units": "mg/L", "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "RADP": {"units": "MICROMOLE/m^2/s", "station_ids": ["L001", "L005", "L006", "LZ40"]},
-    "RADT": {"units": "kW/m^2", "station_ids": ["L001", "L005", "L006", "LZ40"]},
+    "DISSOLVED OXYGEN": {
+        "units": "mg/L",
+        "station_ids": [
+            "L001",
+            "L004",
+            "L005",
+            "L006",
+            "L007",
+            "L008",
+            "LZ40",
+        ],
+    },
+    "RADP": {
+        "units": "MICROMOLE/m^2/s",
+        "station_ids": ["L001", "L005", "L006", "LZ40"],
+    },
+    "RADT": {
+        "units": "kW/m^2",
+        "station_ids": ["L001", "L005", "L006", "LZ40"],
+    },
 }
 DEFAULT_PREDICTION_STATIONS_IDS = [
     "S65E_S",
@@ -263,14 +291,18 @@ def data_interpolations(
         Data_In = Data_In.set_index(["date"])
         Data_In.index = pd.to_datetime(Data_In.index, unit="ns")
         Data_df = Data_In.resample("D").mean()
-        Data_df = Data_df.dropna(subset=["%s_%s_%s" % (station, parameter, units)])
+        Data_df = Data_df.dropna(
+            subset=["%s_%s_%s" % (station, parameter, units)]
+        )
         Data_df = Data_df.reset_index()
         Data_df["Yr_M"] = pd.to_datetime(Data_df["date"]).dt.to_period("M")
         start_date = Data_df["date"].iloc[0]
         end_date = Data_df["date"].iloc[-1]
         date_rng = pd.date_range(start=start_date, end=end_date, freq="M")
         Monthly_df = pd.DataFrame(date_rng, columns=["date"])
-        Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period("M")
+        Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
+            "M"
+        )
         New_date = []
         New_data = []
         Days = []
@@ -282,13 +314,27 @@ def data_interpolations(
             if i in Data_df.index:
                 if type(Data_df.loc[i]["date"]) == pd.Timestamp:
                     New_date.append(Data_df.loc[i]["date"])
-                    New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)])
+                    New_data.append(
+                        Data_df.loc[i][
+                            "%s_%s_%s" % (station, parameter, units)
+                        ]
+                    )
                 else:
                     for j in range(len(Data_df.loc[i]["date"])):
                         New_date.append(Data_df.loc[i]["date"][j])
-                        New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)][j])
+                        New_data.append(
+                            Data_df.loc[i][
+                                "%s_%s_%s" % (station, parameter, units)
+                            ][j]
+                        )
             elif i not in Data_df.index:
-                New_date.append(datetime.datetime(Monthly_df.loc[i]["date"].year, Monthly_df.loc[i]["date"].month, 1))
+                New_date.append(
+                    datetime.datetime(
+                        Monthly_df.loc[i]["date"].year,
+                        Monthly_df.loc[i]["date"].month,
+                        1,
+                    )
+                )
                 New_data.append(np.NaN)
         New_date = pd.to_datetime(New_date, format="%Y-%m-%d")
@@ -302,7 +348,9 @@ def data_interpolations(
                 Days_cum.append(
                     Days_cum[i - 1]
                     + Days[i]
-                    + monthrange(New_date[i - 1].year, New_date[i - 1].month)[1]
+                    + monthrange(New_date[i - 1].year, New_date[i - 1].month)[
+                        1
+                    ]
                     - Days[i - 1]
                 )
         Final_df = pd.DataFrame()
@@ -316,7 +364,9 @@ def data_interpolations(
         Final_df["date"] = pd.to_datetime(Final_df["date"], format="%Y-%m-%d")
         start_date = Final_df["date"].iloc[0]
         end_date = Final_df["date"].iloc[-1]
-        date_rng_TSS_1 = pd.date_range(start=start_date, end=end_date, freq="D")
+        date_rng_TSS_1 = pd.date_range(
+            start=start_date, end=end_date, freq="D"
+        )
         # Create a data frame with a date column
         Data_df = pd.DataFrame(date_rng_TSS_1, columns=["date"])
         Data_len = len(Data_df.index)
@@ -328,7 +378,9 @@ def data_interpolations(
         for i in range(1, Data_len):
             Cum_days[i] = Cum_days[i - 1] + 1
             # Data_daily[i] = interpolate.interp1d(Final_df['Days'], Final_df['TSS'] , kind = 'linear')(Cum_days[i])
-            Data_daily[i] = np.interp(Cum_days[i], Final_df["Days_cum"], Final_df["Data"])
+            Data_daily[i] = np.interp(
+                Cum_days[i], Final_df["Days_cum"], Final_df["Data"]
+            )
         Data_df["Data"] = Data_daily
         Data_df.to_csv(f"{workspace}/{name}_Interpolated.csv", index=False)
@@ -341,11 +393,17 @@ def interpolate_all(workspace: str, d: dict = INTERP_DICT) -> None:
         d (dict, optional): Dict with parameter key, units, and station IDs. Defaults to INTERP_DICT.
     """
     for param, values in d.items():
-        print(f"Interpolating parameter: {param} for station IDs: {values['station_ids']}.")
-        data_interpolations(workspace, param, values["units"], values["station_ids"])
+        print(
+            f"Interpolating parameter: {param} for station IDs: {values['station_ids']}."
+        )
+        data_interpolations(
+            workspace, param, values["units"], values["station_ids"]
+        )
-def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str = "nu.csv"):
+def kinematic_viscosity(
+    workspace: str, in_file_name: str, out_file_name: str = "nu.csv"
+):
     # Read Mean H2O_T in LO
     LO_Temp = pd.read_csv(os.path.join(workspace, in_file_name))
     LO_T = LO_Temp["Water_T"]
@@ -354,13 +412,23 @@ def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str =
     class nu_Func:
         def nu(T):
-            nu20 = 1.0034 / 1e6  # m2/s (kinematic viscosity of water at T = 20 C)
+            nu20 = (
+                1.0034 / 1e6
+            )  # m2/s (kinematic viscosity of water at T = 20 C)
             def func(x):
                 # return[log(x[0]/nu20)-((20-T)/(T+96))*(1.2364-1.37E-3*(20-T)+5.7E-6*(20-T)**2)]
                 return [
                     (x[0] / nu20)
-                    - 10 ** (((20 - T) / (T + 96)) * (1.2364 - 1.37e-3 * (20 - T) + 5.7e-6 * (20 - T) ** 2))
+                    - 10
+                    ** (
+                        ((20 - T) / (T + 96))
+                        * (
+                            1.2364
+                            - 1.37e-3 * (20 - T)
+                            + 5.7e-6 * (20 - T) ** 2
+                        )
+                    )
                 ]
             sol = fsolve(func, [9.70238995692062e-07])
@@ -407,7 +475,11 @@ def wind_induced_waves(
                 (
                     0.283
                     * np.tanh(0.53 * (g * d / WS**2) ** 0.75)
-                    * np.tanh(0.00565 * (g * F / WS**2) ** 0.5 / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8)))
+                    * np.tanh(
+                        0.00565
+                        * (g * F / WS**2) ** 0.5
+                        / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8))
+                    )
                 )
                 * WS**2
                 / g
@@ -419,7 +491,11 @@ def wind_induced_waves(
                 (
                     7.54
                     * np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
-                    * np.tanh(0.0379 * (g * F / WS**2) ** 0.5 / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8)))
+                    * np.tanh(
+                        0.0379
+                        * (g * F / WS**2) ** 0.5
+                        / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
+                    )
                 )
                 * WS
                 / g
@@ -428,7 +504,10 @@ def wind_induced_waves(
         def L(g, d, T):
             def func(x):
-                return [(g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0]) - x[0]]
+                return [
+                    (g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0])
+                    - x[0]
+                ]
             sol = fsolve(func, [1])
             L = sol[0]
@@ -443,12 +522,18 @@ def wind_induced_waves(
         W_T[i] = Wind_Func.T(g, LO_Wd[i], F, LO_WS["WS_mps"].iloc[i])
         W_L[i] = Wind_Func.L(g, LO_Wd[i], W_T[i])
         W_ShearStress[i] = (
-            W_H[i] * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5) / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
+            W_H[i]
+            * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5)
+            / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
         )
     Wind_ShearStress = pd.DataFrame(LO_WS["date"], columns=["date"])
-    Wind_ShearStress["ShearStress"] = W_ShearStress * 10  # Convert N/m2 to Dyne/cm2
-    Wind_ShearStress.to_csv(os.path.join(output_dir, wind_shear_stress_out), index=False)
+    Wind_ShearStress["ShearStress"] = (
+        W_ShearStress * 10
+    )  # Convert N/m2 to Dyne/cm2
+    Wind_ShearStress.to_csv(
+        os.path.join(output_dir, wind_shear_stress_out), index=False
+    )
     # # Monthly
     # Wind_ShearStress['Date'] = pd.to_datetime(Wind_ShearStress['Date'])
@@ -484,8 +569,12 @@ def wind_induced_waves(
         Current_Stress[i] = Current_bottom_shear_stress(ru, Wind_Stress[i])
     Current_ShearStress_df = pd.DataFrame(LO_WS["date"], columns=["date"])
-    Current_ShearStress_df["Current_Stress"] = Current_Stress * 10  # Convert N/m2 to Dyne/cm2
-    Current_ShearStress_df["Wind_Stress"] = Wind_Stress * 10  # Convert N/m2 to Dyne/cm2
+    Current_ShearStress_df["Current_Stress"] = (
+        Current_Stress * 10
+    )  # Convert N/m2 to Dyne/cm2
+    Current_ShearStress_df["Wind_Stress"] = (
+        Wind_Stress * 10
+    )  # Convert N/m2 to Dyne/cm2
     Current_ShearStress_df["Wind_Speed_m/s"] = LO_WS["WS_mps"]
     def Current_bottom_shear_stress_2(u, k, nu, ks, z, ru):
@@ -500,7 +589,10 @@ def wind_induced_waves(
         sol2 = fsolve(func2, [1])
         def func3(u_str3):
-            return [u_str3[0] - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))]
+            return [
+                u_str3[0]
+                - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))
+            ]
         sol3 = fsolve(func3, [1])
         if sol1[0] * ks / nu <= 5:
@@ -514,7 +606,9 @@ def wind_induced_waves(
     def Current_bottom_shear_stress_3(u, k, nu, ks, z, ru):
         def func1(u_str1):
-            return [u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))]
+            return [
+                u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))
+            ]
         sol1 = fsolve(func1, [1])
@@ -524,7 +618,12 @@ def wind_induced_waves(
         sol2 = fsolve(func2, [1])
         def func3(u_str3):
-            return [u_str3[0] - u * k * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))]
+            return [
+                u_str3[0]
+                - u
+                * k
+                * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))
+            ]
         sol3 = fsolve(func3, [1])
         if sol1[0] * ks / nu <= 5:
@@ -541,22 +640,34 @@ def wind_induced_waves(
     ks = 5.27e-4  # m
     current_stress_3 = np.zeros(n, dtype=object)
     for i in range(n):
-        current_stress_3[i] = Current_bottom_shear_stress_3(0.05, 0.41, nu, ks, LO_Wd[i], ru)
-    Current_ShearStress_df["Current_Stress_3"] = current_stress_3 * 10  # Convert N/m2 to Dyne/cm2
-    Current_ShearStress_df.to_csv(os.path.join(output_dir, current_shear_stress_out), index=False)
+        current_stress_3[i] = Current_bottom_shear_stress_3(
+            0.05, 0.41, nu, ks, LO_Wd[i], ru
+        )
+    Current_ShearStress_df["Current_Stress_3"] = (
+        current_stress_3 * 10
+    )  # Convert N/m2 to Dyne/cm2
+    Current_ShearStress_df.to_csv(
+        os.path.join(output_dir, current_shear_stress_out), index=False
+    )
-def stg2sto(stg_sto_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
+def stg2sto(
+    stg_sto_data_path: str, v: pd.Series, i: int
+) -> interpolate.interp1d:
     stgsto_data = pd.read_csv(stg_sto_data_path)
     # NOTE: We Can use cubic interpolation instead of linear
     x = stgsto_data["Stage"]
     y = stgsto_data["Storage"]
     if i == 0:
         # return storage given stage
-        return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
+        return interpolate.interp1d(
+            x, y, fill_value="extrapolate", kind="linear"
+        )(v)
     else:
         # return stage given storage
-        return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
+        return interpolate.interp1d(
+            y, x, fill_value="extrapolate", kind="linear"
+        )(v)
 def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
@@ -569,10 +680,14 @@ def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
     y = stgar_data["Surf_Area"]
     if i == 0:
         # return surface area given stage
-        return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
+        return interpolate.interp1d(
+            x, y, fill_value="extrapolate", kind="linear"
+        )(v)
     else:
         # return stage given surface area
-        return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
+        return interpolate.interp1d(
+            y, x, fill_value="extrapolate", kind="linear"
+        )(v)
 @retry(Exception, tries=3, delay=15, backoff=2)
@@ -580,20 +695,27 @@ def get_pi(workspace: str) -> None:
     # Weekly data is downloaded from:
     # https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv
     # State:Florida Division:4.South Central
-    df = pd.read_csv("https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv")
+    df = pd.read_csv(
+        "https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv"
+    )
     df.to_csv(os.path.join(workspace, "PI.csv"))
 def nutrient_prediction(
-    input_dir: str, output_dir: str, station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS, constants: dict = DEFAULT_EXPFUNC_CONSTANTS
+    input_dir: str,
+    output_dir: str,
+    station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS,
+    constants: dict = DEFAULT_EXPFUNC_CONSTANTS,
 ) -> None:
     for station in station_ids:
         print(f"Predicting nutrient loads for station: {station}.")
         # Construct paths for flow file
-        flow_file_path = ''
+        flow_file_path = ""
         flow_file_path_exists = True
         try:
-            flow_file_path = glob(os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv"))[0]
+            flow_file_path = glob(
+                os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv")
+            )[0]
         except Exception as e:
             flow_file_path_exists = False
@@ -603,7 +725,9 @@ def nutrient_prediction(
             flow = pd.read_csv(flow_file_path)
         else:
             # If it doesn't exist, skip to the next iteration of the loop
-            print(f'Skipping nutrient prediction for station: {station}. Flow file does not exist.')
+            print(
+                f"Skipping nutrient prediction for station: {station}. Flow file does not exist."
+            )
             continue
         # Create structures to hold resulting data
@@ -615,6 +739,7 @@ def nutrient_prediction(
             if "ensemble" not in column_name:
                 continue
             import warnings
             warnings.filterwarnings("error")
             try:
@@ -623,16 +748,22 @@ def nutrient_prediction(
                 # Calculate the logarithm of the flow data
-                Q_Log = np.log(flow_column + 1e-8)  # Add a small number to prevent log(0) errors
+                Q_Log = np.log(
+                    flow_column + 1e-8
+                )  # Add a small number to prevent log(0) errors
                 # Calculate the predicted TP loads using the logarithm of the flow data
-                TP_Loads_Predicted_Log = constants[station]["a"] * Q_Log ** constants[station]["b"]
+                TP_Loads_Predicted_Log = (
+                    constants[station]["a"] * Q_Log ** constants[station]["b"]
+                )
                 # Calculate the predicted TP loads using the exponential of the predicted TP loads logarithm
                 predicted_column = np.exp(TP_Loads_Predicted_Log)
                 # Store prediction data in a pandas DataFrame (So we can concat all ensemble data into one dataframe)
-                predicted_column = pd.DataFrame(predicted_column.tolist(), index=flow["date"].copy())
+                predicted_column = pd.DataFrame(
+                    predicted_column.tolist(), index=flow["date"].copy()
+                )
                 predicted_column.columns = [column_name]
                 prediction_columns.append(predicted_column)
@@ -642,31 +773,99 @@ def nutrient_prediction(
         # Concat individual ensemble columns together into one pandas DataFrame
         out_dataframe = pd.concat(objs=prediction_columns, axis="columns")
-        column_mean = out_dataframe.mean(axis='columns')
-        column_percentile_25 = out_dataframe.quantile(q=0.25, axis='columns')
-        column_percentile_75 = out_dataframe.quantile(q=0.75, axis='columns')
-        column_median = out_dataframe.median(axis='columns')
-        column_std = out_dataframe.std(axis='columns')
-        out_dataframe['mean'] = column_mean
-        out_dataframe['percentile_25'] = column_percentile_25
-        out_dataframe['percentile_75'] = column_percentile_75
-        out_dataframe['median'] = column_median
-        out_dataframe['standard_deviation'] = column_std
+        column_mean = out_dataframe.mean(axis="columns")
+        column_percentile_25 = out_dataframe.quantile(q=0.25, axis="columns")
+        column_percentile_75 = out_dataframe.quantile(q=0.75, axis="columns")
+        column_median = out_dataframe.median(axis="columns")
+        column_std = out_dataframe.std(axis="columns")
+        out_dataframe["mean"] = column_mean
+        out_dataframe["percentile_25"] = column_percentile_25
+        out_dataframe["percentile_75"] = column_percentile_75
+        out_dataframe["median"] = column_median
+        out_dataframe["standard_deviation"] = column_std
         # Save the predicted TP loads to a CSV file
-        out_dataframe.to_csv(os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv"))
+        out_dataframe.to_csv(
+            os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv")
+        )
         # Save the predicted TP loads to a CSV file (in input_dir)
         # Output is needed in input_dir by GEOGLOWS_LOONE_DATA_PREP.py and in output_dir for graph visualization in the app
-        out_dataframe.to_csv(os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv"))
+        out_dataframe.to_csv(
+            os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv")
+        )
+def photo_period(
+    workspace: str,
+    phi: float = 26.982052,
+    doy: np.ndarray = np.arange(1, 365),
+    verbose: bool = False,
+):
+    """Generate PhotoPeriod.csv file for the given latitude and days of the year.
+    Args:
+        workspace (str): A path to the directory where the file will be generated.
+        phi (float, optional): Latitude of the location. Defaults to 26.982052.
+        doy (np.ndarray, optional): An array holding the days of the year that you want the photo period for. Defaults to np.arange(1,365).
+        verbose (bool, optional): Print results of each computation. Defaults to False.
+    """
+    phi = np.radians(phi)  # Convert to radians
+    light_intensity = 2.206 * 10**-3
+    C = np.sin(np.radians(23.44))  # sin of the obliquity of 23.44 degrees.
+    B = -4.76 - 1.03 * np.log(
+        light_intensity
+    )  # Eq. [5]. Angle of the sun below the horizon. Civil twilight is -4.76 degrees.
+    # Calculations
+    alpha = np.radians(90 + B)  # Eq. [6]. Value at sunrise and sunset.
+    M = 0.9856 * doy - 3.251  # Eq. [4].
+    lmd = (
+        M
+        + 1.916 * np.sin(np.radians(M))
+        + 0.020 * np.sin(np.radians(2 * M))
+        + 282.565
+    )  # Eq. [3]. Lambda
+    delta = np.arcsin(C * np.sin(np.radians(lmd)))  # Eq. [2].
+    # Defining sec(x) = 1/cos(x)
+    P = (
+        2
+        / 15
+        * np.degrees(
+            np.arccos(
+                np.cos(alpha) * (1 / np.cos(phi)) * (1 / np.cos(delta))
+                - np.tan(phi) * np.tan(delta)
+            )
+        )
+    )  # Eq. [1].
+    # Print results in order for each computation to match example in paper
+    if verbose:
+        print("Input latitude =", np.degrees(phi))
+        print("[Eq 5] B =", B)
+        print("[Eq 6] alpha =", np.degrees(alpha))
+        print("[Eq 4] M =", M[0])
+        print("[Eq 3] Lambda =", lmd[0])
+        print("[Eq 2] delta=", np.degrees(delta[0]))
+        print("[Eq 1] Daylength =", P[0])
+    photo_period_df = pd.DataFrame()
+    photo_period_df["Day"] = doy
+    photo_period_df["Data"] = P
+    photo_period_df.to_csv(
+        os.path.join(workspace, "PhotoPeriod.csv"), index=False
+    )
 def find_last_date_in_csv(workspace: str, file_name: str) -> str:
     """
     Gets the most recent date from the last line of a .csv file.
-    Assumes the file is formatted as a .csv file, encoded in UTF-8,
+    Assumes the file is formatted as a .csv file, encoded in UTF-8,
     and the rows in the file are sorted by date in ascending order.
     Args:
@@ -676,40 +875,41 @@ def find_last_date_in_csv(workspace: str, file_name: str) -> str:
     Returns:
         str: The most recent date as a string in YYYY-MM-DD format, or None if the file does not exist or the date cannot be found.
     """
     # Helper Functions
     def is_valid_date(date_string):
         try:
-            datetime.datetime.strptime(date_string, '%Y-%m-%d')
+            datetime.datetime.strptime(date_string, "%Y-%m-%d")
             return True
         except ValueError:
             return False
     # Check that file exists
     file_path = os.path.join(workspace, file_name)
     if not os.path.exists(file_path):
         return None
     # Attempt to extract the date of the last line in the file
     try:
-        with open(file_path, 'rb') as file:
+        with open(file_path, "rb") as file:
             # Go to the end of the file
             file.seek(-2, os.SEEK_END)
             # Loop backwards until you find the first newline character
-            while file.read(1) != b'\n':
+            while file.read(1) != b"\n":
                 file.seek(-2, os.SEEK_CUR)
             # Read the last line
             last_line = file.readline().decode()
             # Extract the date from the last line
             date = None
-            for value in last_line.split(','):
+            for value in last_line.split(","):
                 if is_valid_date(value):
                     date = value
                     break
             # Return date
             return date
     except OSError as e:
@@ -721,20 +921,26 @@ def dbhydro_data_is_latest(date_latest: str):
     """
     Checks whether the given date is the most recent date possible to get data from dbhydro.
     Can be used to check whether dbhydro data is up-to-date.
     Args:
         date_latest (str): The date of the most recent data of the dbhydro data you have
     Returns:
         bool: True if the date_latest is the most recent date possible to get data from dbhydro, False otherwise
     """
-    date_latest_object = datetime.datetime.strptime(date_latest, "%Y-%m-%d").date()
-    return date_latest_object == (datetime.datetime.now().date() - datetime.timedelta(days=1))
+    date_latest_object = datetime.datetime.strptime(
+        date_latest, "%Y-%m-%d"
+    ).date()
+    return date_latest_object == (
+        datetime.datetime.now().date() - datetime.timedelta(days=1)
+    )
 if __name__ == "__main__":
     if sys.argv[1] == "get_dbkeys":
-        get_dbkeys(sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:])
+        get_dbkeys(
+            sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:]
+        )
     elif sys.argv[1] == "data_interp":
         interp_args = [x for x in sys.argv[2:]]
         interp_args[0] = interp_args[0].rstrip("/")
@@ -746,7 +952,9 @@ if __name__ == "__main__":
     elif sys.argv[1] == "kinematic_viscosity":
         kinematic_viscosity(sys.argv[2].rstrip("/"), *sys.argv[3:])
     elif sys.argv[1] == "wind_induced_waves":
-        wind_induced_waves(sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:])
+        wind_induced_waves(
+            sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:]
+        )
     elif sys.argv[1] == "get_pi":
         get_pi(sys.argv[2].rstrip("/"))
     elif sys.argv[1] == "nutrient_prediction":

{loone_data_prep-0.1.7.dist-info → loone_data_prep-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: loone_data_prep
-Version: 0.1.7
+Version: 0.1.8
 Summary: Prepare data to run the LOONE model.
 Author-email: Osama Tarabih <osamatarabih@usf.edu>
 Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -20,6 +20,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: rpy2
 Requires-Dist: retry
+Requires-Dist: numpy <2
 Requires-Dist: pandas
 Requires-Dist: scipy
 Requires-Dist: geoglows ==0.27.1

{loone_data_prep-0.1.7.dist-info → loone_data_prep-0.1.8.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=loaMvDU1IgLsz7_eHAVJMtk_pgW_CTHiZE43a0_mZZE,35394
-loone_data_prep/LOONE_DATA_PREP.py,sha256=mI0qC03v7LnK56NAWziMjqM8Hc9clYk0auY3kJ7TinQ,59477
+loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py,sha256=wstZQwb_e2Z117dhvuLPrqyln6Bpb3ZTL0RfnOTvET4,35456
+loone_data_prep/LOONE_DATA_PREP.py,sha256=osaLYlrfTwwUGLwXGypy61BOYBlXnoTPDp09O4Am1ZE,67761
 loone_data_prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 loone_data_prep/data_analyses_fns.py,sha256=BZ7famrSKoUfExQvZfbl72CyADHLb-zzgdWZ-kLJxcQ,4603
-loone_data_prep/utils.py,sha256=dpaOjtnRStf0wK5CbAkrWb8KoSKfJhDIxwU2Hc0ESC4,27532
+loone_data_prep/utils.py,sha256=Jsa08iaD04C-BqK0K5BHgRFZEOqp6f_dcJSjPgcz1zA,31575
 loone_data_prep/flow_data/S65E_total.py,sha256=szNUfj0EyyyDzuKNhTGAZtWc5owiOpxYS55YTt4u19k,2835
 loone_data_prep/flow_data/__init__.py,sha256=u7fENFUZsJjyl13Bc9ZE47sHMKmjxtqXhV9t7vDTm7Y,93
-loone_data_prep/flow_data/forecast_bias_correction.py,sha256=pABmNWWF96JDfjl3u314ORSskGbWaPgz8ZgM8FdEwvE,3752
+loone_data_prep/flow_data/forecast_bias_correction.py,sha256=ydoZ0UmDZvsPLHsO7cpCFN9Pmj7w_tKjMDy9RK5EoiM,10146
 loone_data_prep/flow_data/get_forecast_flows.py,sha256=-nPkscE9UZbRzGZ_dk0zhKiNM2hOINx21HgSeQrFjaU,14462
 loone_data_prep/flow_data/get_inflows.py,sha256=xKuSyJBdPrpjqMdRiyNDyxwdhYVIgLhiTP0k_1I1uWI,6456
 loone_data_prep/flow_data/get_outflows.py,sha256=x7aisIkbXoTkcubFQLDghX-P8lztPq-tU0dQzoVRTtQ,5620
@@ -20,8 +20,8 @@ loone_data_prep/water_quality_data/wq.py,sha256=sl6G3iDCk6QUzpHTXPHpRZNMBG0-wHuc
 loone_data_prep/weather_data/__init__.py,sha256=TX58EPgGRzEK_LmLze79lC4L7kU_j3yZf5_iC4nOIP4,45
 loone_data_prep/weather_data/get_all.py,sha256=aCufuxORU51XhXt7LN9wN_V4qtjNt1qRC1UKlI2b3Ko,6918
 loone_data_prep/weather_data/weather.py,sha256=hvceksrGSnDkCjheBVBuPgY1DrdR0ZAtrFB-K2tYTtk,12043
-loone_data_prep-0.1.7.dist-info/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
-loone_data_prep-0.1.7.dist-info/METADATA,sha256=p8nGiYP4g4D6q2v0i9cHrP0nTsnFaJxJHXjSY_9n9s8,4098
-loone_data_prep-0.1.7.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
-loone_data_prep-0.1.7.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
-loone_data_prep-0.1.7.dist-info/RECORD,,
+loone_data_prep-0.1.8.dist-info/LICENSE,sha256=rR1QKggtQUbAoYu2SW1ouI5xPqt9g4jvRRpZ0ZfnuqQ,1497
+loone_data_prep-0.1.8.dist-info/METADATA,sha256=WB5Nk0uuAtv55-zdjaLRZjn9qbMg1H34Yp5Qe2LpKbc,4122
+loone_data_prep-0.1.8.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+loone_data_prep-0.1.8.dist-info/top_level.txt,sha256=wDyJMJiCO5huTAuNmvxpjFxtvGaq_8Tr4hFFcXf8jLE,16
+loone_data_prep-0.1.8.dist-info/RECORD,,

{loone_data_prep-0.1.7.dist-info → loone_data_prep-0.1.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.3.0)
+Generator: setuptools (75.5.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{loone_data_prep-0.1.7.dist-info → loone_data_prep-0.1.8.dist-info}/LICENSE RENAMED Viewed

File without changes

{loone_data_prep-0.1.7.dist-info → loone_data_prep-0.1.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

loone-data-prep 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

loone-data-prep 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl