PyPI - loone-data-prep - Versions diffs - 1.1.2__tar.gz → 1.2.1__tar.gz - Mend

loone-data-prep 1.1.2tar.gz → 1.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loone_data_prep
-Version: 1.1.2
+Version: 1.2.1
 Summary: Prepare data to run the LOONE model.
 Author-email: Osama Tarabih <osamatarabih@usf.edu>
 Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py RENAMED Viewed

@@ -44,7 +44,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     # Read LO Average Stage (ft)
     LO_Stage = pd.read_csv(f"{input_dir}/LO_Stage.csv")
     # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
-    LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
+    # LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
     LO_Stage.index = LO_Stage["date"]
     # Calculate average
     if "Average_Stage" not in LO_Stage.columns:
@@ -457,6 +457,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     LOWS["LZ40WS"] = LZ40WS["LZ40_WNDS_MPH"]
     LOWS = LOWS.set_index("date")
     LOWS["LO_Avg_WS_MPH"] = LOWS.mean(axis=1)
+    LOWS = LOWS.resample("D").mean()
     LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
     # # RFVol acft
@@ -592,7 +593,7 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     # Write Data into csv files
     # write Avg Stage (ft, m) Storage (acft, m3) SA (acres) to csv
-    LO_Stg_Sto_SA_df.to_csv(f"{output_dir}/Average_LO_Storage_3MLag_{ensemble_number}.csv", index=False)
+    # LO_Stg_Sto_SA_df.to_csv(f"{output_dir}/Average_LO_Storage_3MLag_{ensemble_number}.csv", index=False)
     # Write S65 TP concentrations (mg/L)
     S65_total_TP.to_csv(f"{output_dir}/S65_TP_3MLag_{ensemble_number}.csv", index=False)
     # TP External Loads 3 Months Lag (mg)

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/LOONE_DATA_PREP.py RENAMED Viewed

@@ -34,7 +34,7 @@ def main(input_dir: str, output_dir: str) -> None:
     # Read LO Average Stage (ft)
     LO_Stage = pd.read_csv(f'{input_dir}/LO_Stage.csv')
     # Create Column (EOD Stg(ft, NGVD)) in File (SFWMM_Daily_Outputs)
-    LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
+    # LO_Stage = DF_Date_Range(LO_Stage, M3_Yr, M3_M, M3_D, En_Yr, En_M, En_D)
     # Calculate average
     if "Average_Stage" not in LO_Stage.columns:
         LO_Stage = LO_Stage.loc[:, ~LO_Stage.columns.str.contains('^Unnamed')]
@@ -386,8 +386,17 @@ def main(input_dir: str, output_dir: str) -> None:
     # RFVol acft
     # Create File (RF_Volume)
-    RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
-    RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df['SA_acres'].values
+    # Merge the DataFrames on date to ensure matching rows
+    RF_data_copy = RF_data.copy()
+    LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
+    RF_data_copy['date'] = pd.to_datetime(RF_data_copy['date'])
+    LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
+    merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
+                            LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
+                            on='date', how='inner')
+    RFVol = pd.DataFrame(merged_rf_sa['date'], columns=['date'])
+    RFVol['RFVol_acft'] = (merged_rf_sa['average_rainfall'].values/12) * merged_rf_sa['SA_acres'].values
     date_reference = RFVol['date'].iloc[0]
     date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
     df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
@@ -396,8 +405,17 @@ def main(input_dir: str, output_dir: str) -> None:
     # ETVol acft
     # Create File (ETVol)
-    ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
-    ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df['SA_acres'].values
+    # Merge the DataFrames on date to ensure matching rows
+    ET_data_copy = ET_data.copy()
+    LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
+    ET_data_copy['date'] = pd.to_datetime(ET_data_copy['date'])
+    LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
+    merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
+                            LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
+                            on='date', how='inner')
+    ETVol = pd.DataFrame(merged_et_sa['date'], columns=['date'])
+    ETVol['ETVol_acft'] = (merged_et_sa['average_ETPI'].values/12) * merged_et_sa['SA_acres'].values
     date_reference = ETVol['date'].iloc[0]
     date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
     df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/flow_data/get_forecast_flows.py RENAMED Viewed

@@ -392,10 +392,10 @@ def main(
     # Get the flow data for each station
     stations_inflow_by_comid = {
-        750072741: "S65E_S",   # TODO: Should this be S65E_total or S65E_S? - this is a station we definitely want
-        750069782: "S84_S",        #
-        # 750053211: "S129_C",       # TODO: Should this be S129_C or S129_PMP_P? - Also right now it is all 0s
-        # 750035446: "S133_P",       # TODO: Should this be S133_P or S133_C? - Also right now it is all 0s
+        750072741: "S65E_S",
+        750069782: "S84_S",
+        # 750053211: "S129_C",
+        # 750035446: "S133_P",
         750064453: "S154_C",       # This is primarily 0s
     }
@@ -444,8 +444,7 @@ def main(
     for reach_id in MATCHED_IDS:
         stations_matched_by_comid = {
             750068601: "S71_S",
-            750052624: "S135_C",       # TODO: Should this be S135_C or S135_P?
-           #  750052624: "S308",       # NOTE: Same COMID as S135 — only one key allowed!
+            750052624: "S135_C",
             750053213: "FISHP",
             750038416: "S77_S",
             750050259: "S79_TOT",
@@ -454,7 +453,7 @@ def main(
             750051428: "S49_S",
             # 750038427: "S40",
             750057357: "S191_S",
-            750028935: "S127_C", #TODO: Should this be S127_C or S127_P?
+            750028935: "S127_C",
         }
         station_ensembles = get_flow_forecast_ensembles(

loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/Chla_merged.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pandas as pd
+def loads_predicted(input_dir, output_dir):
+    """
+    Calculate Chlorophyll-a loads based on inflows and chlorophyll-a data.
+    input_dir: Directory where the input files are located.
+    output_dir: Directory where the output files will be saved.
+    St_Yr, St_M, St_D: Start date (year, month, day).
+    En_Yr, En_M, En_D: End date (year, month, day).
+    """
+    # Read forecast inflow file
+    # TODO: Should this be an average/median of all of the ensembles? worst case?
+    Flow_df = pd.read_csv(f"{input_dir}/geoglows_flow_df_ens_01_predicted.csv")
+    Flow_df['date'] = pd.to_datetime(Flow_df['date'])
+    # Read S65E Chlorophyll-a data
+    S65E_Chla = pd.read_csv(f'{output_dir}/S65E_Chla_Merged_forecast.csv')
+    S65E_Chla['date'] = pd.to_datetime(S65E_Chla['date'])  # Ensure date column is datetime
+    # Merge on date
+    merged = pd.merge(Flow_df[['date', 'Inflows']], S65E_Chla[['date', 'Data']], on='date', how='inner')
+    # Calculate Chlorophyll-a loads
+    merged['Chla_Loads'] = merged['Inflows'] * merged['Data']
+    # Save results
+    Chla_Loads_In = merged[['date', 'Chla_Loads']]
+    Chla_Loads_In.to_csv(f'{output_dir}/Chla_Loads_In_forecast.csv', index=False)

loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/get_Chla_predicted.py ADDED Viewed

@@ -0,0 +1,109 @@
+import os
+import pandas as pd
+import datetime
+from loone_data_prep.utils import get_synthetic_data
+def get_Chla_predicted(input_dir, output_dir):
+    """
+    input_dir: Directory where the input files are located.
+    output_dir: Directory where the output files will be saved.
+    """
+    # Read forecast inflow file and get overall date range
+    # TODO: Should this be an average/median of all of the ensembles? worst case?
+    Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
+    Q_in['date'] = pd.to_datetime(Q_in['date'])
+    date_start = Q_in['date'].min()
+    date_end = Q_in['date'].max()
+    # Define stations
+    stations = {
+        "L001": True,
+        "L004": True,
+        "L005": True,
+        "L006": True,
+        "L007": True,
+        "L008": True,
+        "LZ40": True
+    }
+    def load_and_check_forecast(station, suffix, start_date, end_date, forecast_suffix="_forecast"):
+        fname = f"water_quality_{station}_CHLOROPHYLL-A{suffix}.csv"
+        fpath = os.path.join(input_dir, fname)
+        df_full = pd.read_csv(fpath).drop(columns=["days"], errors="ignore")
+        df_full['date'] = pd.to_datetime(df_full['date'])
+        # Rename the specific column if it exists
+        possible_cols = [
+            f"{station}_CHLOROPHYLL-A, CORRECTED_ug/L",
+            f"{station}_CHLOROPHYLL-A(LC)_ug/L"
+        ]
+        original_col_name = None
+        for col in possible_cols:
+            if col in df_full.columns:
+                df_full.rename(columns={col: "Data"}, inplace=True)
+                original_col_name = col
+                break
+        # Filter df to only rows between start_date and end_date
+        df_filtered = df_full[(df_full['date'] >= start_date) & (df_full['date'] <= end_date)]
+        # Check if full date range is covered; if not, fill with synthetic data
+        missing_dates = pd.date_range(start_date, end_date).difference(df_filtered['date'])
+        if len(missing_dates) > 0:
+            # Pass the original full historical df_full to get_synthetic_data, along with the forecast start_date
+            synthetic_df = get_synthetic_data(start_date, df_full)
+            # Rename "Data" back to original column name before saving
+            if original_col_name is not None:
+                synthetic_df.rename(columns={"Data": original_col_name}, inplace=True)
+            # Save synthetic forecast file
+            forecast_fname = f"water_quality_{station}_CHLOROPHYLL-A{suffix}{forecast_suffix}.csv"
+            synthetic_df.to_csv(os.path.join(input_dir, forecast_fname), index=False)
+            return synthetic_df
+        return df_filtered
+    # Load data for all stations and both suffix types
+    chla_data = {}
+    chla_data_lc = {}
+    for station in stations:
+        chla_data[station] = load_and_check_forecast(station, ", CORRECTED", date_start, date_end)
+        chla_data_lc[station] = load_and_check_forecast(station, "(LC)", date_start, date_end)
+    # Merge function
+    def merge_chla_sources(chla_dict):
+        merged = None
+        for df in chla_dict.values():
+            if merged is None:
+                merged = df
+            else:
+                merged = pd.merge(merged, df, on="date", how="left")
+            merged = merged.loc[:, ~merged.columns.str.startswith("Unnamed")]
+        return merged
+    # Calculate aggregates
+    def calculate_chla_aggregates(df, suffix=""):
+        df = df.set_index("date")
+        df["Mean_Chla"] = df.mean(axis=1)
+        df["Chla_North"] = df[[col for col in df.columns if any(site in col for site in ["L001", "L005", "L008"])]].mean(axis=1)
+        df["Chla_South"] = df[[col for col in df.columns if any(site in col for site in ["L004", "L006", "L007", "L008", "LZ40"])]].mean(axis=1)
+        df = df.reset_index()
+        return df[["date", "Mean_Chla", "Chla_North", "Chla_South"]].rename(
+            columns={"Mean_Chla": f"Chla{suffix}", "Chla_North": f"Chla_N{suffix}", "Chla_South": f"Chla_S{suffix}"}
+        )
+    # Process and merge
+    LO_Chla = calculate_chla_aggregates(merge_chla_sources(chla_data))
+    LO_Chla_LC = calculate_chla_aggregates(merge_chla_sources(chla_data_lc))
+    # Merge the two dataframes (no date slicing here since all are limited by Q_in dates)
+    LO_Chla_Merge = pd.concat([LO_Chla, LO_Chla_LC]).reset_index(drop=True)
+    # Export
+    LO_Chla_Merge.to_csv(os.path.join(output_dir, "LO_Chla_Obs_predicted.csv"), index=False)
+    LO_Chla_Merge[["date", "Chla_N"]].rename(columns={"Chla_N": "Chla"}).to_csv(os.path.join(output_dir, "N_Merged_Chla_predicted.csv"), index=False)
+    LO_Chla_Merge[["date", "Chla_S"]].rename(columns={"Chla_S": "Chla"}).to_csv(os.path.join(output_dir, "S_Merged_Chla_predicted.csv"), index=False)
+    return

loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py ADDED Viewed

@@ -0,0 +1,97 @@
+import os
+import pandas as pd
+import datetime
+from loone_data_prep.utils import photo_period, get_synthetic_data
+def get_NO_Loads_predicted(input_dir, output_dir):
+    """
+    input_dir: Directory where the input files are located.
+    output_dir: Directory where the output files will be saved.
+    This function reads the forecast inflow file, retrieves nitrate data for specified stations,
+    """
+    # TODO: Should this be an average/median of all of the ensembles? worst case?
+    Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
+    datetime_str = Q_in['date'].iloc[0]
+    date_start = datetime.datetime.strptime(datetime_str, '%Y-%m-%d')
+    stations = [
+        "S65E", "S71", "S72", "S84", "S127", "S133",
+        "S154", "S191", "S308C", "FECSR78", "CULV10A", "S4"
+    ]
+    station_alias_map = {
+        "S65E": "S65_NO",
+        "S71": "S71_NO",
+        "S72": "S72_NO",
+        "S84": "S84_NO",
+        "S127": "S127_NO",
+        "S133": "S133_NO",
+        "S154": "S154_NO",
+        "S191": "S191_NO",
+        "S308C": "S308_NO",
+        "FECSR78": "FISHP_NO",
+        "CULV10A": "L8_NO",
+        "S4": "S4_NO"
+    }
+    NO_list = {}
+    NO_names = []
+    for station, alias in station_alias_map.items():
+        filename = f'water_quality_{station}_NITRATE+NITRITE-N_Interpolated.csv'
+        file_path = os.path.join(input_dir, filename)
+        try:
+            df = pd.read_csv(file_path)
+        except FileNotFoundError:
+            print(f"{filename} not found.")
+            continue
+        # Forecast if needed
+        if datetime_str not in df['date'].values:
+            df = get_synthetic_data(date_start, df)
+            df.to_csv(os.path.join(input_dir, f'water_quality_{station}_NITRATE+NITRITE-N_Interpolated_forecast.csv'), index=False)
+        NO_list[alias] = df
+        NO_names.append(alias)
+    # date_NO = pd.date_range(start='1/1/2008', end='3/31/2023', freq='D')
+    # Because of the flow df, I think this will be generated for every single ensemble member
+    for ensemble in range(1, 52):
+        Flow_df =pd.read_csv(f"{input_dir}/geoglows_flow_df_ens_{ensemble:02d}_predicted.csv")
+        Flow_df['date'] = pd.to_datetime(Flow_df['date'])
+        # Use Flow_df as the base for merging nitrate data
+        NO_df = Flow_df[['date']].copy()
+        for name in NO_names:
+            y = NO_list[name]
+            y.rename(columns={y.columns[-1]: name}, inplace=True)
+            NO_df = pd.merge(NO_df, y[['date', name]], on='date', how='left')
+        # Flow_df = DF_Date_Range(Flow_df, St_Yr, St_M, St_D, En_Yr, En_M, En_D)
+        NO_df['date'] = pd.to_datetime(NO_df['date'])
+        merged = pd.merge(NO_df, Flow_df, on='date', how='inner')
+        NO_Loads_In = merged[['date']].copy()
+        # Compute individual loads (edit flow variable names if needed)
+        NO_Loads_In['S65_NO_Ld'] = merged['S65_Q'] * merged['S65_NO'] * 1000
+        NO_Loads_In['S71_NO_Ld'] = merged['S71_Q'] * merged['S71_NO'] * 1000
+        # NO_Loads_In['S72_NO_Ld'] = merged['S72_Q'] * merged['S72_NO'] * 1000 # No RFS forecast data
+        NO_Loads_In['S84_NO_Ld'] = merged['S84_Q'] * merged['S84_NO'] * 1000
+        # NO_Loads_In['S127_NO_Ld'] = merged['S127_In'] * merged['S127_NO'] * 1000 # This should be in here, figure out where it went
+        NO_Loads_In['S133_NO_Ld'] = merged['S133_P_Q'] * merged['S133_NO'] * 1000
+        NO_Loads_In['S154_NO_Ld'] = merged['S154_Q'] * merged['S154_NO'] * 1000
+        # NO_Loads_In['S191_NO_Ld'] = merged['S191_Q'] * merged['S191_NO'] * 1000 #This should be in here, figure out where it went
+        NO_Loads_In['S308_NO_Ld'] = merged['S308_In'] * merged['S308_NO'] * 1000
+        NO_Loads_In['FISHP_NO_Ld'] = merged['FISHP_Q'] * merged['FISHP_NO'] * 1000
+        # NO_Loads_In['L8_NO_Ld'] = merged['L8_In'] * merged['L8_NO'] * 1000 # No RFS forecast data
+        # NO_Loads_In['S4_NO_Ld'] = merged['S4_P_Q'] * merged['S4_NO'] * 1000 # No RFS Forecast data
+        NO_Loads_In['External_NO_Ld_mg'] = NO_Loads_In.sum(axis=1, numeric_only=True)
+        NO_Loads_In.to_csv(f'{output_dir}/LO_External_Loadings_NO_ens_{ensemble:02d}_predicted.csv', index=False)
+    return

loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/loone_q_predict.py ADDED Viewed

@@ -0,0 +1,159 @@
+import os
+import pandas as pd
+from datetime import datetime, timedelta
+def generate_historical_predictions(workspace, forecast_days=16):
+    """
+    Generate predictions for the next `forecast_days` days using historical daily averages
+    from the same calendar dates across previous years. Includes all the files for loone_q.
+    Args:
+    workspace : str
+        Path to the folder containing the CSV files.
+    forecast_days : int
+        Number of future days to predict (default = 16).
+    """
+    file_list = [
+        "Estuary_needs_water_Input.csv",
+        "Multi_Seasonal_LONINO.csv",
+        "Seasonal_LONINO.csv",
+        "SFWMM_Daily_Outputs.csv",
+        "Water_dmd.csv",
+        "EAA_MIA_RUNOFF_Inputs.csv",
+    ]
+    possible_date_cols = ['date', 'Date']
+    today = datetime.today()
+    current_year = today.year
+    for filename in file_list:
+        path = os.path.join(workspace, filename)
+        try:
+            df = pd.read_csv(path)
+        except Exception as e:
+            print(f"Could not read {filename}. Error: {e}")
+            continue
+        if filename in ["Multi_Seasonal_LONINO.csv", "Seasonal_LONINO.csv"]:
+            if "Year" not in df.columns:
+                print(f"No 'Year' column in {filename}. Skipping.")
+                continue
+            # ➤ Skip if current year already exists
+            if current_year in df["Year"].values:
+                print(f"{current_year} already present in {filename}. No changes made.")
+                continue
+            # Otherwise calculate averages and append
+            month_cols = [col for col in df.columns if col != "Year"]
+            monthly_means = df[month_cols].mean()
+            new_row = {"Year": current_year}
+            new_row.update(monthly_means.to_dict())
+            updated_df = pd.concat(
+                [df, pd.DataFrame([new_row])],
+                ignore_index=True
+            )
+            output_name = filename.replace(".csv", f"_forecast.csv")
+            output_path = os.path.join(workspace, output_name)
+            updated_df.to_csv(output_path, index=False)
+            print(f"Appended {current_year} row and saved to {output_path}")
+            continue
+        # Identify date column
+        date_col = None
+        for col in df.columns:
+            if col in possible_date_cols:
+                date_col = col
+                break
+        if date_col is None:
+            print(f"Could not detect date column in {filename}. Skipping.")
+            continue
+        # Parse dates
+        if filename in ["SFWMM_Daily_Outputs.csv", "Water_dmd.csv"]:
+            df[date_col] = pd.to_datetime(
+                df[date_col],
+                format="%d-%b-%y",
+                errors="coerce"
+            )
+        else:
+            df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
+        df = df.dropna(subset=[date_col])
+        df["month_day"] = df[date_col].dt.strftime("%m-%d")
+        predictions_list = []
+        # Check if special handling is needed for the boolean file
+        if filename == "Estuary_needs_water_Input.csv":
+            bool_col = "Estuary Needs Water?"
+            if bool_col not in df.columns:
+                print(f"Column '{bool_col}' not found in {filename}. Skipping.")
+                continue
+            # Convert string "True"/"False" to boolean if necessary
+            if df[bool_col].dtype == object:
+                df[bool_col] = df[bool_col].map({"True": True, "False": False}).fillna(df[bool_col])
+            # Compute mode (most frequent value) for each day for each boolean column
+            mode_series = df.groupby("month_day")[bool_col].agg(
+                lambda x: x.mode().iloc[0] if not x.mode().empty else None
+            )
+            for i in range(1, forecast_days + 1):
+                future_date = today + timedelta(days=i)
+                mmdd = future_date.strftime("%m-%d")
+                if mmdd in mode_series.index:
+                    pred_value = mode_series.loc[mmdd]
+                else:
+                    print(f"No historical data for {mmdd} in {filename}. Skipping that day.")
+                    pred_value = None
+                predictions_list.append({
+                    date_col: future_date,
+                    bool_col: pred_value
+                })
+            pred_df = pd.DataFrame(predictions_list)
+            pred_df = pred_df[[date_col, bool_col]]
+        else:
+            # Numeric file handling
+            numeric_cols = df.select_dtypes(include='number').columns.tolist()
+            if not numeric_cols:
+                print(f"No numeric columns in {filename}. Skipping.")
+                continue
+            historical_means = df.groupby("month_day")[numeric_cols].mean()
+            for i in range(0, forecast_days + 1):
+                future_date = (today + timedelta(days=i)).date()
+                mmdd = future_date.strftime("%m-%d")
+                if mmdd in historical_means.index:
+                    row = historical_means.loc[mmdd].copy()
+                    row[date_col] = future_date
+                    predictions_list.append(row)
+                else:
+                    print(f"No historical data for {mmdd} in {filename}. Skipping that day.")
+            if predictions_list:
+                pred_df = pd.DataFrame(predictions_list)
+                pred_df = pred_df[[date_col] + [col for col in pred_df.columns if col != date_col]]
+            else:
+                print(f"No predictions generated for {filename}.")
+                continue
+        # Save predictions
+        output_name = filename.replace(".csv", f"_forecast.csv")
+        output_path = os.path.join(workspace, output_name)
+        pred_df.to_csv(output_path, index=False)
+        print(f"Predictions saved to {output_path}")
+    return

loone_data_prep-1.2.1/loone_data_prep/forecast_scripts/loone_wq_predict.py ADDED Viewed

@@ -0,0 +1,71 @@
+import pandas as pd
+import os
+def create_forecasts(workspace):
+    """
+    Reads the four specified CSV files from `workspace`,
+    creates forecast versions using historical daily averages,
+    and writes new CSV files into the same folder.
+    The forecast always starts today and goes 16 days forward.
+    """
+    # List of filenames
+    files = [
+        'N_OP.csv',
+        'S_OP.csv',
+        'N_DIN.csv',
+        'S_DIN.csv',
+        'LO_DO_Clean_daily.csv'
+    ]
+    def forecast_df(df, date_column='date'):
+        # Parse dates
+        df[date_column] = pd.to_datetime(df[date_column])
+        # Add month and day columns
+        df['month'] = df[date_column].dt.month
+        df['day'] = df[date_column].dt.day
+        # Identify numeric columns to forecast
+        value_columns = df.columns.difference([date_column, 'month', 'day'])
+        # Compute historical averages
+        avg = df.groupby(['month', 'day'])[value_columns].mean().reset_index()
+        # Create forecast dates: today + next 15 days
+        forecast_dates = pd.date_range(
+            start=pd.Timestamp.today().normalize(),
+            periods=16,
+            freq='D'
+        )
+        forecast_df = pd.DataFrame({date_column: forecast_dates})
+        forecast_df['month'] = forecast_df[date_column].dt.month
+        forecast_df['day'] = forecast_df[date_column].dt.day
+        # Merge with historical averages
+        forecast_df = forecast_df.merge(avg, on=['month', 'day'], how='left')
+        # Drop helper columns
+        forecast_df.drop(columns=['month', 'day'], inplace=True)
+        return forecast_df
+    # Process each file
+    for filename in files:
+        # Read file
+        file_path = os.path.join(workspace, filename)
+        df = pd.read_csv(file_path)
+        # Build forecast
+        forecast = forecast_df(df, date_column='date')
+        # Save new file
+        forecast_filename = filename.replace('.csv', '_forecast.csv')
+        forecast_path = os.path.join(workspace, forecast_filename)
+        forecast.to_csv(forecast_path, index=False)
+    print("Forecast files created successfully.")

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/forecast_scripts/trib_cond.py RENAMED Viewed

@@ -50,7 +50,15 @@ def create_trib_cond (weather_data, net_inflows, main_tributary, PI, output, ens
     # Calculate NetRF and NetInf
     Trib_Cond_Wkly['NetRF'] = Net_RF_Weekly['tp_corrected'].values - Net_RF_Weekly['evapotranspiration'].values
-    Trib_Cond_Wkly['NetInf'] = Net_Inflow_Weekly['Net_Inflows'].values
+    # First, reset index so that 'date' becomes a column in Net_Inflow_Weekly
+    Net_Inflow_Weekly_reset = Net_Inflow_Weekly.reset_index()
+    # Merge the dataframes on 'date'
+    Trib_Cond_Wkly = Trib_Cond_Wkly.merge(Net_Inflow_Weekly_reset[['date', 'Net_Inflows']], on='date', how='left')
+    # Now Trib_Cond_Wkly will have a new 'Net_Inflows' column aligned by date
+    Trib_Cond_Wkly.rename(columns={'Net_Inflows': 'NetInf'}, inplace=True)
     # Select only the desired ensemble column and rename it
     S65E_selected = S65E_Weekly[[ensemble_col]].rename(columns={ensemble_col: "S65E"})

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep/utils.py RENAMED Viewed

@@ -466,12 +466,15 @@ def wind_induced_waves(
     lo_stage_in: str = "LO_Stg_Sto_SA_2008-2023.csv",
     wind_shear_stress_out: str = "WindShearStress.csv",
     current_shear_stress_out: str = "Current_ShearStress.csv",
+    forecast: bool = False,
 ):
     # Read Mean Wind Speed in LO
     LO_WS = pd.read_csv(os.path.join(f"{input_dir}/", wind_speed_in))
     LO_WS["WS_mps"] = LO_WS["LO_Avg_WS_MPH"] * 0.44704  # MPH to m/s
     # Read LO Stage to consider water depth changes
     LO_Stage = pd.read_csv(os.path.join(f"{input_dir}/", lo_stage_in))
+    if forecast:
+        LO_Stage["Stage_ft"] = LO_Stage["Stage"].astype(float)
     LO_Stage["Stage_m"] = LO_Stage["Stage_ft"] * 0.3048
     Bottom_Elev = 0.5  # m (Karl E. Havens • Alan D. Steinman 2013)
     LO_Wd = LO_Stage["Stage_m"] - Bottom_Elev
@@ -998,7 +1001,15 @@ def get_synthetic_data(date_start: str, df: pd.DataFrame):
     # Group by the month and day, then calculate the average for each group
     average_values = filtered_data.groupby('month_day')['Data'].mean()
+    # Interpolate in case there are missing values:
+    start_date = pd.to_datetime('2001-' + start_month_day)
+    end_date = pd.to_datetime('2001-' + end_month_day)
+    full_dates = pd.date_range(start=start_date, end=end_date)
+    full_index = full_dates.strftime('%m-%d')
+    average_values = average_values.reindex(full_index)
+    average_values = average_values.interpolate(method='linear')
     average_values_df = pd.DataFrame({
         'date': pd.date_range(start=date_start, end=date_end),
         'Data': average_values.values

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loone_data_prep
-Version: 1.1.2
+Version: 1.2.1
 Summary: Prepare data to run the LOONE model.
 Author-email: Osama Tarabih <osamatarabih@usf.edu>
 Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/loone_data_prep.egg-info/SOURCES.txt RENAMED Viewed

@@ -18,8 +18,13 @@ loone_data_prep/flow_data/get_forecast_flows.py
 loone_data_prep/flow_data/get_inflows.py
 loone_data_prep/flow_data/get_outflows.py
 loone_data_prep/flow_data/hydro.py
+loone_data_prep/forecast_scripts/Chla_merged.py
 loone_data_prep/forecast_scripts/create_forecast_LOWs.py
 loone_data_prep/forecast_scripts/forecast_stages.py
+loone_data_prep/forecast_scripts/get_Chla_predicted.py
+loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py
+loone_data_prep/forecast_scripts/loone_q_predict.py
+loone_data_prep/forecast_scripts/loone_wq_predict.py
 loone_data_prep/forecast_scripts/predict_PI.py
 loone_data_prep/forecast_scripts/trib_cond.py
 loone_data_prep/forecast_scripts/weather_forecast.py

{loone_data_prep-1.1.2 → loone_data_prep-1.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "loone_data_prep"
-version = "1.1.2"
+version = "1.2.1"
 description = "Prepare data to run the LOONE model."
 readme = "README.md"
 license = { file = "LICENSE" }