PyPI - loone-data-prep - Versions diffs - 1.2.3__tar.gz → 1.3.0__tar.gz - Mend

loone-data-prep 1.2.3tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loone_data_prep
-Version: 1.2.3
+Version: 1.3.0
 Summary: Prepare data to run the LOONE model.
 Author-email: Osama Tarabih <osamatarabih@usf.edu>
 Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py RENAMED Viewed

@@ -15,7 +15,7 @@ from loone_data_prep.utils import stg2sto, stg2ar
 import datetime
 START_DATE = datetime.datetime.now()
-END_DATE = START_DATE + datetime.timedelta(days=15)
+END_DATE = START_DATE + datetime.timedelta(days=14)
 M3_Yr = 2008
 M3_M = 1
@@ -373,8 +373,8 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     C44RO_df['C44RO_cmd'] = C44RO
     C43RO_df['C43RO'] = C43RO_df['C43RO_cmd']/(0.0283168466 * 86400)
     C44RO_df['C44RO'] = C44RO_df['C44RO_cmd']/(0.0283168466 * 86400)
-    C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv', index=False)
-    C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv', index=False)
+    C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv')
+    C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv')
     C43RO_df.index = pd.to_datetime(C43RO_df["date"])
     C43RO_df = C43RO_df.drop(columns="date")
@@ -384,13 +384,13 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     C43Mon = C43RO_df.resample('ME').mean()
     C44Mon = C44RO_df.resample('ME').mean()
-    C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv', index=False)
-    C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv', index=False)
+    C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv')
+    C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv')
     Basin_RO = pd.DataFrame(C44Mon.index, columns=['date'])
     # Basin_RO['SLTRIB'] = SLTRIBMon['SLTRIB_cfs'].values * 1.9835  # cfs to acft
     Basin_RO['C44RO'] = C44Mon['C44RO'].values * 86400
     Basin_RO['C43RO'] = C43Mon['C43RO'].values * 86400
-    Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv', index=False)
+    Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv')
     # # Get monthly C43RO and C44RO from historical run
     # shutil.copyfile(os.path.join(historical_files_src, "C43RO_Monthly.csv"), os.path.join(output_dir, 'C43RO_Monthly.csv'))
@@ -461,16 +461,47 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None:  # , hi
     LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
     # # RFVol acft
-    # # Create File (RF_Volume)
-    # RFVol = pd.DataFrame(RF_data["date"], columns=["date"])
-    # RFVol["RFVol_acft"] = (RF_data["average_rainfall"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
-    # RFVol.to_csv(f"{output_dir}/RFVol_LORS_20082023.csv", index=False)
-    # # ETVol acft
-    # # Create File (ETVol)
-    # ETVol = pd.DataFrame(ET_data["date"], columns=["date"])
-    # ETVol["ETVol_acft"] = (ET_data["average_ETPI"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
-    # ETVol.to_csv(f"{output_dir}/ETVol_LORS_20082023.csv", index=False)
+    RF_data = pd.read_csv(f'{input_dir}/LAKE_RAINFALL_DATA_FORECAST.csv')
+    # RF_data_copy = RF_data.copy()
+    # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
+    RF_data['date'] = pd.to_datetime(RF_data['date'])
+    # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
+    # LO_Stg_Sto_SA_df_copy.index.name = None
+    # merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
+    #                         LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
+    #                         on='date', how='inner')
+    #I am just using the most recent SA_acres value for all forecast dates since we do not have forecasted surface area
+    RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
+    RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
+    date_reference = RFVol['date'].iloc[0]
+    date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
+    df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
+    RFVol = pd.concat([df_insert, RFVol])
+    RFVol.to_csv(f'{output_dir}/RFVol_Forecast.csv', index=False)
+        # ETVol acft
+    # Create File (ETVol)
+    # Merge the DataFrames on date to ensure matching rows
+    ET_data = pd.read_csv(f'{input_dir}/LOONE_AVERAGE_ETPI_DATA_FORECAST.csv')
+    # ET_data_copy = ET_data.copy()
+    # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
+    ET_data['date'] = pd.to_datetime(ET_data['date'])
+    # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
+    # merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
+    #                         LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
+    #                         on='date', how='inner')
+    ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
+    ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
+    date_reference = ETVol['date'].iloc[0]
+    date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
+    df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
+    ETVol = pd.concat([df_insert, ETVol])
+    ETVol.to_csv(f'{output_dir}/ETVol_forecast.csv', index=False)
     # # WCA Stages
     # # Create File (WCA_Stages_Inputs)

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep/LOONE_DATA_PREP.py RENAMED Viewed

@@ -351,7 +351,6 @@ def main(input_dir: str, output_dir: str) -> None:
     S65E.index = pd.to_datetime(S65E.index, unit='ns')
     S65E_Weekly = S65E.resample('W-FRI').mean()
     # PI
-    # TODO
     # This is prepared manually
     # Weekly data is downloaded from https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/time-series/0804
     # State:Florida Division:4.South Central

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep/forecast_scripts/get_Chla_predicted.py RENAMED Viewed

@@ -9,7 +9,7 @@ def get_Chla_predicted(input_dir, output_dir):
     output_dir: Directory where the output files will be saved.
     """
     # Read forecast inflow file and get overall date range
-    # TODO: Should this be an average/median of all of the ensembles? worst case?
+    # We are only taking the dates, so it is okay to just use one ensemble because they all have the same dates
     Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
     Q_in['date'] = pd.to_datetime(Q_in['date'])
     date_start = Q_in['date'].min()

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py RENAMED Viewed

@@ -9,7 +9,7 @@ def get_NO_Loads_predicted(input_dir, output_dir):
     output_dir: Directory where the output files will be saved.
     This function reads the forecast inflow file, retrieves nitrate data for specified stations,
     """
-    # TODO: Should this be an average/median of all of the ensembles? worst case?
+    # It is okay to use just one ensemble because they all have the same dates and we only use the dates
     Q_in = pd.read_csv(os.path.join(input_dir, 'LO_Inflows_BK_forecast_01.csv'))
     datetime_str = Q_in['date'].iloc[0]

loone_data_prep-1.3.0/loone_data_prep/forecast_scripts/new_combined_weather_forecast.py ADDED Viewed

@@ -0,0 +1,220 @@
+import os
+import warnings
+import pandas as pd
+from datetime import datetime
+from retry import retry
+from loone_data_prep.herbie_utils import get_fast_herbie_object
+from herbie import FastHerbie
+import openmeteo_requests
+from retry_requests import retry as retry_requests
+import requests_cache
+warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
+POINTS = pd.DataFrame({
+    "station": ["L001", "L005", "L006", "LZ40"],
+    "longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
+    "latitude": [27.1389, 26.9567, 26.8226, 26.9018]
+})
+WIND_FILE_MAP = {
+    "L001": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
+    "L005": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
+    "L006": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
+    "LZ40": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
+}
+AIRT_FILE_MAP = {
+    "L001": "L001_AIRT_Degrees Celsius_forecast.csv",
+    "L005": "L005_AIRT_Degrees Celsius_forecast.csv",
+    "L006": "L006_AIRT_Degrees Celsius_forecast.csv",
+    "LZ40": "LZ40_AIRT_Degrees Celsius_forecast.csv"
+}
+AIRT_COLUMN_MAP = {
+    "L001": "L001_AIRT_Degrees Celsius",
+    "L005": "L005_AIRT_Degrees Celsius",
+    "L006": "L006_AIRT_Degrees Celsius",
+    "LZ40": "LZ40_AIRT_Degrees Celsius"
+}
+@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
+def download_herbie_variable(FH, variable_key, variable_name, point_df):
+    """Download a Herbie variable for a given point and return a DataFrame."""
+    FH.download(f":{variable_key}")
+    ds = FH.xarray(f":{variable_key}", backend_kwargs={"decode_timedelta": True})
+    dsi = ds.herbie.pick_points(point_df, method="nearest")
+    var_name = {
+        "10u": "u10",
+        "10v": "v10",
+        "2t": "t2m"
+    }.get(variable_name, variable_name)
+    ts = dsi[var_name].squeeze()
+    df = ts.to_dataframe().reset_index()
+    if "valid_time" in df.columns:
+        df.rename(columns={"valid_time": "datetime"}, inplace=True)
+    elif "time" in df.columns:
+        df.rename(columns={"time": "datetime"}, inplace=True)
+    df = df[["datetime", var_name]].drop_duplicates()
+    ds.close()
+    dsi.close()
+    del ds, dsi, ts
+    return df
+# Download ET from Open-Meteo
+def download_hourly_et(lat, lon):
+    cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
+    retry_session = retry_requests(cache_session, retries=5, backoff_factor=0.2)
+    client = openmeteo_requests.Client(session=retry_session)
+    url = "https://api.open-meteo.com/v1/forecast"
+    params = {
+        "latitude": lat,
+        "longitude": lon,
+        "hourly": "evapotranspiration",
+        "forecast_days": 16,
+        "models": "gfs_seamless"
+    }
+    responses = client.weather_api(url, params=params)
+    response = responses[0]
+    hourly = response.Hourly()
+    hourly_evap = hourly.Variables(0).ValuesAsNumpy()
+    hourly_data = {"date": pd.date_range(
+        start=pd.to_datetime(hourly.Time(), unit="s"),
+        end=pd.to_datetime(hourly.TimeEnd(), unit="s"),
+        freq=pd.Timedelta(seconds=hourly.Interval()),
+        inclusive="left"
+    )}
+    hourly_data["evapotranspiration"] = hourly_evap
+    return pd.DataFrame(hourly_data)
+# Main generation function
+def generate_all_outputs(output_dir):
+    os.makedirs(output_dir, exist_ok=True)
+    today_str = datetime.today().strftime('%Y-%m-%d 00:00')
+    FH = get_fast_herbie_object(today_str)
+    # Forecasted weather data (single point)
+    point_df = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
+    forecast_vars = ["10u", "10v", "2t", "tp", "ssrd"]
+    data = {var: download_herbie_variable(FH, var, var, point_df) for var in forecast_vars}
+    merged = data["10u"].merge(data["10v"], on="datetime")
+    merged = merged.merge(data["2t"], on="datetime")
+    merged = merged.merge(data["tp"], on="datetime")
+    merged = merged.merge(data["ssrd"], on="datetime")
+    # Derived columns
+    merged["wind_speed"] = (merged["u10"]**2 + merged["v10"]**2)**0.5  # wind speed in m/s
+    merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
+    merged["tp_inc_m"] = merged["tp"].diff().clip(lower=0)
+    # Convert incremental meters → mm
+    merged["tp_inc_mm"] = merged["tp_inc_m"] * 1000.0
+    # Apply bias correction (in mm)
+    merged["tp_corrected_mm"] = 0.7247 * merged["tp_inc_mm"] + 0.1853
+    # convert to inches
+    merged["tp_corrected"] = merged["tp_corrected_mm"] * 0.0393701
+    merged["ssrd_kwm2"] = merged["ssrd"].diff() / merged["datetime"].diff().dt.total_seconds() / 1000
+    merged["ssrd_corrected"] = (1.0530 * merged["ssrd_kwm2"] - 0.0347).clip(lower=0)
+    merged = merged[[
+        "datetime",
+        "wind_speed_corrected",
+        "tp_corrected",
+        "ssrd_corrected"
+    ]]
+    # ET for main point
+    df_et = download_hourly_et(26.9690, -80.7976)
+    merged = merged.merge(df_et, left_on="datetime", right_on="date", how="left").drop(columns=["date"])
+    merged.to_csv(os.path.join(output_dir, "forecasted_weather_data.csv"), index=False)
+    # 4-point wind and air temp CSVs
+    for idx, row in POINTS.iterrows():
+        station = row["station"]
+        point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
+        # Wind
+        df_u = download_herbie_variable(FH, "10u", "10u", point_df)
+        df_v = download_herbie_variable(FH, "10v", "10v", point_df)
+        merged_ws = df_u.merge(df_v, on="datetime")
+        merged_ws["wind_speed"] = (merged_ws["u10"]**2 + merged_ws["v10"]**2)**0.5
+        merged_ws["wind_speed_corrected"] = 0.4167 * merged_ws["wind_speed"] + 4.1868
+        filename, new_col = WIND_FILE_MAP[station]
+        merged_ws[["datetime", "wind_speed_corrected"]].rename(
+            columns={"datetime": "date", "wind_speed_corrected": new_col}
+        ).to_csv(os.path.join(output_dir, filename), index=False)
+        # Air temp
+        df_t = download_herbie_variable(FH, "2t", "2t", point_df)
+        df_t["t2m"] = df_t["t2m"] - 273.15
+        df_t.rename(columns={"datetime": "date", "t2m": AIRT_COLUMN_MAP[station]}).to_csv(
+            os.path.join(output_dir, AIRT_FILE_MAP[station]), index=False
+        )
+    # Rainfall, ET, and SSRD 4-point CSVs
+    rainfall_dfs, et_dfs, ssrd_dfs = [], [], []
+    for idx, row in POINTS.iterrows():
+        station = row["station"]
+        point_df = pd.DataFrame({"longitude": [row.longitude], "latitude": [row.latitude]})
+        # Rainfall
+        df_tp = download_herbie_variable(FH, "tp", "tp", point_df)
+        # Convert cumulative meters → incremental meters
+        df_tp["tp_inc_m"] = df_tp["tp"].diff().clip(lower=0)
+        # Convert incremental meters → millimeters
+        df_tp["tp_inc_mm"] = df_tp["tp_inc_m"] * 1000.0
+        df_tp["date_only"] = df_tp["datetime"].dt.date
+        # Sum incremental precipitation per day
+        df_daily = df_tp.groupby("date_only")["tp_inc_mm"].sum().reset_index()
+        # Apply bias correction on daily totals (in mm)
+        df_daily["tp_corrected_mm"] = 0.7247 * df_daily["tp_inc_mm"] + 0.1853
+        # Convert corrected mm → inches
+        df_daily["tp_corrected_in"] = df_daily["tp_corrected_mm"] * 0.0393701
+        df_daily = df_daily.rename(columns={"date_only": "date", "tp_corrected_in": station})
+        rainfall_dfs.append(df_daily[["date", station]])
+        # ET
+        df_et_point = download_hourly_et(row.latitude, row.longitude)
+        df_et_point.rename(columns={"evapotranspiration": station}, inplace=True)
+        et_dfs.append(df_et_point)
+        # SSRD
+        df_ssrd = download_herbie_variable(FH, "ssrd", "ssrd", point_df)
+        df_ssrd["ssrd_kwm2"] = df_ssrd["ssrd"].diff() / df_ssrd["datetime"].diff().dt.total_seconds() / 1000
+        df_ssrd["ssrd_corrected"] = (1.0530 * df_ssrd["ssrd_kwm2"] - 0.0347).clip(lower=0)
+        df_ssrd = df_ssrd[["datetime", "ssrd_corrected"]].rename(columns={"datetime": "date", "ssrd_corrected": station})
+        ssrd_dfs.append(df_ssrd)
+    # Merge rainfall
+    rainfall_df = pd.concat(rainfall_dfs, axis=0).groupby("date").first().reset_index()
+    rainfall_df["average_rainfall"] = rainfall_df[POINTS["station"]].mean(axis=1)
+    rainfall_df.to_csv(os.path.join(output_dir, "LAKE_RAINFALL_DATA_FORECAST.csv"), index=False)
+    # Merge ET
+    et_df_all = pd.concat(et_dfs, axis=0).groupby("date").first().reset_index()
+    et_df_all["average_ETPI"] = et_df_all[POINTS["station"]].mean(axis=1)
+    et_df_all.to_csv(os.path.join(output_dir, "LOONE_AVERAGE_ETPI_DATA_FORECAST.csv"), index=False)
+    # Combine all SSRD DataFrames
+    ssrd_df_all = pd.concat(ssrd_dfs, axis=0)
+    ssrd_df_all["date"] = pd.to_datetime(ssrd_df_all["date"])
+    # Compute the daily mean for each station
+    daily_ssrd = (
+        ssrd_df_all.groupby(ssrd_df_all["date"].dt.date)[POINTS["station"]]
+        .mean()
+        .reset_index()
+    )
+    daily_ssrd = daily_ssrd.rename(columns={"date": "date"})
+    daily_ssrd["Mean_RADT"] = daily_ssrd[POINTS["station"]].mean(axis=1)
+    daily_ssrd.to_csv(os.path.join(output_dir, "LO_RADT_data_forecast.csv"), index=False)
+    print("All outputs generated successfully.")

loone_data_prep-1.3.0/loone_data_prep/herbie_utils.py ADDED Viewed

@@ -0,0 +1,29 @@
+from retry import retry
+from herbie import FastHerbie
+class NoGribFilesFoundError(Exception):
+    """Raised when no GRIB files are found for the specified date/model run."""
+    pass
+@retry(NoGribFilesFoundError, tries=5, delay=15, max_delay=60, backoff=2)
+def get_fast_herbie_object(date: str) -> FastHerbie:
+    """
+    Get a FastHerbie object for the specified date. Raises an exception when no GRIB files are found.
+    Args:
+        date: pandas-parsable datetime string
+    Returns:
+        A FastHerbie object configured for the specified date.
+    Raises:
+        NoGribFilesFoundError: If no GRIB files are found for the specified date.
+    """
+    fast_herbie = FastHerbie([date], model="ifs", fxx=range(0, 360, 3))
+    if len(fast_herbie.file_exists) == 0:
+        raise NoGribFilesFoundError(f"No GRIB files found for the specified date {date}.")
+    return fast_herbie

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep/utils.py RENAMED Viewed

@@ -996,14 +996,31 @@ def get_synthetic_data(date_start: str, df: pd.DataFrame):
     end_month_day = date_end.strftime('%m-%d')
     # Filter the DataFrame to include only rows between date_start and date_end for all previous years
-    mask = (df['month_day'] >= start_month_day) & (df['month_day'] <= end_month_day)
+    # (handle year wrap, e.g., Dec -> Jan)
+    wraps_year = start_month_day > end_month_day
+    if wraps_year:
+        mask = (
+            (df['month_day'] >= start_month_day) |
+            (df['month_day'] <= end_month_day)
+        )
+    else:
+        mask = (
+            (df['month_day'] >= start_month_day) &
+            (df['month_day'] <= end_month_day)
+        )
     filtered_data = df.loc[mask]
     # Group by the month and day, then calculate the average for each group
     average_values = filtered_data.groupby('month_day')['Data'].mean()
     # Interpolate in case there are missing values:
     start_date = pd.to_datetime('2001-' + start_month_day)
-    end_date = pd.to_datetime('2001-' + end_month_day)
+    if wraps_year:
+        end_date = pd.to_datetime('2002-' + end_month_day)
+    else:
+        end_date = pd.to_datetime('2001-' + end_month_day)
     full_dates = pd.date_range(start=start_date, end=end_date)
     full_index = full_dates.strftime('%m-%d')

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: loone_data_prep
-Version: 1.2.3
+Version: 1.3.0
 Summary: Prepare data to run the LOONE model.
 Author-email: Osama Tarabih <osamatarabih@usf.edu>
 Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/loone_data_prep.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py
 loone_data_prep/LOONE_DATA_PREP.py
 loone_data_prep/__init__.py
 loone_data_prep/data_analyses_fns.py
+loone_data_prep/herbie_utils.py
 loone_data_prep/utils.py
 loone_data_prep.egg-info/PKG-INFO
 loone_data_prep.egg-info/SOURCES.txt
@@ -19,15 +20,14 @@ loone_data_prep/flow_data/get_inflows.py
 loone_data_prep/flow_data/get_outflows.py
 loone_data_prep/flow_data/hydro.py
 loone_data_prep/forecast_scripts/Chla_merged.py
-loone_data_prep/forecast_scripts/create_forecast_LOWs.py
 loone_data_prep/forecast_scripts/forecast_stages.py
 loone_data_prep/forecast_scripts/get_Chla_predicted.py
 loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py
 loone_data_prep/forecast_scripts/loone_q_predict.py
 loone_data_prep/forecast_scripts/loone_wq_predict.py
+loone_data_prep/forecast_scripts/new_combined_weather_forecast.py
 loone_data_prep/forecast_scripts/predict_PI.py
 loone_data_prep/forecast_scripts/trib_cond.py
-loone_data_prep/forecast_scripts/weather_forecast.py
 loone_data_prep/water_level_data/__init__.py
 loone_data_prep/water_level_data/get_all.py
 loone_data_prep/water_level_data/hydro.py

{loone_data_prep-1.2.3 → loone_data_prep-1.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "loone_data_prep"
-version = "1.2.3"
+version = "1.3.0"
 description = "Prepare data to run the LOONE model."
 readme = "README.md"
 license = { file = "LICENSE" }

loone_data_prep-1.2.3/loone_data_prep/forecast_scripts/create_forecast_LOWs.py DELETED Viewed

@@ -1,127 +0,0 @@
-import os
-from herbie import FastHerbie
-from datetime import datetime
-import pandas as pd
-from retry_requests import retry
-import warnings
-def generate_wind_forecasts(output_dir):
-    # Ensure output directory exists
-    warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
-    os.makedirs(output_dir, exist_ok=True)
-    # Define points of interest
-    points = pd.DataFrame({
-        "longitude": [-80.7934, -80.9724, -80.7828, -80.7890],
-        "latitude": [27.1389, 26.9567, 26.8226, 26.9018]
-    })
-    # Station-specific file and column names
-    file_map = {
-        "Point_1": ("L001_WNDS_MPH_predicted.csv", "L001_WNDS_MPH"),
-        "Point_2": ("L005_WNDS_MPH_predicted.csv", "L005_WNDS_MPH"),
-        "Point_3": ("L006_WNDS_MPH_predicted.csv", "L006_WNDS_MPH"),
-        "Point_4": ("LZ40_WNDS_MPH_predicted.csv", "LZ40_WNDS_MPH")
-    }
-    today_str = datetime.today().strftime('%Y-%m-%d 00:00')
-    FH = FastHerbie([today_str], model="ifs", fxx=range(0, 360, 3))
-    dfs = []
-    variables = {
-        "10u": "10u",
-        "10v": "10v",
-        "2t": "2t",
-    }
-    # Loop through points and extract data
-    for index, point in points.iterrows():
-        print(f"\nProcessing Point {index + 1}: ({point.latitude}, {point.longitude})")
-        point_df = pd.DataFrame({
-            "longitude": [point.longitude],
-            "latitude": [point.latitude]
-        })
-        for var_key, var_name in variables.items():
-            print(f"  Variable: {var_key}")
-            # Download and load dataset
-            FH.download(f":{var_key}")
-            ds = FH.xarray(f":{var_key}", backend_kwargs={"decode_timedelta": True})
-            # Extract point data
-            dsi = ds.herbie.pick_points(point_df, method="nearest")
-            # Get actual variable name
-            if var_name == "10u":
-                var_name_actual = "u10"  # Map 10u to u10
-            elif var_name == "10v":
-                var_name_actual = "v10"  # Map 10v to v10
-            elif var_name == "2t":
-                var_name_actual = "t2m" #TODO: check that this is correct
-            # Convert to DataFrame
-            time_series = dsi[var_name_actual].squeeze()
-            df = time_series.to_dataframe().reset_index()
-            # Handle datetime columns
-            if "valid_time" in df.columns:
-                df = df.rename(columns={"valid_time": "datetime"})
-            elif "step" in df.columns and "time" in dsi.coords:
-                df["datetime"] = dsi.time.values[0] + df["step"]
-            # Retain necessary columns
-            df = df[["datetime", var_name_actual]].drop_duplicates()
-            dfs.append((index, var_name_actual, df))
-    # Merge and process data per point
-    results = {}
-    for point_index in range(len(points)):
-        u_df = [df for idx, name, df in dfs if idx == point_index and name == "u10"][0]
-        v_df = [df for idx, name, df in dfs if idx == point_index and name == "v10"][0]
-        merged = u_df.merge(v_df, on="datetime", how="outer")
-        # Compute wind speed and correction
-        merged["wind_speed"] = (merged["u10"] ** 2 + merged["v10"] ** 2) ** 0.5
-        merged["wind_speed_corrected"] = 0.4167 * merged["wind_speed"] + 4.1868
-        merged["wind_speed_corrected"] = merged["wind_speed_corrected"] * 2.23694  # m/s to mph
-        results[f"Point_{point_index + 1}"] = merged
-    # Save outputs with station-specific column names
-    for key, (filename, new_col_name) in file_map.items():
-        df = results[key].copy()
-        df = df[["datetime", "wind_speed_corrected"]].rename(columns={
-            "wind_speed_corrected": new_col_name,
-            "datetime": "date"
-        })
-        filepath = os.path.join(output_dir, filename)
-        df.to_csv(filepath, index=False)
-    # Save 2-meter air temperature data
-    airt_file_map = {
-        "Point_1": "L001_AIRT_Degrees Celsius_forecast.csv",
-        "Point_2": "L005_AIRT_Degrees Celsius_forecast.csv",
-        "Point_3": "L006_AIRT_Degrees Celsius_forecast.csv",
-        "Point_4": "LZ40_AIRT_Degrees Celsius_forecast.csv"
-    }
-    airt_column_map = {
-        "Point_1": "L001_AIRT_Degrees Celsius",
-        "Point_2": "L005_AIRT_Degrees Celsius",
-        "Point_3": "L006_AIRT_Degrees Celsius",
-        "Point_4": "LZ40_AIRT_Degrees Celsius"
-    }
-    for key in airt_file_map:
-        point_index = int(key.split("_")[1]) - 1
-        df_airt = [df for idx, name, df in dfs if idx == point_index and name == "t2m"][0].copy()
-        df_airt["t2m"] = df_airt["t2m"] - 273.15  # Convert from Kelvin to Celsius
-        df_airt = df_airt.rename(columns={
-            "datetime": "date",
-            "t2m": airt_column_map[key]
-        })
-        filepath = os.path.join(output_dir, airt_file_map[key])
-        df_airt.to_csv(filepath, index=False)

loone_data_prep-1.2.3/loone_data_prep/forecast_scripts/weather_forecast.py DELETED Viewed

@@ -1,155 +0,0 @@
-from herbie import FastHerbie
-from datetime import datetime
-import pandas as pd
-import openmeteo_requests
-import argparse
-import requests_cache
-from retry_requests import retry
-import warnings
-warnings.filterwarnings("ignore", message="Will not remove GRIB file because it previously existed.")
-def download_weather_forecast (file_path):
-    # Get today's date in the required format
-    today_str = datetime.today().strftime('%Y-%m-%d 00:00')
-    # Define variables to download and extract
-    variables = {
-        "10u": "10u",
-        "ssrd": "ssrd",
-        "tp": "tp",
-        "10v": "10v",
-    }
-    # Define point of interest
-    points = pd.DataFrame({"longitude": [-80.7976], "latitude": [26.9690]})
-    # Initialize FastHerbie
-    FH = FastHerbie([today_str], model="ifs", fxx=range(0, 360, 3))
-    dfs = []
-    for var_key, var_name in variables.items():
-        print(f"Processing {var_key}...")
-        # Download and load the dataset
-        FH.download(f":{var_key}")
-        ds = FH.xarray(f":{var_key}", backend_kwargs={"decode_timedelta": True})
-        # Extract point data
-        dsi = ds.herbie.pick_points(points, method="nearest")
-        # Extract the correct variable name dynamically
-        if var_name == "10u":
-            var_name_actual = "u10"  # Map 10u to u10
-        elif var_name == "10v":
-            var_name_actual = "v10"  # Map 10v to v10
-        else:
-            var_name_actual = var_name  # For ssrd and tp, use the same name
-        # Extract time series
-        time_series = dsi[var_name_actual].squeeze()
-        # Convert to DataFrame
-        df = time_series.to_dataframe().reset_index()
-        # Convert `valid_time` to datetime
-        if "valid_time" in df.columns:
-            df = df.rename(columns={"valid_time": "datetime"})
-        elif "step" in df.columns and "time" in dsi.coords:
-            df["datetime"] = dsi.time.values[0] + df["step"]
-        # Keep only datetime and variable of interest
-        df = df[["datetime", var_name_actual]].drop_duplicates()
-        # Append to list
-        dfs.append(df)
-        # Print extracted data
-        # print(df)
-    # Merge all variables into a single DataFrame
-    final_df = dfs[0]
-    for df in dfs[1:]:
-        final_df = final_df.merge(df, on="datetime", how="outer")
-    print(final_df)
-    # Calculate wind speed
-    final_df["wind_speed"] = (final_df["u10"] ** 2 + final_df["v10"] ** 2) ** 0.5
-    #rainfall corrected: OLS Regression Equation: Corrected Forecast = 0.7247 * Forecast + 0.1853
-    final_df["tp_corrected"] = 0.7247 * final_df["tp"] + 0.1853
-    #wind speed correction: Corrected Forecast = 0.4167 * Forecast + 4.1868
-    final_df["wind_speed_corrected"] = 0.4167 * final_df["wind_speed"] + 4.1868
-    #radiation correction will need to be fixed because it was done on fdir instead of ssdr
-    #radiation corrected: Corrected Forecast = 0.0553 * Forecast - 0.0081
-    final_df["ssrd_corrected"] = 0.0553 * final_df["ssrd"] - 0.0081
-    # Setup the Open-Meteo API client with cache and retry on error
-    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
-    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
-    openmeteo = openmeteo_requests.Client(session = retry_session)
-    # Make sure all required weather variables are listed here
-    # The order of variables in hourly or daily is important to assign them correctly below
-    url = "https://api.open-meteo.com/v1/forecast"
-    params = {
-        "latitude": 26.9690,
-        "longitude": -80.7976,
-        "hourly": "evapotranspiration",
-        "forecast_days": 16,
-        "models": "gfs_seamless"
-    }
-    responses = openmeteo.weather_api(url, params=params)
-    # Process first location. Add a for-loop for multiple locations or weather models
-    response = responses[0]
-    hourly = response.Hourly()
-    hourly_evapotranspiration = hourly.Variables(0).ValuesAsNumpy()
-    hourly_data = {"date": pd.date_range(
-        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
-        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
-        freq = pd.Timedelta(seconds = hourly.Interval()),
-        inclusive = "left"
-    )}
-    hourly_data["evapotranspiration"] = hourly_evapotranspiration
-    hourly_dataframe = pd.DataFrame(data = hourly_data)
-    # Convert datetime to date for merging
-    final_df['date'] = final_df['datetime']
-    # Ensure final_df['date'] is timezone-aware (convert to UTC)
-    final_df['date'] = pd.to_datetime(final_df['date'], utc=True)
-    # Ensure hourly_dataframe['date'] is also timezone-aware (convert to UTC)
-    hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'], utc=True)
-    # Merge while keeping only matching dates from final_df
-    merged_df = final_df.merge(hourly_dataframe, on='date', how='left')
-    # Print final combined DataFrame
-    merged_df.drop(columns=['date'], inplace=True)
-    # print(merged_df)
-    merged_df.to_csv(file_path, index=False)
-def main():
-    # Set up command-line argument parsing
-    parser = argparse.ArgumentParser(description="Download and process weather forecast data.")
-    parser.add_argument("file_path", help="Path to save the resulting CSV file.")
-    # Parse the arguments
-    args = parser.parse_args()
-    # Call the function with the provided file path
-    download_weather_forecast(args.file_path)
-if __name__ == "__main__":
-    main()