PyPI - loone-data-prep - Versions diffs - 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

loone_data_prep/dbhydro_insights.py +195 -0
loone_data_prep/flow_data/S65E_total.py +57 -57
loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
loone_data_prep/flow_data/get_forecast_flows.py +19 -105
loone_data_prep/flow_data/get_inflows.py +18 -8
loone_data_prep/flow_data/get_outflows.py +16 -7
loone_data_prep/flow_data/hydro.py +62 -91
loone_data_prep/utils.py +243 -30
loone_data_prep/water_level_data/get_all.py +52 -44
loone_data_prep/water_level_data/hydro.py +49 -68
loone_data_prep/water_quality_data/get_inflows.py +69 -27
loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
loone_data_prep/water_quality_data/wq.py +114 -88
loone_data_prep/weather_data/get_all.py +5 -3
loone_data_prep/weather_data/weather.py +117 -180
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/RECORD +20 -19
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0

loone_data_prep/dbhydro_insights.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""
+Utilities for interacting with the DBHYDRO Insights database services.
+This module provides functions for fetching data from endpoints used
+by the South Florida Water Management District's DBHYDRO Insights app.
+"""
+from datetime import datetime
+import requests
+from typing import Literal, Tuple
+def get_dbhydro_station_metadata(station_id: str) -> dict | None:
+    """
+    Fetches metadata for a specific station from the DBHYDRO_SiteStation service.
+    Args:
+        station_id (str): The ID of the station for which to fetch metadata. Examples: 'FISHP', 'L OKEE', etc.
+    Returns:
+        dict: A dictionary containing the metadata of the station, or None if the request fails.
+    """
+    # Build the request URL with the provided station ID
+    request_url = 'https://geoweb.sfwmd.gov/agsext2/rest/services/MonitoringLocations/DBHYDRO_SiteStation/MapServer/4/query'
+    params = {
+        'f': 'json',
+        'outFields': '*',
+        'spatialRel': 'esriSpatialRelIntersects',
+        'where': f"(STATION = '{station_id}')"
+    }
+    # Send the GET request to the specified URL with the parameters
+    try:
+        response = requests.get(request_url, params=params)
+    except requests.exceptions.RequestException:
+        return None
+    # Successful Request
+    if response.status_code == 200:
+        # Parse the JSON response
+        json = response.json()
+        # No data given back for given station ID
+        if not json['features']:
+            return None
+        # Data given back, return the JSON response
+        return json
+    # Failure
+    return None
+def get_dbhydro_continuous_timeseries_metadata(
+    station_ids: list[str],
+    categories: list[str] | None = ['ALL'],
+    parameters: list[str] | None = ['ALL'],
+    statistics: list[str] | None = ['ALL'],
+    recorders: list[str] | None = ['ALL'],
+    frequencies: list[str] | None = ['ALL']
+) -> dict | None:
+    """Fetches metadata for continuous time series data from the DBHYDRO Insights service.
+    Args:
+        station_ids (list[str]): List of station IDs to query.
+        categories (list[str] | None): List of categories to filter by. Defaults to ['ALL'].
+        parameters (list[str] | None): List of parameters to filter by. Defaults to ['ALL'].
+        statistics (list[str] | None): List of statistics to filter by. Defaults to ['ALL'].
+        recorders (list[str] | None): List of recorders to filter by. Defaults to ['ALL'].
+        frequencies (list[str] | None): List of frequencies to filter by. Defaults to ['ALL'].
+    Returns:
+        dict | None: The JSON response from the API if successful, otherwise None.
+    Raises:
+        Exception: If the request fails.
+    """
+    # Build the request URL
+    request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/cont/ts'
+    # Build the locations list
+    locations = []
+    for station_id in station_ids:
+        # Build the location dictionary for this station_id
+        location = {
+            'name': station_id,
+            'type': 'STATION',
+        }
+        # Add location to the locations list
+        locations.append(location)
+    # Build the data payload
+    data = {
+        'query': {
+            'locations': locations,
+            'parameters': parameters,
+            'category': categories,
+            'statistic': statistics,
+            'recorder': recorders,
+            'frequency': frequencies,
+            'dbkeys': ['ALL'],
+        }
+    }
+    # Send the POST request to the specified URL with the parameters
+    response = requests.post(request_url, json=data)
+    # Successful Request
+    if response.status_code == 200:
+        # Parse the JSON response
+        json = response.json()
+        # No data given back for given station ID
+        if not json['results']:
+            return None
+        # Data given back, return the JSON response
+        return json
+    # Failure
+    raise Exception(f"Request failed with status code {response.status_code}: {response.text}")
+def get_dbhydro_water_quality_metadata(stations: list[Tuple[str,Literal['SITE', 'STATION']]], test_numbers: list[int]) -> dict | None:
+    """Fetches metadata for water quality data from the DBHYDRO Insights service.
+    Args:
+        stations (list[Tuple[str, Literal['SITE', 'STATION']]]): List of tuples containing station names and station types ('SITE' or 'STATION') to get water quality metadata for.
+        test_numbers (list[int]): List of test numbers to get data for. Test numbers map to parameters. Example: 25 maps to 'PHOSPHATE, TOTAL AS P'.
+    Returns:
+        dict | None: The JSON response from the API if successful, otherwise None.
+    Raises:
+        Exception: If the request fails.
+    """
+    # Build the request URL
+    request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/chem/ts'
+    # Build the locations list
+    locations = []
+    for station in stations:
+        # Build the location dictionary for this station/site
+        location = {
+            'name': station[0],
+            'type': station[1],
+        }
+        # Add location to the locations list
+        locations.append(location)
+    # Build the query parameters
+    query_parameters = {
+        'offset': 0,
+        'limit': 1000,
+        'sort': 'project,location,parameterDesc,matrix,method',
+        'startDate': '19000101',
+        'endDate': datetime.now().strftime("%Y%m%d"),
+        'period': '',
+    }
+    # Build the data payload
+    payload = {
+        'query': {
+            'locations': locations,
+            'matrices': ['ALL'],
+            'methods': ['ALL'],
+            'paramGroups': ['ALL'],
+            'parameters': [str(num) for num in test_numbers],
+            'projects': ['ALL'],
+            'sampleTypes': ['ALL'],
+        }
+    }
+    # Send the POST request to the specified URL with the parameters
+    response = requests.post(request_url, params=query_parameters, json=payload)
+    # Successful Request
+    if response.status_code == 200:
+        # Parse the JSON response
+        json = response.json()
+        # No data given back for given station ID
+        if not json['results']:
+            return None
+        # Data given back, return the JSON response
+        return json
+    # Failure
+    raise Exception(f"Request failed with status code {response.status_code}: {response.text}")

loone_data_prep/flow_data/S65E_total.py CHANGED Viewed

@@ -1,74 +1,73 @@
 import sys
 from retry import retry
-from rpy2.robjects import r
-from rpy2.rinterface_lib.embedded import RRuntimeError
 import pandas as pd
+from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
-@retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
+@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
 def get(
     workspace,
     date_min: str = "1972-01-01",
     date_max: str = "2023-06-30"
 ) -> None:
-    r(
-        f"""
-        # Load the required libraries
-        library(dbhydroR)
-        library(dplyr)
-        # Helper Functions
-        retrieve_data <- function(dbkey, date_min, date_max)
-        {{
-            # Get the data from dbhydro
-            df = get_hydro(dbkey = dbkey, date_min = date_min, date_max = date_max, raw = TRUE)
-            # Give data.frame correct column names so it can be cleaned using the clean_hydro function
-            colnames(df) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
-            # Add a type and units column to data so it can be cleaned using the clean_hydro function
-            df$type <- "FLOW"
-            df$units <- "cfs"
-            # Clean the data.frame
-            df <- clean_hydro(df)
-            # Drop the " _FLOW_cfs" column
-            df <- df %>% select(-` _FLOW_cfs`)
-            # Convert Flow rate from cfs to m³/day
-            df[, -1] <- df[, -1] * (0.0283168466 * 86400)
-            # Return resulting data.frame
-            return(df)
-        }}
-        # S65E_S
-        S65E_S <- retrieve_data(dbkey = "91656", date_min = "{date_min}", date_max = "{date_max}")
-        # Wait five seconds before next request to avoid "too many requests" error
-        Sys.sleep(5)
-        # S65EX1_S
-        S65EX1_S <- retrieve_data(dbkey = "AL760", date_min = "{date_min}", date_max = "{date_max}")
-        # Merge the data from each dbkey
-        result <- merge(S65E_S, S65EX1_S, by = "date", all = TRUE)
-        # Write the data to a file
-        write.csv(result, file = '{workspace}/S65E_total.csv')
-        """
-    )
-    _reformat_s65e_total_file(workspace)
+    """Retrieve total flow data for S65E structure (S65E_S + S65EX1_S) and save to CSV.
+    Args:
+        workspace (str): Path to workspace where data will be downloaded.
+        date_min (str): Minimum date for data retrieval in 'YYYY-MM-DD' format.
+        date_max (str): Maximum date for data retrieval in 'YYYY-MM-DD' format.
+    """
+    # Get a DbHydroApi instance
+    api = get_dbhydro_api()
+    # S65E_S
+    s65e_s = api.get_daily_data(['91656'], 'id', date_min, date_max, 'NGVD29', False)
+    if not s65e_s.has_data():
+        return
+    df_s65e_s = s65e_s.to_dataframe(True)
+    df_s65e_s = df_replace_missing_with_nan(df_s65e_s)                                              # Replace flagged 0 values and -99999.0 with NaN
+    df_s65e_s.reset_index(inplace=True)                                                             # Reset index so datetime is a column
+    df_s65e_s['value'] = df_s65e_s['value'] * (0.0283168466 * 86400)                                # Convert flow from cfs to cmd
+    df_s65e_s = df_s65e_s[['datetime', 'value']].copy()                                             # Grab only the columns we need
+    df_s65e_s.rename(columns={'datetime': 'date', 'value': f'S65E_S_FLOW_cfs'}, inplace=True)       # Rename columns to expected names
+    # S65EX1_S
+    s65ex1_s = api.get_daily_data(['AL760'], 'id', date_min, date_max, 'NGVD29', False)
+    if not s65ex1_s.has_data():
+        return
+    df_s65ex1_s = s65ex1_s.to_dataframe(True)
+    df_s65ex1_s = df_replace_missing_with_nan(df_s65ex1_s)                                          # Replace flagged 0 values and -99999.0 with NaN
+    df_s65ex1_s.reset_index(inplace=True)                                                           # Reset index so datetime is a column
+    df_s65ex1_s['value'] = df_s65ex1_s['value'] * (0.0283168466 * 86400)                            # Convert flow from cfs to cmd
+    df_s65ex1_s = df_s65ex1_s[['datetime', 'value']].copy()                                         # Grab only the columns we need
+    df_s65ex1_s.rename(columns={'datetime': 'date', 'value': f'S65EX1_S_FLOW_cfs'}, inplace=True)   # Rename columns to expected names
+    # Combine the data from both stations into a single dataframe
+    df = pd.merge(df_s65e_s, df_s65ex1_s, on='date', how='outer', suffixes=('_S65E_S', '_S65EX1_S'))
+    # Reformat the data to the expected layout
+    df = _reformat_s65e_total_df(df)
+    # Write the data to a file
+    df.to_csv(f"{workspace}/S65E_total.csv")
 def _reformat_s65e_total_file(workspace: str):
     # Read in the data
     df = pd.read_csv(f"{workspace}/S65E_total.csv")
-    # Drop unused columns
-    df.drop('Unnamed: 0', axis=1, inplace=True)
+    # Reformat the data
+    df = _reformat_s65e_total_df(df)
+    # Write the updated data back to the file
+    df.to_csv(f"{workspace}/S65E_total.csv")
+def _reformat_s65e_total_df(df: pd.DataFrame) -> pd.DataFrame:
     # Convert date column to datetime
     df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
@@ -81,8 +80,9 @@ def _reformat_s65e_total_file(workspace: str):
     # Drop rows that are missing all their values
     df.dropna(how='all', inplace=True)
-    # Write the updated data back to the file
-    df.to_csv(f"{workspace}/S65E_total.csv")
+    # Return the reformatted dataframe
+    return df
 if __name__ == "__main__":
     workspace = sys.argv[1].rstrip("/")

loone_data_prep/flow_data/forecast_bias_correction.py CHANGED Viewed

@@ -38,7 +38,7 @@ def get_bias_corrected_data(
     # Prepare the observed data by filling NaN values with the 10yr average
     prepared_od = prep_observed_data(observed_data)
     historical_data = geoglows.data.retro_daily(reach_id)
-    # Get the historical simulation data for the given reach ID - TODO: Do we for sure want to cache the historical data?
+    # Get the historical simulation data for the given reach ID
     # I am reading the observed data that we queried earlier instead of caching it
     # historical_data = None

loone_data_prep/flow_data/get_forecast_flows.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import os
 import sys
 import pandas as pd
-import rpy2.robjects as ro
-from rpy2.robjects import pandas2ri
 import geoglows
 import datetime
-from loone_data_prep.utils import get_dbkeys
 from loone_data_prep.flow_data.forecast_bias_correction import (
     get_bias_corrected_data,
 )
@@ -63,51 +60,6 @@ FORECAST_DATE = (datetime.datetime.now()).strftime("%Y%m%d")
 GEOGLOWS_ENDPOINT = "https://geoglows.ecmwf.int/api/"
-def get_stations_latitude_longitude(station_ids: list[str]):
-    """Gets the latitudes and longitudes of the given stations.
-    Args:
-        station_ids (list[str]): The ids of the stations to get the
-            latitudes/longitudes of
-    Returns:
-        (dict[str, tuple[numpy.float64, numpy.float64]]): A dictionary of
-            format dict<station_id:(latitude,longitude)>
-    If a station's latitude/longitude fails to download then its station_id
-        won't be a key in the returned dictionary.
-    """
-    # The dict that holds the data that gets returned
-    station_data = {}
-    # Get the station/dbkey data
-    r_dataframe = get_dbkeys(
-        station_ids=station_ids,
-        category="SW",
-        param="",
-        stat="",
-        recorder="",
-        detail_level="full",
-    )
-    # Convert the r dataframe to a pandas dataframe
-    with (ro.default_converter + pandas2ri.converter).context():
-        pd_dataframe = ro.conversion.get_conversion().rpy2py(r_dataframe)
-    # Filter out extra rows for each station from the dataframe
-    pd_dataframe.drop_duplicates(subset="Station", keep="first", inplace=True)
-    # Get latitude/longitude of each station
-    for index in pd_dataframe.index:
-        station = pd_dataframe["Station"][index]
-        latitude = pd_dataframe["Latitude"][index]
-        longitude = pd_dataframe["Longitude"][index]
-        station_data[station] = latitude, longitude
-    return station_data
 def get_reach_id(latitude: float, longitude: float):
     """Gets the reach id for the given latitude/longitude.
@@ -273,70 +225,32 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
     dataframe.index = dataframe.index.normalize()
     # Convert m^3/s data to m^3/h
-    dataframe = dataframe.transform(lambda x: x * SECONDS_IN_HOUR)
+    dataframe = dataframe * SECONDS_IN_HOUR
     # Make negative values 0
     dataframe.clip(0, inplace=True)
-    # Max Column (Max)
-    column_max = dataframe[["flow_max"]].copy()
-    column_max = column_max.groupby([column_max.index]).max()
-    # 75th Percentile Column (Average)
-    column_75percentile = dataframe[["flow_75p"]].copy()
-    column_75percentile = column_75percentile.groupby(
-        [column_75percentile.index]
-    ).mean()
-    # Average Column (Weighted Average)
-    column_average = dataframe[["flow_avg"]].copy()
-    column_average.transform(lambda x: x / 8)
-    column_average = column_average.groupby([column_average.index]).sum()
-    # 25th Percentile Column (Average)
-    column_25percentile = dataframe[["flow_25p"]].copy()
-    column_25percentile = column_25percentile.groupby(
-        [column_25percentile.index]
-    ).mean()
-    # Min Column (Min)
-    column_min = dataframe[["flow_min"]].copy()
-    column_min = column_min.groupby([column_min.index]).min()
-    # Convert values in each column from m^3/h to m^3/d
-    column_max = column_max.transform(lambda x: x * HOURS_IN_DAY)
-    column_75percentile = column_75percentile.transform(
-        lambda x: x * HOURS_IN_DAY
-    )
-    column_average = column_average.transform(lambda x: x * HOURS_IN_DAY)
-    column_25percentile = column_25percentile.transform(
-        lambda x: x * HOURS_IN_DAY
+    grouped = dataframe.groupby(dataframe.index).mean()
+    # Convert from m^3/h → m^3/d
+    grouped = grouped * HOURS_IN_DAY
+    # Rename columns
+    grouped = grouped.rename(
+        columns={
+            "flow_max": "flow_max_m^3/d",
+            "flow_75p": "flow_75%_m^3/d",
+            "flow_avg": "flow_avg_m^3/d",
+            "flow_med": "flow_med_m^3/d",
+            "flow_25p": "flow_25%_m^3/d",
+            "flow_min": "flow_min_m^3/d",
+        }
     )
-    column_min = column_min.transform(lambda x: x * HOURS_IN_DAY)
-    # Append modified columns into one pandas DataFrame
-    dataframe_result = pd.DataFrame()
-    dataframe_result.index = dataframe.groupby([dataframe.index]).mean().index
-    dataframe_result["flow_max_m^3/d"] = column_max["flow_max"].tolist()
-    dataframe_result["flow_75%_m^3/d"] = column_75percentile[
-        "flow_75p"
-    ].tolist()
-    dataframe_result["flow_avg_m^3/d"] = column_average[
-        "flow_avg"
-    ].tolist()
-    dataframe_result["flow_25%_m^3/d"] = column_25percentile[
-        "flow_25p"
-    ].tolist()
-    dataframe_result["flow_min_m^3/d"] = column_min["flow_min"].tolist()
-    # Format datetimes to just dates
-    dataframe_result.index = dataframe_result.index.strftime("%Y-%m-%d")
+    # Format index as date string and rename
+    grouped.index = grouped.index.strftime("%Y-%m-%d")
+    grouped.index.name = "date"
-    # Rename index from datetimes to date
-    dataframe_result.rename_axis("date", inplace=True)
-    # Return resulting DataFrame
-    return dataframe_result
+    return grouped
 def main(

loone_data_prep/flow_data/get_inflows.py CHANGED Viewed

@@ -45,20 +45,22 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
     Returns:
         dict: Success or error message
     """
+    # Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
+    dbkeys = dbkeys.copy()
     # Retrieve inflow data
     for dbkey, station in dbkeys.copy().items():
-        file_name = f"{station}_FLOW_cmd.csv"
+        file_name = f"{station.replace(' ', '_')}_FLOW_cmd.csv"
         date_latest = find_last_date_in_csv(workspace, file_name)
         # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
         if date_latest is None:
             # Download all the data
             print(f'Downloading all inflow data for {station}')
-            hydro.get(workspace, dbkey)
+            hydro.get(workspace=workspace, dbkey=dbkey, station=station)
         else:
             # Check whether the latest data is already up to date.
-            if dbhydro_data_is_latest(date_latest):
+            if dbhydro_data_is_latest(date_latest, dbkey):
                 # Notify that the data is already up to date
                 print(f'Downloading of new inflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
@@ -67,8 +69,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
                 continue
             # Download only the new data
-            print(f'Downloading new inflow data for {station} starting from date {date_latest}')
-            hydro.get(workspace, dbkey, date_latest)
+            date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
+            print(f'Downloading new inflow data for {station} starting from date {date_next}')
+            hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
+            # Check if the station name contains a space
+            if ' ' in station:
+                # Replace space with underscore in the station name
+                station_previous = station
+                station = station.replace(' ', '_')
             # Make sure both our original data and newly downloaded data exist
             df_original_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
@@ -94,7 +103,7 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
         S65E_total.get(workspace, date_max=datetime.now().strftime("%Y-%m-%d"))
     else:
         # Check whether the latest data is already up to date.
-        if dbhydro_data_is_latest(date_latest):
+        if dbhydro_data_is_latest(date_latest, '91656') and dbhydro_data_is_latest(date_latest, 'AL760'):
             # Notify that the data is already up to date
             print(f'Downloading of new inflow data skipped for S65E_total. Data is already up to date.')
         else:
@@ -104,8 +113,9 @@ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
             try:
                 # Download only the new data
-                print(f'Downloading new S65E_total data starting from date {date_latest}')
-                S65E_total.get(workspace, date_min=date_latest, date_max=datetime.now().strftime("%Y-%m-%d"))
+                date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
+                print(f'Downloading new S65E_total data starting from date {date_next}')
+                S65E_total.get(workspace, date_min=date_next, date_max=datetime.now().strftime("%Y-%m-%d"))
                 # Merge the new data with the original data
                 df_original = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)

loone_data_prep/flow_data/get_outflows.py CHANGED Viewed

@@ -56,8 +56,8 @@ def _get_outflow_data_from_station_ids(workspace: str, station_ids: list) -> dic
         dict: Success or error message
     """
     # Get dbkeys from station ids
-    dbkeys = list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF", detail_level="dbkey"))
-    dbkeys.extend(list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV", detail_level="dbkey")))
+    dbkeys = get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF")
+    dbkeys.extend(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV"))
     for dbkey in dbkeys:
         hydro.get(workspace, dbkey, "2000-01-01")
@@ -94,6 +94,8 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
     Returns:
         dict: Success or error message
     """
+    # Make a copy of the dbkeys dictionary because key value pairs will be removed as they are successfully downloaded
+    dbkeys = dbkeys.copy()
     # No dbkeys given, attempt to get data from station ids
     if dbkeys is None:
@@ -102,16 +104,16 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
     # Get outflow data from dbkeys
     for dbkey, station in dbkeys.copy().items():
         # Get the date of the latest data in the csv file (if any)
-        date_latest = find_last_date_in_csv(workspace, f"{station}_FLOW_cmd.csv")
+        date_latest = find_last_date_in_csv(workspace, f"{station.replace(' ', '_')}_FLOW_cmd.csv")
         # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
         if date_latest is None:
             # Download all data
             print(f'Downloading all outflow data for {station}')
-            hydro.get(workspace, dbkey, "2000-01-01")
+            hydro.get(workspace=workspace, dbkey=dbkey, date_min="2000-01-01", station=station)
         else:
             # Check whether the latest data is already up to date.
-            if dbhydro_data_is_latest(date_latest):
+            if dbhydro_data_is_latest(date_latest, dbkey):
                 # Notify that the data is already up to date
                 print(f'Downloading of new outflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
@@ -120,8 +122,15 @@ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS)
                 continue
             # Download only the new data
-            print(f'Downloading new outflow data for {station} starting from date {date_latest}')
-            hydro.get(workspace, dbkey, date_latest)
+            date_next = (pd.to_datetime(date_latest) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
+            print(f'Downloading new outflow data for {station} starting from date {date_next}')
+            hydro.get(workspace=workspace, dbkey=dbkey, date_min=date_next, station=station)
+            # Check if the station name contains a space
+            if ' ' in station:
+                # Replace space with underscore in the station name
+                station_previous = station
+                station = station.replace(' ', '_')
             # Make sure both our original data and newly downloaded data exist
             df_old_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")

loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl