PyPI - loone-data-prep - Versions diffs - 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

loone_data_prep/dbhydro_insights.py +195 -0
loone_data_prep/flow_data/S65E_total.py +57 -57
loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
loone_data_prep/flow_data/get_forecast_flows.py +19 -105
loone_data_prep/flow_data/get_inflows.py +18 -8
loone_data_prep/flow_data/get_outflows.py +16 -7
loone_data_prep/flow_data/hydro.py +62 -91
loone_data_prep/utils.py +243 -30
loone_data_prep/water_level_data/get_all.py +52 -44
loone_data_prep/water_level_data/hydro.py +49 -68
loone_data_prep/water_quality_data/get_inflows.py +69 -27
loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
loone_data_prep/water_quality_data/wq.py +114 -88
loone_data_prep/weather_data/get_all.py +5 -3
loone_data_prep/weather_data/weather.py +117 -180
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/RECORD +20 -19
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0

loone_data_prep/water_level_data/get_all.py CHANGED Viewed

@@ -4,8 +4,7 @@ import requests
 import uuid
 from datetime import datetime
 from loone_data_prep.water_level_data import hydro
-from loone_data_prep.flow_data.get_forecast_flows import get_stations_latitude_longitude
-from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
+from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest, get_stations_latitude_longitude
 import pandas as pd
 DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
@@ -13,11 +12,11 @@ DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
 D = {
     "LO_Stage": {"dbkeys": ["16022", "12509", "12519", "16265", "15611"], "datum": "NGVD29"},
     "LO_Stage_2": {"dbkeys": ["94832"], "date_min": "2024-04-30", "datum": "NAVD88"},
-    "Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
-    "Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
-    "Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
-    "Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
-    "Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"}
+    "Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"G3ANW": "3A-NW"}},
+    "Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"2A-17": "2-17"}},
+    "Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-3": "3-63"}},
+    "Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-4": "3-64"}},
+    "Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-28": "3-65"}},
 }
@@ -25,9 +24,6 @@ def main(workspace: str, d: dict = D) -> dict:
     missing_files = []
     failed_downloads = []   # List of file names that the script failed to get the latest data for (but the files still exist)
-    # Get the date of the latest data in LO_Stage_2.csv
-    date_latest_lo_stage_2 = find_last_date_in_csv(workspace, "LO_Stage_2.csv")
     for name, params in d.items():
         # Get the date of the latest data in the csv file
         date_latest = find_last_date_in_csv(workspace, f"{name}.csv")
@@ -35,10 +31,18 @@ def main(workspace: str, d: dict = D) -> dict:
         # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
         if date_latest is None:
             print(f"Getting all water level data for {name}.")
+            params['date_max'] = DATE_NOW
             hydro.get(workspace, name, **params)
         else:
             # Check whether the latest data is already up to date.
-            if dbhydro_data_is_latest(date_latest):
+            requires_data_download = False
+            for dbkey in params['dbkeys']:
+                if not dbhydro_data_is_latest(date_latest, dbkey):
+                    requires_data_download = True
+                    break
+            # Data is already up to date
+            if not requires_data_download:
                 # Notify that the data is already up to date
                 print(f'Downloading of new water level data skipped for {name}. Data is already up to date.')
                 continue
@@ -50,21 +54,23 @@ def main(workspace: str, d: dict = D) -> dict:
             try:
                 # Download only the new data
-                print(f'Downloading new water level data for {name} starting from date {date_latest}')
-                hydro.get(workspace, name, dbkeys=params['dbkeys'], date_min=date_latest, date_max=DATE_NOW, datum=params['datum'])
+                date_next = (datetime.strptime(date_latest, "%Y-%m-%d") + pd.DateOffset(days=1)).date().strftime("%Y-%m-%d")
+                print(f'Downloading new water level data for {name} starting from date {date_next}')
+                kwargs = {}
+                if 'override_site_codes' in params:
+                    kwargs['override_site_codes'] = params['override_site_codes']
+                hydro.get(workspace, name, dbkeys=params['dbkeys'], date_min=date_next, date_max=DATE_NOW, datum=params['datum'], **kwargs)
                 # Read in the original data and the newly downloaded data
-                df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
-                df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
-                # For get_hydro() calls with multiple dbkeys, remove the row corresponding to the latest date from the downloaded data.
-                # When get_hydro() is given multiple keys its returned data starts from the date given instead of the day after like it
-                # does when given a single key.
-                if len(params['dbkeys']) > 1:
-                    df_new = df_new[df_new['date'] != date_latest]
+                df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col='date')
+                df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col='date')
                 # Merge the new data with the original data
-                df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                df_merged = pd.concat([df_original, df_new], ignore_index=False)
+                # Ensure an integer index (for backwards compatibility)
+                df_merged.reset_index(inplace=True)
+                df_merged.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
                 # Write out the merged data
                 df_merged.to_csv(os.path.join(workspace, original_file_name))
@@ -102,6 +108,10 @@ def main(workspace: str, d: dict = D) -> dict:
         lat_long_map = get_stations_latitude_longitude(["L OKEE"])
         latitude, longitude = lat_long_map["L OKEE"]
+        # Load the LO_Stage.csv file
+        df_lo_stage = pd.read_csv(os.path.join(workspace, "LO_Stage.csv"), index_col="date")
+        df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
         # Load the LO_Stage_2.csv file
         df_lo_stage_2 = pd.read_csv(os.path.join(workspace, "LO_Stage_2.csv"), index_col="date")
         df_lo_stage_2.index = pd.to_datetime(df_lo_stage_2.index)
@@ -109,21 +119,24 @@ def main(workspace: str, d: dict = D) -> dict:
         # Output Progress
         print("Converting NAVD88 to NGVD29 for 'L OKEE's new dbkey...\n")
-        # Use only the data that is not already in the LO_Stage.csv file
-        if date_latest_lo_stage_2 is not None:
-            date_start = datetime.strptime(date_latest_lo_stage_2, "%Y-%m-%d") + pd.DateOffset(days=1)
-            df_lo_stage_2 = df_lo_stage_2.loc[date_start:]
+        # Use only the data that is not already in the LO_Stage.csv file and exists in the LO_Stage_2.csv file
+        common_dates = df_lo_stage.index.intersection(df_lo_stage_2.index)
-        # Convert the stage values from NAVD88 to NGVD29
-        lo_stage_2_dates = df_lo_stage_2.index.tolist()
-        lo_stage_2_values_navd88 = df_lo_stage_2["L OKEE_STG_ft NGVD29"].tolist()
-        lo_stage_2_values_ngvd29 = []
+        missing_mask = (
+            df_lo_stage.loc[common_dates, "L OKEE_STG_ft NGVD29"].isna() &
+            df_lo_stage_2.loc[common_dates, "L OKEE_STG_ft NGVD29"].notna()
+        )
-        for i in range(0, len(lo_stage_2_values_navd88)):
-            date = lo_stage_2_dates[i]
-            value = lo_stage_2_values_navd88[i]
+        missing_dates: pd.DatetimeIndex = common_dates[missing_mask]
+        missing_dates = missing_dates.to_list()
+        # Convert the stage values from NAVD88 to NGVD29 for the missing dates
+        converted_values = {}
+        for date in missing_dates:
             try:
-                lo_stage_2_values_ngvd29.append(_convert_navd88_to_ngvd29(latitude, longitude, value, date.year))
+                navd88_value = df_lo_stage_2.at[date, "L OKEE_STG_ft NGVD29"]
+                ngvd29_value = _convert_navd88_to_ngvd29(latitude, longitude, navd88_value, date.year)
+                converted_values[date] = ngvd29_value
             except Exception as e:
                 convert_failure = True
                 print(str(e))
@@ -132,20 +145,15 @@ def main(workspace: str, d: dict = D) -> dict:
         # Check for conversion failure
         if not convert_failure:
             # Update the LO_Stage.csv file with the converted values
-            df_lo_stage = pd.read_csv(os.path.join(workspace, "LO_Stage.csv"), index_col="date")
-            df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
-            for i in range(0, len(lo_stage_2_values_ngvd29)):
-                # Get the current date and value
-                date = lo_stage_2_dates[i]
-                value = lo_stage_2_values_ngvd29[i]
-                # Update the value in the LO_Stage dataframe
+            for date, value in converted_values.items():
                 df_lo_stage.at[date, "L OKEE_STG_ft NGVD29"] = value
             # Reset the index
             df_lo_stage.reset_index(inplace=True)
-            df_lo_stage.drop(columns=["Unnamed: 0"], inplace=True)
+            # Drop Unnamed: 0 column that might have been added
+            if "Unnamed: 0" in df_lo_stage.columns:
+                df_lo_stage.drop(columns=["Unnamed: 0"], inplace=True)
             # Save the updated LO_Stage.csv file
             df_lo_stage.to_csv(os.path.join(workspace, "LO_Stage.csv"))

loone_data_prep/water_level_data/hydro.py CHANGED Viewed

@@ -1,15 +1,14 @@
 import sys
 from datetime import datetime
 from retry import retry
-from rpy2.robjects import r
-from rpy2.rinterface_lib.embedded import RRuntimeError
 import pandas as pd
+from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
 DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
 DATE_NOW = datetime.now().strftime("%Y-%m-%d")
-@retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
+@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
 def get(
     workspace: str,
     name: str,
@@ -17,8 +16,19 @@ def get(
     date_min: str = "1950-01-01",
     date_max: str = DATE_NOW,
     datum: str = "",
-    **kwargs: str | list
+    **kwargs: str | list | dict
 ) -> None:
+    """Fetches daily water level data from DBHYDRO and saves it as a CSV file.
+    Args:
+        workspace (str): The directory where the CSV file will be saved.
+        name (str): The name of the output CSV file (without extension).
+        dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
+        date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
+        date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
+        datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
+        **kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
+    """
     # Get the type and units for the station
     data_type = "STG"
     units = "ft NGVD29"
@@ -27,77 +37,48 @@ def get(
         data_type = "GAGHT"
         units = "feet"
-    dbkeys_str = "\"" + "\", \"".join(dbkeys) + "\""
-    r(
-        f"""
-        # Load the required libraries
-        library(rio)
-        library(dbhydroR)
-        library(dplyr)
-        # Stage Data
-        if ("{datum}" == "")
-        {{
-            {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
-        }}
-        if (nchar("{datum}") > 0)
-        {{
-            {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
-        }}
-        # Give data.frame correct column names so it can be cleaned using the clean_hydro function
-        colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
-        # Check if the data.frame has any rows
-        if (nrow({name}) == 0)
-        {{
-            # No data given back, It's possible that the dbkey has reached its end date.
-            print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
-            return(list(success = FALSE, dbkey = "{dbkeys}"))
-        }}
-        # Get the station
-        station <- {name}$station[1]
-        # Add a type and units column to data so it can be cleaned using the clean_hydro function
-        {name}$type <- "{data_type}"
-        {name}$units <- "{units}"
-        # Clean the data.frame
-        {name} <- clean_hydro({name})
-        # Drop the " _STG_ft NGVD29" column
-        {name} <- {name} %>% select(-` _{data_type}_{units}`)
-        # Write the data to a csv file
-        write.csv({name},file ='{workspace}/{name}.csv')
-        """
-    )
+    # Retrieve the data
+    api = get_dbhydro_api()
+    response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
-    _reformat_water_level_file(workspace, name)
-def _reformat_water_level_file(workspace: str, name: str):
-    # Read in the data
-    df = pd.read_csv(f"{workspace}/{name}.csv")
+    # Get the data as a dataframe
+    df = response.to_dataframe(True)
-    # Drop the "Unnamed: 0" column
-    df.drop(columns=['Unnamed: 0'], inplace=True)
+    # Replace flagged 0 values and -99999.0 with NaN
+    df = df_replace_missing_with_nan(df)
-    # Convert date column to datetime
-    df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
+    # Make sure datetime exists as a column
+    if 'datetime' not in df.columns:
+        df.reset_index(inplace=True)
+    # Pivot the data so that each site_code is a column
+    df = df.pivot(index='datetime', columns='site_code', values='value')
-    # Sort the data by date
-    df.sort_values('date', inplace=True)
+    # Get the current column names in df and the names to rename them to
+    column_names = {'datetime': 'date'}
+    override_site_codes = kwargs.get("override_site_codes", None)
+    for column in df.columns:
+        if override_site_codes and column in override_site_codes:
+            column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
+        else:
+            column_names[column] = f"{column}_{data_type}_{units}"
-    # Renumber the index
-    df.reset_index(drop=True, inplace=True)
+    # Reset the index to turn the datetime index into a column
+    df.reset_index(inplace=True)
-    # Drop rows that are missing all their values
-    df.dropna(how='all', inplace=True)
+    # Rename the columns
+    df.rename(columns=column_names, inplace=True)
-    # Write the updated data back to the file
-    df.to_csv(f"{workspace}/{name}.csv")
+    # Convert date column to datetime
+    df['date'] = pd.to_datetime(df['date'])
+    # Drop the "Unnamed: 0" column if it exists
+    if 'Unnamed: 0' in df.columns:
+        df.drop(columns=['Unnamed: 0'], inplace=True)
+    # Write the data to a csv file
+    df.to_csv(f"{workspace}/{name}.csv", index=True)
 if __name__ == "__main__":
     args = [sys.argv[1].rstrip("/"), sys.argv[2]]

loone_data_prep/water_quality_data/get_inflows.py CHANGED Viewed

@@ -1,27 +1,50 @@
+import csv
+import traceback
 import sys
 import os
 import uuid
 from datetime import datetime, timedelta
 import pandas as pd
 from loone_data_prep.water_quality_data import wq
-from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
+from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
 D = {
-    "PHOSPHATE, TOTAL AS P": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                              'CULV10A', 'S133', 'S127', 'S135']},
-    "AMMONIA-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                  'CULV10A', 'S133', 'S127', 'S135']},
-    "NITRATE+NITRITE-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                          'CULV10A', 'S133', 'S127', 'S135']},
-    "TOTAL NITROGEN": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                       'CULV10A', 'S133', 'S127', 'S135']},
-    "CHLOROPHYLL-A": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133',
-                                      'S127', 'S135', 'S191']},
-    "CHLOROPHYLL-A(LC)": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A',
-                                          'S133', 'S127', 'S135', 'S191']},
-    "CHLOROPHYLL-A, CORRECTED": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                                 'CULV10A', 'S133', 'S127', 'S135', 'S191']}
+    "PHOSPHATE, TOTAL AS P": {
+        "test_number": 25,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "AMMONIA-N": {
+        "test_number": 20,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "NITRATE+NITRITE-N": {
+        "test_number": 18,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "TOTAL NITROGEN": {
+        "test_number": 80,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "CHLOROPHYLL-A": {
+        "test_number": 61,
+        "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
+        "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
+    },
+    "CHLOROPHYLL-A(LC)": {
+        "test_number": 179,
+        "station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
+        "station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
+    },
+    "CHLOROPHYLL-A, CORRECTED": {
+        "test_number": 112,
+        "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
+        "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
+    },
 }
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
+        # Get the test_number for this parameter name
+        test_number = params['test_number']
         # Get the date of the latest data in the csv file for each station id
         station_date_latest = {}
         for station_id in params["station_ids"]:
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
         # Get the water quality data
         for station_id, date_latest in station_date_latest.items():
+            # Get the station type for this station ID
+            station_type = params["station_types"][station_id]
             # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
             if date_latest is None:
                 # Get all the water quality data for the name/station combination
                 print(f"Getting all {name} data for station ID: {station_id}.")
-                wq.get(workspace, name, [station_id])
+                wq.get(workspace, name, test_number, [station_id])
             else:
                 # Check whether we already have the latest data
-                if dbhydro_data_is_latest(date_latest):
+                if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
                     # Notify that the data is already up to date
-                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
                     continue
                 # Temporarily rename current data file so it isn't over written
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
                 try:
                     # Get only the water quality data that is newer than the latest data in the csv file
                     print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
-                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
-                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
+                    wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
                     # Data failed to download - It's possible the data's end date has been reached
                     if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
                     # Read in the original data
                     df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
-                    # Calculate the days column for the newly downloaded data
-                    df_original_date_min = df_original['date'].min()
-                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
                     # Read in the newly downloaded data
                     df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
-                    df_new.reset_index(inplace=True)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
                     # Merge the new data with the original data
-                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    df_merged = pd.concat([df_original, df_new], ignore_index=False)
+                    # Re-number the index
+                    df_merged.reset_index(inplace=True)
+                    df_merged.drop(['index'], axis=1, inplace=True)
+                    # Start index at 1 instead of 0 (for backwards compatibility)
+                    df_merged.index = df_merged.index + 1
+                    # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
+                    df_merged.index = df_merged.index.astype(str)
                     # Write out the merged data
-                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
+                    # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+                    wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
                     # Remove the original renamed data file
                     os.remove(os.path.join(workspace, original_file_name_temp))
                 except Exception as e:
                     # Notify of the error
                     print(f"Error occurred while downloading new water quality data: {e}")
+                    traceback.print_exc()
                     # Remove the newly downloaded data file if it exists
                     if os.path.exists(os.path.join(workspace, original_file_name)):

loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl