PyPI - loone-data-prep - Versions diffs - 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

loone-data-prep 1.2.4py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
loone_data_prep/LOONE_DATA_PREP.py +0 -1
loone_data_prep/dbhydro_insights.py +195 -0
loone_data_prep/flow_data/S65E_total.py +57 -57
loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
loone_data_prep/flow_data/get_forecast_flows.py +19 -105
loone_data_prep/flow_data/get_inflows.py +18 -8
loone_data_prep/flow_data/get_outflows.py +16 -7
loone_data_prep/flow_data/hydro.py +62 -91
loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
loone_data_prep/utils.py +262 -32
loone_data_prep/water_level_data/get_all.py +52 -44
loone_data_prep/water_level_data/hydro.py +49 -68
loone_data_prep/water_quality_data/get_inflows.py +69 -27
loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
loone_data_prep/water_quality_data/wq.py +114 -88
loone_data_prep/weather_data/get_all.py +5 -3
loone_data_prep/weather_data/weather.py +117 -180
{loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
loone_data_prep-1.3.1.dist-info/RECORD +38 -0
{loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
loone_data_prep-1.2.4.dist-info/RECORD +0 -38
{loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
{loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0

loone_data_prep/water_level_data/hydro.py CHANGED Viewed

@@ -1,15 +1,14 @@
 import sys
 from datetime import datetime
 from retry import retry
-from rpy2.robjects import r
-from rpy2.rinterface_lib.embedded import RRuntimeError
 import pandas as pd
+from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
 DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
 DATE_NOW = datetime.now().strftime("%Y-%m-%d")
-@retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
+@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
 def get(
     workspace: str,
     name: str,
@@ -17,8 +16,19 @@ def get(
     date_min: str = "1950-01-01",
     date_max: str = DATE_NOW,
     datum: str = "",
-    **kwargs: str | list
+    **kwargs: str | list | dict
 ) -> None:
+    """Fetches daily water level data from DBHYDRO and saves it as a CSV file.
+    Args:
+        workspace (str): The directory where the CSV file will be saved.
+        name (str): The name of the output CSV file (without extension).
+        dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
+        date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
+        date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
+        datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
+        **kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
+    """
     # Get the type and units for the station
     data_type = "STG"
     units = "ft NGVD29"
@@ -27,77 +37,48 @@ def get(
         data_type = "GAGHT"
         units = "feet"
-    dbkeys_str = "\"" + "\", \"".join(dbkeys) + "\""
-    r(
-        f"""
-        # Load the required libraries
-        library(rio)
-        library(dbhydroR)
-        library(dplyr)
-        # Stage Data
-        if ("{datum}" == "")
-        {{
-            {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
-        }}
-        if (nchar("{datum}") > 0)
-        {{
-            {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
-        }}
-        # Give data.frame correct column names so it can be cleaned using the clean_hydro function
-        colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
-        # Check if the data.frame has any rows
-        if (nrow({name}) == 0)
-        {{
-            # No data given back, It's possible that the dbkey has reached its end date.
-            print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
-            return(list(success = FALSE, dbkey = "{dbkeys}"))
-        }}
-        # Get the station
-        station <- {name}$station[1]
-        # Add a type and units column to data so it can be cleaned using the clean_hydro function
-        {name}$type <- "{data_type}"
-        {name}$units <- "{units}"
-        # Clean the data.frame
-        {name} <- clean_hydro({name})
-        # Drop the " _STG_ft NGVD29" column
-        {name} <- {name} %>% select(-` _{data_type}_{units}`)
-        # Write the data to a csv file
-        write.csv({name},file ='{workspace}/{name}.csv')
-        """
-    )
+    # Retrieve the data
+    api = get_dbhydro_api()
+    response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
-    _reformat_water_level_file(workspace, name)
-def _reformat_water_level_file(workspace: str, name: str):
-    # Read in the data
-    df = pd.read_csv(f"{workspace}/{name}.csv")
+    # Get the data as a dataframe
+    df = response.to_dataframe(True)
-    # Drop the "Unnamed: 0" column
-    df.drop(columns=['Unnamed: 0'], inplace=True)
+    # Replace flagged 0 values and -99999.0 with NaN
+    df = df_replace_missing_with_nan(df)
-    # Convert date column to datetime
-    df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
+    # Make sure datetime exists as a column
+    if 'datetime' not in df.columns:
+        df.reset_index(inplace=True)
+    # Pivot the data so that each site_code is a column
+    df = df.pivot(index='datetime', columns='site_code', values='value')
-    # Sort the data by date
-    df.sort_values('date', inplace=True)
+    # Get the current column names in df and the names to rename them to
+    column_names = {'datetime': 'date'}
+    override_site_codes = kwargs.get("override_site_codes", None)
+    for column in df.columns:
+        if override_site_codes and column in override_site_codes:
+            column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
+        else:
+            column_names[column] = f"{column}_{data_type}_{units}"
-    # Renumber the index
-    df.reset_index(drop=True, inplace=True)
+    # Reset the index to turn the datetime index into a column
+    df.reset_index(inplace=True)
-    # Drop rows that are missing all their values
-    df.dropna(how='all', inplace=True)
+    # Rename the columns
+    df.rename(columns=column_names, inplace=True)
-    # Write the updated data back to the file
-    df.to_csv(f"{workspace}/{name}.csv")
+    # Convert date column to datetime
+    df['date'] = pd.to_datetime(df['date'])
+    # Drop the "Unnamed: 0" column if it exists
+    if 'Unnamed: 0' in df.columns:
+        df.drop(columns=['Unnamed: 0'], inplace=True)
+    # Write the data to a csv file
+    df.to_csv(f"{workspace}/{name}.csv", index=True)
 if __name__ == "__main__":
     args = [sys.argv[1].rstrip("/"), sys.argv[2]]

loone_data_prep/water_quality_data/get_inflows.py CHANGED Viewed

@@ -1,27 +1,50 @@
+import csv
+import traceback
 import sys
 import os
 import uuid
 from datetime import datetime, timedelta
 import pandas as pd
 from loone_data_prep.water_quality_data import wq
-from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
+from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
 D = {
-    "PHOSPHATE, TOTAL AS P": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                              'CULV10A', 'S133', 'S127', 'S135']},
-    "AMMONIA-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                  'CULV10A', 'S133', 'S127', 'S135']},
-    "NITRATE+NITRITE-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                          'CULV10A', 'S133', 'S127', 'S135']},
-    "TOTAL NITROGEN": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                       'CULV10A', 'S133', 'S127', 'S135']},
-    "CHLOROPHYLL-A": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133',
-                                      'S127', 'S135', 'S191']},
-    "CHLOROPHYLL-A(LC)": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A',
-                                          'S133', 'S127', 'S135', 'S191']},
-    "CHLOROPHYLL-A, CORRECTED": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
-                                                 'CULV10A', 'S133', 'S127', 'S135', 'S191']}
+    "PHOSPHATE, TOTAL AS P": {
+        "test_number": 25,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "AMMONIA-N": {
+        "test_number": 20,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "NITRATE+NITRITE-N": {
+        "test_number": 18,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "TOTAL NITROGEN": {
+        "test_number": 80,
+        "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
+        "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
+    },
+    "CHLOROPHYLL-A": {
+        "test_number": 61,
+        "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
+        "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
+    },
+    "CHLOROPHYLL-A(LC)": {
+        "test_number": 179,
+        "station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
+        "station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
+    },
+    "CHLOROPHYLL-A, CORRECTED": {
+        "test_number": 112,
+        "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
+        "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
+    },
 }
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
+        # Get the test_number for this parameter name
+        test_number = params['test_number']
         # Get the date of the latest data in the csv file for each station id
         station_date_latest = {}
         for station_id in params["station_ids"]:
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
         # Get the water quality data
         for station_id, date_latest in station_date_latest.items():
+            # Get the station type for this station ID
+            station_type = params["station_types"][station_id]
             # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
             if date_latest is None:
                 # Get all the water quality data for the name/station combination
                 print(f"Getting all {name} data for station ID: {station_id}.")
-                wq.get(workspace, name, [station_id])
+                wq.get(workspace, name, test_number, [station_id])
             else:
                 # Check whether we already have the latest data
-                if dbhydro_data_is_latest(date_latest):
+                if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
                     # Notify that the data is already up to date
-                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
                     continue
                 # Temporarily rename current data file so it isn't over written
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
                 try:
                     # Get only the water quality data that is newer than the latest data in the csv file
                     print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
-                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
-                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
+                    wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
                     # Data failed to download - It's possible the data's end date has been reached
                     if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
                     # Read in the original data
                     df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
-                    # Calculate the days column for the newly downloaded data
-                    df_original_date_min = df_original['date'].min()
-                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
                     # Read in the newly downloaded data
                     df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
-                    df_new.reset_index(inplace=True)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
                     # Merge the new data with the original data
-                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    df_merged = pd.concat([df_original, df_new], ignore_index=False)
+                    # Re-number the index
+                    df_merged.reset_index(inplace=True)
+                    df_merged.drop(['index'], axis=1, inplace=True)
+                    # Start index at 1 instead of 0 (for backwards compatibility)
+                    df_merged.index = df_merged.index + 1
+                    # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
+                    df_merged.index = df_merged.index.astype(str)
                     # Write out the merged data
-                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
+                    # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+                    wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
                     # Remove the original renamed data file
                     os.remove(os.path.join(workspace, original_file_name_temp))
                 except Exception as e:
                     # Notify of the error
                     print(f"Error occurred while downloading new water quality data: {e}")
+                    traceback.print_exc()
                     # Remove the newly downloaded data file if it exists
                     if os.path.exists(os.path.join(workspace, original_file_name)):

loone_data_prep/water_quality_data/get_lake_wq.py CHANGED Viewed

@@ -1,32 +1,110 @@
+import csv
+import traceback
 import sys
 import os
 import uuid
 from datetime import datetime, timedelta
 import pandas as pd
 from loone_data_prep.water_quality_data import wq
-from loone_data_prep.utils import find_last_date_in_csv,  dbhydro_data_is_latest
+from loone_data_prep.utils import find_last_date_in_csv,  dbhydro_water_quality_data_is_latest
 D = {
-    "PHOSPHATE, TOTAL AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "PHOSPHATE, ORTHO AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "AMMONIA-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "NITRATE+NITRITE-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "TOTAL NITROGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN HILR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN HTYR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LA": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LF": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LW": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LY": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN RR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN WR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN YR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A(LC)": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A, CORRECTED": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "DISSOLVED OXYGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]}
+    "PHOSPHATE, TOTAL AS P": {
+        "test_number": 25,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "PHOSPHATE, ORTHO AS P": {
+        "test_number": 23,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "AMMONIA-N": {
+        "test_number": 20,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "NITRATE+NITRITE-N": {
+        "test_number": 18,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "TOTAL NITROGEN": {
+        "test_number": 80,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN HILR": {
+        "test_number": 1023,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN HTYR": {
+        "test_number": 1022,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LA": {
+        "test_number": 1005,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LF": {
+        "test_number": 1006,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LR": {
+        "test_number": 1007,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LW": {
+        "test_number": 1008,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LY": {
+        "test_number": 1009,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN RR": {
+        "test_number": 1010,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN WR": {
+        "test_number": 1011,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN YR": {
+        "test_number": 1012,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A": {
+        "test_number": 61,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A(LC)": {
+        "test_number": 179,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A, CORRECTED": {
+        "test_number": 112,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "DISSOLVED OXYGEN": {
+        "test_number": 8,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    }
 }
@@ -36,6 +114,9 @@ def main(workspace: str, d: dict = D) -> dict:
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
+        # Get the test_number for this parameter name
+        test_number = params['test_number']
         # Get the date of the latest data in the csv file for each station id
         station_date_latest = {}
         for station_id in params["station_ids"]:
@@ -43,16 +124,19 @@ def main(workspace: str, d: dict = D) -> dict:
         # Get the water quality data
         for station_id, date_latest in station_date_latest.items():
+            # Get the station type for this station ID
+            station_type = params["station_types"][station_id]
             # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
             if date_latest is None:
                 # Get all the water quality data for the name/station combination
                 print(f"Getting all {name} data for station ID: {station_id}.")
-                wq.get(workspace, name, [station_id])
+                wq.get(workspace, name, test_number, [station_id])
             else:
                 # Check whether we already have the latest data
-                if dbhydro_data_is_latest(date_latest):
+                if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
                     # Notify that the data is already up to date
-                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
                     continue
                 # Temporarily rename current data file so it isn't over written
@@ -63,8 +147,8 @@ def main(workspace: str, d: dict = D) -> dict:
                 try:
                     # Get only the water quality data that is newer than the latest data in the csv file
                     print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
-                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
-                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
+                    wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
                     # Data failed to download - It's possible the data's end date has been reached
                     if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -73,25 +157,38 @@ def main(workspace: str, d: dict = D) -> dict:
                     # Read in the original data
                     df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
-                    # Calculate the days column for the newly downloaded data
-                    df_original_date_min = df_original['date'].min()
-                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
                     # Read in the newly downloaded data
                     df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
-                    df_new.reset_index(inplace=True)
-                    # Merge the new data with the original data
-                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
+                    # Merge the new data with the original data
+                    df_merged = pd.concat([df_original, df_new], ignore_index=False)
+                    # Re-number the index
+                    df_merged.reset_index(inplace=True)
+                    df_merged.drop(['index'], axis=1, inplace=True)
+                    # Start index at 1 instead of 0 (for backwards compatibility)
+                    df_merged.index = df_merged.index + 1
+                    # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
+                    df_merged.index = df_merged.index.astype(str)
                     # Write out the merged data
-                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
+                    # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+                    wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
                     # Remove the original renamed data file
                     os.remove(os.path.join(workspace, original_file_name_temp))
                 except Exception as e:
                     # Notify of the error
                     print(f"Error occurred while downloading new water quality data: {e}")
+                    traceback.print_exc()
                     # Remove the newly downloaded data file if it exists
                     if os.path.exists(os.path.join(workspace, original_file_name)):

loone-data-prep 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl

loone-data-prep 1.2.4py3-none-any.whl → 1.3.1py3-none-any.whl