PyPI - loone-data-prep - Versions diffs - 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

loone_data_prep/dbhydro_insights.py +195 -0
loone_data_prep/flow_data/S65E_total.py +57 -57
loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
loone_data_prep/flow_data/get_forecast_flows.py +19 -105
loone_data_prep/flow_data/get_inflows.py +18 -8
loone_data_prep/flow_data/get_outflows.py +16 -7
loone_data_prep/flow_data/hydro.py +62 -91
loone_data_prep/utils.py +243 -30
loone_data_prep/water_level_data/get_all.py +52 -44
loone_data_prep/water_level_data/hydro.py +49 -68
loone_data_prep/water_quality_data/get_inflows.py +69 -27
loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
loone_data_prep/water_quality_data/wq.py +114 -88
loone_data_prep/weather_data/get_all.py +5 -3
loone_data_prep/weather_data/weather.py +117 -180
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/RECORD +20 -19
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
{loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0

loone_data_prep/water_quality_data/get_lake_wq.py CHANGED Viewed

@@ -1,32 +1,110 @@
+import csv
+import traceback
 import sys
 import os
 import uuid
 from datetime import datetime, timedelta
 import pandas as pd
 from loone_data_prep.water_quality_data import wq
-from loone_data_prep.utils import find_last_date_in_csv,  dbhydro_data_is_latest
+from loone_data_prep.utils import find_last_date_in_csv,  dbhydro_water_quality_data_is_latest
 D = {
-    "PHOSPHATE, TOTAL AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "PHOSPHATE, ORTHO AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "AMMONIA-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "NITRATE+NITRITE-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "TOTAL NITROGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN HILR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN HTYR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LA": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LF": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LW": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN LY": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN RR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN WR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "MICROCYSTIN YR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A(LC)": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "CHLOROPHYLL-A, CORRECTED": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
-    "DISSOLVED OXYGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]}
+    "PHOSPHATE, TOTAL AS P": {
+        "test_number": 25,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "PHOSPHATE, ORTHO AS P": {
+        "test_number": 23,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "AMMONIA-N": {
+        "test_number": 20,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "NITRATE+NITRITE-N": {
+        "test_number": 18,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "TOTAL NITROGEN": {
+        "test_number": 80,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN HILR": {
+        "test_number": 1023,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN HTYR": {
+        "test_number": 1022,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LA": {
+        "test_number": 1005,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LF": {
+        "test_number": 1006,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LR": {
+        "test_number": 1007,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LW": {
+        "test_number": 1008,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN LY": {
+        "test_number": 1009,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN RR": {
+        "test_number": 1010,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN WR": {
+        "test_number": 1011,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "MICROCYSTIN YR": {
+        "test_number": 1012,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A": {
+        "test_number": 61,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A(LC)": {
+        "test_number": 179,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "CHLOROPHYLL-A, CORRECTED": {
+        "test_number": 112,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    },
+    "DISSOLVED OXYGEN": {
+        "test_number": 8,
+        "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
+        "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
+    }
 }
@@ -36,6 +114,9 @@ def main(workspace: str, d: dict = D) -> dict:
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
+        # Get the test_number for this parameter name
+        test_number = params['test_number']
         # Get the date of the latest data in the csv file for each station id
         station_date_latest = {}
         for station_id in params["station_ids"]:
@@ -43,16 +124,19 @@ def main(workspace: str, d: dict = D) -> dict:
         # Get the water quality data
         for station_id, date_latest in station_date_latest.items():
+            # Get the station type for this station ID
+            station_type = params["station_types"][station_id]
             # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
             if date_latest is None:
                 # Get all the water quality data for the name/station combination
                 print(f"Getting all {name} data for station ID: {station_id}.")
-                wq.get(workspace, name, [station_id])
+                wq.get(workspace, name, test_number, [station_id])
             else:
                 # Check whether we already have the latest data
-                if dbhydro_data_is_latest(date_latest):
+                if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
                     # Notify that the data is already up to date
-                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
                     continue
                 # Temporarily rename current data file so it isn't over written
@@ -63,8 +147,8 @@ def main(workspace: str, d: dict = D) -> dict:
                 try:
                     # Get only the water quality data that is newer than the latest data in the csv file
                     print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
-                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
-                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
+                    wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
                     # Data failed to download - It's possible the data's end date has been reached
                     if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -73,25 +157,38 @@ def main(workspace: str, d: dict = D) -> dict:
                     # Read in the original data
                     df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
-                    # Calculate the days column for the newly downloaded data
-                    df_original_date_min = df_original['date'].min()
-                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
                     # Read in the newly downloaded data
                     df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
-                    df_new.reset_index(inplace=True)
-                    # Merge the new data with the original data
-                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
+                    # Merge the new data with the original data
+                    df_merged = pd.concat([df_original, df_new], ignore_index=False)
+                    # Re-number the index
+                    df_merged.reset_index(inplace=True)
+                    df_merged.drop(['index'], axis=1, inplace=True)
+                    # Start index at 1 instead of 0 (for backwards compatibility)
+                    df_merged.index = df_merged.index + 1
+                    # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
+                    df_merged.index = df_merged.index.astype(str)
                     # Write out the merged data
-                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
+                    # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+                    wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
                     # Remove the original renamed data file
                     os.remove(os.path.join(workspace, original_file_name_temp))
                 except Exception as e:
                     # Notify of the error
                     print(f"Error occurred while downloading new water quality data: {e}")
+                    traceback.print_exc()
                     # Remove the newly downloaded data file if it exists
                     if os.path.exists(os.path.join(workspace, original_file_name)):

loone_data_prep/water_quality_data/wq.py CHANGED Viewed

@@ -1,117 +1,143 @@
+import csv
+import os
 import sys
 from datetime import datetime
 from retry import retry
-from rpy2.robjects import r
-from rpy2.rinterface_lib.embedded import RRuntimeError
+import pandas as pd
+from loone_data_prep.utils import get_dbhydro_api
 DEFAULT_STATION_IDS = ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]
 DATE_NOW = datetime.now().strftime("%Y-%m-%d")
-@retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
+@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
 def get(
     workspace: str,
     name: str,
+    test_number: int,
     station_ids: list = DEFAULT_STATION_IDS,
     date_min: str = "1950-01-01",
     date_max: str = DATE_NOW,
     **kwargs: str | list
 ) -> None:
-    station_ids_str = "\"" + "\", \"".join(station_ids) + "\""
-    r(
-        f"""
-        # Load the required libraries
-        library(rio)
-        library(dbhydroR)
-        # Specify the station IDs, date range, and test names
-        station_ids <- c({station_ids_str})
-        date_min <- "{date_min}"
-        date_max <- "{date_max}"
-        test_names <- c("{name}")
-        # Loop over the station IDs
-        for (station_id in station_ids) {{
-            # Retrieve water quality data for the current station ID
-            water_quality_data <- tryCatch(
-                get_wq(
-                    station_id = station_id,
-                    date_min = date_min,
-                    date_max = date_max,
-                    test_name = test_names
-                ),
-                error = function(e) NULL
-            )
-            # Check if data is available for the current station ID and test name
-            if (!is.null(water_quality_data) && nrow(water_quality_data) > 0) {{
-                # Convert the vector to a data frame
-                water_quality_data <- as.data.frame(water_quality_data)
-                # Calculate the number of days from the minimum date plus 8
-                water_quality_data$days <- as.integer(difftime(water_quality_data$date, min(water_quality_data$date), units = "days")) + as.integer(format(min(water_quality_data$date), "%d"))
-                # Generate the filename based on the station ID
-                filename <- paste0("{workspace}/water_quality_", station_id, "_", test_names, ".csv")
-                # Save data to a CSV file
-                write.csv(water_quality_data, file = filename)
-                # Print a message indicating the file has been saved
-                cat("CSV file", filename, "has been saved.\n")
-            }} else {{
-                # Print a message indicating no data was found for the current station ID and test name
-                cat("No data found for station ID", station_id, "and test name", test_names, "\n")
-            }}
-            Sys.sleep(1) # Wait for 1 seconds before the next iteration
-        }}
-        """  # noqa: E501
-    )
+    """Fetch water quality data from DBHydro API and save it as CSV files in the specified workspace.
+    Args:
+        workspace (str): The directory where the CSV files will be saved.
+        name (str): The name of the water quality parameter. Example: 'PHOSPHATE, TOTAL AS P'
+        test_number (int): The DBHydro test number for the water quality parameter.
+        station_ids (list, optional): List of station IDs to fetch data for. Defaults to DEFAULT_STATION_IDS.
+        date_min (str, optional): The start date for fetching data in YYYY-MM-DD format. Defaults to "1950-01-01".
+        date_max (str, optional): The end date for fetching data in YYYY-MM-DD format. Defaults to the current date.
+        **kwargs: Additional keyword arguments.
+    Returns:
+        None
+    """
+    # Initialize the DBHydro API
+    api = get_dbhydro_api()
+    # Fetch water quality data
+    response = api.get_water_quality(stations=station_ids, test_numbers=[test_number], date_start=date_min, date_end=date_max, exclude_flagged_results=False)
+    df = response.to_dataframe(include_metadata=True)
+    # Process and save data for each station
+    for station in station_ids:
+        # Get a copy of the data frame for this station
+        df_station = df[df['station'] == station].copy()
+        # Check if the data frame is empty
+        if df_station.empty:
+            print(f'No data found for station ID {station} and test number {test_number}.')
+            continue
+        # Get the units of the data
+        units = df_station['units'].iloc[0] if 'units' in df_station.columns else ''
+        # Drop unwanted columns
+        df_station = df_station[['date_collected_str', 'sig_fig_value']].copy()
+        # Convert string sig_fig_value to numeric
+        df_station['sig_fig_value'] = pd.to_numeric(df_station['sig_fig_value'], errors='coerce')
+        # Calculate daily average values
+        df_station['date_collected_str'] = pd.to_datetime(df_station['date_collected_str'])
+        df_station["date_only"] = df_station["date_collected_str"].dt.date
+        df_station = df_station.groupby("date_only")["sig_fig_value"].mean().reset_index()
+        df_station.rename(columns={"date_only": "date_collected_str"}, inplace=True)
+        # Format dataframe to expected layout
+        df_station['date_collected_str'] = pd.to_datetime(df_station['date_collected_str'])                                     # Convert date_collected_str column to datetime
+        df_station.sort_values('date_collected_str', inplace=True)                                                              # Sort df by date_collected_str
+        df_station.rename(columns={'date_collected_str': 'date', 'sig_fig_value': f'{station}_{name}_{units}'}, inplace=True)   # Rename columns
+        # Calculate the days column
+        df_station['days'] = (df_station['date'] - df_station['date'].min()).dt.days + df_station['date'].min().day
+        # Make sure the integer index is written out (for backwards compatibility)
+        df_station.reset_index(inplace=True, drop=True)
+        # Start index at 1 instead of 0 (for backwards compatibility)
+        df_station.index = df_station.index + 1
+        # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
+        df_station.index = df_station.index.astype(str)
+        # Make sure the date column includes time information at midnight (for backwards compatibility)
+        df_station['date'] = df_station['date'].dt.strftime('%Y-%m-%d 00:00:00')
+        # Write out the data frame to a CSV file
+        df_station.to_csv(os.path.join(workspace, f'water_quality_{station}_{name}.csv'), index=True, quoting=csv.QUOTE_NONNUMERIC)
+        # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+        rewrite_water_quality_file_without_date_quotes(workspace, f'water_quality_{station}_{name}.csv')
-def _calculate_days_column(workspace: str, file_name: str, date_min: str):
+def _calculate_days_column(workspace: str, df: pd.DataFrame, date_min: str):
     """
     Calculates the values that should be in the "days" column of the water quality data CSV file
     based on the given date_min and writes the updated data frame back to the CSV file.
     Args:
         workspace (str): The path to the workspace directory.
-        file_name (str): The name of the water quality data CSV file.
+        df (pd.DataFrame): The water quality data dataframe.
         date_min (str): The minimum date that the "days" column values should be calculated from. Should be in format "YYYY-MM-DD".
     """
-    r(
-        f"""
-        # Import necessary libraries
-        library(lubridate)
-        # Read the CSV file
-        df <- read.csv("{workspace}/{file_name}", check.names = FALSE)
-        # Drop the "X" column that R adds when reading CSV files
-        df <- df[,-1]
-        # Get date_min as an object with the correct timezone
-        date_min_object <- as.POSIXct("{date_min}", tz = "UTC")
-        date_min_tz <- format(with_tz(date_min_object, tzone = "America/New_York"), "%Z")
-        date_min_object <- as.POSIXct("{date_min}", tz = date_min_tz)
-        # Calculate each value in the days column based on the date_min
-        for(i in 1:nrow(df))
-        {{
-            # Get the current row's date as an object with the correct timezone
-            date <- as.POSIXct(df$date[i], tz = "UTC")
-            date_tz <- format(with_tz(date, tzone = "America/New_York"), "%Z")
-            date <- as.POSIXct(df$date[i], tz = date_tz)
-            # Calculate the number of days from the minimum date to the row's date plus the number of days in date_min
-            df$days[i] <- as.integer(difftime(date, date_min_object, units = "days")) + as.integer(format(date_min_object, "%d"))
-        }}
-        # Write the updated data frame back to the CSV file
-        write.csv(df, file = "{workspace}/{file_name}", row.names = FALSE)
-        """ # noqa: E501
-      )
+    # Ensure df['date'] is a pandas datetime Series
+    df['date'] = pd.to_datetime(df['date'])
+    date_min_object = pd.to_datetime(date_min)
+    # Calculate days column for all rows
+    df['days'] = (df['date'] - date_min_object).dt.days + date_min_object.day
+    return df
+def rewrite_water_quality_file_without_date_quotes(workspace: str, file_name: str) -> None:
+    """
+    Rewrites the given water quality CSV file so that the dates don't have double quotes around them (for backwards compatibility).
+    Args:
+        workspace (str): The path to the workspace directory.
+        file_name (str): The name of the water quality CSV file.
+    """
+    # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
+    file_path = os.path.join(workspace, file_name)
+    lines = []
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+    with open(file_path, 'w', newline='') as file:
+        line_number = 0
+        for line in lines:
+            if line_number != 0:
+                line_split = line.split(',')
+                line_split[1] = line_split[1].replace('"', '')  # Remove quotes around dates (2nd column)
+                line = ','.join(line_split)
+            file.write(line)
+            line_number += 1
 if __name__ == "__main__":

loone_data_prep/weather_data/get_all.py CHANGED Viewed

@@ -88,7 +88,7 @@ def main(workspace: str, d: dict = D, dbkey_stations: dict = DBKEY_STATIONS) ->
                 continue
             # Check whether the latest data is already up to date.
-            if dbhydro_data_is_latest(date_latest):
+            if dbhydro_data_is_latest(date_latest, dbkey):
                 # Notify that the data is already up to date
                 print(f'Downloading of new {name} data skipped for dbkey {dbkey}. Data is already up to date.')
                 continue
@@ -99,8 +99,10 @@ def main(workspace: str, d: dict = D, dbkey_stations: dict = DBKEY_STATIONS) ->
             try:
                 # Download only the new data
-                print(f'Downloading new {name} data for dbkey {dbkey} starting from date {date_latest}')
-                weather.get(workspace, name, dbkeys=[dbkey], date_min=date_latest)
+                date_start = pd.to_datetime(date_latest) + pd.Timedelta(days=1)
+                date_start = date_start.strftime('%Y-%m-%d')
+                print(f'Downloading new {name} data for dbkey {dbkey} starting from date {date_start}')
+                weather.get(workspace, name, dbkeys=[dbkey], date_min=date_start)
                 # Data failed to download - It's possible the data's end date has been reached
                 if not os.path.exists(os.path.join(workspace, original_file_name)):

loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

loone-data-prep 1.3.0py3-none-any.whl → 1.3.1py3-none-any.whl