PyPI - loone-data-prep - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

loone-data-prep 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

loone_data_prep/LOONE_DATA_PREP.py +1 -3
loone_data_prep/flow_data/S65E_total.py +71 -6
loone_data_prep/flow_data/forecast_bias_correction.py +19 -1
loone_data_prep/flow_data/get_forecast_flows.py +4 -0
loone_data_prep/flow_data/get_inflows.py +130 -41
loone_data_prep/flow_data/get_outflows.py +110 -26
loone_data_prep/flow_data/hydro.py +121 -27
loone_data_prep/utils.py +73 -2
loone_data_prep/water_level_data/get_all.py +208 -11
loone_data_prep/water_level_data/hydro.py +71 -3
loone_data_prep/water_quality_data/get_inflows.py +88 -3
loone_data_prep/water_quality_data/get_lake_wq.py +85 -3
loone_data_prep/water_quality_data/wq.py +44 -0
loone_data_prep/weather_data/get_all.py +126 -3
loone_data_prep/weather_data/weather.py +185 -27
{loone_data_prep-0.1.5.dist-info → loone_data_prep-0.1.7.dist-info}/METADATA +1 -1
loone_data_prep-0.1.7.dist-info/RECORD +27 -0
{loone_data_prep-0.1.5.dist-info → loone_data_prep-0.1.7.dist-info}/WHEEL +1 -1
loone_data_prep-0.1.5.dist-info/RECORD +0 -27
{loone_data_prep-0.1.5.dist-info → loone_data_prep-0.1.7.dist-info}/LICENSE +0 -0
{loone_data_prep-0.1.5.dist-info → loone_data_prep-0.1.7.dist-info}/top_level.txt +0 -0

loone_data_prep/water_quality_data/get_inflows.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import sys
 import os
+import uuid
+from datetime import datetime, timedelta
+import pandas as pd
 from loone_data_prep.water_quality_data import wq
+from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
 D = {
@@ -21,18 +25,99 @@ D = {
 }
+DEFAULT_DATE_MIN = "1950-01-01"
 def main(workspace: str, d: dict = D) -> dict:
     missing_files = []
+    failed_downloads = []   # List of file names that the script failed to get the latest data for (but the files still exist)
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
-        wq.get(workspace, name, **params)
+        # Get the date of the latest data in the csv file for each station id
+        station_date_latest = {}
+        for station_id in params["station_ids"]:
+            station_date_latest[station_id] = find_last_date_in_csv(workspace, f"water_quality_{station_id}_{name}.csv")
+        # Get the water quality data
+        for station_id, date_latest in station_date_latest.items():
+            # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
+            if date_latest is None:
+                # Get all the water quality data for the name/station combination
+                print(f"Getting all {name} data for station ID: {station_id}.")
+                wq.get(workspace, name, [station_id])
+            else:
+                # Check whether we already have the latest data
+                if dbhydro_data_is_latest(date_latest):
+                    # Notify that the data is already up to date
+                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    continue
+                # Temporarily rename current data file so it isn't over written
+                original_file_name = f"water_quality_{station_id}_{name}.csv"
+                original_file_name_temp = f"water_quality_{station_id}_{name}_{uuid.uuid4()}.csv"
+                os.rename(os.path.join(workspace, original_file_name), os.path.join(workspace, original_file_name_temp))
+                try:
+                    # Get only the water quality data that is newer than the latest data in the csv file
+                    print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
+                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    # Data failed to download - It's possible the data's end date has been reached
+                    if not os.path.exists(os.path.join(workspace, original_file_name)):
+                        raise Exception(f"It's possible that the data for test name: {name} station ID: {station_id} has reached its end date.")
+                    # Read in the original data
+                    df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
+                    # Read in the newly downloaded data
+                    df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
+                    df_new.reset_index(inplace=True)
+                    # Merge the new data with the original data
+                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    # Write out the merged data
+                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    # Remove the original renamed data file
+                    os.remove(os.path.join(workspace, original_file_name_temp))
+                except Exception as e:
+                    # Notify of the error
+                    print(f"Error occurred while downloading new water quality data: {e}")
+                    # Remove the newly downloaded data file if it exists
+                    if os.path.exists(os.path.join(workspace, original_file_name)):
+                        os.remove(os.path.join(workspace, original_file_name))
+                    # Rename the original renamed file back to its original name
+                    if os.path.exists(os.path.join(workspace, original_file_name_temp)):
+                        os.rename(os.path.join(workspace, original_file_name_temp), os.path.join(workspace, original_file_name))
+                    # Add the file name to the list of failed downloads
+                    failed_downloads.append(original_file_name)
+        # Check for any download failures
         for station in params["station_ids"]:
             if not os.path.exists(os.path.join(workspace, f"water_quality_{station}_{name}.csv")):
                 missing_files.append(f"water_quality_{station}_{name}.csv")
                 print(f"{name} station ID: {station} could not be downloaded after various tries.")
-    if missing_files:
-        return {"error": f"The following files could not be downloaded: {missing_files}"}
+    if missing_files or failed_downloads:
+        error_string = ""
+        if missing_files:
+            error_string += f"The following files could not be downloaded: {missing_files}"
+        if failed_downloads:
+            error_string += f"\nThe following files could not be updated: {failed_downloads}"
+        return {"error": error_string}
     return {"success": "Completed water quality data download."}

loone_data_prep/water_quality_data/get_lake_wq.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import sys
 import os
+import uuid
+from datetime import datetime, timedelta
+import pandas as pd
 from loone_data_prep.water_quality_data import wq
+from loone_data_prep.utils import find_last_date_in_csv,  dbhydro_data_is_latest
 D = {
@@ -28,16 +32,94 @@ D = {
 def main(workspace: str, d: dict = D) -> dict:
     missing_files = []
+    failed_downloads = []   # List of file names that the script failed to get the latest data for (but the files still exist)
     for name, params in d.items():
         print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
-        wq.get(workspace, name, **params)
+        # Get the date of the latest data in the csv file for each station id
+        station_date_latest = {}
+        for station_id in params["station_ids"]:
+            station_date_latest[station_id] = find_last_date_in_csv(workspace, f"water_quality_{station_id}_{name}.csv")
+        # Get the water quality data
+        for station_id, date_latest in station_date_latest.items():
+            # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
+            if date_latest is None:
+                # Get all the water quality data for the name/station combination
+                print(f"Getting all {name} data for station ID: {station_id}.")
+                wq.get(workspace, name, [station_id])
+            else:
+                # Check whether we already have the latest data
+                if dbhydro_data_is_latest(date_latest):
+                    # Notify that the data is already up to date
+                    print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
+                    continue
+                # Temporarily rename current data file so it isn't over written
+                original_file_name = f"water_quality_{station_id}_{name}.csv"
+                original_file_name_temp = f"water_quality_{station_id}_{name}_{uuid.uuid4()}.csv"
+                os.rename(os.path.join(workspace, original_file_name), os.path.join(workspace, original_file_name_temp))
+                try:
+                    # Get only the water quality data that is newer than the latest data in the csv file
+                    print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
+                    date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
+                    wq.get(workspace, name, [station_id], date_min=date_latest)
+                    # Data failed to download - It's possible the data's end date has been reached
+                    if not os.path.exists(os.path.join(workspace, original_file_name)):
+                        raise Exception(f"It's possible that the data for test name: {name} station ID: {station_id} has reached its end date.")
+                    # Read in the original data
+                    df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
+                    # Calculate the days column for the newly downloaded data
+                    df_original_date_min = df_original['date'].min()
+                    wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
+                    # Read in the newly downloaded data
+                    df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
+                    df_new.reset_index(inplace=True)
+                    # Merge the new data with the original data
+                    df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                    # Write out the merged data
+                    df_merged.to_csv(os.path.join(workspace, original_file_name))
+                    # Remove the original renamed data file
+                    os.remove(os.path.join(workspace, original_file_name_temp))
+                except Exception as e:
+                    # Notify of the error
+                    print(f"Error occurred while downloading new water quality data: {e}")
+                    # Remove the newly downloaded data file if it exists
+                    if os.path.exists(os.path.join(workspace, original_file_name)):
+                        os.remove(os.path.join(workspace, original_file_name))
+                    # Rename the original renamed file back to its original name
+                    if os.path.exists(os.path.join(workspace, original_file_name_temp)):
+                        os.rename(os.path.join(workspace, original_file_name_temp), os.path.join(workspace, original_file_name))
+                    # Add the file name to the list of failed downloads
+                    failed_downloads.append(original_file_name)
+        # Check for missing files
         for station in params["station_ids"]:
             if not os.path.exists(os.path.join(workspace, f"water_quality_{station}_{name}.csv")):
                 missing_files.append(f"water_quality_{station}_{name}.csv")
                 print(f"{name} station ID: {station} could not be downloaded after various tries.")
-    if missing_files:
-        return {"error": f"The following files could not be downloaded: {missing_files}"}
+    if missing_files or failed_downloads:
+        error_string = ""
+        if missing_files:
+            error_string += f"The following files could not be downloaded: {missing_files}"
+        if failed_downloads:
+            error_string += f"\nThe following files could not be updated: {failed_downloads}"
+        return {"error": error_string}
     return {"success": "Completed water quality data download."}

loone_data_prep/water_quality_data/wq.py CHANGED Viewed

@@ -70,6 +70,50 @@ def get(
     )
+def _calculate_days_column(workspace: str, file_name: str, date_min: str):
+    """
+    Calculates the values that should be in the "days" column of the water quality data CSV file
+    based on the given date_min and writes the updated data frame back to the CSV file.
+    Args:
+        workspace (str): The path to the workspace directory.
+        file_name (str): The name of the water quality data CSV file.
+        date_min (str): The minimum date that the "days" column values should be calculated from. Should be in format "YYYY-MM-DD".
+    """
+    r(
+        f"""
+        # Import necessary libraries
+        library(lubridate)
+        # Read the CSV file
+        df <- read.csv("{workspace}/{file_name}", check.names = FALSE)
+        # Drop the "X" column that R adds when reading CSV files
+        df <- df[,-1]
+        # Get date_min as an object with the correct timezone
+        date_min_object <- as.POSIXct("{date_min}", tz = "UTC")
+        date_min_tz <- format(with_tz(date_min_object, tzone = "America/New_York"), "%Z")
+        date_min_object <- as.POSIXct("{date_min}", tz = date_min_tz)
+        # Calculate each value in the days column based on the date_min
+        for(i in 1:nrow(df))
+        {{
+            # Get the current row's date as an object with the correct timezone
+            date <- as.POSIXct(df$date[i], tz = "UTC")
+            date_tz <- format(with_tz(date, tzone = "America/New_York"), "%Z")
+            date <- as.POSIXct(df$date[i], tz = date_tz)
+            # Calculate the number of days from the minimum date to the row's date plus the number of days in date_min
+            df$days[i] <- as.integer(difftime(date, date_min_object, units = "days")) + as.integer(format(date_min_object, "%d"))
+        }}
+        # Write the updated data frame back to the CSV file
+        write.csv(df, file = "{workspace}/{file_name}", row.names = FALSE)
+        """ # noqa: E501
+      )
 if __name__ == "__main__":
     args = [sys.argv[1].rstrip("/"), sys.argv[2]]
     if len(sys.argv) >= 4:

loone_data_prep/weather_data/get_all.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import sys
 from glob import glob
+import uuid
+import os
+import pandas as pd
 from loone_data_prep.weather_data import weather
+from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
 D = {
@@ -14,14 +18,133 @@ D = {
 }
-def main(workspace: str, d: dict = D) -> dict:
+DBKEY_STATIONS = {
+    "16021": "L001",
+    "12515": "L005",
+    "12524": "L006",
+    "13081": "LZ40",
+    "UT736": "L001",
+    "VM675": "L005",
+    "UT743": "L006",
+    "UT748": "LZ40",
+    "16031": "L001",
+    "12518": "L005",
+    "12527": "L006",
+    "16267": "LZ40",
+    "16025": "L001",
+    "12516": "L005",
+    "12525": "L006",
+    "15649": "LZ40",
+    "16024": "L001",
+    "12512": "L005",
+    "12522": "L006",
+    "13080": "LZ40",
+    "16027": "L001",
+    "12514": "L005",
+    "12911": "L006",
+    "13078": "LZ40",
+    "16023": "L001",
+    "12510": "L005",
+    "12520": "L006",
+    "13076": "LZ40",
+}
+def main(workspace: str, d: dict = D, dbkey_stations: dict = DBKEY_STATIONS) -> dict:
+    """
+    Retrieves all weather data used by LOONE. When the dbkey_stations argument is provided
+    the function will download only the latest data it doesn't have for the dbkeys in the d and dbkey_stations arguments.
+    Otherwise, it will download all the data for the dbkeys in the d argument.
+    Args:
+        workspace (str): Path to workspace where data will be downloaded.
+        d (dict): A dictionary of data type keys and dict values that hold keyword arguments to be used with weather_data.weather.get().
+                  Valid keys are 'RAIN', 'ETPI', 'H2OT', 'RADP', 'RADT', 'AIRT', and 'WNDS'.
+        dbkey_stations (dict): Dictionary of dbkeys mapped to their station's name.
+    """
     missing_files = []
+    failed_downloads = []   # List of (data type name, file name) tuples that the script failed to get the latest data for (but the files still exist)
+    # Get the data for each data type
     for name, params in d.items():
-        print(f"Getting {name} for the following dbkeys: {params['dbkeys']}.")
-        weather.get(workspace, name, **params)
+        # Get the data for each dbkey individually for this data type
+        for dbkey in params['dbkeys']:
+            # Get the file name of the current file being downloaded
+            station = dbkey_stations[dbkey]
+            date_units_file, _ = weather._get_file_header_data_units(name)
+            original_file_name = ""
+            if name in ['RADP', 'RADT']:
+                original_file_name = f"{station}_{name}.csv"
+            else:
+                original_file_name = f"{station}_{name}_{date_units_file}.csv"
+            # Get the date of the latest data in the csv file
+            date_latest = find_last_date_in_csv(workspace, original_file_name)
+            # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
+            if date_latest is None:
+                print(f"Getting all {name} data for the following dbkey: {dbkey}.")
+                weather.get(workspace, name, dbkeys=[dbkey])
+                continue
+            # Check whether the latest data is already up to date.
+            if dbhydro_data_is_latest(date_latest):
+                # Notify that the data is already up to date
+                print(f'Downloading of new {name} data skipped for dbkey {dbkey}. Data is already up to date.')
+                continue
+            # Temporarily rename current data file so it isn't over written
+            original_file_name_temp = original_file_name.replace(".csv", f"_{uuid.uuid4()}.csv")
+            os.rename(os.path.join(workspace, original_file_name), os.path.join(workspace, original_file_name_temp))
+            try:
+                # Download only the new data
+                print(f'Downloading new {name} data for dbkey {dbkey} starting from date {date_latest}')
+                weather.get(workspace, name, dbkeys=[dbkey], date_min=date_latest)
+                # Data failed to download - It's possible the data's end date has been reached
+                if not os.path.exists(os.path.join(workspace, original_file_name)):
+                    raise Exception(f"It's possible that the data for {name} dbkey {dbkey} has reached its end date.")
+                # Read in the original data and the newly downloaded data
+                df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
+                df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
+                # Merge the new data with the original data
+                df_merged = pd.concat([df_original, df_new], ignore_index=True)
+                # Write out the merged data
+                df_merged.to_csv(os.path.join(workspace, original_file_name))
+                # Remove the original renamed data file
+                os.remove(os.path.join(workspace, original_file_name_temp))
+            except Exception as e:
+                # Notify of the error
+                print(f"Error occurred while downloading new weather data: {e}")
+                # Remove the newly downloaded data file if it exists
+                if os.path.exists(os.path.join(workspace, original_file_name)):
+                    os.remove(os.path.join(workspace, original_file_name))
+                # Rename the original renamed file back to its original name
+                if os.path.exists(os.path.join(workspace, original_file_name_temp)):
+                    os.rename(os.path.join(workspace, original_file_name_temp), os.path.join(workspace, original_file_name))
+                # Add the file name to the list of failed downloads
+                failed_downloads.append((name, original_file_name))
+        # Check if all the files were downloaded
         if len(glob(f"{workspace}/*{name}*.csv")) < len(params["dbkeys"]):
             missing_files.append(True)
             print(f"After various tries, files are still missing for {name}.")
+        # Check if any files failed to update
+        if len(failed_downloads) > 0:
+            print(f"Failed to update the following files {failed_downloads}")
+    # Create LAKE_RAINFALL_DATA.csv and LOONE_AVERAGE_ETPI_DATA.csv
+    weather.merge_data(workspace, 'RAIN')
+    weather.merge_data(workspace, 'ETPI')
     if True in missing_files:
         return {"error": "Missing files."}

loone_data_prep/weather_data/weather.py CHANGED Viewed

@@ -3,6 +3,7 @@ from datetime import datetime
 from retry import retry
 from rpy2.robjects import r
 from rpy2.rinterface_lib.embedded import RRuntimeError
+import pandas as pd
 DEFAULT_DBKEYS = ["16021", "12515", "12524", "13081"]
@@ -20,40 +21,112 @@ def get(
 ) -> None:
     dbkeys_str = "\"" + "\", \"".join(dbkeys) + "\""
-    r(
-        f"""
-        library(dbhydroR)
-        library(dplyr)
+    data_type = param
+    data_units_file = None
+    data_units_header = None
+    # Get the units for the file name and column header based on the type of data
+    data_units_file, data_units_header = _get_file_header_data_units(data_type)
+    r_str = f"""
+        download_weather_data <- function()#workspace, dbkeys, date_min, date_max, data_type, data_units_file, data_units_header)
+        {{
+            library(dbhydroR)
+            library(dplyr)
-        dbkeys <- c({dbkeys_str})
+            dbkeys <- c({dbkeys_str})
+            successful_stations <- list()
+            for (i in dbkeys)
+            {{
+                # Retrieve data for the dbkey
+                data <- get_hydro(dbkey = i, date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
+                # Give data.frame correct column names so it can be cleaned using the clean_hydro function
+                column_names <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
+                colnames(data) <- column_names
+                # Check if the data.frame has any rows
+                if (nrow(data) > 0)
+                {{
+                    # Get the station
+                    station <- data$station[1]
+                    # Add a type and units column to data so it can be cleaned using the clean_hydro function
+                    data$type <- "{data_type}"
+                    data$units <- "{data_units_header}"
+                    # Clean the data.frame
+                    data <- clean_hydro(data)
+                    # Get the filename of the output file
+                    filename <- ""
+                    if ("{param}" %in% c("RADP", "RADT"))
+                    {{
+                        filename <- paste(station, "{data_type}", sep = "_")
+                    }}
+                    else
+                    {{
+                        filename <- paste(station, "{data_type}", "{data_units_file}", sep = "_")
+                    }}
+                    filename <- paste0(filename, ".csv")
+                    filename <- paste0("{workspace}/", filename)
-        for (i in dbkeys) {{
-            # Retrieve data for the dbkey
-            data <- get_hydro(dbkey = i, date_min = "{date_min}", date_max = "{date_max}")
+                    # Save data to a CSV file
+                    write.csv(data, file = filename)
-            # Extract the column names excluding the date column
-            column_names <- names(data)[-1]
+                    # Print a message indicating the file has been saved
+                    cat("CSV file", filename, "has been saved.\n")
-            # Generate the filename based on the column names
-            if ("{param}" %in% c("RADP", "RADT")) {{
-                filename <- paste0("{workspace}/", gsub(" ", "_", sub("_[^_]*$", "", paste(column_names, collapse = "_"))), ".csv")
-            }} else {{
-                filename <- paste0("{workspace}/", paste(column_names, collapse = "_"), ".csv")
-            }}
-            # Save data to a CSV file
-            write.csv(data, file = filename)
-            # Print a message indicating the file has been saved
-            cat("CSV file", filename, "has been saved.\n")
+                    # Append the station to the list of successful stations
+                    successful_stations <- c(successful_stations, station)
+                }}
+                else
+                {{
+                    # No data given back, It's possible that the dbkey has reached its end date.
+                    print(paste("Empty data.frame returned for dbkey", i, "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
+                }}
-            # Add a delay between requests
-            Sys.sleep(2) # Wait for 2 seconds before the next iteration
+                # Add a delay between requests
+                Sys.sleep(2) # Wait for 2 seconds before the next iteration
+            }}
+            # Return the station and dbkey to the python code
+            return(successful_stations)
         }}
         """  # noqa: E501
-    )
+    # Download the weather data
+    r(r_str)
+    result = r.download_weather_data()
+    # Get the stations of the dbkeys who's data were successfully downloaded
+    stations = []
+    for value in result:
+        stations.append(value[0])
+    # Format files to expected layout
+    for station in stations:
+        if station in ["L001", "L005", "L006", "LZ40"]:
+            _reformat_weather_file(workspace, station, data_type, data_units_file, data_units_header)
+            # Print a message indicating the file has been saved
+            print(f"CSV file {workspace}/{station}_{data_type}_{data_units_file}.csv has been reformatted.")
-    if param == "RAIN":
+def merge_data(workspace: str, data_type: str):
+    """
+    Merge the data files for the different stations to create either the LAKE_RAINFALL_DATA.csv or LOONE_AVERAGE_ETPI_DATA.csv file.
+    Args:
+        workspace (str): The path to the workspace directory.
+        data_type (str): The type of data. Either 'RAIN' for LAKE_RAINFALL_DATA.csv or 'ETPI' for LOONE_AVERAGE_ETPI_DATA.csv.
+    """
+    # Merge the data files for the different stations (LAKE_RAINFALL_DATA.csv)
+    if data_type == "RAIN":
         r(
             f"""
             L001_RAIN_Inches <- read.csv("{workspace}/L001_RAIN_Inches.csv", colClasses = c("NULL", "character", "numeric"))
@@ -79,7 +152,8 @@ def get(
             """  # noqa: E501
         )
-    if param == "ETPI":
+    # Merge the data files for the different stations (LOONE_AVERAGE_ETPI_DATA.csv)
+    if data_type == "ETPI":
         r(
             f"""
             L001_ETPI_Inches <- read.csv("{workspace}/L001_ETPI_Inches.csv", colClasses = c("NULL", "character", "numeric"))
@@ -107,6 +181,90 @@ def get(
         )
+def _reformat_weather_file(workspace: str, station: str, data_type: str, data_units_file: str, data_units_header: str) -> None:
+    '''
+    Reformats the dbhydro weather file to the layout expected by the rest of the LOONE scripts.
+    This function reads in and writes out a .csv file.
+    Args:
+        workspace (str): The path to the workspace directory.
+        station (str): The station name. Ex: L001, L005, L006, LZ40.
+        data_type (str): The type of data. Ex: RAIN, ETPI, H2OT, RADP, RADT, AIRT, WNDS.
+        data_units_file (str): The units for the file name. Ex: Inches, Degrees Celsius, etc.
+        data_units_header (str): The units for the column header. Ex: Inches, Degrees Celsius, etc. Can differ from data_units_file when data_type is either RADP or RADT.
+    Returns:
+        None
+    '''
+    # Read in the data
+    df = None
+    if data_type in ['RADP', 'RADT']:
+        df = pd.read_csv(f"{workspace}/{station}_{data_type}.csv")
+    else:
+        df = pd.read_csv(f"{workspace}/{station}_{data_type}_{data_units_file}.csv")
+    # Remove unneeded column columns
+    df.drop(f' _{data_type}_{data_units_header}', axis=1, inplace=True)
+    df.drop('Unnamed: 0', axis=1, inplace=True)
+    # Convert date column to datetime
+    df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
+    # Sort the data by date
+    df.sort_values('date', inplace=True)
+    # Renumber the index
+    df.reset_index(drop=True, inplace=True)
+    # Drop rows that are missing all their values
+    df.dropna(how='all', inplace=True)
+    # Write the updated data back to the file
+    if data_type in ['RADP', 'RADT']:
+        df.to_csv(f"{workspace}/{station}_{data_type}.csv")
+    else:
+        df.to_csv(f"{workspace}/{station}_{data_type}_{data_units_file}.csv")
+def _get_file_header_data_units(data_type: str) -> tuple[str, str]:
+    """
+    Retrieves the units of measurement for a given environmental data type to be used in file names and column headers.
+    This function maps a specified environmental data type to its corresponding units of measurement.
+    These units are used for naming files and for the column headers within those files.
+    Args:
+        data_type (str): The type of environmental data for which units are being requested. Supported types include "RAIN", "ETPI", "H2OT", "RADP", "RADT", "AIRT", and "WNDS".
+    Returns:
+        tuple[str, str]: A tuple containing two strings. The first string represents the unit of measurement for the file name, and the second string represents the unit of measurement for the column header in the data file.
+    """
+    # Get the units for the file name and column header based on the type of data
+    if data_type == "RAIN":
+        data_units_file = "Inches"
+        data_units_header = "Inches"
+    elif data_type == "ETPI":
+        data_units_file = "Inches"
+        data_units_header = "Inches"
+    elif data_type == "H2OT":
+        data_units_file = "Degrees Celsius"
+        data_units_header = "Degrees Celsius"
+    elif data_type == "RADP":
+        data_units_file = ""
+        data_units_header = "MICROMOLE/m^2/s"
+    elif data_type == "RADT":
+        data_units_file = ""
+        data_units_header = "kW/m^2"
+    elif data_type == "AIRT":
+        data_units_file = "Degrees Celsius"
+        data_units_header = "Degrees Celsius"
+    elif data_type == "WNDS":
+        data_units_file = "MPH"
+        data_units_header = "MPH"
+    return data_units_file, data_units_header
 if __name__ == "__main__":
     args = [sys.argv[1].rstrip("/"), sys.argv[2]]
     if len(sys.argv) >= 4:

loone-data-prep 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

loone-data-prep 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl