loone-data-prep 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -383,7 +383,6 @@ def main(input_dir: str, output_dir: str) -> None:
383
383
  LOWS['LZ40WS'] = LZ40WS['LZ40_WNDS_MPH']
384
384
  LOWS['LO_Avg_WS_MPH'] = LOWS.mean(axis=1, numeric_only=True)
385
385
  LOWS.to_csv(f'{output_dir}/LOWS.csv', index=False)
386
- LOWS.to_csv(f'{input_dir}/LOWS.csv', index=False) # Also needed in temporary directory by utils.py's wind_induced_waves()
387
386
 
388
387
  # RFVol acft
389
388
  # Create File (RF_Volume)
@@ -897,7 +896,6 @@ def main(input_dir: str, output_dir: str) -> None:
897
896
  # Write Data into csv files
898
897
  # write Avg Stage (ft, m) Storage (acft, m3) SA (acres) to csv
899
898
  LO_Stg_Sto_SA_df.to_csv(f'{output_dir}/Average_LO_Storage_3MLag.csv', index=False)
900
- LO_Stg_Sto_SA_df.to_csv(f'{input_dir}/Average_LO_Storage_3MLag.csv', index=False) # Also needed in temporary directory by utils.py's wind_induced_waves()
901
899
  # Write S65 TP concentrations (mg/L)
902
900
  S65_total_TP.to_csv(f'{output_dir}/S65_TP_3MLag.csv', index=False)
903
901
  # TP External Loads 3 Months Lag (mg)
@@ -2,22 +2,87 @@ import sys
2
2
  from retry import retry
3
3
  from rpy2.robjects import r
4
4
  from rpy2.rinterface_lib.embedded import RRuntimeError
5
+ import pandas as pd
5
6
 
6
7
 
7
8
  @retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
8
- def get(workspace):
9
+ def get(
10
+ workspace,
11
+ date_min: str = "1972-01-01",
12
+ date_max: str = "2023-06-30"
13
+ ) -> None:
9
14
  r(
10
15
  f"""
11
16
  # Load the required libraries
12
17
  library(dbhydroR)
13
-
14
- #S65E_Total
15
- S65E_total = get_hydro(dbkey = c("91656", "AL760"), date_min = "1972-01-01", date_max = "2023-06-30")
16
- S65E_total[, -1] <- S65E_total[, -1] * (0.0283168466 * 86400)
17
- write.csv(S65E_total,file ='{workspace}/S65E_total.csv')
18
+ library(dplyr)
19
+
20
+ # Helper Functions
21
+ retrieve_data <- function(dbkey, date_min, date_max)
22
+ {{
23
+ # Get the data from dbhydro
24
+ df = get_hydro(dbkey = dbkey, date_min = date_min, date_max = date_max, raw = TRUE)
25
+
26
+ # Give data.frame correct column names so it can be cleaned using the clean_hydro function
27
+ colnames(df) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
28
+
29
+ # Add a type and units column to data so it can be cleaned using the clean_hydro function
30
+ df$type <- "FLOW"
31
+ df$units <- "cfs"
32
+
33
+ # Clean the data.frame
34
+ df <- clean_hydro(df)
35
+
36
+ # Drop the " _FLOW_cfs" column
37
+ df <- df %>% select(-` _FLOW_cfs`)
38
+
39
+ # Convert Flow rate from cfs to m³/day
40
+ df[, -1] <- df[, -1] * (0.0283168466 * 86400)
41
+
42
+ # Return resulting data.frame
43
+ return(df)
44
+ }}
45
+
46
+ # S65E_S
47
+ S65E_S <- retrieve_data(dbkey = "91656", date_min = "{date_min}", date_max = "{date_max}")
48
+
49
+ # Wait five seconds before next request to avoid "too many requests" error
50
+ Sys.sleep(5)
51
+
52
+ # S65EX1_S
53
+ S65EX1_S <- retrieve_data(dbkey = "AL760", date_min = "{date_min}", date_max = "{date_max}")
54
+
55
+ # Merge the data from each dbkey
56
+ result <- merge(S65E_S, S65EX1_S, by = "date", all = TRUE)
57
+
58
+ # Write the data to a file
59
+ write.csv(result, file = '{workspace}/S65E_total.csv')
18
60
  """
19
61
  )
62
+
63
+ _reformat_s65e_total_file(workspace)
20
64
 
65
+ def _reformat_s65e_total_file(workspace: str):
66
+ # Read in the data
67
+ df = pd.read_csv(f"{workspace}/S65E_total.csv")
68
+
69
+ # Drop unused columns
70
+ df.drop('Unnamed: 0', axis=1, inplace=True)
71
+
72
+ # Convert date column to datetime
73
+ df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
74
+
75
+ # Sort the data by date
76
+ df.sort_values('date', inplace=True)
77
+
78
+ # Renumber the index
79
+ df.reset_index(drop=True, inplace=True)
80
+
81
+ # Drop rows that are missing all their values
82
+ df.dropna(how='all', inplace=True)
83
+
84
+ # Write the updated data back to the file
85
+ df.to_csv(f"{workspace}/S65E_total.csv")
21
86
 
22
87
  if __name__ == "__main__":
23
88
  workspace = sys.argv[1].rstrip("/")
@@ -1,43 +1,128 @@
1
1
  import sys
2
2
  import os
3
3
  from glob import glob
4
+ from datetime import datetime
5
+ import uuid
6
+ import pandas as pd
4
7
  from loone_data_prep.flow_data import hydro, S65E_total
8
+ from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
5
9
 
6
10
 
7
- # Database keys for needed inflow data
8
- DBKEYS = [
9
- "91370",
10
- "91371",
11
- "91373",
12
- "91377",
13
- "91379",
14
- "91401",
15
- "91429",
16
- "91473",
17
- "91508",
18
- "91510",
19
- "91513",
20
- "91599",
21
- "91608",
22
- "91656",
23
- "91668",
24
- "91675",
25
- "91687",
26
- "15627",
27
- "15640",
28
- "15626",
29
- "15642",
30
- "15638",
31
- ]
11
+ # Database keys for needed inflow data mapped to their stations
12
+ DBKEYS = {
13
+ "91370": "S127_C",
14
+ "91371": "S127_P",
15
+ "91373": "S129_C",
16
+ "91377": "S133_P",
17
+ "91379": "S135_C",
18
+ "91401": "S154_C",
19
+ "91429": "S191_S",
20
+ "91473": "S2_P",
21
+ "91508": "S351_S",
22
+ "91510": "S352_S",
23
+ "91513": "S354_S",
24
+ "91599": "S3_P",
25
+ "91608": "S4_P",
26
+ "91656": "S65E_S",
27
+ "91668": "S71_S",
28
+ "91675": "S72_S",
29
+ "91687": "S84_S",
30
+ "15627": "FISHP",
31
+ "15640": "L8.441",
32
+ "15626": "S308.DS",
33
+ "15642": "S129 PMP_P",
34
+ "15638": "S135 PMP_P",
35
+ }
32
36
 
33
-
34
- def main(workspace: str, dbkeys: list = DBKEYS) -> dict:
37
+ def main(workspace: str, dbkeys: dict = DBKEYS) -> dict:
38
+ """
39
+ Retrieve the inflows data used by LOONE.
40
+
41
+ Args:
42
+ workspace (str): Path to workspace where data will be downloaded.
43
+ dbkeys (dict): Dictionary of dbkeys and corresponding station names.
44
+
45
+ Returns:
46
+ dict: Success or error message
47
+ """
48
+
35
49
  # Retrieve inflow data
36
- for dbkey in dbkeys:
37
- hydro.get(workspace, dbkey)
50
+ for dbkey, station in dbkeys.copy().items():
51
+ file_name = f"{station}_FLOW_cmd.csv"
52
+ date_latest = find_last_date_in_csv(workspace, file_name)
53
+
54
+ # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
55
+ if date_latest is None:
56
+ # Download all the data
57
+ print(f'Downloading all inflow data for {station}')
58
+ hydro.get(workspace, dbkey)
59
+ else:
60
+ # Check whether the latest data is already up to date.
61
+ if dbhydro_data_is_latest(date_latest):
62
+ # Notify that the data is already up to date
63
+ print(f'Downloading of new inflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
64
+
65
+ # Remove dbkey from dbkeys so we know it didn't fail
66
+ del dbkeys[dbkey]
67
+ continue
68
+
69
+ # Download only the new data
70
+ print(f'Downloading new inflow data for {station} starting from date {date_latest}')
71
+ hydro.get(workspace, dbkey, date_latest)
72
+
73
+ # Make sure both our original data and newly downloaded data exist
74
+ df_original_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
75
+ df_new_path = os.path.join(workspace, f"{station}_FLOW_{dbkey}_cmd.csv")
76
+
77
+ if os.path.exists(df_original_path) and os.path.exists(df_new_path):
78
+ # Merge the new data with the old data
79
+ df_original = pd.read_csv(df_original_path, index_col=0)
80
+ df_new = pd.read_csv(df_new_path, index_col=0)
81
+ df_merged = pd.concat([df_original, df_new], ignore_index=True)
82
+
83
+ # Write the merged data to the new file
84
+ df_merged.to_csv(os.path.join(workspace, f"{station}_FLOW_{dbkey}_cmd.csv"))
85
+
86
+ # Remove the old file
87
+ os.remove(os.path.join(workspace, f"{station}_FLOW_cmd.csv"))
38
88
 
39
- S65E_total.get(workspace)
89
+ # Download S65E_total.csv Data
90
+ date_latest = find_last_date_in_csv(workspace, "S65E_total.csv")
40
91
 
92
+ if date_latest is None:
93
+ print('Downloading all S65E_total data')
94
+ S65E_total.get(workspace, date_max=datetime.now().strftime("%Y-%m-%d"))
95
+ else:
96
+ # Check whether the latest data is already up to date.
97
+ if dbhydro_data_is_latest(date_latest):
98
+ # Notify that the data is already up to date
99
+ print(f'Downloading of new inflow data skipped for S65E_total. Data is already up to date.')
100
+ else:
101
+ # Temporarily rename current data file so it isn't over written
102
+ original_file_name = F"S65E_total_old_{uuid.uuid4()}.csv"
103
+ os.rename(os.path.join(workspace, "S65E_total.csv"), os.path.join(workspace, original_file_name))
104
+
105
+ try:
106
+ # Download only the new data
107
+ print(f'Downloading new S65E_total data starting from date {date_latest}')
108
+ S65E_total.get(workspace, date_min=date_latest, date_max=datetime.now().strftime("%Y-%m-%d"))
109
+
110
+ # Merge the new data with the original data
111
+ df_original = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
112
+ df_new = pd.read_csv(os.path.join(workspace, "S65E_total.csv"), index_col=0)
113
+ df_merged = pd.concat([df_original, df_new], ignore_index=True)
114
+
115
+ # Write out the merged data
116
+ df_merged.to_csv(os.path.join(workspace, original_file_name))
117
+
118
+ # Remove the newly downloaded data file
119
+ os.remove(os.path.join(workspace, "S65E_total.csv"))
120
+ except Exception as e:
121
+ print(f"Error occurred while downloading new S65E_total data: {e}")
122
+ finally:
123
+ # Rename the original updated file back to its original name
124
+ os.rename(os.path.join(workspace, original_file_name), os.path.join(workspace, "S65E_total.csv"))
125
+
41
126
  # Check if all files were downloaded
42
127
  files = glob(f"{workspace}/*FLOW*_cmd.csv")
43
128
 
@@ -50,17 +135,21 @@ def main(workspace: str, dbkeys: list = DBKEYS) -> dict:
50
135
  os.rename(file, new_file_name)
51
136
 
52
137
  # Remove dbkey from dbkeys so we know it successfully downloaded
53
- dbkeys.remove(file_dbkey)
54
-
55
- if len(dbkeys) > 0:
56
- return {
57
- "error": (
58
- "The data from the following dbkeys could not be "
59
- f"downloaded: {dbkeys}"
60
- )
61
- }
62
- elif not os.path.exists(f"{workspace}/S65E_total.csv"):
63
- return {"error": "S65E_total.csv file could not be downloaded."}
138
+ del dbkeys[file_dbkey]
139
+
140
+ # Check for failed downloads
141
+ if len(dbkeys) > 0 or not os.path.exists(f"{workspace}/S65E_total.csv"):
142
+ error_message = ""
143
+
144
+ # dbkeys
145
+ if len(dbkeys) > 0:
146
+ error_message += f"The data from the following dbkeys could not be downloaded: {list(dbkeys.keys())}\n"
147
+
148
+ # S65E_total.csv
149
+ if not os.path.exists(f"{workspace}/S65E_total.csv"):
150
+ error_message += "S65E_total.csv file could not be downloaded.\n"
151
+
152
+ return {"error": error_message}
64
153
 
65
154
  return {"success": "Completed inflow flow data download."}
66
155
 
@@ -1,7 +1,8 @@
1
1
  import sys
2
2
  import os
3
3
  from glob import glob
4
- from loone_data_prep.utils import get_dbkeys
4
+ import pandas as pd
5
+ from loone_data_prep.utils import get_dbkeys, find_last_date_in_csv, dbhydro_data_is_latest
5
6
  from loone_data_prep.flow_data import hydro
6
7
 
7
8
  STATION_IDS = [
@@ -24,33 +25,39 @@ STATION_IDS = [
24
25
  ]
25
26
 
26
27
 
27
- DBKEYS = [
28
- "91370",
29
- "91373",
30
- "91379",
31
- "91508",
32
- "91510",
33
- "91513",
34
- "91677",
35
- "15628",
36
- "15640",
37
- "15626",
38
- "00865",
39
- "JW224",
40
- "00436",
41
- "15018",
42
- "91606",
43
- "JW223",
44
- ]
45
-
28
+ DBKEYS = {
29
+ "91370": "S127_C",
30
+ "91373": "S129_C",
31
+ "91379": "S135_C",
32
+ "91508": "S351_S",
33
+ "91510": "S352_S",
34
+ "91513": "S354_S",
35
+ "91677": "S77_S",
36
+ "15628": "INDUST",
37
+ "15640": "L8.441",
38
+ "15626": "S308.DS",
39
+ "00865": "S79_TOT",
40
+ "JW224": "S80_S",
41
+ "00436": "S2 NNR",
42
+ "15018": "S3",
43
+ "91606": "S48_S",
44
+ "JW223": "S49_S",
45
+ }
46
46
 
47
- def main(workspace: str, dbkeys: list = DBKEYS, station_ids: list = STATION_IDS) -> dict:
48
- # Retrieve outflow data
49
47
 
50
- if dbkeys is None:
51
- # Get dbkeys from station ids
52
- dbkeys = list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF", detail_level="dbkey"))
53
- dbkeys.extend(list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV", detail_level="dbkey")))
48
+ def _get_outflow_data_from_station_ids(workspace: str, station_ids: list) -> dict:
49
+ """Attempt to download outflow data from station ids.
50
+
51
+ Args:
52
+ workspace (str): Path to workspace where data will be downloaded.
53
+ station_ids (list): List of station ids to download data for.
54
+
55
+ Returns:
56
+ dict: Success or error message
57
+ """
58
+ # Get dbkeys from station ids
59
+ dbkeys = list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "PREF", detail_level="dbkey"))
60
+ dbkeys.extend(list(get_dbkeys(station_ids, "SW", "FLOW", "MEAN", "DRV", detail_level="dbkey")))
54
61
 
55
62
  for dbkey in dbkeys:
56
63
  hydro.get(workspace, dbkey, "2000-01-01")
@@ -75,6 +82,83 @@ def main(workspace: str, dbkeys: list = DBKEYS, station_ids: list = STATION_IDS)
75
82
  return {"success": "Completed outflow flow data download."}
76
83
 
77
84
 
85
+ def main(workspace: str, dbkeys: dict = DBKEYS, station_ids: list = STATION_IDS) -> dict:
86
+ """
87
+ Retrieve the outflow data used by LOONE.
88
+
89
+ Args:
90
+ workspace (str): Path to workspace where data will be downloaded.
91
+ dbkeys (dict): Dictionary of dbkeys and corresponding station names.
92
+ station_ids (list): List of station ids to download data for if the dbkeys argument is not provided.
93
+
94
+ Returns:
95
+ dict: Success or error message
96
+ """
97
+
98
+ # No dbkeys given, attempt to get data from station ids
99
+ if dbkeys is None:
100
+ return _get_outflow_data_from_station_ids(workspace, station_ids)
101
+
102
+ # Get outflow data from dbkeys
103
+ for dbkey, station in dbkeys.copy().items():
104
+ # Get the date of the latest data in the csv file (if any)
105
+ date_latest = find_last_date_in_csv(workspace, f"{station}_FLOW_cmd.csv")
106
+
107
+ # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
108
+ if date_latest is None:
109
+ # Download all data
110
+ print(f'Downloading all outflow data for {station}')
111
+ hydro.get(workspace, dbkey, "2000-01-01")
112
+ else:
113
+ # Check whether the latest data is already up to date.
114
+ if dbhydro_data_is_latest(date_latest):
115
+ # Notify that the data is already up to date
116
+ print(f'Downloading of new outflow data skipped for Station {station} (dbkey: {dbkey}). Data is already up to date.')
117
+
118
+ # Remove dbkey from dbkeys so we know it didn't fail
119
+ del dbkeys[dbkey]
120
+ continue
121
+
122
+ # Download only the new data
123
+ print(f'Downloading new outflow data for {station} starting from date {date_latest}')
124
+ hydro.get(workspace, dbkey, date_latest)
125
+
126
+ # Make sure both our original data and newly downloaded data exist
127
+ df_old_path = os.path.join(workspace, f"{station}_FLOW_cmd.csv")
128
+ df_new_path = os.path.join(workspace, f"{station}_FLOW_{dbkey}_cmd.csv")
129
+
130
+ if os.path.exists(df_old_path) and os.path.exists(df_new_path):
131
+ # Merge the new data with the old data
132
+ df_original = pd.read_csv(df_old_path, index_col=0)
133
+ df_new = pd.read_csv(df_new_path, index_col=0)
134
+ df_merged = pd.concat([df_original, df_new], ignore_index=True)
135
+
136
+ # Write the merged data to the new file
137
+ df_merged.to_csv(os.path.join(workspace, f"{station}_FLOW_{dbkey}_cmd.csv"))
138
+
139
+ # Remove the old file
140
+ os.remove(os.path.join(workspace, f"{station}_FLOW_cmd.csv"))
141
+
142
+ # Check if all files were downloaded
143
+ files = glob(f"{workspace}/*FLOW*_cmd.csv")
144
+
145
+ for file in files:
146
+ file_dbkey = file.split("_")[-2]
147
+
148
+ if file_dbkey in dbkeys:
149
+ # Remove dbkey from file name
150
+ new_file_name = file.replace(f"_{file_dbkey}", "")
151
+ os.rename(file, new_file_name)
152
+
153
+ # Remove dbkey from dbkeys so we know it successfully downloaded
154
+ del dbkeys[file_dbkey]
155
+
156
+ if len(dbkeys) > 0:
157
+ return {"error": f"The data from the following dbkeys could not be downloaded: {dbkeys}"}
158
+
159
+ return {"success": "Completed outflow flow data download."}
160
+
161
+
78
162
  if __name__ == "__main__":
79
163
  workspace = sys.argv[1].rstrip("/")
80
164
  main(workspace)
@@ -2,6 +2,7 @@ import sys
2
2
  from datetime import datetime
3
3
  from glob import glob
4
4
  from retry import retry
5
+ import os
5
6
  import pandas as pd
6
7
  from rpy2.robjects import r
7
8
  from rpy2.rinterface_lib.embedded import RRuntimeError
@@ -18,36 +19,92 @@ def get(
18
19
  date_max: str = DATE_NOW
19
20
  ) -> None:
20
21
  r_str = f"""
21
- # Load the required libraries
22
- library(dbhydroR)
22
+ download_flow_data <- function(workspace, dbkey, date_min, date_max)
23
+ {{
24
+ # Load the required libraries
25
+ library(dbhydroR)
26
+ library(dplyr)
23
27
 
24
- # Retrieve data for the dbkey
25
- data <- get_hydro(dbkey = "{dbkey}", date_min = "{date_min}", date_max = "{date_max}")
28
+ # Retrieve data for the dbkey
29
+ data <- get_hydro(dbkey = "{dbkey}", date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
30
+
31
+ # Check if data is empty or contains only the "date" column
32
+ if (ncol(data) <= 1) {{
33
+ cat("No data found for dbkey", "{dbkey}", "Skipping to the next dbkey.\n")
34
+ }}
35
+
36
+ # Give data.frame correct column names so it can be cleaned using the clean_hydro function
37
+ colnames(data) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
38
+
39
+ # Check if the data.frame has any rows
40
+ if (nrow(data) == 0)
41
+ {{
42
+ # No data given back, It's possible that the dbkey has reached its end date.
43
+ print(paste("Empty data.frame returned for dbkey", "{dbkey}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
44
+ return(list(success = FALSE, dbkey = "{dbkey}"))
45
+ }}
46
+
47
+ # Add a type and units column to data so it can be cleaned using the clean_hydro function
48
+ data$type <- "FLOW"
49
+ data$units <- "cfs"
50
+
51
+ # Get the station
52
+ station <- data$station[1]
53
+
54
+ # Clean the data.frame
55
+ data <- clean_hydro(data)
26
56
 
27
- # Check if data is empty or contains only the "date" column
28
- if (ncol(data) <= 1) {{
29
- cat("No data found for dbkey", "{dbkey}", "Skipping to the next dbkey.\n")
30
- }}
31
-
32
- # Multiply all columns except "date" column by 0.0283168466 * 86400 to convert Flow rate from cfs to m³/day
33
- data[, -1] <- data[, -1] * (0.0283168466 * 86400)
34
-
35
- # Extract the column names excluding the date column
36
- column_names <- names(data)[-1]
57
+ # Multiply all columns except "date" column by 0.0283168466 * 86400 to convert Flow rate from cfs to m³/day
58
+ data[, -1] <- data[, -1] * (0.0283168466 * 86400)
59
+
60
+ # Drop the " _FLOW_cfs" column
61
+ data <- data %>% select(-` _FLOW_cfs`)
62
+
63
+ # Sort the data by date
64
+ data <- data[order(data$date), ]
65
+
66
+ # Get the filename for the output CSV file
67
+ filename <- paste0(station, "_FLOW", "_{dbkey}_cmd.csv")
68
+
69
+ # Save data to a CSV file
70
+ write.csv(data, file = paste0("{workspace}/", filename))
37
71
 
38
- # Generate the filename based on the column names
39
- filename <- paste0( gsub(" ", "_", sub("_[^_]*$", "", paste(column_names, collapse = "_"))), "_{dbkey}_cmd.csv")
40
- # Save data to a CSV file
41
- write.csv(data, file = paste0("{workspace}/", filename))
72
+ # Print a message indicating the file has been saved
73
+ cat("CSV file", filename, "has been saved.\n")
42
74
 
43
- # Print a message indicating the file has been saved
44
- cat("CSV file", filename, "has been saved.\n")
45
-
46
- # Add a delay between requests
47
- Sys.sleep(1) # Wait for 1 second before the next iteration
75
+ # Add a delay between requests
76
+ Sys.sleep(1) # Wait for 1 second before the next iteration
77
+
78
+ # Return the station and dbkey to the python code
79
+ list(success = TRUE, station = station, dbkey = "{dbkey}")
80
+ }}
48
81
  """
49
82
 
50
83
  r(r_str)
84
+
85
+ # Call the R function to download the flow data
86
+ result = r.download_flow_data(workspace, dbkey, date_min, date_max)
87
+
88
+ # Check for failure
89
+ success = result.rx2("success")[0]
90
+
91
+ if not success:
92
+ return
93
+
94
+ # Get the station name for _reformat_flow_file()
95
+ station = result.rx2("station")[0]
96
+
97
+ # Reformat the flow data file to the expected layout
98
+ _reformat_flow_file(workspace, station, dbkey)
99
+
100
+ # Check if the station name contains a space
101
+ if " " in station:
102
+ # Replace space with underscore in the station name
103
+ station_previous = station
104
+ station = station.replace(" ", "_")
105
+
106
+ # Rename the file
107
+ os.rename(f"{workspace}/{station_previous}_FLOW_{dbkey}_cmd.csv", f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
51
108
 
52
109
  # column values are converted to cmd in R. This snippet makes sure column names are updated accordingly.
53
110
  file = glob(f'{workspace}/*FLOW*{dbkey}_cmd.csv')[0]
@@ -55,7 +112,44 @@ def get(
55
112
  df.columns = df.columns.astype(str).str.replace("_cfs", "_cmd")
56
113
  df.to_csv(file, index=False)
57
114
 
58
- if __name__ == "__main__":
59
- workspace = sys.argv[1].rstrip("/")
60
- dbkey = sys.argv[2]
61
- get(workspace, dbkey)
115
+
116
+ def _reformat_flow_file(workspace:str, station: str, dbkey: str):
117
+ '''
118
+ Reformat the flow data file to the expected layout.
119
+ Converts the format of the dates in the file to 'YYYY-MM-DD' then sorts the data by date.
120
+ Reads and writes to a .CSV file.
121
+
122
+ Args:
123
+ workspace (str): The path to the workspace directory.
124
+ station (str): The station name.
125
+ dbkey (str): The dbkey for the station.
126
+
127
+ Returns:
128
+ None
129
+ '''
130
+ # Read in the data
131
+ df = pd.read_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
132
+
133
+ # Grab only the columns we need
134
+ df = df[['date', f'{station}_FLOW_cfs']]
135
+
136
+ # Convert date column to datetime
137
+ df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
138
+
139
+ # Sort the data by date
140
+ df.sort_values('date', inplace=True)
141
+
142
+ # Renumber the index
143
+ df.reset_index(drop=True, inplace=True)
144
+
145
+ # Drop rows that are missing values for both the date and value columns
146
+ df = df.drop(df[(df['date'].isna()) & (df[f'{station}_FLOW_cfs'].isna())].index)
147
+
148
+ # Write the updated data back to the file
149
+ df.to_csv(f"{workspace}/{station}_FLOW_{dbkey}_cmd.csv")
150
+
151
+
152
+ if __name__ == "__main__":
153
+ workspace = sys.argv[1].rstrip("/")
154
+ dbkey = sys.argv[2]
155
+ get(workspace, dbkey)