loone-data-prep 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
  2. loone_data_prep/LOONE_DATA_PREP.py +0 -1
  3. loone_data_prep/dbhydro_insights.py +195 -0
  4. loone_data_prep/flow_data/S65E_total.py +57 -57
  5. loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
  6. loone_data_prep/flow_data/get_forecast_flows.py +19 -105
  7. loone_data_prep/flow_data/get_inflows.py +18 -8
  8. loone_data_prep/flow_data/get_outflows.py +16 -7
  9. loone_data_prep/flow_data/hydro.py +62 -91
  10. loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
  11. loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
  12. loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
  13. loone_data_prep/utils.py +262 -32
  14. loone_data_prep/water_level_data/get_all.py +52 -44
  15. loone_data_prep/water_level_data/hydro.py +49 -68
  16. loone_data_prep/water_quality_data/get_inflows.py +69 -27
  17. loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
  18. loone_data_prep/water_quality_data/wq.py +114 -88
  19. loone_data_prep/weather_data/get_all.py +5 -3
  20. loone_data_prep/weather_data/weather.py +117 -180
  21. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
  22. loone_data_prep-1.3.1.dist-info/RECORD +38 -0
  23. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
  24. loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
  25. loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
  26. loone_data_prep-1.2.4.dist-info/RECORD +0 -38
  27. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
  28. {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,14 @@
1
1
  import sys
2
2
  from datetime import datetime
3
3
  from retry import retry
4
- from rpy2.robjects import r
5
- from rpy2.rinterface_lib.embedded import RRuntimeError
6
4
  import pandas as pd
5
+ from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
7
6
 
8
7
  DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
9
8
  DATE_NOW = datetime.now().strftime("%Y-%m-%d")
10
9
 
11
10
 
12
- @retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
11
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
13
12
  def get(
14
13
  workspace: str,
15
14
  name: str,
@@ -17,8 +16,19 @@ def get(
17
16
  date_min: str = "1950-01-01",
18
17
  date_max: str = DATE_NOW,
19
18
  datum: str = "",
20
- **kwargs: str | list
19
+ **kwargs: str | list | dict
21
20
  ) -> None:
21
+ """Fetches daily water level data from DBHYDRO and saves it as a CSV file.
22
+
23
+ Args:
24
+ workspace (str): The directory where the CSV file will be saved.
25
+ name (str): The name of the output CSV file (without extension).
26
+ dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
27
+ date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
28
+ date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
29
+ datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
30
+ **kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
31
+ """
22
32
  # Get the type and units for the station
23
33
  data_type = "STG"
24
34
  units = "ft NGVD29"
@@ -27,77 +37,48 @@ def get(
27
37
  data_type = "GAGHT"
28
38
  units = "feet"
29
39
 
30
- dbkeys_str = "\"" + "\", \"".join(dbkeys) + "\""
31
- r(
32
- f"""
33
- # Load the required libraries
34
- library(rio)
35
- library(dbhydroR)
36
- library(dplyr)
37
-
38
- # Stage Data
39
- if ("{datum}" == "")
40
- {{
41
- {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
42
- }}
43
-
44
- if (nchar("{datum}") > 0)
45
- {{
46
- {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
47
- }}
48
-
49
- # Give data.frame correct column names so it can be cleaned using the clean_hydro function
50
- colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
51
-
52
- # Check if the data.frame has any rows
53
- if (nrow({name}) == 0)
54
- {{
55
- # No data given back, It's possible that the dbkey has reached its end date.
56
- print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
57
- return(list(success = FALSE, dbkey = "{dbkeys}"))
58
- }}
59
-
60
- # Get the station
61
- station <- {name}$station[1]
62
-
63
- # Add a type and units column to data so it can be cleaned using the clean_hydro function
64
- {name}$type <- "{data_type}"
65
- {name}$units <- "{units}"
66
-
67
- # Clean the data.frame
68
- {name} <- clean_hydro({name})
69
-
70
- # Drop the " _STG_ft NGVD29" column
71
- {name} <- {name} %>% select(-` _{data_type}_{units}`)
72
-
73
- # Write the data to a csv file
74
- write.csv({name},file ='{workspace}/{name}.csv')
75
- """
76
- )
40
+ # Retrieve the data
41
+ api = get_dbhydro_api()
42
+ response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
77
43
 
78
- _reformat_water_level_file(workspace, name)
79
-
80
- def _reformat_water_level_file(workspace: str, name: str):
81
- # Read in the data
82
- df = pd.read_csv(f"{workspace}/{name}.csv")
44
+ # Get the data as a dataframe
45
+ df = response.to_dataframe(True)
83
46
 
84
- # Drop the "Unnamed: 0" column
85
- df.drop(columns=['Unnamed: 0'], inplace=True)
47
+ # Replace flagged 0 values and -99999.0 with NaN
48
+ df = df_replace_missing_with_nan(df)
86
49
 
87
- # Convert date column to datetime
88
- df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
50
+ # Make sure datetime exists as a column
51
+ if 'datetime' not in df.columns:
52
+ df.reset_index(inplace=True)
53
+
54
+ # Pivot the data so that each site_code is a column
55
+ df = df.pivot(index='datetime', columns='site_code', values='value')
89
56
 
90
- # Sort the data by date
91
- df.sort_values('date', inplace=True)
57
+ # Get the current column names in df and the names to rename them to
58
+ column_names = {'datetime': 'date'}
59
+ override_site_codes = kwargs.get("override_site_codes", None)
60
+ for column in df.columns:
61
+ if override_site_codes and column in override_site_codes:
62
+ column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
63
+ else:
64
+ column_names[column] = f"{column}_{data_type}_{units}"
92
65
 
93
- # Renumber the index
94
- df.reset_index(drop=True, inplace=True)
66
+ # Reset the index to turn the datetime index into a column
67
+ df.reset_index(inplace=True)
95
68
 
96
- # Drop rows that are missing all their values
97
- df.dropna(how='all', inplace=True)
69
+ # Rename the columns
70
+ df.rename(columns=column_names, inplace=True)
98
71
 
99
- # Write the updated data back to the file
100
- df.to_csv(f"{workspace}/{name}.csv")
72
+ # Convert date column to datetime
73
+ df['date'] = pd.to_datetime(df['date'])
74
+
75
+ # Drop the "Unnamed: 0" column if it exists
76
+ if 'Unnamed: 0' in df.columns:
77
+ df.drop(columns=['Unnamed: 0'], inplace=True)
78
+
79
+ # Write the data to a csv file
80
+ df.to_csv(f"{workspace}/{name}.csv", index=True)
81
+
101
82
 
102
83
  if __name__ == "__main__":
103
84
  args = [sys.argv[1].rstrip("/"), sys.argv[2]]
@@ -1,27 +1,50 @@
1
+ import csv
2
+ import traceback
1
3
  import sys
2
4
  import os
3
5
  import uuid
4
6
  from datetime import datetime, timedelta
5
7
  import pandas as pd
6
8
  from loone_data_prep.water_quality_data import wq
7
- from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
9
+ from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
8
10
 
9
11
 
10
12
  D = {
11
- "PHOSPHATE, TOTAL AS P": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
12
- 'CULV10A', 'S133', 'S127', 'S135']},
13
- "AMMONIA-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
14
- 'CULV10A', 'S133', 'S127', 'S135']},
15
- "NITRATE+NITRITE-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
16
- 'CULV10A', 'S133', 'S127', 'S135']},
17
- "TOTAL NITROGEN": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
18
- 'CULV10A', 'S133', 'S127', 'S135']},
19
- "CHLOROPHYLL-A": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133',
20
- 'S127', 'S135', 'S191']},
21
- "CHLOROPHYLL-A(LC)": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A',
22
- 'S133', 'S127', 'S135', 'S191']},
23
- "CHLOROPHYLL-A, CORRECTED": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
24
- 'CULV10A', 'S133', 'S127', 'S135', 'S191']}
13
+ "PHOSPHATE, TOTAL AS P": {
14
+ "test_number": 25,
15
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
16
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
17
+ },
18
+ "AMMONIA-N": {
19
+ "test_number": 20,
20
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
21
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
22
+ },
23
+ "NITRATE+NITRITE-N": {
24
+ "test_number": 18,
25
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
26
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
27
+ },
28
+ "TOTAL NITROGEN": {
29
+ "test_number": 80,
30
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
31
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
32
+ },
33
+ "CHLOROPHYLL-A": {
34
+ "test_number": 61,
35
+ "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
36
+ "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
37
+ },
38
+ "CHLOROPHYLL-A(LC)": {
39
+ "test_number": 179,
40
+ "station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
41
+ "station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
42
+ },
43
+ "CHLOROPHYLL-A, CORRECTED": {
44
+ "test_number": 112,
45
+ "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
46
+ "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
47
+ },
25
48
  }
26
49
 
27
50
 
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
34
57
  for name, params in d.items():
35
58
  print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
36
59
 
60
+ # Get the test_number for this parameter name
61
+ test_number = params['test_number']
62
+
37
63
  # Get the date of the latest data in the csv file for each station id
38
64
  station_date_latest = {}
39
65
  for station_id in params["station_ids"]:
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
41
67
 
42
68
  # Get the water quality data
43
69
  for station_id, date_latest in station_date_latest.items():
70
+ # Get the station type for this station ID
71
+ station_type = params["station_types"][station_id]
72
+
44
73
  # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
45
74
  if date_latest is None:
46
75
  # Get all the water quality data for the name/station combination
47
76
  print(f"Getting all {name} data for station ID: {station_id}.")
48
- wq.get(workspace, name, [station_id])
77
+ wq.get(workspace, name, test_number, [station_id])
49
78
  else:
50
79
  # Check whether we already have the latest data
51
- if dbhydro_data_is_latest(date_latest):
80
+ if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
52
81
  # Notify that the data is already up to date
53
- print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
82
+ print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
54
83
  continue
55
84
 
56
85
  # Temporarily rename current data file so it isn't over written
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
61
90
  try:
62
91
  # Get only the water quality data that is newer than the latest data in the csv file
63
92
  print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
64
- date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
65
- wq.get(workspace, name, [station_id], date_min=date_latest)
93
+ date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
94
+ wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
66
95
 
67
96
  # Data failed to download - It's possible the data's end date has been reached
68
97
  if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
71
100
  # Read in the original data
72
101
  df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
73
102
 
74
- # Calculate the days column for the newly downloaded data
75
- df_original_date_min = df_original['date'].min()
76
- wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
77
-
78
103
  # Read in the newly downloaded data
79
104
  df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
80
- df_new.reset_index(inplace=True)
105
+
106
+ # Calculate the days column for the newly downloaded data
107
+ df_original_date_min = df_original['date'].min()
108
+ df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
81
109
 
82
110
  # Merge the new data with the original data
83
- df_merged = pd.concat([df_original, df_new], ignore_index=True)
111
+ df_merged = pd.concat([df_original, df_new], ignore_index=False)
112
+
113
+ # Re-number the index
114
+ df_merged.reset_index(inplace=True)
115
+ df_merged.drop(['index'], axis=1, inplace=True)
116
+
117
+ # Start index at 1 instead of 0 (for backwards compatibility)
118
+ df_merged.index = df_merged.index + 1
119
+
120
+ # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
121
+ df_merged.index = df_merged.index.astype(str)
84
122
 
85
123
  # Write out the merged data
86
- df_merged.to_csv(os.path.join(workspace, original_file_name))
124
+ df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
125
+
126
+ # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
127
+ wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
87
128
 
88
129
  # Remove the original renamed data file
89
130
  os.remove(os.path.join(workspace, original_file_name_temp))
90
131
  except Exception as e:
91
132
  # Notify of the error
92
133
  print(f"Error occurred while downloading new water quality data: {e}")
134
+ traceback.print_exc()
93
135
 
94
136
  # Remove the newly downloaded data file if it exists
95
137
  if os.path.exists(os.path.join(workspace, original_file_name)):
@@ -1,32 +1,110 @@
1
+ import csv
2
+ import traceback
1
3
  import sys
2
4
  import os
3
5
  import uuid
4
6
  from datetime import datetime, timedelta
5
7
  import pandas as pd
6
8
  from loone_data_prep.water_quality_data import wq
7
- from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
9
+ from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
8
10
 
9
11
 
10
12
  D = {
11
- "PHOSPHATE, TOTAL AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
12
- "PHOSPHATE, ORTHO AS P": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
13
- "AMMONIA-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
14
- "NITRATE+NITRITE-N": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
15
- "TOTAL NITROGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
16
- "MICROCYSTIN HILR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
17
- "MICROCYSTIN HTYR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
18
- "MICROCYSTIN LA": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
19
- "MICROCYSTIN LF": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
20
- "MICROCYSTIN LR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
21
- "MICROCYSTIN LW": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
22
- "MICROCYSTIN LY": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
23
- "MICROCYSTIN RR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
24
- "MICROCYSTIN WR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
25
- "MICROCYSTIN YR": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
26
- "CHLOROPHYLL-A": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
27
- "CHLOROPHYLL-A(LC)": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
28
- "CHLOROPHYLL-A, CORRECTED": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
29
- "DISSOLVED OXYGEN": {"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]}
13
+ "PHOSPHATE, TOTAL AS P": {
14
+ "test_number": 25,
15
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
16
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
17
+ },
18
+ "PHOSPHATE, ORTHO AS P": {
19
+ "test_number": 23,
20
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
21
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
22
+ },
23
+ "AMMONIA-N": {
24
+ "test_number": 20,
25
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
26
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
27
+ },
28
+ "NITRATE+NITRITE-N": {
29
+ "test_number": 18,
30
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
31
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
32
+ },
33
+ "TOTAL NITROGEN": {
34
+ "test_number": 80,
35
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
36
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
37
+ },
38
+ "MICROCYSTIN HILR": {
39
+ "test_number": 1023,
40
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
41
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
42
+ },
43
+ "MICROCYSTIN HTYR": {
44
+ "test_number": 1022,
45
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
46
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
47
+ },
48
+ "MICROCYSTIN LA": {
49
+ "test_number": 1005,
50
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
51
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
52
+ },
53
+ "MICROCYSTIN LF": {
54
+ "test_number": 1006,
55
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
56
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
57
+ },
58
+ "MICROCYSTIN LR": {
59
+ "test_number": 1007,
60
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
61
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
62
+ },
63
+ "MICROCYSTIN LW": {
64
+ "test_number": 1008,
65
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
66
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
67
+ },
68
+ "MICROCYSTIN LY": {
69
+ "test_number": 1009,
70
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
71
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
72
+ },
73
+ "MICROCYSTIN RR": {
74
+ "test_number": 1010,
75
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
76
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
77
+ },
78
+ "MICROCYSTIN WR": {
79
+ "test_number": 1011,
80
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
81
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
82
+ },
83
+ "MICROCYSTIN YR": {
84
+ "test_number": 1012,
85
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
86
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
87
+ },
88
+ "CHLOROPHYLL-A": {
89
+ "test_number": 61,
90
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
91
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
92
+ },
93
+ "CHLOROPHYLL-A(LC)": {
94
+ "test_number": 179,
95
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
96
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
97
+ },
98
+ "CHLOROPHYLL-A, CORRECTED": {
99
+ "test_number": 112,
100
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
101
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
102
+ },
103
+ "DISSOLVED OXYGEN": {
104
+ "test_number": 8,
105
+ "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
106
+ "station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
107
+ }
30
108
  }
31
109
 
32
110
 
@@ -36,6 +114,9 @@ def main(workspace: str, d: dict = D) -> dict:
36
114
  for name, params in d.items():
37
115
  print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
38
116
 
117
+ # Get the test_number for this parameter name
118
+ test_number = params['test_number']
119
+
39
120
  # Get the date of the latest data in the csv file for each station id
40
121
  station_date_latest = {}
41
122
  for station_id in params["station_ids"]:
@@ -43,16 +124,19 @@ def main(workspace: str, d: dict = D) -> dict:
43
124
 
44
125
  # Get the water quality data
45
126
  for station_id, date_latest in station_date_latest.items():
127
+ # Get the station type for this station ID
128
+ station_type = params["station_types"][station_id]
129
+
46
130
  # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
47
131
  if date_latest is None:
48
132
  # Get all the water quality data for the name/station combination
49
133
  print(f"Getting all {name} data for station ID: {station_id}.")
50
- wq.get(workspace, name, [station_id])
134
+ wq.get(workspace, name, test_number, [station_id])
51
135
  else:
52
136
  # Check whether we already have the latest data
53
- if dbhydro_data_is_latest(date_latest):
137
+ if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
54
138
  # Notify that the data is already up to date
55
- print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
139
+ print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
56
140
  continue
57
141
 
58
142
  # Temporarily rename current data file so it isn't over written
@@ -63,8 +147,8 @@ def main(workspace: str, d: dict = D) -> dict:
63
147
  try:
64
148
  # Get only the water quality data that is newer than the latest data in the csv file
65
149
  print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
66
- date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
67
- wq.get(workspace, name, [station_id], date_min=date_latest)
150
+ date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
151
+ wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
68
152
 
69
153
  # Data failed to download - It's possible the data's end date has been reached
70
154
  if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -73,25 +157,38 @@ def main(workspace: str, d: dict = D) -> dict:
73
157
  # Read in the original data
74
158
  df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
75
159
 
76
- # Calculate the days column for the newly downloaded data
77
- df_original_date_min = df_original['date'].min()
78
- wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
79
-
80
160
  # Read in the newly downloaded data
81
161
  df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
82
- df_new.reset_index(inplace=True)
83
162
 
84
- # Merge the new data with the original data
85
- df_merged = pd.concat([df_original, df_new], ignore_index=True)
163
+ # Calculate the days column for the newly downloaded data
164
+ df_original_date_min = df_original['date'].min()
165
+ df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
86
166
 
167
+ # Merge the new data with the original data
168
+ df_merged = pd.concat([df_original, df_new], ignore_index=False)
169
+
170
+ # Re-number the index
171
+ df_merged.reset_index(inplace=True)
172
+ df_merged.drop(['index'], axis=1, inplace=True)
173
+
174
+ # Start index at 1 instead of 0 (for backwards compatibility)
175
+ df_merged.index = df_merged.index + 1
176
+
177
+ # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
178
+ df_merged.index = df_merged.index.astype(str)
179
+
87
180
  # Write out the merged data
88
- df_merged.to_csv(os.path.join(workspace, original_file_name))
181
+ df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
182
+
183
+ # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
184
+ wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
89
185
 
90
186
  # Remove the original renamed data file
91
187
  os.remove(os.path.join(workspace, original_file_name_temp))
92
188
  except Exception as e:
93
189
  # Notify of the error
94
190
  print(f"Error occurred while downloading new water quality data: {e}")
191
+ traceback.print_exc()
95
192
 
96
193
  # Remove the newly downloaded data file if it exists
97
194
  if os.path.exists(os.path.join(workspace, original_file_name)):