loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,7 @@ import requests
4
4
  import uuid
5
5
  from datetime import datetime
6
6
  from loone_data_prep.water_level_data import hydro
7
- from loone_data_prep.flow_data.get_forecast_flows import get_stations_latitude_longitude
8
- from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
7
+ from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest, get_stations_latitude_longitude
9
8
  import pandas as pd
10
9
 
11
10
  DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
@@ -13,11 +12,11 @@ DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
13
12
  D = {
14
13
  "LO_Stage": {"dbkeys": ["16022", "12509", "12519", "16265", "15611"], "datum": "NGVD29"},
15
14
  "LO_Stage_2": {"dbkeys": ["94832"], "date_min": "2024-04-30", "datum": "NAVD88"},
16
- "Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
17
- "Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
18
- "Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
19
- "Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
20
- "Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"}
15
+ "Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"G3ANW": "3A-NW"}},
16
+ "Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"2A-17": "2-17"}},
17
+ "Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-3": "3-63"}},
18
+ "Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-4": "3-64"}},
19
+ "Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-28": "3-65"}},
21
20
  }
22
21
 
23
22
 
@@ -25,9 +24,6 @@ def main(workspace: str, d: dict = D) -> dict:
25
24
  missing_files = []
26
25
  failed_downloads = [] # List of file names that the script failed to get the latest data for (but the files still exist)
27
26
 
28
- # Get the date of the latest data in LO_Stage_2.csv
29
- date_latest_lo_stage_2 = find_last_date_in_csv(workspace, "LO_Stage_2.csv")
30
-
31
27
  for name, params in d.items():
32
28
  # Get the date of the latest data in the csv file
33
29
  date_latest = find_last_date_in_csv(workspace, f"{name}.csv")
@@ -35,10 +31,18 @@ def main(workspace: str, d: dict = D) -> dict:
35
31
  # File with data for this dbkey does NOT already exist (or possibly some other error occurred)
36
32
  if date_latest is None:
37
33
  print(f"Getting all water level data for {name}.")
34
+ params['date_max'] = DATE_NOW
38
35
  hydro.get(workspace, name, **params)
39
36
  else:
40
37
  # Check whether the latest data is already up to date.
41
- if dbhydro_data_is_latest(date_latest):
38
+ requires_data_download = False
39
+ for dbkey in params['dbkeys']:
40
+ if not dbhydro_data_is_latest(date_latest, dbkey):
41
+ requires_data_download = True
42
+ break
43
+
44
+ # Data is already up to date
45
+ if not requires_data_download:
42
46
  # Notify that the data is already up to date
43
47
  print(f'Downloading of new water level data skipped for {name}. Data is already up to date.')
44
48
  continue
@@ -50,21 +54,23 @@ def main(workspace: str, d: dict = D) -> dict:
50
54
 
51
55
  try:
52
56
  # Download only the new data
53
- print(f'Downloading new water level data for {name} starting from date {date_latest}')
54
- hydro.get(workspace, name, dbkeys=params['dbkeys'], date_min=date_latest, date_max=DATE_NOW, datum=params['datum'])
57
+ date_next = (datetime.strptime(date_latest, "%Y-%m-%d") + pd.DateOffset(days=1)).date().strftime("%Y-%m-%d")
58
+ print(f'Downloading new water level data for {name} starting from date {date_next}')
59
+ kwargs = {}
60
+ if 'override_site_codes' in params:
61
+ kwargs['override_site_codes'] = params['override_site_codes']
62
+ hydro.get(workspace, name, dbkeys=params['dbkeys'], date_min=date_next, date_max=DATE_NOW, datum=params['datum'], **kwargs)
55
63
 
56
64
  # Read in the original data and the newly downloaded data
57
- df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
58
- df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
59
-
60
- # For get_hydro() calls with multiple dbkeys, remove the row corresponding to the latest date from the downloaded data.
61
- # When get_hydro() is given multiple keys its returned data starts from the date given instead of the day after like it
62
- # does when given a single key.
63
- if len(params['dbkeys']) > 1:
64
- df_new = df_new[df_new['date'] != date_latest]
65
+ df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col='date')
66
+ df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col='date')
65
67
 
66
68
  # Merge the new data with the original data
67
- df_merged = pd.concat([df_original, df_new], ignore_index=True)
69
+ df_merged = pd.concat([df_original, df_new], ignore_index=False)
70
+
71
+ # Ensure an integer index (for backwards compatibility)
72
+ df_merged.reset_index(inplace=True)
73
+ df_merged.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
68
74
 
69
75
  # Write out the merged data
70
76
  df_merged.to_csv(os.path.join(workspace, original_file_name))
@@ -102,6 +108,10 @@ def main(workspace: str, d: dict = D) -> dict:
102
108
  lat_long_map = get_stations_latitude_longitude(["L OKEE"])
103
109
  latitude, longitude = lat_long_map["L OKEE"]
104
110
 
111
+ # Load the LO_Stage.csv file
112
+ df_lo_stage = pd.read_csv(os.path.join(workspace, "LO_Stage.csv"), index_col="date")
113
+ df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
114
+
105
115
  # Load the LO_Stage_2.csv file
106
116
  df_lo_stage_2 = pd.read_csv(os.path.join(workspace, "LO_Stage_2.csv"), index_col="date")
107
117
  df_lo_stage_2.index = pd.to_datetime(df_lo_stage_2.index)
@@ -109,21 +119,24 @@ def main(workspace: str, d: dict = D) -> dict:
109
119
  # Output Progress
110
120
  print("Converting NAVD88 to NGVD29 for 'L OKEE's new dbkey...\n")
111
121
 
112
- # Use only the data that is not already in the LO_Stage.csv file
113
- if date_latest_lo_stage_2 is not None:
114
- date_start = datetime.strptime(date_latest_lo_stage_2, "%Y-%m-%d") + pd.DateOffset(days=1)
115
- df_lo_stage_2 = df_lo_stage_2.loc[date_start:]
122
+ # Use only the data that is not already in the LO_Stage.csv file and exists in the LO_Stage_2.csv file
123
+ common_dates = df_lo_stage.index.intersection(df_lo_stage_2.index)
116
124
 
117
- # Convert the stage values from NAVD88 to NGVD29
118
- lo_stage_2_dates = df_lo_stage_2.index.tolist()
119
- lo_stage_2_values_navd88 = df_lo_stage_2["L OKEE_STG_ft NGVD29"].tolist()
120
- lo_stage_2_values_ngvd29 = []
125
+ missing_mask = (
126
+ df_lo_stage.loc[common_dates, "L OKEE_STG_ft NGVD29"].isna() &
127
+ df_lo_stage_2.loc[common_dates, "L OKEE_STG_ft NGVD29"].notna()
128
+ )
121
129
 
122
- for i in range(0, len(lo_stage_2_values_navd88)):
123
- date = lo_stage_2_dates[i]
124
- value = lo_stage_2_values_navd88[i]
130
+ missing_dates: pd.DatetimeIndex = common_dates[missing_mask]
131
+ missing_dates = missing_dates.to_list()
132
+
133
+ # Convert the stage values from NAVD88 to NGVD29 for the missing dates
134
+ converted_values = {}
135
+ for date in missing_dates:
125
136
  try:
126
- lo_stage_2_values_ngvd29.append(_convert_navd88_to_ngvd29(latitude, longitude, value, date.year))
137
+ navd88_value = df_lo_stage_2.at[date, "L OKEE_STG_ft NGVD29"]
138
+ ngvd29_value = _convert_navd88_to_ngvd29(latitude, longitude, navd88_value, date.year)
139
+ converted_values[date] = ngvd29_value
127
140
  except Exception as e:
128
141
  convert_failure = True
129
142
  print(str(e))
@@ -132,20 +145,15 @@ def main(workspace: str, d: dict = D) -> dict:
132
145
  # Check for conversion failure
133
146
  if not convert_failure:
134
147
  # Update the LO_Stage.csv file with the converted values
135
- df_lo_stage = pd.read_csv(os.path.join(workspace, "LO_Stage.csv"), index_col="date")
136
- df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
137
-
138
- for i in range(0, len(lo_stage_2_values_ngvd29)):
139
- # Get the current date and value
140
- date = lo_stage_2_dates[i]
141
- value = lo_stage_2_values_ngvd29[i]
142
-
143
- # Update the value in the LO_Stage dataframe
148
+ for date, value in converted_values.items():
144
149
  df_lo_stage.at[date, "L OKEE_STG_ft NGVD29"] = value
145
150
 
146
151
  # Reset the index
147
152
  df_lo_stage.reset_index(inplace=True)
148
- df_lo_stage.drop(columns=["Unnamed: 0"], inplace=True)
153
+
154
+ # Drop Unnamed: 0 column that might have been added
155
+ if "Unnamed: 0" in df_lo_stage.columns:
156
+ df_lo_stage.drop(columns=["Unnamed: 0"], inplace=True)
149
157
 
150
158
  # Save the updated LO_Stage.csv file
151
159
  df_lo_stage.to_csv(os.path.join(workspace, "LO_Stage.csv"))
@@ -1,15 +1,14 @@
1
1
  import sys
2
2
  from datetime import datetime
3
3
  from retry import retry
4
- from rpy2.robjects import r
5
- from rpy2.rinterface_lib.embedded import RRuntimeError
6
4
  import pandas as pd
5
+ from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
7
6
 
8
7
  DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
9
8
  DATE_NOW = datetime.now().strftime("%Y-%m-%d")
10
9
 
11
10
 
12
- @retry(RRuntimeError, tries=5, delay=15, max_delay=60, backoff=2)
11
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
13
12
  def get(
14
13
  workspace: str,
15
14
  name: str,
@@ -17,8 +16,19 @@ def get(
17
16
  date_min: str = "1950-01-01",
18
17
  date_max: str = DATE_NOW,
19
18
  datum: str = "",
20
- **kwargs: str | list
19
+ **kwargs: str | list | dict
21
20
  ) -> None:
21
+ """Fetches daily water level data from DBHYDRO and saves it as a CSV file.
22
+
23
+ Args:
24
+ workspace (str): The directory where the CSV file will be saved.
25
+ name (str): The name of the output CSV file (without extension).
26
+ dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
27
+ date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
28
+ date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
29
+ datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
30
+ **kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
31
+ """
22
32
  # Get the type and units for the station
23
33
  data_type = "STG"
24
34
  units = "ft NGVD29"
@@ -27,77 +37,48 @@ def get(
27
37
  data_type = "GAGHT"
28
38
  units = "feet"
29
39
 
30
- dbkeys_str = "\"" + "\", \"".join(dbkeys) + "\""
31
- r(
32
- f"""
33
- # Load the required libraries
34
- library(rio)
35
- library(dbhydroR)
36
- library(dplyr)
37
-
38
- # Stage Data
39
- if ("{datum}" == "")
40
- {{
41
- {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
42
- }}
43
-
44
- if (nchar("{datum}") > 0)
45
- {{
46
- {name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
47
- }}
48
-
49
- # Give data.frame correct column names so it can be cleaned using the clean_hydro function
50
- colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
51
-
52
- # Check if the data.frame has any rows
53
- if (nrow({name}) == 0)
54
- {{
55
- # No data given back, It's possible that the dbkey has reached its end date.
56
- print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
57
- return(list(success = FALSE, dbkey = "{dbkeys}"))
58
- }}
59
-
60
- # Get the station
61
- station <- {name}$station[1]
62
-
63
- # Add a type and units column to data so it can be cleaned using the clean_hydro function
64
- {name}$type <- "{data_type}"
65
- {name}$units <- "{units}"
66
-
67
- # Clean the data.frame
68
- {name} <- clean_hydro({name})
69
-
70
- # Drop the " _STG_ft NGVD29" column
71
- {name} <- {name} %>% select(-` _{data_type}_{units}`)
72
-
73
- # Write the data to a csv file
74
- write.csv({name},file ='{workspace}/{name}.csv')
75
- """
76
- )
40
+ # Retrieve the data
41
+ api = get_dbhydro_api()
42
+ response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
77
43
 
78
- _reformat_water_level_file(workspace, name)
79
-
80
- def _reformat_water_level_file(workspace: str, name: str):
81
- # Read in the data
82
- df = pd.read_csv(f"{workspace}/{name}.csv")
44
+ # Get the data as a dataframe
45
+ df = response.to_dataframe(True)
83
46
 
84
- # Drop the "Unnamed: 0" column
85
- df.drop(columns=['Unnamed: 0'], inplace=True)
47
+ # Replace flagged 0 values and -99999.0 with NaN
48
+ df = df_replace_missing_with_nan(df)
86
49
 
87
- # Convert date column to datetime
88
- df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
50
+ # Make sure datetime exists as a column
51
+ if 'datetime' not in df.columns:
52
+ df.reset_index(inplace=True)
53
+
54
+ # Pivot the data so that each site_code is a column
55
+ df = df.pivot(index='datetime', columns='site_code', values='value')
89
56
 
90
- # Sort the data by date
91
- df.sort_values('date', inplace=True)
57
+ # Get the current column names in df and the names to rename them to
58
+ column_names = {'datetime': 'date'}
59
+ override_site_codes = kwargs.get("override_site_codes", None)
60
+ for column in df.columns:
61
+ if override_site_codes and column in override_site_codes:
62
+ column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
63
+ else:
64
+ column_names[column] = f"{column}_{data_type}_{units}"
92
65
 
93
- # Renumber the index
94
- df.reset_index(drop=True, inplace=True)
66
+ # Reset the index to turn the datetime index into a column
67
+ df.reset_index(inplace=True)
95
68
 
96
- # Drop rows that are missing all their values
97
- df.dropna(how='all', inplace=True)
69
+ # Rename the columns
70
+ df.rename(columns=column_names, inplace=True)
98
71
 
99
- # Write the updated data back to the file
100
- df.to_csv(f"{workspace}/{name}.csv")
72
+ # Convert date column to datetime
73
+ df['date'] = pd.to_datetime(df['date'])
74
+
75
+ # Drop the "Unnamed: 0" column if it exists
76
+ if 'Unnamed: 0' in df.columns:
77
+ df.drop(columns=['Unnamed: 0'], inplace=True)
78
+
79
+ # Write the data to a csv file
80
+ df.to_csv(f"{workspace}/{name}.csv", index=True)
81
+
101
82
 
102
83
  if __name__ == "__main__":
103
84
  args = [sys.argv[1].rstrip("/"), sys.argv[2]]
@@ -1,27 +1,50 @@
1
+ import csv
2
+ import traceback
1
3
  import sys
2
4
  import os
3
5
  import uuid
4
6
  from datetime import datetime, timedelta
5
7
  import pandas as pd
6
8
  from loone_data_prep.water_quality_data import wq
7
- from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
9
+ from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
8
10
 
9
11
 
10
12
  D = {
11
- "PHOSPHATE, TOTAL AS P": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
12
- 'CULV10A', 'S133', 'S127', 'S135']},
13
- "AMMONIA-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
14
- 'CULV10A', 'S133', 'S127', 'S135']},
15
- "NITRATE+NITRITE-N": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
16
- 'CULV10A', 'S133', 'S127', 'S135']},
17
- "TOTAL NITROGEN": {"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
18
- 'CULV10A', 'S133', 'S127', 'S135']},
19
- "CHLOROPHYLL-A": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133',
20
- 'S127', 'S135', 'S191']},
21
- "CHLOROPHYLL-A(LC)": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A',
22
- 'S133', 'S127', 'S135', 'S191']},
23
- "CHLOROPHYLL-A, CORRECTED": {"station_ids": ['S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C',
24
- 'CULV10A', 'S133', 'S127', 'S135', 'S191']}
13
+ "PHOSPHATE, TOTAL AS P": {
14
+ "test_number": 25,
15
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
16
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
17
+ },
18
+ "AMMONIA-N": {
19
+ "test_number": 20,
20
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
21
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
22
+ },
23
+ "NITRATE+NITRITE-N": {
24
+ "test_number": 18,
25
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
26
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
27
+ },
28
+ "TOTAL NITROGEN": {
29
+ "test_number": 80,
30
+ "station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
31
+ "station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
32
+ },
33
+ "CHLOROPHYLL-A": {
34
+ "test_number": 61,
35
+ "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
36
+ "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
37
+ },
38
+ "CHLOROPHYLL-A(LC)": {
39
+ "test_number": 179,
40
+ "station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
41
+ "station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
42
+ },
43
+ "CHLOROPHYLL-A, CORRECTED": {
44
+ "test_number": 112,
45
+ "station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
46
+ "station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
47
+ },
25
48
  }
26
49
 
27
50
 
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
34
57
  for name, params in d.items():
35
58
  print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
36
59
 
60
+ # Get the test_number for this parameter name
61
+ test_number = params['test_number']
62
+
37
63
  # Get the date of the latest data in the csv file for each station id
38
64
  station_date_latest = {}
39
65
  for station_id in params["station_ids"]:
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
41
67
 
42
68
  # Get the water quality data
43
69
  for station_id, date_latest in station_date_latest.items():
70
+ # Get the station type for this station ID
71
+ station_type = params["station_types"][station_id]
72
+
44
73
  # File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
45
74
  if date_latest is None:
46
75
  # Get all the water quality data for the name/station combination
47
76
  print(f"Getting all {name} data for station ID: {station_id}.")
48
- wq.get(workspace, name, [station_id])
77
+ wq.get(workspace, name, test_number, [station_id])
49
78
  else:
50
79
  # Check whether we already have the latest data
51
- if dbhydro_data_is_latest(date_latest):
80
+ if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
52
81
  # Notify that the data is already up to date
53
- print(f'Downloading of new water quality data for test name: {name} station: {station} skipped. Data is already up to date.')
82
+ print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
54
83
  continue
55
84
 
56
85
  # Temporarily rename current data file so it isn't over written
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
61
90
  try:
62
91
  # Get only the water quality data that is newer than the latest data in the csv file
63
92
  print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
64
- date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
65
- wq.get(workspace, name, [station_id], date_min=date_latest)
93
+ date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
94
+ wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
66
95
 
67
96
  # Data failed to download - It's possible the data's end date has been reached
68
97
  if not os.path.exists(os.path.join(workspace, original_file_name)):
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
71
100
  # Read in the original data
72
101
  df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
73
102
 
74
- # Calculate the days column for the newly downloaded data
75
- df_original_date_min = df_original['date'].min()
76
- wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
77
-
78
103
  # Read in the newly downloaded data
79
104
  df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
80
- df_new.reset_index(inplace=True)
105
+
106
+ # Calculate the days column for the newly downloaded data
107
+ df_original_date_min = df_original['date'].min()
108
+ df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
81
109
 
82
110
  # Merge the new data with the original data
83
- df_merged = pd.concat([df_original, df_new], ignore_index=True)
111
+ df_merged = pd.concat([df_original, df_new], ignore_index=False)
112
+
113
+ # Re-number the index
114
+ df_merged.reset_index(inplace=True)
115
+ df_merged.drop(['index'], axis=1, inplace=True)
116
+
117
+ # Start index at 1 instead of 0 (for backwards compatibility)
118
+ df_merged.index = df_merged.index + 1
119
+
120
+ # Make sure the integer index values are quoted in the csv file (for backwards compatibility)
121
+ df_merged.index = df_merged.index.astype(str)
84
122
 
85
123
  # Write out the merged data
86
- df_merged.to_csv(os.path.join(workspace, original_file_name))
124
+ df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
125
+
126
+ # Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
127
+ wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
87
128
 
88
129
  # Remove the original renamed data file
89
130
  os.remove(os.path.join(workspace, original_file_name_temp))
90
131
  except Exception as e:
91
132
  # Notify of the error
92
133
  print(f"Error occurred while downloading new water quality data: {e}")
134
+ traceback.print_exc()
93
135
 
94
136
  # Remove the newly downloaded data file if it exists
95
137
  if os.path.exists(os.path.join(workspace, original_file_name)):