loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/dbhydro_insights.py +195 -0
- loone_data_prep/flow_data/S65E_total.py +57 -57
- loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
- loone_data_prep/flow_data/get_forecast_flows.py +19 -105
- loone_data_prep/flow_data/get_inflows.py +18 -8
- loone_data_prep/flow_data/get_outflows.py +16 -7
- loone_data_prep/flow_data/hydro.py +62 -91
- loone_data_prep/utils.py +243 -30
- loone_data_prep/water_level_data/get_all.py +52 -44
- loone_data_prep/water_level_data/hydro.py +49 -68
- loone_data_prep/water_quality_data/get_inflows.py +69 -27
- loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
- loone_data_prep/water_quality_data/wq.py +114 -88
- loone_data_prep/weather_data/get_all.py +5 -3
- loone_data_prep/weather_data/weather.py +117 -180
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/RECORD +20 -19
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -4,8 +4,7 @@ import requests
|
|
|
4
4
|
import uuid
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from loone_data_prep.water_level_data import hydro
|
|
7
|
-
from loone_data_prep.
|
|
8
|
-
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest
|
|
7
|
+
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_data_is_latest, get_stations_latitude_longitude
|
|
9
8
|
import pandas as pd
|
|
10
9
|
|
|
11
10
|
DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
|
|
@@ -13,11 +12,11 @@ DATE_NOW = datetime.now().date().strftime("%Y-%m-%d")
|
|
|
13
12
|
D = {
|
|
14
13
|
"LO_Stage": {"dbkeys": ["16022", "12509", "12519", "16265", "15611"], "datum": "NGVD29"},
|
|
15
14
|
"LO_Stage_2": {"dbkeys": ["94832"], "date_min": "2024-04-30", "datum": "NAVD88"},
|
|
16
|
-
"Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
|
|
17
|
-
"Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
|
|
18
|
-
"Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
|
|
19
|
-
"Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"},
|
|
20
|
-
"Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29"}
|
|
15
|
+
"Stg_3ANW": {"dbkeys": ["LA369"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"G3ANW": "3A-NW"}},
|
|
16
|
+
"Stg_2A17": {"dbkeys": ["16531"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"2A-17": "2-17"}},
|
|
17
|
+
"Stg_3A3": {"dbkeys": ["16532"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-3": "3-63"}},
|
|
18
|
+
"Stg_3A4": {"dbkeys": ["16537"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-4": "3-64"}},
|
|
19
|
+
"Stg_3A28": {"dbkeys": ["16538"], "date_min": "1972-01-01", "date_max": "2023-04-30", "datum": "NGVD29", "override_site_codes": {"3A-28": "3-65"}},
|
|
21
20
|
}
|
|
22
21
|
|
|
23
22
|
|
|
@@ -25,9 +24,6 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
25
24
|
missing_files = []
|
|
26
25
|
failed_downloads = [] # List of file names that the script failed to get the latest data for (but the files still exist)
|
|
27
26
|
|
|
28
|
-
# Get the date of the latest data in LO_Stage_2.csv
|
|
29
|
-
date_latest_lo_stage_2 = find_last_date_in_csv(workspace, "LO_Stage_2.csv")
|
|
30
|
-
|
|
31
27
|
for name, params in d.items():
|
|
32
28
|
# Get the date of the latest data in the csv file
|
|
33
29
|
date_latest = find_last_date_in_csv(workspace, f"{name}.csv")
|
|
@@ -35,10 +31,18 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
35
31
|
# File with data for this dbkey does NOT already exist (or possibly some other error occurred)
|
|
36
32
|
if date_latest is None:
|
|
37
33
|
print(f"Getting all water level data for {name}.")
|
|
34
|
+
params['date_max'] = DATE_NOW
|
|
38
35
|
hydro.get(workspace, name, **params)
|
|
39
36
|
else:
|
|
40
37
|
# Check whether the latest data is already up to date.
|
|
41
|
-
|
|
38
|
+
requires_data_download = False
|
|
39
|
+
for dbkey in params['dbkeys']:
|
|
40
|
+
if not dbhydro_data_is_latest(date_latest, dbkey):
|
|
41
|
+
requires_data_download = True
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
# Data is already up to date
|
|
45
|
+
if not requires_data_download:
|
|
42
46
|
# Notify that the data is already up to date
|
|
43
47
|
print(f'Downloading of new water level data skipped for {name}. Data is already up to date.')
|
|
44
48
|
continue
|
|
@@ -50,21 +54,23 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
50
54
|
|
|
51
55
|
try:
|
|
52
56
|
# Download only the new data
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
date_next = (datetime.strptime(date_latest, "%Y-%m-%d") + pd.DateOffset(days=1)).date().strftime("%Y-%m-%d")
|
|
58
|
+
print(f'Downloading new water level data for {name} starting from date {date_next}')
|
|
59
|
+
kwargs = {}
|
|
60
|
+
if 'override_site_codes' in params:
|
|
61
|
+
kwargs['override_site_codes'] = params['override_site_codes']
|
|
62
|
+
hydro.get(workspace, name, dbkeys=params['dbkeys'], date_min=date_next, date_max=DATE_NOW, datum=params['datum'], **kwargs)
|
|
55
63
|
|
|
56
64
|
# Read in the original data and the newly downloaded data
|
|
57
|
-
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=
|
|
58
|
-
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=
|
|
59
|
-
|
|
60
|
-
# For get_hydro() calls with multiple dbkeys, remove the row corresponding to the latest date from the downloaded data.
|
|
61
|
-
# When get_hydro() is given multiple keys its returned data starts from the date given instead of the day after like it
|
|
62
|
-
# does when given a single key.
|
|
63
|
-
if len(params['dbkeys']) > 1:
|
|
64
|
-
df_new = df_new[df_new['date'] != date_latest]
|
|
65
|
+
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col='date')
|
|
66
|
+
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col='date')
|
|
65
67
|
|
|
66
68
|
# Merge the new data with the original data
|
|
67
|
-
df_merged = pd.concat([df_original, df_new], ignore_index=
|
|
69
|
+
df_merged = pd.concat([df_original, df_new], ignore_index=False)
|
|
70
|
+
|
|
71
|
+
# Ensure an integer index (for backwards compatibility)
|
|
72
|
+
df_merged.reset_index(inplace=True)
|
|
73
|
+
df_merged.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
|
|
68
74
|
|
|
69
75
|
# Write out the merged data
|
|
70
76
|
df_merged.to_csv(os.path.join(workspace, original_file_name))
|
|
@@ -102,6 +108,10 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
102
108
|
lat_long_map = get_stations_latitude_longitude(["L OKEE"])
|
|
103
109
|
latitude, longitude = lat_long_map["L OKEE"]
|
|
104
110
|
|
|
111
|
+
# Load the LO_Stage.csv file
|
|
112
|
+
df_lo_stage = pd.read_csv(os.path.join(workspace, "LO_Stage.csv"), index_col="date")
|
|
113
|
+
df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
|
|
114
|
+
|
|
105
115
|
# Load the LO_Stage_2.csv file
|
|
106
116
|
df_lo_stage_2 = pd.read_csv(os.path.join(workspace, "LO_Stage_2.csv"), index_col="date")
|
|
107
117
|
df_lo_stage_2.index = pd.to_datetime(df_lo_stage_2.index)
|
|
@@ -109,21 +119,24 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
109
119
|
# Output Progress
|
|
110
120
|
print("Converting NAVD88 to NGVD29 for 'L OKEE's new dbkey...\n")
|
|
111
121
|
|
|
112
|
-
# Use only the data that is not already in the LO_Stage.csv file
|
|
113
|
-
|
|
114
|
-
date_start = datetime.strptime(date_latest_lo_stage_2, "%Y-%m-%d") + pd.DateOffset(days=1)
|
|
115
|
-
df_lo_stage_2 = df_lo_stage_2.loc[date_start:]
|
|
122
|
+
# Use only the data that is not already in the LO_Stage.csv file and exists in the LO_Stage_2.csv file
|
|
123
|
+
common_dates = df_lo_stage.index.intersection(df_lo_stage_2.index)
|
|
116
124
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
125
|
+
missing_mask = (
|
|
126
|
+
df_lo_stage.loc[common_dates, "L OKEE_STG_ft NGVD29"].isna() &
|
|
127
|
+
df_lo_stage_2.loc[common_dates, "L OKEE_STG_ft NGVD29"].notna()
|
|
128
|
+
)
|
|
121
129
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
130
|
+
missing_dates: pd.DatetimeIndex = common_dates[missing_mask]
|
|
131
|
+
missing_dates = missing_dates.to_list()
|
|
132
|
+
|
|
133
|
+
# Convert the stage values from NAVD88 to NGVD29 for the missing dates
|
|
134
|
+
converted_values = {}
|
|
135
|
+
for date in missing_dates:
|
|
125
136
|
try:
|
|
126
|
-
|
|
137
|
+
navd88_value = df_lo_stage_2.at[date, "L OKEE_STG_ft NGVD29"]
|
|
138
|
+
ngvd29_value = _convert_navd88_to_ngvd29(latitude, longitude, navd88_value, date.year)
|
|
139
|
+
converted_values[date] = ngvd29_value
|
|
127
140
|
except Exception as e:
|
|
128
141
|
convert_failure = True
|
|
129
142
|
print(str(e))
|
|
@@ -132,20 +145,15 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
132
145
|
# Check for conversion failure
|
|
133
146
|
if not convert_failure:
|
|
134
147
|
# Update the LO_Stage.csv file with the converted values
|
|
135
|
-
|
|
136
|
-
df_lo_stage.index = pd.to_datetime(df_lo_stage.index)
|
|
137
|
-
|
|
138
|
-
for i in range(0, len(lo_stage_2_values_ngvd29)):
|
|
139
|
-
# Get the current date and value
|
|
140
|
-
date = lo_stage_2_dates[i]
|
|
141
|
-
value = lo_stage_2_values_ngvd29[i]
|
|
142
|
-
|
|
143
|
-
# Update the value in the LO_Stage dataframe
|
|
148
|
+
for date, value in converted_values.items():
|
|
144
149
|
df_lo_stage.at[date, "L OKEE_STG_ft NGVD29"] = value
|
|
145
150
|
|
|
146
151
|
# Reset the index
|
|
147
152
|
df_lo_stage.reset_index(inplace=True)
|
|
148
|
-
|
|
153
|
+
|
|
154
|
+
# Drop Unnamed: 0 column that might have been added
|
|
155
|
+
if "Unnamed: 0" in df_lo_stage.columns:
|
|
156
|
+
df_lo_stage.drop(columns=["Unnamed: 0"], inplace=True)
|
|
149
157
|
|
|
150
158
|
# Save the updated LO_Stage.csv file
|
|
151
159
|
df_lo_stage.to_csv(os.path.join(workspace, "LO_Stage.csv"))
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from retry import retry
|
|
4
|
-
from rpy2.robjects import r
|
|
5
|
-
from rpy2.rinterface_lib.embedded import RRuntimeError
|
|
6
4
|
import pandas as pd
|
|
5
|
+
from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
|
|
7
6
|
|
|
8
7
|
DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
|
|
9
8
|
DATE_NOW = datetime.now().strftime("%Y-%m-%d")
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
@retry(
|
|
11
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
13
12
|
def get(
|
|
14
13
|
workspace: str,
|
|
15
14
|
name: str,
|
|
@@ -17,8 +16,19 @@ def get(
|
|
|
17
16
|
date_min: str = "1950-01-01",
|
|
18
17
|
date_max: str = DATE_NOW,
|
|
19
18
|
datum: str = "",
|
|
20
|
-
**kwargs: str | list
|
|
19
|
+
**kwargs: str | list | dict
|
|
21
20
|
) -> None:
|
|
21
|
+
"""Fetches daily water level data from DBHYDRO and saves it as a CSV file.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
workspace (str): The directory where the CSV file will be saved.
|
|
25
|
+
name (str): The name of the output CSV file (without extension).
|
|
26
|
+
dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
|
|
27
|
+
date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
|
|
28
|
+
date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
|
|
29
|
+
datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
|
|
30
|
+
**kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
|
|
31
|
+
"""
|
|
22
32
|
# Get the type and units for the station
|
|
23
33
|
data_type = "STG"
|
|
24
34
|
units = "ft NGVD29"
|
|
@@ -27,77 +37,48 @@ def get(
|
|
|
27
37
|
data_type = "GAGHT"
|
|
28
38
|
units = "feet"
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# Load the required libraries
|
|
34
|
-
library(rio)
|
|
35
|
-
library(dbhydroR)
|
|
36
|
-
library(dplyr)
|
|
37
|
-
|
|
38
|
-
# Stage Data
|
|
39
|
-
if ("{datum}" == "")
|
|
40
|
-
{{
|
|
41
|
-
{name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
|
|
42
|
-
}}
|
|
43
|
-
|
|
44
|
-
if (nchar("{datum}") > 0)
|
|
45
|
-
{{
|
|
46
|
-
{name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
|
|
47
|
-
}}
|
|
48
|
-
|
|
49
|
-
# Give data.frame correct column names so it can be cleaned using the clean_hydro function
|
|
50
|
-
colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
|
|
51
|
-
|
|
52
|
-
# Check if the data.frame has any rows
|
|
53
|
-
if (nrow({name}) == 0)
|
|
54
|
-
{{
|
|
55
|
-
# No data given back, It's possible that the dbkey has reached its end date.
|
|
56
|
-
print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
|
|
57
|
-
return(list(success = FALSE, dbkey = "{dbkeys}"))
|
|
58
|
-
}}
|
|
59
|
-
|
|
60
|
-
# Get the station
|
|
61
|
-
station <- {name}$station[1]
|
|
62
|
-
|
|
63
|
-
# Add a type and units column to data so it can be cleaned using the clean_hydro function
|
|
64
|
-
{name}$type <- "{data_type}"
|
|
65
|
-
{name}$units <- "{units}"
|
|
66
|
-
|
|
67
|
-
# Clean the data.frame
|
|
68
|
-
{name} <- clean_hydro({name})
|
|
69
|
-
|
|
70
|
-
# Drop the " _STG_ft NGVD29" column
|
|
71
|
-
{name} <- {name} %>% select(-` _{data_type}_{units}`)
|
|
72
|
-
|
|
73
|
-
# Write the data to a csv file
|
|
74
|
-
write.csv({name},file ='{workspace}/{name}.csv')
|
|
75
|
-
"""
|
|
76
|
-
)
|
|
40
|
+
# Retrieve the data
|
|
41
|
+
api = get_dbhydro_api()
|
|
42
|
+
response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
|
|
77
43
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def _reformat_water_level_file(workspace: str, name: str):
|
|
81
|
-
# Read in the data
|
|
82
|
-
df = pd.read_csv(f"{workspace}/{name}.csv")
|
|
44
|
+
# Get the data as a dataframe
|
|
45
|
+
df = response.to_dataframe(True)
|
|
83
46
|
|
|
84
|
-
#
|
|
85
|
-
df
|
|
47
|
+
# Replace flagged 0 values and -99999.0 with NaN
|
|
48
|
+
df = df_replace_missing_with_nan(df)
|
|
86
49
|
|
|
87
|
-
#
|
|
88
|
-
|
|
50
|
+
# Make sure datetime exists as a column
|
|
51
|
+
if 'datetime' not in df.columns:
|
|
52
|
+
df.reset_index(inplace=True)
|
|
53
|
+
|
|
54
|
+
# Pivot the data so that each site_code is a column
|
|
55
|
+
df = df.pivot(index='datetime', columns='site_code', values='value')
|
|
89
56
|
|
|
90
|
-
#
|
|
91
|
-
|
|
57
|
+
# Get the current column names in df and the names to rename them to
|
|
58
|
+
column_names = {'datetime': 'date'}
|
|
59
|
+
override_site_codes = kwargs.get("override_site_codes", None)
|
|
60
|
+
for column in df.columns:
|
|
61
|
+
if override_site_codes and column in override_site_codes:
|
|
62
|
+
column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
|
|
63
|
+
else:
|
|
64
|
+
column_names[column] = f"{column}_{data_type}_{units}"
|
|
92
65
|
|
|
93
|
-
#
|
|
94
|
-
df.reset_index(
|
|
66
|
+
# Reset the index to turn the datetime index into a column
|
|
67
|
+
df.reset_index(inplace=True)
|
|
95
68
|
|
|
96
|
-
#
|
|
97
|
-
df.
|
|
69
|
+
# Rename the columns
|
|
70
|
+
df.rename(columns=column_names, inplace=True)
|
|
98
71
|
|
|
99
|
-
#
|
|
100
|
-
df.
|
|
72
|
+
# Convert date column to datetime
|
|
73
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
74
|
+
|
|
75
|
+
# Drop the "Unnamed: 0" column if it exists
|
|
76
|
+
if 'Unnamed: 0' in df.columns:
|
|
77
|
+
df.drop(columns=['Unnamed: 0'], inplace=True)
|
|
78
|
+
|
|
79
|
+
# Write the data to a csv file
|
|
80
|
+
df.to_csv(f"{workspace}/{name}.csv", index=True)
|
|
81
|
+
|
|
101
82
|
|
|
102
83
|
if __name__ == "__main__":
|
|
103
84
|
args = [sys.argv[1].rstrip("/"), sys.argv[2]]
|
|
@@ -1,27 +1,50 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import traceback
|
|
1
3
|
import sys
|
|
2
4
|
import os
|
|
3
5
|
import uuid
|
|
4
6
|
from datetime import datetime, timedelta
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from loone_data_prep.water_quality_data import wq
|
|
7
|
-
from loone_data_prep.utils import find_last_date_in_csv,
|
|
9
|
+
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
D = {
|
|
11
|
-
"PHOSPHATE, TOTAL AS P": {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
13
|
+
"PHOSPHATE, TOTAL AS P": {
|
|
14
|
+
"test_number": 25,
|
|
15
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
16
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
17
|
+
},
|
|
18
|
+
"AMMONIA-N": {
|
|
19
|
+
"test_number": 20,
|
|
20
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
21
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
22
|
+
},
|
|
23
|
+
"NITRATE+NITRITE-N": {
|
|
24
|
+
"test_number": 18,
|
|
25
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
26
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
27
|
+
},
|
|
28
|
+
"TOTAL NITROGEN": {
|
|
29
|
+
"test_number": 80,
|
|
30
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
31
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
32
|
+
},
|
|
33
|
+
"CHLOROPHYLL-A": {
|
|
34
|
+
"test_number": 61,
|
|
35
|
+
"station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
|
|
36
|
+
"station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
|
|
37
|
+
},
|
|
38
|
+
"CHLOROPHYLL-A(LC)": {
|
|
39
|
+
"test_number": 179,
|
|
40
|
+
"station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
|
|
41
|
+
"station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
|
|
42
|
+
},
|
|
43
|
+
"CHLOROPHYLL-A, CORRECTED": {
|
|
44
|
+
"test_number": 112,
|
|
45
|
+
"station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
|
|
46
|
+
"station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
|
|
47
|
+
},
|
|
25
48
|
}
|
|
26
49
|
|
|
27
50
|
|
|
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
34
57
|
for name, params in d.items():
|
|
35
58
|
print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
|
|
36
59
|
|
|
60
|
+
# Get the test_number for this parameter name
|
|
61
|
+
test_number = params['test_number']
|
|
62
|
+
|
|
37
63
|
# Get the date of the latest data in the csv file for each station id
|
|
38
64
|
station_date_latest = {}
|
|
39
65
|
for station_id in params["station_ids"]:
|
|
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
41
67
|
|
|
42
68
|
# Get the water quality data
|
|
43
69
|
for station_id, date_latest in station_date_latest.items():
|
|
70
|
+
# Get the station type for this station ID
|
|
71
|
+
station_type = params["station_types"][station_id]
|
|
72
|
+
|
|
44
73
|
# File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
|
|
45
74
|
if date_latest is None:
|
|
46
75
|
# Get all the water quality data for the name/station combination
|
|
47
76
|
print(f"Getting all {name} data for station ID: {station_id}.")
|
|
48
|
-
wq.get(workspace, name, [station_id])
|
|
77
|
+
wq.get(workspace, name, test_number, [station_id])
|
|
49
78
|
else:
|
|
50
79
|
# Check whether we already have the latest data
|
|
51
|
-
if
|
|
80
|
+
if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
|
|
52
81
|
# Notify that the data is already up to date
|
|
53
|
-
print(f'Downloading of new water quality data for test name: {name} station: {
|
|
82
|
+
print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
|
|
54
83
|
continue
|
|
55
84
|
|
|
56
85
|
# Temporarily rename current data file so it isn't over written
|
|
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
61
90
|
try:
|
|
62
91
|
# Get only the water quality data that is newer than the latest data in the csv file
|
|
63
92
|
print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
|
|
64
|
-
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
65
|
-
wq.get(workspace, name, [station_id], date_min=date_latest)
|
|
93
|
+
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
|
|
94
|
+
wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
|
|
66
95
|
|
|
67
96
|
# Data failed to download - It's possible the data's end date has been reached
|
|
68
97
|
if not os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
71
100
|
# Read in the original data
|
|
72
101
|
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
|
|
73
102
|
|
|
74
|
-
# Calculate the days column for the newly downloaded data
|
|
75
|
-
df_original_date_min = df_original['date'].min()
|
|
76
|
-
wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
|
|
77
|
-
|
|
78
103
|
# Read in the newly downloaded data
|
|
79
104
|
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
|
|
80
|
-
|
|
105
|
+
|
|
106
|
+
# Calculate the days column for the newly downloaded data
|
|
107
|
+
df_original_date_min = df_original['date'].min()
|
|
108
|
+
df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
|
|
81
109
|
|
|
82
110
|
# Merge the new data with the original data
|
|
83
|
-
df_merged = pd.concat([df_original, df_new], ignore_index=
|
|
111
|
+
df_merged = pd.concat([df_original, df_new], ignore_index=False)
|
|
112
|
+
|
|
113
|
+
# Re-number the index
|
|
114
|
+
df_merged.reset_index(inplace=True)
|
|
115
|
+
df_merged.drop(['index'], axis=1, inplace=True)
|
|
116
|
+
|
|
117
|
+
# Start index at 1 instead of 0 (for backwards compatibility)
|
|
118
|
+
df_merged.index = df_merged.index + 1
|
|
119
|
+
|
|
120
|
+
# Make sure the integer index values are quoted in the csv file (for backwards compatibility)
|
|
121
|
+
df_merged.index = df_merged.index.astype(str)
|
|
84
122
|
|
|
85
123
|
# Write out the merged data
|
|
86
|
-
df_merged.to_csv(os.path.join(workspace, original_file_name))
|
|
124
|
+
df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
|
|
125
|
+
|
|
126
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
127
|
+
wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
|
|
87
128
|
|
|
88
129
|
# Remove the original renamed data file
|
|
89
130
|
os.remove(os.path.join(workspace, original_file_name_temp))
|
|
90
131
|
except Exception as e:
|
|
91
132
|
# Notify of the error
|
|
92
133
|
print(f"Error occurred while downloading new water quality data: {e}")
|
|
134
|
+
traceback.print_exc()
|
|
93
135
|
|
|
94
136
|
# Remove the newly downloaded data file if it exists
|
|
95
137
|
if os.path.exists(os.path.join(workspace, original_file_name)):
|