loone-data-prep 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
- loone_data_prep/LOONE_DATA_PREP.py +0 -1
- loone_data_prep/dbhydro_insights.py +195 -0
- loone_data_prep/flow_data/S65E_total.py +57 -57
- loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
- loone_data_prep/flow_data/get_forecast_flows.py +19 -105
- loone_data_prep/flow_data/get_inflows.py +18 -8
- loone_data_prep/flow_data/get_outflows.py +16 -7
- loone_data_prep/flow_data/hydro.py +62 -91
- loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
- loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
- loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
- loone_data_prep/utils.py +262 -32
- loone_data_prep/water_level_data/get_all.py +52 -44
- loone_data_prep/water_level_data/hydro.py +49 -68
- loone_data_prep/water_quality_data/get_inflows.py +69 -27
- loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
- loone_data_prep/water_quality_data/wq.py +114 -88
- loone_data_prep/weather_data/get_all.py +5 -3
- loone_data_prep/weather_data/weather.py +117 -180
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
- loone_data_prep-1.3.1.dist-info/RECORD +38 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
- loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
- loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
- loone_data_prep-1.2.4.dist-info/RECORD +0 -38
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {loone_data_prep-1.2.4.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from retry import retry
|
|
4
|
-
from rpy2.robjects import r
|
|
5
|
-
from rpy2.rinterface_lib.embedded import RRuntimeError
|
|
6
4
|
import pandas as pd
|
|
5
|
+
from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
|
|
7
6
|
|
|
8
7
|
DEFAULT_DBKEYS = ["16022", "12509", "12519", "16265", "15611"]
|
|
9
8
|
DATE_NOW = datetime.now().strftime("%Y-%m-%d")
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
@retry(
|
|
11
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
13
12
|
def get(
|
|
14
13
|
workspace: str,
|
|
15
14
|
name: str,
|
|
@@ -17,8 +16,19 @@ def get(
|
|
|
17
16
|
date_min: str = "1950-01-01",
|
|
18
17
|
date_max: str = DATE_NOW,
|
|
19
18
|
datum: str = "",
|
|
20
|
-
**kwargs: str | list
|
|
19
|
+
**kwargs: str | list | dict
|
|
21
20
|
) -> None:
|
|
21
|
+
"""Fetches daily water level data from DBHYDRO and saves it as a CSV file.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
workspace (str): The directory where the CSV file will be saved.
|
|
25
|
+
name (str): The name of the output CSV file (without extension).
|
|
26
|
+
dbkeys (list): List of DBHYDRO dbkeys to fetch data for. Defaults to DEFAULT_DBKEYS.
|
|
27
|
+
date_min (str): The start date for data retrieval in 'YYYY-MM-DD' format. Defaults to '1950-01-01'.
|
|
28
|
+
date_max (str): The end date for data retrieval in 'YYYY-MM-DD' format. Defaults to current date.
|
|
29
|
+
datum (str): The datum to use for the water level data. Defaults to an empty string. One of 'NGVD29', or 'NAVD88'.
|
|
30
|
+
**kwargs: Additional keyword arguments. Can include 'override_site_codes' (dict) to rename site codes in the output.
|
|
31
|
+
"""
|
|
22
32
|
# Get the type and units for the station
|
|
23
33
|
data_type = "STG"
|
|
24
34
|
units = "ft NGVD29"
|
|
@@ -27,77 +37,48 @@ def get(
|
|
|
27
37
|
data_type = "GAGHT"
|
|
28
38
|
units = "feet"
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# Load the required libraries
|
|
34
|
-
library(rio)
|
|
35
|
-
library(dbhydroR)
|
|
36
|
-
library(dplyr)
|
|
37
|
-
|
|
38
|
-
# Stage Data
|
|
39
|
-
if ("{datum}" == "")
|
|
40
|
-
{{
|
|
41
|
-
{name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE)
|
|
42
|
-
}}
|
|
43
|
-
|
|
44
|
-
if (nchar("{datum}") > 0)
|
|
45
|
-
{{
|
|
46
|
-
{name} <- get_hydro(dbkey = c({dbkeys_str}), date_min = "{date_min}", date_max = "{date_max}", raw = TRUE, datum = "{datum}")
|
|
47
|
-
}}
|
|
48
|
-
|
|
49
|
-
# Give data.frame correct column names so it can be cleaned using the clean_hydro function
|
|
50
|
-
colnames({name}) <- c("station", "dbkey", "date", "data.value", "qualifer", "revision.date")
|
|
51
|
-
|
|
52
|
-
# Check if the data.frame has any rows
|
|
53
|
-
if (nrow({name}) == 0)
|
|
54
|
-
{{
|
|
55
|
-
# No data given back, It's possible that the dbkey has reached its end date.
|
|
56
|
-
print(paste("Empty data.frame returned for dbkeys", "{dbkeys}", "It's possible that the dbkey has reached its end date. Skipping to the next dbkey."))
|
|
57
|
-
return(list(success = FALSE, dbkey = "{dbkeys}"))
|
|
58
|
-
}}
|
|
59
|
-
|
|
60
|
-
# Get the station
|
|
61
|
-
station <- {name}$station[1]
|
|
62
|
-
|
|
63
|
-
# Add a type and units column to data so it can be cleaned using the clean_hydro function
|
|
64
|
-
{name}$type <- "{data_type}"
|
|
65
|
-
{name}$units <- "{units}"
|
|
66
|
-
|
|
67
|
-
# Clean the data.frame
|
|
68
|
-
{name} <- clean_hydro({name})
|
|
69
|
-
|
|
70
|
-
# Drop the " _STG_ft NGVD29" column
|
|
71
|
-
{name} <- {name} %>% select(-` _{data_type}_{units}`)
|
|
72
|
-
|
|
73
|
-
# Write the data to a csv file
|
|
74
|
-
write.csv({name},file ='{workspace}/{name}.csv')
|
|
75
|
-
"""
|
|
76
|
-
)
|
|
40
|
+
# Retrieve the data
|
|
41
|
+
api = get_dbhydro_api()
|
|
42
|
+
response = api.get_daily_data(dbkeys, 'id', date_min, date_max, datum, False)
|
|
77
43
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def _reformat_water_level_file(workspace: str, name: str):
|
|
81
|
-
# Read in the data
|
|
82
|
-
df = pd.read_csv(f"{workspace}/{name}.csv")
|
|
44
|
+
# Get the data as a dataframe
|
|
45
|
+
df = response.to_dataframe(True)
|
|
83
46
|
|
|
84
|
-
#
|
|
85
|
-
df
|
|
47
|
+
# Replace flagged 0 values and -99999.0 with NaN
|
|
48
|
+
df = df_replace_missing_with_nan(df)
|
|
86
49
|
|
|
87
|
-
#
|
|
88
|
-
|
|
50
|
+
# Make sure datetime exists as a column
|
|
51
|
+
if 'datetime' not in df.columns:
|
|
52
|
+
df.reset_index(inplace=True)
|
|
53
|
+
|
|
54
|
+
# Pivot the data so that each site_code is a column
|
|
55
|
+
df = df.pivot(index='datetime', columns='site_code', values='value')
|
|
89
56
|
|
|
90
|
-
#
|
|
91
|
-
|
|
57
|
+
# Get the current column names in df and the names to rename them to
|
|
58
|
+
column_names = {'datetime': 'date'}
|
|
59
|
+
override_site_codes = kwargs.get("override_site_codes", None)
|
|
60
|
+
for column in df.columns:
|
|
61
|
+
if override_site_codes and column in override_site_codes:
|
|
62
|
+
column_names[column] = f"{override_site_codes[column]}_{data_type}_{units}"
|
|
63
|
+
else:
|
|
64
|
+
column_names[column] = f"{column}_{data_type}_{units}"
|
|
92
65
|
|
|
93
|
-
#
|
|
94
|
-
df.reset_index(
|
|
66
|
+
# Reset the index to turn the datetime index into a column
|
|
67
|
+
df.reset_index(inplace=True)
|
|
95
68
|
|
|
96
|
-
#
|
|
97
|
-
df.
|
|
69
|
+
# Rename the columns
|
|
70
|
+
df.rename(columns=column_names, inplace=True)
|
|
98
71
|
|
|
99
|
-
#
|
|
100
|
-
df.
|
|
72
|
+
# Convert date column to datetime
|
|
73
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
74
|
+
|
|
75
|
+
# Drop the "Unnamed: 0" column if it exists
|
|
76
|
+
if 'Unnamed: 0' in df.columns:
|
|
77
|
+
df.drop(columns=['Unnamed: 0'], inplace=True)
|
|
78
|
+
|
|
79
|
+
# Write the data to a csv file
|
|
80
|
+
df.to_csv(f"{workspace}/{name}.csv", index=True)
|
|
81
|
+
|
|
101
82
|
|
|
102
83
|
if __name__ == "__main__":
|
|
103
84
|
args = [sys.argv[1].rstrip("/"), sys.argv[2]]
|
|
@@ -1,27 +1,50 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import traceback
|
|
1
3
|
import sys
|
|
2
4
|
import os
|
|
3
5
|
import uuid
|
|
4
6
|
from datetime import datetime, timedelta
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from loone_data_prep.water_quality_data import wq
|
|
7
|
-
from loone_data_prep.utils import find_last_date_in_csv,
|
|
9
|
+
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
D = {
|
|
11
|
-
"PHOSPHATE, TOTAL AS P": {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
13
|
+
"PHOSPHATE, TOTAL AS P": {
|
|
14
|
+
"test_number": 25,
|
|
15
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
16
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
17
|
+
},
|
|
18
|
+
"AMMONIA-N": {
|
|
19
|
+
"test_number": 20,
|
|
20
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
21
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
22
|
+
},
|
|
23
|
+
"NITRATE+NITRITE-N": {
|
|
24
|
+
"test_number": 18,
|
|
25
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
26
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
27
|
+
},
|
|
28
|
+
"TOTAL NITROGEN": {
|
|
29
|
+
"test_number": 80,
|
|
30
|
+
"station_ids": ['S191', 'S65E', 'S84', 'S154', 'S71', 'S72', 'S4', 'FECSR78', 'S308C', 'CULV10A', 'S133', 'S127', 'S135'],
|
|
31
|
+
"station_types": {'S191': 'SITE', 'S65E': 'SITE', 'S84': 'SITE', 'S154': 'SITE', 'S71': 'SITE', 'S72': 'SITE', 'S4': 'SITE', 'FECSR78': 'STATION', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S135': 'SITE'}
|
|
32
|
+
},
|
|
33
|
+
"CHLOROPHYLL-A": {
|
|
34
|
+
"test_number": 61,
|
|
35
|
+
"station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
|
|
36
|
+
"station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
|
|
37
|
+
},
|
|
38
|
+
"CHLOROPHYLL-A(LC)": {
|
|
39
|
+
"test_number": 179,
|
|
40
|
+
"station_ids": ['S65E', 'S154', 'S4', 'S308C', 'CULV10A', 'S133', 'S127', 'S191'],
|
|
41
|
+
"station_types": {'S65E': 'SITE', 'S154': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S133': 'SITE', 'S127': 'SITE', 'S191': 'SITE'}
|
|
42
|
+
},
|
|
43
|
+
"CHLOROPHYLL-A, CORRECTED": {
|
|
44
|
+
"test_number": 112,
|
|
45
|
+
"station_ids": ['S65E', 'S84', 'S4', 'S308C', 'CULV10A', 'S135'],
|
|
46
|
+
"station_types": {'S65E': 'SITE', 'S84': 'SITE', 'S4': 'SITE', 'S308C': 'STATION', 'CULV10A': 'STATION', 'S135': 'SITE'}
|
|
47
|
+
},
|
|
25
48
|
}
|
|
26
49
|
|
|
27
50
|
|
|
@@ -34,6 +57,9 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
34
57
|
for name, params in d.items():
|
|
35
58
|
print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
|
|
36
59
|
|
|
60
|
+
# Get the test_number for this parameter name
|
|
61
|
+
test_number = params['test_number']
|
|
62
|
+
|
|
37
63
|
# Get the date of the latest data in the csv file for each station id
|
|
38
64
|
station_date_latest = {}
|
|
39
65
|
for station_id in params["station_ids"]:
|
|
@@ -41,16 +67,19 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
41
67
|
|
|
42
68
|
# Get the water quality data
|
|
43
69
|
for station_id, date_latest in station_date_latest.items():
|
|
70
|
+
# Get the station type for this station ID
|
|
71
|
+
station_type = params["station_types"][station_id]
|
|
72
|
+
|
|
44
73
|
# File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
|
|
45
74
|
if date_latest is None:
|
|
46
75
|
# Get all the water quality data for the name/station combination
|
|
47
76
|
print(f"Getting all {name} data for station ID: {station_id}.")
|
|
48
|
-
wq.get(workspace, name, [station_id])
|
|
77
|
+
wq.get(workspace, name, test_number, [station_id])
|
|
49
78
|
else:
|
|
50
79
|
# Check whether we already have the latest data
|
|
51
|
-
if
|
|
80
|
+
if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
|
|
52
81
|
# Notify that the data is already up to date
|
|
53
|
-
print(f'Downloading of new water quality data for test name: {name} station: {
|
|
82
|
+
print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
|
|
54
83
|
continue
|
|
55
84
|
|
|
56
85
|
# Temporarily rename current data file so it isn't over written
|
|
@@ -61,8 +90,8 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
61
90
|
try:
|
|
62
91
|
# Get only the water quality data that is newer than the latest data in the csv file
|
|
63
92
|
print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
|
|
64
|
-
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
65
|
-
wq.get(workspace, name, [station_id], date_min=date_latest)
|
|
93
|
+
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
|
|
94
|
+
wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
|
|
66
95
|
|
|
67
96
|
# Data failed to download - It's possible the data's end date has been reached
|
|
68
97
|
if not os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -71,25 +100,38 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
71
100
|
# Read in the original data
|
|
72
101
|
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
|
|
73
102
|
|
|
74
|
-
# Calculate the days column for the newly downloaded data
|
|
75
|
-
df_original_date_min = df_original['date'].min()
|
|
76
|
-
wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
|
|
77
|
-
|
|
78
103
|
# Read in the newly downloaded data
|
|
79
104
|
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
|
|
80
|
-
|
|
105
|
+
|
|
106
|
+
# Calculate the days column for the newly downloaded data
|
|
107
|
+
df_original_date_min = df_original['date'].min()
|
|
108
|
+
df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
|
|
81
109
|
|
|
82
110
|
# Merge the new data with the original data
|
|
83
|
-
df_merged = pd.concat([df_original, df_new], ignore_index=
|
|
111
|
+
df_merged = pd.concat([df_original, df_new], ignore_index=False)
|
|
112
|
+
|
|
113
|
+
# Re-number the index
|
|
114
|
+
df_merged.reset_index(inplace=True)
|
|
115
|
+
df_merged.drop(['index'], axis=1, inplace=True)
|
|
116
|
+
|
|
117
|
+
# Start index at 1 instead of 0 (for backwards compatibility)
|
|
118
|
+
df_merged.index = df_merged.index + 1
|
|
119
|
+
|
|
120
|
+
# Make sure the integer index values are quoted in the csv file (for backwards compatibility)
|
|
121
|
+
df_merged.index = df_merged.index.astype(str)
|
|
84
122
|
|
|
85
123
|
# Write out the merged data
|
|
86
|
-
df_merged.to_csv(os.path.join(workspace, original_file_name))
|
|
124
|
+
df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
|
|
125
|
+
|
|
126
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
127
|
+
wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
|
|
87
128
|
|
|
88
129
|
# Remove the original renamed data file
|
|
89
130
|
os.remove(os.path.join(workspace, original_file_name_temp))
|
|
90
131
|
except Exception as e:
|
|
91
132
|
# Notify of the error
|
|
92
133
|
print(f"Error occurred while downloading new water quality data: {e}")
|
|
134
|
+
traceback.print_exc()
|
|
93
135
|
|
|
94
136
|
# Remove the newly downloaded data file if it exists
|
|
95
137
|
if os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -1,32 +1,110 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import traceback
|
|
1
3
|
import sys
|
|
2
4
|
import os
|
|
3
5
|
import uuid
|
|
4
6
|
from datetime import datetime, timedelta
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from loone_data_prep.water_quality_data import wq
|
|
7
|
-
from loone_data_prep.utils import find_last_date_in_csv,
|
|
9
|
+
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
D = {
|
|
11
|
-
"PHOSPHATE, TOTAL AS P": {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
13
|
+
"PHOSPHATE, TOTAL AS P": {
|
|
14
|
+
"test_number": 25,
|
|
15
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
16
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
17
|
+
},
|
|
18
|
+
"PHOSPHATE, ORTHO AS P": {
|
|
19
|
+
"test_number": 23,
|
|
20
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
21
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
22
|
+
},
|
|
23
|
+
"AMMONIA-N": {
|
|
24
|
+
"test_number": 20,
|
|
25
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
26
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
27
|
+
},
|
|
28
|
+
"NITRATE+NITRITE-N": {
|
|
29
|
+
"test_number": 18,
|
|
30
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
31
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
32
|
+
},
|
|
33
|
+
"TOTAL NITROGEN": {
|
|
34
|
+
"test_number": 80,
|
|
35
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
36
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
37
|
+
},
|
|
38
|
+
"MICROCYSTIN HILR": {
|
|
39
|
+
"test_number": 1023,
|
|
40
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
41
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
42
|
+
},
|
|
43
|
+
"MICROCYSTIN HTYR": {
|
|
44
|
+
"test_number": 1022,
|
|
45
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
46
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
47
|
+
},
|
|
48
|
+
"MICROCYSTIN LA": {
|
|
49
|
+
"test_number": 1005,
|
|
50
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
51
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
52
|
+
},
|
|
53
|
+
"MICROCYSTIN LF": {
|
|
54
|
+
"test_number": 1006,
|
|
55
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
56
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
57
|
+
},
|
|
58
|
+
"MICROCYSTIN LR": {
|
|
59
|
+
"test_number": 1007,
|
|
60
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
61
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
62
|
+
},
|
|
63
|
+
"MICROCYSTIN LW": {
|
|
64
|
+
"test_number": 1008,
|
|
65
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
66
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
67
|
+
},
|
|
68
|
+
"MICROCYSTIN LY": {
|
|
69
|
+
"test_number": 1009,
|
|
70
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
71
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
72
|
+
},
|
|
73
|
+
"MICROCYSTIN RR": {
|
|
74
|
+
"test_number": 1010,
|
|
75
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
76
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
77
|
+
},
|
|
78
|
+
"MICROCYSTIN WR": {
|
|
79
|
+
"test_number": 1011,
|
|
80
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
81
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
82
|
+
},
|
|
83
|
+
"MICROCYSTIN YR": {
|
|
84
|
+
"test_number": 1012,
|
|
85
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
86
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
87
|
+
},
|
|
88
|
+
"CHLOROPHYLL-A": {
|
|
89
|
+
"test_number": 61,
|
|
90
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
91
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
92
|
+
},
|
|
93
|
+
"CHLOROPHYLL-A(LC)": {
|
|
94
|
+
"test_number": 179,
|
|
95
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
96
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
97
|
+
},
|
|
98
|
+
"CHLOROPHYLL-A, CORRECTED": {
|
|
99
|
+
"test_number": 112,
|
|
100
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
101
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
102
|
+
},
|
|
103
|
+
"DISSOLVED OXYGEN": {
|
|
104
|
+
"test_number": 8,
|
|
105
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
106
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
107
|
+
}
|
|
30
108
|
}
|
|
31
109
|
|
|
32
110
|
|
|
@@ -36,6 +114,9 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
36
114
|
for name, params in d.items():
|
|
37
115
|
print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
|
|
38
116
|
|
|
117
|
+
# Get the test_number for this parameter name
|
|
118
|
+
test_number = params['test_number']
|
|
119
|
+
|
|
39
120
|
# Get the date of the latest data in the csv file for each station id
|
|
40
121
|
station_date_latest = {}
|
|
41
122
|
for station_id in params["station_ids"]:
|
|
@@ -43,16 +124,19 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
43
124
|
|
|
44
125
|
# Get the water quality data
|
|
45
126
|
for station_id, date_latest in station_date_latest.items():
|
|
127
|
+
# Get the station type for this station ID
|
|
128
|
+
station_type = params["station_types"][station_id]
|
|
129
|
+
|
|
46
130
|
# File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
|
|
47
131
|
if date_latest is None:
|
|
48
132
|
# Get all the water quality data for the name/station combination
|
|
49
133
|
print(f"Getting all {name} data for station ID: {station_id}.")
|
|
50
|
-
wq.get(workspace, name, [station_id])
|
|
134
|
+
wq.get(workspace, name, test_number, [station_id])
|
|
51
135
|
else:
|
|
52
136
|
# Check whether we already have the latest data
|
|
53
|
-
if
|
|
137
|
+
if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
|
|
54
138
|
# Notify that the data is already up to date
|
|
55
|
-
print(f'Downloading of new water quality data for test name: {name} station: {
|
|
139
|
+
print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
|
|
56
140
|
continue
|
|
57
141
|
|
|
58
142
|
# Temporarily rename current data file so it isn't over written
|
|
@@ -63,8 +147,8 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
63
147
|
try:
|
|
64
148
|
# Get only the water quality data that is newer than the latest data in the csv file
|
|
65
149
|
print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
|
|
66
|
-
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
67
|
-
wq.get(workspace, name, [station_id], date_min=date_latest)
|
|
150
|
+
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
|
|
151
|
+
wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
|
|
68
152
|
|
|
69
153
|
# Data failed to download - It's possible the data's end date has been reached
|
|
70
154
|
if not os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -73,25 +157,38 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
73
157
|
# Read in the original data
|
|
74
158
|
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
|
|
75
159
|
|
|
76
|
-
# Calculate the days column for the newly downloaded data
|
|
77
|
-
df_original_date_min = df_original['date'].min()
|
|
78
|
-
wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
|
|
79
|
-
|
|
80
160
|
# Read in the newly downloaded data
|
|
81
161
|
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
|
|
82
|
-
df_new.reset_index(inplace=True)
|
|
83
162
|
|
|
84
|
-
#
|
|
85
|
-
|
|
163
|
+
# Calculate the days column for the newly downloaded data
|
|
164
|
+
df_original_date_min = df_original['date'].min()
|
|
165
|
+
df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
|
|
86
166
|
|
|
167
|
+
# Merge the new data with the original data
|
|
168
|
+
df_merged = pd.concat([df_original, df_new], ignore_index=False)
|
|
169
|
+
|
|
170
|
+
# Re-number the index
|
|
171
|
+
df_merged.reset_index(inplace=True)
|
|
172
|
+
df_merged.drop(['index'], axis=1, inplace=True)
|
|
173
|
+
|
|
174
|
+
# Start index at 1 instead of 0 (for backwards compatibility)
|
|
175
|
+
df_merged.index = df_merged.index + 1
|
|
176
|
+
|
|
177
|
+
# Make sure the integer index values are quoted in the csv file (for backwards compatibility)
|
|
178
|
+
df_merged.index = df_merged.index.astype(str)
|
|
179
|
+
|
|
87
180
|
# Write out the merged data
|
|
88
|
-
df_merged.to_csv(os.path.join(workspace, original_file_name))
|
|
181
|
+
df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
|
|
182
|
+
|
|
183
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
184
|
+
wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
|
|
89
185
|
|
|
90
186
|
# Remove the original renamed data file
|
|
91
187
|
os.remove(os.path.join(workspace, original_file_name_temp))
|
|
92
188
|
except Exception as e:
|
|
93
189
|
# Notify of the error
|
|
94
190
|
print(f"Error occurred while downloading new water quality data: {e}")
|
|
191
|
+
traceback.print_exc()
|
|
95
192
|
|
|
96
193
|
# Remove the newly downloaded data file if it exists
|
|
97
194
|
if os.path.exists(os.path.join(workspace, original_file_name)):
|