loone-data-prep 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/dbhydro_insights.py +195 -0
- loone_data_prep/flow_data/S65E_total.py +57 -57
- loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
- loone_data_prep/flow_data/get_forecast_flows.py +19 -105
- loone_data_prep/flow_data/get_inflows.py +18 -8
- loone_data_prep/flow_data/get_outflows.py +16 -7
- loone_data_prep/flow_data/hydro.py +62 -91
- loone_data_prep/utils.py +243 -30
- loone_data_prep/water_level_data/get_all.py +52 -44
- loone_data_prep/water_level_data/hydro.py +49 -68
- loone_data_prep/water_quality_data/get_inflows.py +69 -27
- loone_data_prep/water_quality_data/get_lake_wq.py +130 -33
- loone_data_prep/water_quality_data/wq.py +114 -88
- loone_data_prep/weather_data/get_all.py +5 -3
- loone_data_prep/weather_data/weather.py +117 -180
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/METADATA +2 -8
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/RECORD +20 -19
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/WHEEL +1 -1
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {loone_data_prep-1.3.0.dist-info → loone_data_prep-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,32 +1,110 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import traceback
|
|
1
3
|
import sys
|
|
2
4
|
import os
|
|
3
5
|
import uuid
|
|
4
6
|
from datetime import datetime, timedelta
|
|
5
7
|
import pandas as pd
|
|
6
8
|
from loone_data_prep.water_quality_data import wq
|
|
7
|
-
from loone_data_prep.utils import find_last_date_in_csv,
|
|
9
|
+
from loone_data_prep.utils import find_last_date_in_csv, dbhydro_water_quality_data_is_latest
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
D = {
|
|
11
|
-
"PHOSPHATE, TOTAL AS P": {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
13
|
+
"PHOSPHATE, TOTAL AS P": {
|
|
14
|
+
"test_number": 25,
|
|
15
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
16
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
17
|
+
},
|
|
18
|
+
"PHOSPHATE, ORTHO AS P": {
|
|
19
|
+
"test_number": 23,
|
|
20
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
21
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
22
|
+
},
|
|
23
|
+
"AMMONIA-N": {
|
|
24
|
+
"test_number": 20,
|
|
25
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
26
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
27
|
+
},
|
|
28
|
+
"NITRATE+NITRITE-N": {
|
|
29
|
+
"test_number": 18,
|
|
30
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
31
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
32
|
+
},
|
|
33
|
+
"TOTAL NITROGEN": {
|
|
34
|
+
"test_number": 80,
|
|
35
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
36
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
37
|
+
},
|
|
38
|
+
"MICROCYSTIN HILR": {
|
|
39
|
+
"test_number": 1023,
|
|
40
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
41
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
42
|
+
},
|
|
43
|
+
"MICROCYSTIN HTYR": {
|
|
44
|
+
"test_number": 1022,
|
|
45
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
46
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
47
|
+
},
|
|
48
|
+
"MICROCYSTIN LA": {
|
|
49
|
+
"test_number": 1005,
|
|
50
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
51
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
52
|
+
},
|
|
53
|
+
"MICROCYSTIN LF": {
|
|
54
|
+
"test_number": 1006,
|
|
55
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
56
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
57
|
+
},
|
|
58
|
+
"MICROCYSTIN LR": {
|
|
59
|
+
"test_number": 1007,
|
|
60
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
61
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
62
|
+
},
|
|
63
|
+
"MICROCYSTIN LW": {
|
|
64
|
+
"test_number": 1008,
|
|
65
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
66
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
67
|
+
},
|
|
68
|
+
"MICROCYSTIN LY": {
|
|
69
|
+
"test_number": 1009,
|
|
70
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
71
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
72
|
+
},
|
|
73
|
+
"MICROCYSTIN RR": {
|
|
74
|
+
"test_number": 1010,
|
|
75
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
76
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
77
|
+
},
|
|
78
|
+
"MICROCYSTIN WR": {
|
|
79
|
+
"test_number": 1011,
|
|
80
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
81
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
82
|
+
},
|
|
83
|
+
"MICROCYSTIN YR": {
|
|
84
|
+
"test_number": 1012,
|
|
85
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
86
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
87
|
+
},
|
|
88
|
+
"CHLOROPHYLL-A": {
|
|
89
|
+
"test_number": 61,
|
|
90
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
91
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
92
|
+
},
|
|
93
|
+
"CHLOROPHYLL-A(LC)": {
|
|
94
|
+
"test_number": 179,
|
|
95
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
96
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
97
|
+
},
|
|
98
|
+
"CHLOROPHYLL-A, CORRECTED": {
|
|
99
|
+
"test_number": 112,
|
|
100
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
101
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
102
|
+
},
|
|
103
|
+
"DISSOLVED OXYGEN": {
|
|
104
|
+
"test_number": 8,
|
|
105
|
+
"station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
|
|
106
|
+
"station_types": {"L001": "SITE", "L004": "SITE", "L005": "SITE", "L006": "SITE", "L007": "SITE", "L008": "SITE", "LZ40": "SITE"}
|
|
107
|
+
}
|
|
30
108
|
}
|
|
31
109
|
|
|
32
110
|
|
|
@@ -36,6 +114,9 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
36
114
|
for name, params in d.items():
|
|
37
115
|
print(f"Getting {name} for the following station IDs: {params['station_ids']}.")
|
|
38
116
|
|
|
117
|
+
# Get the test_number for this parameter name
|
|
118
|
+
test_number = params['test_number']
|
|
119
|
+
|
|
39
120
|
# Get the date of the latest data in the csv file for each station id
|
|
40
121
|
station_date_latest = {}
|
|
41
122
|
for station_id in params["station_ids"]:
|
|
@@ -43,16 +124,19 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
43
124
|
|
|
44
125
|
# Get the water quality data
|
|
45
126
|
for station_id, date_latest in station_date_latest.items():
|
|
127
|
+
# Get the station type for this station ID
|
|
128
|
+
station_type = params["station_types"][station_id]
|
|
129
|
+
|
|
46
130
|
# File with data for this station/name combination does NOT already exist (or possibly some other error occurred)
|
|
47
131
|
if date_latest is None:
|
|
48
132
|
# Get all the water quality data for the name/station combination
|
|
49
133
|
print(f"Getting all {name} data for station ID: {station_id}.")
|
|
50
|
-
wq.get(workspace, name, [station_id])
|
|
134
|
+
wq.get(workspace, name, test_number, [station_id])
|
|
51
135
|
else:
|
|
52
136
|
# Check whether we already have the latest data
|
|
53
|
-
if
|
|
137
|
+
if dbhydro_water_quality_data_is_latest(date_latest, station_id, station_type, test_number):
|
|
54
138
|
# Notify that the data is already up to date
|
|
55
|
-
print(f'Downloading of new water quality data for test name: {name} station: {
|
|
139
|
+
print(f'Downloading of new water quality data for test name: {name} station: {station_id} skipped. Data is already up to date.')
|
|
56
140
|
continue
|
|
57
141
|
|
|
58
142
|
# Temporarily rename current data file so it isn't over written
|
|
@@ -63,8 +147,8 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
63
147
|
try:
|
|
64
148
|
# Get only the water quality data that is newer than the latest data in the csv file
|
|
65
149
|
print(f"Downloading new water quality data for test name: {name} station ID: {station_id} starting from date: {date_latest}.")
|
|
66
|
-
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
|
|
67
|
-
wq.get(workspace, name, [station_id], date_min=date_latest)
|
|
150
|
+
date_latest = (datetime.strptime(date_latest, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
|
|
151
|
+
wq.get(workspace, name, test_number, [station_id], date_min=date_latest)
|
|
68
152
|
|
|
69
153
|
# Data failed to download - It's possible the data's end date has been reached
|
|
70
154
|
if not os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -73,25 +157,38 @@ def main(workspace: str, d: dict = D) -> dict:
|
|
|
73
157
|
# Read in the original data
|
|
74
158
|
df_original = pd.read_csv(os.path.join(workspace, original_file_name_temp), index_col=0)
|
|
75
159
|
|
|
76
|
-
# Calculate the days column for the newly downloaded data
|
|
77
|
-
df_original_date_min = df_original['date'].min()
|
|
78
|
-
wq._calculate_days_column(workspace, original_file_name, df_original_date_min)
|
|
79
|
-
|
|
80
160
|
# Read in the newly downloaded data
|
|
81
161
|
df_new = pd.read_csv(os.path.join(workspace, original_file_name), index_col=0)
|
|
82
|
-
df_new.reset_index(inplace=True)
|
|
83
162
|
|
|
84
|
-
#
|
|
85
|
-
|
|
163
|
+
# Calculate the days column for the newly downloaded data
|
|
164
|
+
df_original_date_min = df_original['date'].min()
|
|
165
|
+
df_new = wq._calculate_days_column(workspace, df_new, df_original_date_min)
|
|
86
166
|
|
|
167
|
+
# Merge the new data with the original data
|
|
168
|
+
df_merged = pd.concat([df_original, df_new], ignore_index=False)
|
|
169
|
+
|
|
170
|
+
# Re-number the index
|
|
171
|
+
df_merged.reset_index(inplace=True)
|
|
172
|
+
df_merged.drop(['index'], axis=1, inplace=True)
|
|
173
|
+
|
|
174
|
+
# Start index at 1 instead of 0 (for backwards compatibility)
|
|
175
|
+
df_merged.index = df_merged.index + 1
|
|
176
|
+
|
|
177
|
+
# Make sure the integer index values are quoted in the csv file (for backwards compatibility)
|
|
178
|
+
df_merged.index = df_merged.index.astype(str)
|
|
179
|
+
|
|
87
180
|
# Write out the merged data
|
|
88
|
-
df_merged.to_csv(os.path.join(workspace, original_file_name))
|
|
181
|
+
df_merged.to_csv(os.path.join(workspace, original_file_name), index=True, quoting=csv.QUOTE_NONNUMERIC)
|
|
182
|
+
|
|
183
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
184
|
+
wq.rewrite_water_quality_file_without_date_quotes(workspace, original_file_name)
|
|
89
185
|
|
|
90
186
|
# Remove the original renamed data file
|
|
91
187
|
os.remove(os.path.join(workspace, original_file_name_temp))
|
|
92
188
|
except Exception as e:
|
|
93
189
|
# Notify of the error
|
|
94
190
|
print(f"Error occurred while downloading new water quality data: {e}")
|
|
191
|
+
traceback.print_exc()
|
|
95
192
|
|
|
96
193
|
# Remove the newly downloaded data file if it exists
|
|
97
194
|
if os.path.exists(os.path.join(workspace, original_file_name)):
|
|
@@ -1,117 +1,143 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import os
|
|
1
3
|
import sys
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from retry import retry
|
|
4
|
-
|
|
5
|
-
from
|
|
6
|
-
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from loone_data_prep.utils import get_dbhydro_api
|
|
7
8
|
|
|
8
9
|
DEFAULT_STATION_IDS = ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]
|
|
9
10
|
DATE_NOW = datetime.now().strftime("%Y-%m-%d")
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
@retry(
|
|
13
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
13
14
|
def get(
|
|
14
15
|
workspace: str,
|
|
15
16
|
name: str,
|
|
17
|
+
test_number: int,
|
|
16
18
|
station_ids: list = DEFAULT_STATION_IDS,
|
|
17
19
|
date_min: str = "1950-01-01",
|
|
18
20
|
date_max: str = DATE_NOW,
|
|
19
21
|
**kwargs: str | list
|
|
20
22
|
) -> None:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
23
|
+
"""Fetch water quality data from DBHydro API and save it as CSV files in the specified workspace.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
workspace (str): The directory where the CSV files will be saved.
|
|
27
|
+
name (str): The name of the water quality parameter. Example: 'PHOSPHATE, TOTAL AS P'
|
|
28
|
+
test_number (int): The DBHydro test number for the water quality parameter.
|
|
29
|
+
station_ids (list, optional): List of station IDs to fetch data for. Defaults to DEFAULT_STATION_IDS.
|
|
30
|
+
date_min (str, optional): The start date for fetching data in YYYY-MM-DD format. Defaults to "1950-01-01".
|
|
31
|
+
date_max (str, optional): The end date for fetching data in YYYY-MM-DD format. Defaults to the current date.
|
|
32
|
+
**kwargs: Additional keyword arguments.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
None
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# Initialize the DBHydro API
|
|
39
|
+
api = get_dbhydro_api()
|
|
40
|
+
|
|
41
|
+
# Fetch water quality data
|
|
42
|
+
response = api.get_water_quality(stations=station_ids, test_numbers=[test_number], date_start=date_min, date_end=date_max, exclude_flagged_results=False)
|
|
43
|
+
df = response.to_dataframe(include_metadata=True)
|
|
44
|
+
|
|
45
|
+
# Process and save data for each station
|
|
46
|
+
for station in station_ids:
|
|
47
|
+
# Get a copy of the data frame for this station
|
|
48
|
+
df_station = df[df['station'] == station].copy()
|
|
49
|
+
|
|
50
|
+
# Check if the data frame is empty
|
|
51
|
+
if df_station.empty:
|
|
52
|
+
print(f'No data found for station ID {station} and test number {test_number}.')
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
# Get the units of the data
|
|
56
|
+
units = df_station['units'].iloc[0] if 'units' in df_station.columns else ''
|
|
57
|
+
|
|
58
|
+
# Drop unwanted columns
|
|
59
|
+
df_station = df_station[['date_collected_str', 'sig_fig_value']].copy()
|
|
60
|
+
|
|
61
|
+
# Convert string sig_fig_value to numeric
|
|
62
|
+
df_station['sig_fig_value'] = pd.to_numeric(df_station['sig_fig_value'], errors='coerce')
|
|
63
|
+
|
|
64
|
+
# Calculate daily average values
|
|
65
|
+
df_station['date_collected_str'] = pd.to_datetime(df_station['date_collected_str'])
|
|
66
|
+
df_station["date_only"] = df_station["date_collected_str"].dt.date
|
|
67
|
+
df_station = df_station.groupby("date_only")["sig_fig_value"].mean().reset_index()
|
|
68
|
+
df_station.rename(columns={"date_only": "date_collected_str"}, inplace=True)
|
|
69
|
+
|
|
70
|
+
# Format dataframe to expected layout
|
|
71
|
+
df_station['date_collected_str'] = pd.to_datetime(df_station['date_collected_str']) # Convert date_collected_str column to datetime
|
|
72
|
+
df_station.sort_values('date_collected_str', inplace=True) # Sort df by date_collected_str
|
|
73
|
+
df_station.rename(columns={'date_collected_str': 'date', 'sig_fig_value': f'{station}_{name}_{units}'}, inplace=True) # Rename columns
|
|
74
|
+
|
|
75
|
+
# Calculate the days column
|
|
76
|
+
df_station['days'] = (df_station['date'] - df_station['date'].min()).dt.days + df_station['date'].min().day
|
|
77
|
+
|
|
78
|
+
# Make sure the integer index is written out (for backwards compatibility)
|
|
79
|
+
df_station.reset_index(inplace=True, drop=True)
|
|
80
|
+
|
|
81
|
+
# Start index at 1 instead of 0 (for backwards compatibility)
|
|
82
|
+
df_station.index = df_station.index + 1
|
|
83
|
+
|
|
84
|
+
# Make sure the integer index values are quoted in the csv file (for backwards compatibility)
|
|
85
|
+
df_station.index = df_station.index.astype(str)
|
|
86
|
+
|
|
87
|
+
# Make sure the date column includes time information at midnight (for backwards compatibility)
|
|
88
|
+
df_station['date'] = df_station['date'].dt.strftime('%Y-%m-%d 00:00:00')
|
|
89
|
+
|
|
90
|
+
# Write out the data frame to a CSV file
|
|
91
|
+
df_station.to_csv(os.path.join(workspace, f'water_quality_{station}_{name}.csv'), index=True, quoting=csv.QUOTE_NONNUMERIC)
|
|
92
|
+
|
|
93
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
94
|
+
rewrite_water_quality_file_without_date_quotes(workspace, f'water_quality_{station}_{name}.csv')
|
|
71
95
|
|
|
72
96
|
|
|
73
|
-
def _calculate_days_column(workspace: str,
|
|
97
|
+
def _calculate_days_column(workspace: str, df: pd.DataFrame, date_min: str):
|
|
74
98
|
"""
|
|
75
99
|
Calculates the values that should be in the "days" column of the water quality data CSV file
|
|
76
100
|
based on the given date_min and writes the updated data frame back to the CSV file.
|
|
77
101
|
|
|
78
102
|
Args:
|
|
79
103
|
workspace (str): The path to the workspace directory.
|
|
80
|
-
|
|
104
|
+
df (pd.DataFrame): The water quality data dataframe.
|
|
81
105
|
date_min (str): The minimum date that the "days" column values should be calculated from. Should be in format "YYYY-MM-DD".
|
|
82
106
|
"""
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
107
|
+
# Ensure df['date'] is a pandas datetime Series
|
|
108
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
109
|
+
date_min_object = pd.to_datetime(date_min)
|
|
110
|
+
|
|
111
|
+
# Calculate days column for all rows
|
|
112
|
+
df['days'] = (df['date'] - date_min_object).dt.days + date_min_object.day
|
|
113
|
+
|
|
114
|
+
return df
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def rewrite_water_quality_file_without_date_quotes(workspace: str, file_name: str) -> None:
|
|
118
|
+
"""
|
|
119
|
+
Rewrites the given water quality CSV file so that the dates don't have double quotes around them (for backwards compatibility).
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
workspace (str): The path to the workspace directory.
|
|
123
|
+
file_name (str): The name of the water quality CSV file.
|
|
124
|
+
"""
|
|
125
|
+
# Rewrite the file so dates don't have double quotes around them (for backwards compatibility)
|
|
126
|
+
file_path = os.path.join(workspace, file_name)
|
|
127
|
+
lines = []
|
|
128
|
+
|
|
129
|
+
with open(file_path, 'r') as file:
|
|
130
|
+
lines = file.readlines()
|
|
131
|
+
|
|
132
|
+
with open(file_path, 'w', newline='') as file:
|
|
133
|
+
line_number = 0
|
|
134
|
+
for line in lines:
|
|
135
|
+
if line_number != 0:
|
|
136
|
+
line_split = line.split(',')
|
|
137
|
+
line_split[1] = line_split[1].replace('"', '') # Remove quotes around dates (2nd column)
|
|
138
|
+
line = ','.join(line_split)
|
|
139
|
+
file.write(line)
|
|
140
|
+
line_number += 1
|
|
115
141
|
|
|
116
142
|
|
|
117
143
|
if __name__ == "__main__":
|
|
@@ -88,7 +88,7 @@ def main(workspace: str, d: dict = D, dbkey_stations: dict = DBKEY_STATIONS) ->
|
|
|
88
88
|
continue
|
|
89
89
|
|
|
90
90
|
# Check whether the latest data is already up to date.
|
|
91
|
-
if dbhydro_data_is_latest(date_latest):
|
|
91
|
+
if dbhydro_data_is_latest(date_latest, dbkey):
|
|
92
92
|
# Notify that the data is already up to date
|
|
93
93
|
print(f'Downloading of new {name} data skipped for dbkey {dbkey}. Data is already up to date.')
|
|
94
94
|
continue
|
|
@@ -99,8 +99,10 @@ def main(workspace: str, d: dict = D, dbkey_stations: dict = DBKEY_STATIONS) ->
|
|
|
99
99
|
|
|
100
100
|
try:
|
|
101
101
|
# Download only the new data
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
date_start = pd.to_datetime(date_latest) + pd.Timedelta(days=1)
|
|
103
|
+
date_start = date_start.strftime('%Y-%m-%d')
|
|
104
|
+
print(f'Downloading new {name} data for dbkey {dbkey} starting from date {date_start}')
|
|
105
|
+
weather.get(workspace, name, dbkeys=[dbkey], date_min=date_start)
|
|
104
106
|
|
|
105
107
|
# Data failed to download - It's possible the data's end date has been reached
|
|
106
108
|
if not os.path.exists(os.path.join(workspace, original_file_name)):
|