mpcaHydro 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mpcaHydro/pywisk.py ADDED
@@ -0,0 +1,381 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Jul 10 16:18:03 2023
4
+
5
+ @author: mfratki
6
+ """
7
+ import requests
8
+ from requests.exceptions import ConnectionError, Timeout, HTTPError, RequestException
9
+ import pandas as pd
10
+ import time
11
+
12
+ #TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
13
+ class Service():
14
+ base_url = 'http://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
15
+ base_dict = {
16
+ 'datasource': '0',
17
+ 'service': 'kisters',
18
+ 'type': 'queryServices',
19
+ 'format': 'json'}
20
+
21
+ def __init__(self):
22
+ self._url = None
23
+ self._args = None
24
+ self.response = None
25
+
26
+ def test_connection(self):
27
+ timeout = 5
28
+ try:
29
+ # Using requests.head() to fetch headers is faster than requests.get()
30
+ # as it doesn't download the full content
31
+ response = requests.head('http://wiskiweb01.pca.state.mn.us', timeout=timeout)
32
+
33
+ # raise_for_status() raises an HTTPError for 4xx or 5xx status codes
34
+ response.raise_for_status()
35
+
36
+ # If no exception was raised, the website is considered "up"
37
+ return True, f"Website is UP (Status Code: {response.status_code})"
38
+
39
+ except ConnectionError as e:
40
+ # Handles DNS failures, refused connections, etc.
41
+ return False, f"Website is DOWN (Connection Error): {e}"
42
+ except Timeout as e:
43
+ # Handles cases where the server takes too long to respond
44
+ return False, f"Website is DOWN (Timeout Error): {e}"
45
+ except HTTPError as e:
46
+ # Handles HTTP errors like 404 Not Found, 500 Internal Server Error, etc.
47
+ return False, f"Website is experiencing issues (HTTP Error): {e}"
48
+ except RequestException as e:
49
+ # Handles any other exceptions that might occur during the request
50
+ return False, f"An unexpected error occurred: {e}"
51
+
52
+ def _requestTypes(self):
53
+ url = self.url({'request': 'getrequestinfo'})
54
+ return requests.get(url).json()[0]
55
+
56
+ def getRequests(self):
57
+ return list(self._requestTypes()['Requests'].keys())
58
+
59
+ def queryfields(self,request_type):
60
+ return list(self._requestTypes()['Requests'][request_type]['QueryFields']['Content'].keys())
61
+
62
+ def returnfields(self,request_type):
63
+ return list(self._requestTypes()['Requests'][request_type]['Returnfields']['Content'].keys())
64
+
65
+ def optionalfields(self,request_type):
66
+ return list(self._requestTypes()['Requests'][request_type]['Optionalfields']['Content'].keys())
67
+
68
+ def formats(self,request_type):
69
+ return list(self._requestTypes()['Requests'][request_type]['Formats']['Content'].keys())
70
+
71
+ def info(self,request_type):
72
+ url = self.url({'request': 'getrequestinfo'})
73
+ response = requests.get(url)
74
+ get_requests = response.json()
75
+ return get_requests[0]['Requests'].keys()
76
+
77
+
78
+ def url(self,args_dict):
79
+ args_dict = self.base_dict | args_dict
80
+ args = []
81
+ for k,v in args_dict.items():
82
+ if v is None:
83
+ continue
84
+ elif isinstance(v,list):
85
+ v = [str(vv) for vv in v]
86
+ v = ','.join(v)
87
+ args.append(f'{k}={v}')
88
+ args = '&'.join(args)
89
+
90
+ url = self.base_url + args
91
+ self._url = url
92
+ return url
93
+
94
+ def get_json(self,args_dict):
95
+ # Download request
96
+ self.response = requests.get(self.url(args_dict))
97
+ if self.response.status_code != 200:
98
+ print('Error: ' + self.response.json()['message'])
99
+ self.response.raise_for_status() # raises exception when not a 2xx response
100
+
101
+ return self.response.json()
102
+
103
+ def df(self,args_dict):
104
+
105
+
106
+ get_requests = self.get_json(args_dict)
107
+ # Convert to dataframe
108
+ if args_dict['request'] in ['getTimeseriesValues']:
109
+ dfs = []
110
+ for get_request in get_requests:
111
+ df = pd.DataFrame(get_request['data'],columns = get_request['columns'].split(','))
112
+ del get_request['data']
113
+ del get_request['rows']
114
+ del get_request['columns']
115
+ for k,v in get_request.items(): df[k] = v
116
+ dfs.append(df)
117
+ df = pd.concat(dfs)
118
+ else:
119
+ df = pd.DataFrame(get_requests[1:], columns = get_requests[0])
120
+
121
+ # print('Done!')
122
+ return df
123
+
124
+ def get(self,args):
125
+ request_type = args['request']
126
+ #assert(request_type in self.getRequests())
127
+ _args = {queryfield: None for queryfield in self.queryfields(request_type)} | {optionalfield: None for optionalfield in self.optionalfields(request_type)}
128
+ args = {**_args, **args}
129
+ self._args = args
130
+ return self.df(args)
131
+
132
+ def _filter(self,args):
133
+
134
+ '''
135
+ Filter for ensuring not too many values are requested and determining the proper division
136
+ given the number of timeseries, timeseries length, and timeseries sampling interval
137
+ '''
138
+ 'minute','hour','daily'
139
+
140
+ MAX_OUTPUT = 240000 #True max output is 250,000 but giving myself a bit of a buffer
141
+
142
+
143
+ n_timeseries = 1
144
+ n_years = 1
145
+ #1 timeseries for 1 year
146
+ n_values = 60*24*365*n_timeseries*n_years
147
+
148
+ if n_values < MAX_OUTPUT :
149
+ return 0
150
+ elif n_timeseries == 1:
151
+ n_values/MAX_OUTPUT
152
+
153
+
154
+
155
+ '''
156
+ Potential use cases:
157
+
158
+ 1. timeseries for a given ts_id
159
+ 2. All timeseries for a given station
160
+ 3. All timeseries for a given parameter
161
+ 4. All timeseries for a given huc_id
162
+ 5. All timeseries of a given resolution
163
+
164
+ '''
165
+
166
+ ''''
167
+ Aggregate (aggregate) - Builds representative periodic values.
168
+ Attributes: Interval (MANDATORY): HHMMSS, decadal, yearly, year, monthly, month, daily, day, hourly, hour
169
+ Aggregation Types (MANDATORY LIST): min, max, mean, average, total, counts, perc-#
170
+ Returnfields: Count
171
+ Interpolation Type
172
+ Average
173
+ Quality Code
174
+ Mean
175
+ Total
176
+ Maximum
177
+ Minimum
178
+ Timestamp
179
+ P#
180
+ Examples: aggregate(daily~total)
181
+ aggregate(yearly~mean~min~max)
182
+ aggregate(hourly~perc-25~perc-75)
183
+ '''
184
+
185
+ VALID_AGGREGATION_TYPES = ['min', 'max', 'mean', 'average', 'total', 'counts']
186
+ VALID_INTERVALS = ['decadal', 'yearly', 'year', 'monthly', 'month', 'daily', 'day', 'hourly', 'hour']
187
+ SERVICE = Service()
188
+
189
+ def construct_aggregation(interval, aggregation_type):
190
+ validate_interval(interval)
191
+ validate_aggregation_type(aggregation_type)
192
+ return f'aggregate({interval}~{aggregation_type})'
193
+
194
+ def validate_aggregation_type(aggregation_type):
195
+ assert(aggregation_type in VALID_AGGREGATION_TYPES or validate_percentile(aggregation_type))
196
+ return True
197
+
198
+ def validate_percentile(aggregation_type):
199
+ assert(aggregation_type.startswith('perc-'))
200
+ perc_value = aggregation_type.split('-')[1]
201
+ assert(perc_value.isdigit())
202
+ perc_value = int(perc_value)
203
+ assert(0 < perc_value < 100)
204
+ return True
205
+
206
+
207
+ def validate_interval(interval):
208
+ assert(interval in VALID_INTERVALS or validate_custom_interval(interval))
209
+ return True
210
+
211
+ def validate_custom_interval(interval:str):
212
+ # Custom interval in HHMMSS format
213
+ assert(len(interval) == 6)
214
+ assert(all(char.isdigit() for char in interval))
215
+ assert(0 <= int(interval[0:2]) < 24) # hours
216
+ assert(0 <= int(interval[2:4]) < 60) # minutes
217
+ assert(0 <= int(interval[4:6]) < 60) # seconds
218
+ return True
219
+
220
+
221
+ def test_connection():
222
+ return SERVICE.test_connection()
223
+
224
+ def get(args_dict):
225
+ return SERVICE.get(args_dict)
226
+
227
+ def get_ts(
228
+ ts_id,
229
+ aggregation_interval = None,
230
+ aggregation_type = None,
231
+ start_date = '1996-01-01',
232
+ end_date = '2050-12-31',
233
+ stationgroup_id = None,
234
+ timezone = 'GMT-6',
235
+ as_json = False):
236
+
237
+ if (aggregation_interval is not None) and (aggregation_type is not None):
238
+ transformation = construct_aggregation(aggregation_interval, aggregation_type)
239
+ ts_id = f'{ts_id};{transformation}'
240
+
241
+ #print('Downloading Timeseries Data')
242
+ args = {'request':'getTimeseriesValues',
243
+ 'ts_id' : ts_id,
244
+ 'from': start_date,
245
+ 'to': end_date,
246
+ 'returnfields': ['Timestamp', 'Value', 'Quality Code','Quality Code Name'],
247
+ 'metadata': 'true',
248
+ 'md_returnfields': ['ts_unitsymbol',
249
+ 'ts_name',
250
+ 'ts_id',
251
+ 'station_no',
252
+ 'station_name',
253
+ 'station_latitude',
254
+ 'station_longitude',
255
+ 'parametertype_id',
256
+ 'parametertype_name',
257
+ 'stationparameter_no',
258
+ 'stationparameter_name'],
259
+ 'timezone':timezone,
260
+ 'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID']}
261
+
262
+ if as_json:
263
+ output = SERVICE.get_json(args)
264
+ else:
265
+ output = SERVICE.get(args)
266
+ #print('Done!')
267
+ return output
268
+
269
+ def get_stations(
270
+ huc_id = None,
271
+ parametertype_id = None,
272
+ stationgroup_id = None,
273
+ stationparameter_no = None,
274
+ station_no = None,
275
+ returnfields = []):
276
+
277
+ args = {'request':'getStationList'}
278
+
279
+ returnfields = list(set(['ca_sta','station_no','station_name'] + returnfields))
280
+
281
+ args ={'request': 'getStationList',
282
+ 'stationparameter_no': stationparameter_no,
283
+ 'stationgroup_id': stationgroup_id,
284
+ 'parametertype_id': parametertype_id,
285
+ 'station_no': station_no,
286
+ #'object_type': object_type,
287
+ 'returnfields': returnfields,
288
+ # 'parametertype_id','parametertype_name',
289
+ # 'station_latitude','station_longitude',
290
+ # 'stationparameter_no','stationparameter_name'],
291
+ 'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID','stn_AUID','hydrounit_title','hydrounit_no','NearestTown']
292
+ }
293
+
294
+
295
+ df = SERVICE.get(args)
296
+ if huc_id is not None: df = df.loc[df['stn_HUC12'].str.startswith(huc_id)]
297
+ return df
298
+
299
+ def get_ts_ids(
300
+ station_nos,
301
+ parametertype_id = None,
302
+ stationparameter_no = None,
303
+ stationgroup_id = None,
304
+ ts_name = None,
305
+ returnfields = None):
306
+
307
+
308
+ if returnfields is None:
309
+ returnfields = ['ts_id','ts_name','ca_sta','station_no',
310
+ 'ts_unitsymbol',
311
+ 'parametertype_id','parametertype_name',
312
+ 'station_latitude','station_longitude',
313
+ 'stationparameter_no','stationparameter_name',
314
+ 'station_no','station_name',
315
+ 'coverage','ts_density']
316
+
317
+
318
+ args ={'request': 'getTimeseriesList',
319
+ 'station_no': station_nos,
320
+ 'parametertype_id': parametertype_id,
321
+ 'stationparameter_no': stationparameter_no,
322
+ 'ts_name' : ts_name,
323
+ 'returnfields': returnfields,
324
+ 'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID','stn_AUID']}
325
+
326
+ df = SERVICE.get(args)
327
+ return df
328
+
329
+
330
+
331
+ def get_wplmn(station_nos):
332
+
333
+ PARAMETERS_MAP={'5004':'TP Load',
334
+ '5005':'TP Conc',
335
+ '5014':'TSS Load',
336
+ '5015':'TSS Conc',
337
+ '5024':'N Load',
338
+ '5025':'N Conc',
339
+ '5034':'OP Load',
340
+ '5035':'OP Conc',
341
+ '5044':'TKN Load',
342
+ '5045':'TKN Conc',
343
+ '262' :'Flow'}
344
+
345
+ ts_ids = self.get_ts_ids(station_nos = station_nos,
346
+ stationgroup_id = '1319204',
347
+ stationparameter_no = list(PARAMETERS_MAP.keys()),
348
+ ts_name = ['20.Day.Mean'])
349
+
350
+ if len(ts_ids) == 0:
351
+ print('No WPLMN Sites Available')
352
+ return pd.DataFrame()
353
+
354
+ dfs = []
355
+ for ts_id in ts_ids['ts_id']:
356
+ dfs.append(self.get_ts(ts_id))
357
+ time.sleep(1)
358
+
359
+ return pd.concat(dfs)
360
+
361
+
362
+
363
+ # nutrient
364
+ # -N03N02
365
+ # -OP
366
+ # -NH3
367
+ # -TP
368
+ # -DO
369
+ # -CHla
370
+ # temperature
371
+ # flow
372
+
373
+ # test = pyWISK()
374
+
375
+ # df = test.get_ts(ts_ids = 424663010)
376
+
377
+ # df = test.get_ts(station_nos = 'W25060001')
378
+
379
+ # df = test.get_wplmn(huc8_id = '07020005')
380
+
381
+ # df = test.get_ts(huc_id = '07010205',stationgroup_id = '1319204',parametertype_id = 11500)
mpcaHydro/reports.py ADDED
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+ import duckdb
3
+ import glob
4
+
5
+ #TODO ensure all reports are actually in the reports schema
6
+
7
+ class reportManager():
8
+ def __init__(self,db_path:Path):
9
+ self.db_path = db_path
10
+
11
+ def wiski_qc_counts(self):
12
+ with duckdb.connect(self.db_path,read_only=True) as con:
13
+ return wiski_qc_counts(con)
14
+
15
+ def constituent_summary(self,constituent: str = None):
16
+ with duckdb.connect(self.db_path,read_only=True) as con:
17
+ return constituent_summary(con,constituent)
18
+
19
+ def station_reach_pairs(self):
20
+ with duckdb.connect(self.db_path,read_only=True) as con:
21
+ return station_reach_pairs(con)
22
+
23
+ def outlet_summary(self):
24
+ with duckdb.connect(self.db_path,read_only=True) as con:
25
+ return outlet_summary(con)
26
+
27
+
28
+
29
+ def outlet_summary(con: duckdb.DuckDBPyConnection):
30
+ query = '''
31
+ SELECT *,
32
+ FROM
33
+ reports.outlet_constituent_summary
34
+ ORDER BY
35
+ outlet_id,
36
+ constituent
37
+ '''
38
+ df = con.execute(query).fetch_df()
39
+ return df
40
+
41
+
42
+ def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
43
+ query = '''
44
+ SELECT *,
45
+ FROM
46
+ staging.wiski_qc_count
47
+ ORDER BY
48
+ station_no,
49
+ parametertype_name
50
+ '''
51
+ df = con.execute(query).fetch_df()
52
+ return df
53
+
54
+ def constituent_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
55
+
56
+ query = '''
57
+ SELECT *,
58
+ FROM
59
+ reports.constituent_summary
60
+ ORDER BY
61
+ station_id,
62
+ station_origin,
63
+ constituent
64
+ '''
65
+ df = con.execute(query).fetch_df()
66
+ if constituent is not None:
67
+ df = df[df['constituent'] == constituent]
68
+ return df
69
+
70
+ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
71
+ query = '''
72
+ SELECT *,
73
+ FROM
74
+ reports.station_reach_pairs
75
+ ORDER BY
76
+ outlet_id,
77
+ station_id
78
+ '''
79
+ df = con.execute(query).fetch_df()
80
+ return df