mpcaHydro 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mpcaHydro/data/WISKI_QUALITY_CODES.csv +71 -0
- mpcaHydro/data/outlets.duckdb +0 -0
- mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcaHydro/data_manager.py +142 -314
- mpcaHydro/equis.py +488 -0
- mpcaHydro/etlSWD.py +4 -5
- mpcaHydro/etlWISKI.py +39 -23
- mpcaHydro/etlWPLMN.py +2 -2
- mpcaHydro/outlets.py +371 -0
- mpcaHydro/pywisk.py +381 -0
- mpcaHydro/reports.py +80 -0
- mpcaHydro/warehouse.py +581 -0
- mpcaHydro/warehouseManager.py +47 -0
- mpcaHydro/wiski.py +308 -0
- mpcaHydro/xref.py +74 -0
- {mpcahydro-2.0.4.dist-info → mpcahydro-2.0.5.dist-info}/METADATA +2 -1
- mpcahydro-2.0.5.dist-info/RECORD +23 -0
- {mpcahydro-2.0.4.dist-info → mpcahydro-2.0.5.dist-info}/WHEEL +1 -1
- mpcaHydro/WISKI.py +0 -352
- mpcaHydro/helpers.py +0 -0
- mpcahydro-2.0.4.dist-info/RECORD +0 -13
mpcaHydro/WISKI.py
DELETED
|
@@ -1,352 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
Created on Mon Jul 10 16:18:03 2023
|
|
4
|
-
|
|
5
|
-
@author: mfratki
|
|
6
|
-
"""
|
|
7
|
-
import requests
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import time
|
|
10
|
-
|
|
11
|
-
#TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
|
|
12
|
-
class Service():
|
|
13
|
-
base_url = 'http://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
|
|
14
|
-
base_dict = {
|
|
15
|
-
'datasource': '0',
|
|
16
|
-
'service': 'kisters',
|
|
17
|
-
'type': 'queryServices',
|
|
18
|
-
'format': 'json'}
|
|
19
|
-
|
|
20
|
-
def __init__(self):
|
|
21
|
-
#TODO: store request types in a file and load them here to avoid making a request when the class is instantiated
|
|
22
|
-
#url = self.url({'request': 'getrequestinfo'})
|
|
23
|
-
#self.requestTypes = requests.get(url).json()[0]
|
|
24
|
-
self._url = None
|
|
25
|
-
self._args = None
|
|
26
|
-
|
|
27
|
-
# def _requestTypes(self):
|
|
28
|
-
# url = self.url({'request': 'getrequestinfo'})
|
|
29
|
-
# self.requestTypes = requests.get(url).json()[0]
|
|
30
|
-
# self._url = None
|
|
31
|
-
# self._args = None
|
|
32
|
-
def _requestTypes(self):
|
|
33
|
-
url = self.url({'request': 'getrequestinfo'})
|
|
34
|
-
return requests.get(url).json()[0]
|
|
35
|
-
|
|
36
|
-
def getRequests(self):
|
|
37
|
-
return list(self._requestTypes()['Requests'].keys())
|
|
38
|
-
|
|
39
|
-
def queryfields(self,request_type):
|
|
40
|
-
return list(self._requestTypes()['Requests'][request_type]['QueryFields']['Content'].keys())
|
|
41
|
-
|
|
42
|
-
def returnfields(self,request_type):
|
|
43
|
-
return list(self._requestTypes()['Requests'][request_type]['Returnfields']['Content'].keys())
|
|
44
|
-
|
|
45
|
-
def optionalfields(self,request_type):
|
|
46
|
-
return list(self._requestTypes()['Requests'][request_type]['Optionalfields']['Content'].keys())
|
|
47
|
-
|
|
48
|
-
def formats(self,request_type):
|
|
49
|
-
return list(self._requestTypes()['Requests'][request_type]['Formats']['Content'].keys())
|
|
50
|
-
|
|
51
|
-
def info(self,request_type):
|
|
52
|
-
url = self.url({'request': 'getrequestinfo'})
|
|
53
|
-
response = requests.get(url)
|
|
54
|
-
get_requests = response.json()
|
|
55
|
-
return get_requests[0]['Requests'].keys()
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def url(self,args_dict):
|
|
59
|
-
args_dict = self.base_dict | args_dict
|
|
60
|
-
args = []
|
|
61
|
-
for k,v in args_dict.items():
|
|
62
|
-
if v is None:
|
|
63
|
-
continue
|
|
64
|
-
elif isinstance(v,list):
|
|
65
|
-
v = [str(vv) for vv in v]
|
|
66
|
-
v = ','.join(v)
|
|
67
|
-
args.append(f'{k}={v}')
|
|
68
|
-
args = '&'.join(args)
|
|
69
|
-
|
|
70
|
-
url = self.base_url + args
|
|
71
|
-
self._url = url
|
|
72
|
-
return url
|
|
73
|
-
|
|
74
|
-
def get_json(self,args_dict):
|
|
75
|
-
# Download request
|
|
76
|
-
response = requests.get(self.url(args_dict))
|
|
77
|
-
response.raise_for_status() # raises exception when not a 2xx response
|
|
78
|
-
if response.status_code != 200:
|
|
79
|
-
print('Error: ' + response.json()['message'])
|
|
80
|
-
return 1
|
|
81
|
-
return response.json()
|
|
82
|
-
|
|
83
|
-
def df(self,args_dict):
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
get_requests = self.get_json(args_dict)
|
|
87
|
-
# Convert to dataframe
|
|
88
|
-
if args_dict['request'] in ['getTimeseriesValues']:
|
|
89
|
-
dfs = []
|
|
90
|
-
for get_request in get_requests:
|
|
91
|
-
df = pd.DataFrame(get_request['data'],columns = get_request['columns'].split(','))
|
|
92
|
-
del get_request['data']
|
|
93
|
-
del get_request['rows']
|
|
94
|
-
del get_request['columns']
|
|
95
|
-
for k,v in get_request.items(): df[k] = v
|
|
96
|
-
dfs.append(df)
|
|
97
|
-
df = pd.concat(dfs)
|
|
98
|
-
else:
|
|
99
|
-
df = pd.DataFrame(get_requests[1:], columns = get_requests[0])
|
|
100
|
-
|
|
101
|
-
# print('Done!')
|
|
102
|
-
return df
|
|
103
|
-
|
|
104
|
-
def get(self,args):
|
|
105
|
-
request_type = args['request']
|
|
106
|
-
assert(request_type in self.getRequests())
|
|
107
|
-
_args = {queryfield: None for queryfield in self.queryfields(request_type)} | {optionalfield: None for optionalfield in self.optionalfields(request_type)}
|
|
108
|
-
args = {**_args, **args}
|
|
109
|
-
self._args = args
|
|
110
|
-
return self.df(args)
|
|
111
|
-
|
|
112
|
-
def _filter(self,args):
|
|
113
|
-
|
|
114
|
-
'''
|
|
115
|
-
Filter for ensuring not too many values are requested and determining the proper division
|
|
116
|
-
given the number of timeseries, timeseries length, and timeseries sampling interval
|
|
117
|
-
'''
|
|
118
|
-
'minute','hour','daily'
|
|
119
|
-
|
|
120
|
-
MAX_OUTPUT = 240000 #True max output is 250,000 but giving myself a bit of a buffer
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
n_timeseries = 1
|
|
124
|
-
n_years = 1
|
|
125
|
-
#1 timeseries for 1 year
|
|
126
|
-
n_values = 60*24*365*n_timeseries*n_years
|
|
127
|
-
|
|
128
|
-
if n_values < MAX_OUTPUT :
|
|
129
|
-
return 0
|
|
130
|
-
elif n_timeseries == 1:
|
|
131
|
-
n_values/MAX_OUTPUT
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
'''
|
|
136
|
-
Potential use cases:
|
|
137
|
-
|
|
138
|
-
1. timeseries for a given ts_id
|
|
139
|
-
2. All timeseries for a given station
|
|
140
|
-
3. All timeseries for a given parameter
|
|
141
|
-
4. All timeseries for a given huc_id
|
|
142
|
-
5. All timeseries of a given resolution
|
|
143
|
-
|
|
144
|
-
'''
|
|
145
|
-
|
|
146
|
-
class pyWISK():
|
|
147
|
-
|
|
148
|
-
def __init__(self):
|
|
149
|
-
self.service = Service()
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def get(self,args_dict):
|
|
153
|
-
return self.service.get(args_dict)
|
|
154
|
-
|
|
155
|
-
def get_ts(self,
|
|
156
|
-
ts_ids = None,
|
|
157
|
-
huc_id = None,
|
|
158
|
-
station_nos = None,
|
|
159
|
-
parametertype_id = None,
|
|
160
|
-
parameter_no = None,
|
|
161
|
-
start_date = '1996-01-01',
|
|
162
|
-
end_date = '2050-12-31',
|
|
163
|
-
stationgroup_id = None,
|
|
164
|
-
timezone = 'GMT-6',
|
|
165
|
-
as_json = False):
|
|
166
|
-
|
|
167
|
-
if ts_ids is None:
|
|
168
|
-
print('Determing Timeseries IDs')
|
|
169
|
-
ts_ids = self.get_ts_ids(station_nos,huc_id,parametertype_id)
|
|
170
|
-
print('Done!')
|
|
171
|
-
|
|
172
|
-
#print('Downloading Timeseries Data')
|
|
173
|
-
args = {'request':'getTimeseriesValues',
|
|
174
|
-
'ts_id' : ts_ids,
|
|
175
|
-
'from': start_date,
|
|
176
|
-
'to': end_date,
|
|
177
|
-
'returnfields': ['Timestamp', 'Value', 'Quality Code','Quality Code Name'],
|
|
178
|
-
'metadata': 'true',
|
|
179
|
-
'md_returnfields': ['ts_unitsymbol',
|
|
180
|
-
'ts_name',
|
|
181
|
-
'ts_id',
|
|
182
|
-
'station_no',
|
|
183
|
-
'station_name',
|
|
184
|
-
'station_latitude',
|
|
185
|
-
'station_longitude',
|
|
186
|
-
'parametertype_id',
|
|
187
|
-
'parametertype_name',
|
|
188
|
-
'stationparameter_no',
|
|
189
|
-
'stationparameter_name'],
|
|
190
|
-
'timezone':timezone,
|
|
191
|
-
'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID']}
|
|
192
|
-
|
|
193
|
-
if as_json:
|
|
194
|
-
output = self.service.get_json(args)
|
|
195
|
-
else:
|
|
196
|
-
output = self.service.get(args)
|
|
197
|
-
#print('Done!')
|
|
198
|
-
return output
|
|
199
|
-
|
|
200
|
-
def get_stations(self,
|
|
201
|
-
huc_id = None,
|
|
202
|
-
parametertype_id = None,
|
|
203
|
-
stationgroup_id = None,
|
|
204
|
-
stationparameter_no = None,
|
|
205
|
-
station_no = None,
|
|
206
|
-
returnfields = []):
|
|
207
|
-
|
|
208
|
-
args = {'request':'getStationList'}
|
|
209
|
-
|
|
210
|
-
returnfields = list(set(['ca_sta','station_no','station_name'] + returnfields))
|
|
211
|
-
|
|
212
|
-
args ={'request': 'getStationList',
|
|
213
|
-
'stationparameter_no': stationparameter_no,
|
|
214
|
-
'stationgroup_id': stationgroup_id,
|
|
215
|
-
'parametertype_id': parametertype_id,
|
|
216
|
-
'station_no': station_no,
|
|
217
|
-
#'object_type': object_type,
|
|
218
|
-
'returnfields': returnfields,
|
|
219
|
-
# 'parametertype_id','parametertype_name',
|
|
220
|
-
# 'station_latitude','station_longitude',
|
|
221
|
-
# 'stationparameter_no','stationparameter_name'],
|
|
222
|
-
'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID','stn_AUID','hydrounit_title','hydrounit_no','NearestTown']
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
df = self.service.get(args)
|
|
227
|
-
if huc_id is not None: df = df.loc[df['stn_HUC12'].str.startswith(huc_id)]
|
|
228
|
-
return df
|
|
229
|
-
|
|
230
|
-
def get_ts_ids(self,
|
|
231
|
-
station_nos=None,
|
|
232
|
-
huc_id = None,
|
|
233
|
-
parametertype_id = None,
|
|
234
|
-
stationparameter_no = None,
|
|
235
|
-
stationgroup_id = None,
|
|
236
|
-
ts_name = None,
|
|
237
|
-
returnfields = None):
|
|
238
|
-
|
|
239
|
-
if station_nos is None:
|
|
240
|
-
station_nos = self.get_stations(huc_id,parametertype_id,stationgroup_id,stationparameter_no)['station_no'].to_list()
|
|
241
|
-
|
|
242
|
-
if returnfields is None:
|
|
243
|
-
returnfields = ['ts_id','ts_name','ca_sta','station_no',
|
|
244
|
-
'ts_unitsymbol',
|
|
245
|
-
'parametertype_id','parametertype_name',
|
|
246
|
-
'station_latitude','station_longitude',
|
|
247
|
-
'stationparameter_no','stationparameter_name',
|
|
248
|
-
'station_no','station_name',
|
|
249
|
-
'coverage','ts_density']
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
args ={'request': 'getTimeseriesList',
|
|
253
|
-
'station_no': station_nos,
|
|
254
|
-
'parametertype_id': parametertype_id,
|
|
255
|
-
'stationparameter_no': stationparameter_no,
|
|
256
|
-
'ts_name' : ts_name,
|
|
257
|
-
'returnfields': returnfields,
|
|
258
|
-
'ca_sta_returnfields': ['stn_HUC12','stn_EQuIS_ID','stn_AUID']}
|
|
259
|
-
|
|
260
|
-
df = self.service.get(args)
|
|
261
|
-
return df
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
def get_wplmn(self,station_nos):
|
|
266
|
-
|
|
267
|
-
PARAMETERS_MAP={'5004':'TP Load',
|
|
268
|
-
'5005':'TP Conc',
|
|
269
|
-
'5014':'TSS Load',
|
|
270
|
-
'5015':'TSS Conc',
|
|
271
|
-
'5024':'N Load',
|
|
272
|
-
'5025':'N Conc',
|
|
273
|
-
'5034':'OP Load',
|
|
274
|
-
'5035':'OP Conc',
|
|
275
|
-
'5044':'TKN Load',
|
|
276
|
-
'5045':'TKN Conc',
|
|
277
|
-
'262' :'Flow'}
|
|
278
|
-
|
|
279
|
-
ts_ids = self.get_ts_ids(station_nos = station_nos,
|
|
280
|
-
stationgroup_id = '1319204',
|
|
281
|
-
stationparameter_no = list(PARAMETERS_MAP.keys()),
|
|
282
|
-
ts_name = ['20.Day.Mean'])
|
|
283
|
-
|
|
284
|
-
if len(ts_ids) == 0:
|
|
285
|
-
print('No WPLMN Sites Available')
|
|
286
|
-
return pd.DataFrame()
|
|
287
|
-
|
|
288
|
-
dfs = []
|
|
289
|
-
for ts_id in ts_ids['ts_id']:
|
|
290
|
-
dfs.append(self.get_ts(ts_id))
|
|
291
|
-
time.sleep(1)
|
|
292
|
-
|
|
293
|
-
return pd.concat(dfs)
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# CONSTITUENT_NAME_NO = {'Q' :['262'],#,'263'],
|
|
297
|
-
# 'WT' :['450'],# , '451' , '450.42','451.42'],
|
|
298
|
-
# 'OP' :['863' ,'5034' ,'5035'],
|
|
299
|
-
# 'DO' :['865' ,'866' , '867'],
|
|
300
|
-
# 'TP' :['5005' ,'5004'],
|
|
301
|
-
# 'TSS':['5014' ,'5015'],
|
|
302
|
-
# 'N' :['5024' ,'5025'],
|
|
303
|
-
# 'TKN':['5044' ,'5045']}
|
|
304
|
-
|
|
305
|
-
# TS_NAME_SELECTOR = {'Q':{'daily':['20.Day.Mean.Archive','20.Day.Mean'],
|
|
306
|
-
# 'unit': ['15.Rated','08.Provisional.Edited']},
|
|
307
|
-
# 'WT':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
308
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
309
|
-
# 'TSS':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
310
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
311
|
-
# 'N':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
312
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
313
|
-
# 'TKN':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
314
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
315
|
-
# 'TP':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
316
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
317
|
-
# 'OP':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
318
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']},
|
|
319
|
-
# 'DO':{'daily':['20.Day.Mean','20.Day.Mean'],
|
|
320
|
-
# 'unit': ['09.Archive','08.Provisional.Edited']}}
|
|
321
|
-
|
|
322
|
-
# def extract(self,station_nos,constituent,resolution):
|
|
323
|
-
# ts_names = self.TS_NAME_SELECTOR[constituent][resolution]
|
|
324
|
-
# data = self.get_ts_ids(station_no = station_nos,stationparameter_no = self.CONSTITUENT_NAME_NO[constituent],ts_name =ts_names)
|
|
325
|
-
# # Filter by MPCA distinction between internal and external sites and how time series are named
|
|
326
|
-
# ts_ids = pd.concat([data.loc[(data['station_no'].str.startswith('E')) & (data['ts_name'] == ts_names[1])],
|
|
327
|
-
# data.loc[(~data['station_no'].str.startswith('E')) & (data['ts_name'] == ts_names[0])]])
|
|
328
|
-
# dfs = [self.get_ts(ts_ids = ts_id) for ts_id in ts_ids['ts_id']]
|
|
329
|
-
# data = pd.concat(dfs)
|
|
330
|
-
# return data
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
# nutrient
|
|
335
|
-
# -N03N02
|
|
336
|
-
# -OP
|
|
337
|
-
# -NH3
|
|
338
|
-
# -TP
|
|
339
|
-
# -DO
|
|
340
|
-
# -CHla
|
|
341
|
-
# temperature
|
|
342
|
-
# flow
|
|
343
|
-
|
|
344
|
-
# test = pyWISK()
|
|
345
|
-
|
|
346
|
-
# df = test.get_ts(ts_ids = 424663010)
|
|
347
|
-
|
|
348
|
-
# df = test.get_ts(station_nos = 'W25060001')
|
|
349
|
-
|
|
350
|
-
# df = test.get_wplmn(huc8_id = '07020005')
|
|
351
|
-
|
|
352
|
-
# df = test.get_ts(huc_id = '07010205',stationgroup_id = '1319204',parametertype_id = 11500)
|
mpcaHydro/helpers.py
DELETED
|
File without changes
|
mpcahydro-2.0.4.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
mpcaHydro/WISKI.py,sha256=pDNPJ2ypSfx9y4xddXkZK3fkjfWn9uAxRrVLT06q9ZI,13434
|
|
2
|
-
mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
mpcaHydro/data_manager.py,sha256=IxSVBScoLNNGF2S0LK_ezFkfF-4qGWyeQAZroCrQ-W8,14980
|
|
4
|
-
mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
|
|
5
|
-
mpcaHydro/etlSWD.py,sha256=F-my6pG34xqz_2y4tuTwEFMYp2JrHp_3QZENYNGoFFE,6341
|
|
6
|
-
mpcaHydro/etlWISKI.py,sha256=NwXPlid9qCkCNwJwDWIUkQuCYf3E9VIGJgWu4Tj7Yzw,21540
|
|
7
|
-
mpcaHydro/etlWPLMN.py,sha256=b44xvx4s7lwXhpRtfR6rj7RnBpbVKXaYqZCr26BexUI,4160
|
|
8
|
-
mpcaHydro/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
|
|
10
|
-
mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
|
|
11
|
-
mpcahydro-2.0.4.dist-info/METADATA,sha256=zQW7OYOdMDDj5TwEw2P8FWB4MkYcXsnUMIriR3_IL74,543
|
|
12
|
-
mpcahydro-2.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
13
|
-
mpcahydro-2.0.4.dist-info/RECORD,,
|