mpcaHydro 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mpcaHydro/wiski.py ADDED
@@ -0,0 +1,308 @@
1
+ import pandas as pd
2
+ from mpcaHydro import pywisk
3
+ import baseflow as bf
4
+ import time
5
+
6
+
7
+ #%% Define Selectors and Maps
8
+ PARAMETERTYPE_MAP ={'11522': 'TP',
9
+ '11531': 'TP',
10
+ '11532': 'TSS',
11
+ '11523': 'TSS',
12
+ '11526': 'N',
13
+ '11519': 'N',
14
+ '11520': 'OP',
15
+ '11528': 'OP',
16
+ '11530': 'TKN',
17
+ '11521': 'TKN',
18
+ '11500' : 'Q',
19
+ '11504': 'WT',
20
+ '11533': 'DO',
21
+ '11507':'WL'}
22
+ #STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024' ,'5025','5044' ,'5045']
23
+ STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
24
+
25
+ DATA_CODES = [1,3,10,12,15,20,29,30,31,32,34,45,46,47,48,49]
26
+
27
+
28
+ TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
29
+ 'unit': '15.Rated'},
30
+ 'External': {'daily': '20.Day.Mean',
31
+ 'unit': '08.Provisional.Edited'}},
32
+ 'WT':{'Internal':{'daily':'20.Day.Mean',
33
+ 'unit': '09.Archive'},
34
+ 'External': {'daily': '20.Day.Mean',
35
+ 'unit': '08.Provisional.Edited'}},
36
+ 'TSS':{'Internal':{'daily':'20.Day.Mean',
37
+ 'unit': '09.Archive'},
38
+ 'External': {'daily': '20.Day.Mean',
39
+ 'unit': '08.Provisional.Edited'}},
40
+ 'N':{'Internal':{'daily':'20.Day.Mean',
41
+ 'unit': '09.Archive'},
42
+ 'External': {'daily': '20.Day.Mean',
43
+ 'unit': '08.Provisional.Edited'}},
44
+ 'TKN':{'Internal':{'daily':'20.Day.Mean',
45
+ 'unit': '09.Archive'},
46
+ 'External': {'daily': '20.Day.Mean',
47
+ 'unit': '08.Provisional.Edited'}},
48
+ 'TP':{'Internal':{'daily':'20.Day.Mean',
49
+ 'unit': '09.Archive'},
50
+ 'External': {'daily': '20.Day.Mean',
51
+ 'unit': '08.Provisional.Edited'}},
52
+ 'OP':{'Internal':{'daily':'20.Day.Mean',
53
+ 'unit': '09.Archive'},
54
+ 'External': {'daily': '20.Day.Mean',
55
+ 'unit': '08.Provisional.Edited'}},
56
+ 'DO':{'Internal':{'daily':'20.Day.Mean',
57
+ 'unit': '09.Archive'},
58
+ 'External': {'daily': '20.Day.Mean',
59
+ 'unit': '08.Provisional.Edited'}},
60
+ 'TRB':{'Internal':{'daily':'20.Day.Mean',
61
+ 'unit': '09.Archive'},
62
+ 'External': {'daily': '20.Day.Mean',
63
+ 'unit': '08.Provisional.Edited'}}}
64
+
65
+
66
+
67
+ CONSTITUENT_NAME_NO = {'Q' :['262*'],#,'263'],
68
+ 'WT' :['450*', '451*'], # '450.42','451.42'],
69
+ 'OP' :['863*'],
70
+ 'DO' :['866*'],
71
+ 'TRB':['811*'],
72
+ 'TP' :None,
73
+ 'TSS':None,
74
+ 'N' :None,
75
+ 'TKN':None}
76
+
77
+ CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
78
+ 'WT' :['450*', '451*'], # '450.42','451.42'],
79
+ 'OP' :['863*','5034' ,'5035'],
80
+ 'DO' :['866*'],
81
+ 'TP' :['5005' ,'5004'],
82
+ 'TSS':['5014' ,'5015'],
83
+ 'N' :['5024' ,'5025'],
84
+ 'TKN':['5044' ,'5045']}
85
+
86
+ VALID_CONSTITUENTS = ['Q','WT','OP','DO','TP','TSS','N','TKN','TRB']
87
+
88
+ def test_connection():
89
+ '''
90
+ Test connection to WISKI database.
91
+ '''
92
+ return pywisk.test_connection()
93
+
94
+ def download(station_ids: list, start_year: int = 1996, end_year: int = 2030,wplmn: bool = False):
95
+ '''
96
+ Fetch data for given station IDs from WISKI database using the KISTERS API.
97
+ '''
98
+ dfs = [pd.DataFrame()]
99
+ for station_id in station_ids:
100
+ if not isinstance(station_id,str):
101
+ raise ValueError(f'Station ID {station_id} is not a string')
102
+ print('Downloading Timeseries Data')
103
+ df = pd.concat([_download(constituent,station_id,start_year,end_year,wplmn) for constituent in VALID_CONSTITUENTS])
104
+
105
+ if not df.empty:
106
+ dfs.append(df)
107
+ df = pd.concat(dfs)
108
+
109
+ station_metadata = pywisk.get_stations(station_no = station_ids,returnfields = ['stationgroup_id'])
110
+ if any(station_metadata['stationgroup_id'].isin(['1319204'])):
111
+ df['wplmn_flag'] = 1
112
+ else:
113
+ df['wplmn_flag'] = 0
114
+ print('Done!')
115
+
116
+ return df
117
+
118
+ def _download(constituent,station_nos,start_year = 1996,end_year = 2030,wplmn = False):
119
+
120
+ if station_nos[0] == 'E':
121
+ ts_names = TS_NAME_SELECTOR[constituent]['External']
122
+ else:
123
+ ts_names =TS_NAME_SELECTOR[constituent]['Internal']
124
+
125
+ if wplmn:
126
+ constituent_nos = CONSTITUENT_NAME_NO_WPLMN[constituent]
127
+ else:
128
+ constituent_nos = CONSTITUENT_NAME_NO[constituent]
129
+
130
+ if constituent_nos is not None:
131
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
132
+ stationparameter_no = constituent_nos,
133
+ ts_name = ts_names['unit'])
134
+
135
+ if ts_ids.empty:
136
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
137
+ stationparameter_no = constituent_nos,
138
+ ts_name = ts_names['daily'])
139
+ if ts_ids.empty:
140
+ return pd.DataFrame()
141
+
142
+ df = convert_to_df(ts_ids['ts_id'],start_year,end_year)
143
+
144
+ if df.empty:
145
+ return pd.DataFrame()
146
+ else:
147
+ df = pd.DataFrame()
148
+ return df
149
+
150
+
151
+ def download_chunk(ts_id,start_year = 1996,end_year = 2030, interval = 4, as_json = False):
152
+ frames = [pd.DataFrame()]
153
+
154
+ for start in range(start_year,end_year,interval):
155
+ end = int(start + interval-1)
156
+ if end > end_year:
157
+ end = end_year
158
+ df = pywisk.get_ts(ts_id,start_date = f'{start}-01-01',end_date = f'{end}-12-31',as_json = as_json)
159
+ if not df.empty: frames.append(df)
160
+ df['Timestamp'] = pd.to_datetime(df['Timestamp']).dt.tz_localize(None)
161
+ time.sleep(.1)
162
+ return pd.concat(frames)
163
+
164
+ def convert_to_df(ts_ids,start_year = 1996,end_year = 2030):
165
+ dfs = []
166
+ for ts_id in ts_ids:
167
+ dfs.append(download_chunk(ts_id,start_year,end_year))
168
+ time.sleep(.1)
169
+ df = pd.concat(dfs)
170
+ return df
171
+
172
+
173
+ def discharge(station_nos,start_year = 1996,end_year = 2030):
174
+ return _download('Q',station_nos,start_year,end_year)
175
+
176
+
177
+ def temperature(station_nos,start_year = 1996,end_year = 2030):
178
+ return _download('WT',station_nos,start_year,end_year)
179
+
180
+
181
+ def orthophosphate(station_nos,start_year = 1996,end_year = 2030):
182
+ return _download('OP',station_nos,start_year,end_year)
183
+
184
+ def dissolved_oxygen(station_nos,start_year = 1996,end_year = 2030):
185
+ return _download('DO',station_nos,start_year,end_year)
186
+
187
+ def nitrogen(station_nos,start_year = 1996,end_year = 2030):
188
+ return _download('N',station_nos,start_year,end_year)
189
+
190
+ def total_suspended_solids(station_nos,start_year = 1996,end_year = 2030):
191
+ return _download('TSS',station_nos,start_year,end_year)
192
+
193
+ def total_phosphorous(station_nos,start_year = 1996,end_year = 2030):
194
+ return _download('TP',station_nos,start_year,end_year)
195
+
196
+ def tkn(station_nos,start_year = 1996,end_year = 2030):
197
+ return _download('TKN',station_nos,start_year,end_year)
198
+
199
+
200
+
201
+
202
+
203
+ def convert_units(df):
204
+ '''
205
+ Convert units to standard units
206
+ '''
207
+ # Convert units
208
+ #Water temperature``
209
+ df.loc[:,'ts_unitsymbol'] = df['ts_unitsymbol'].str.lower()
210
+ df.replace({'ts_unitsymbol':'°c'},'degf',inplace = True)
211
+ df.loc[df['ts_unitsymbol'] == 'degf','Value'] = df.loc[df['ts_unitsymbol'] == 'degf','Value'].apply(lambda x: (x*9/5)+32)
212
+
213
+ # Convert kg to lb
214
+ df.loc[df['ts_unitsymbol'] == 'kg','Value'] = df.loc[df['ts_unitsymbol'] == 'kg','Value'].apply(lambda x: (x*2.20462))
215
+ df.replace({'ts_unitsymbol':'kg'},'lb',inplace=True)
216
+
217
+ # rename ft3/s to cfs
218
+ df.replace({'ts_unitsymbol':'ft³/s'},'cfs',inplace=True)
219
+ return df
220
+
221
+
222
+ def normalize_columns(df):
223
+ '''
224
+ Normalize column names and units
225
+ '''
226
+ # Map parameter numbers to constituent names
227
+ df['constituent'] = df['parametertype_id'].map(PARAMETERTYPE_MAP)
228
+
229
+ df.rename(columns={
230
+ 'station_no':'station_id',
231
+ 'Timestamp':'datetime',
232
+ 'Value':'value',
233
+ 'ts_unitsymbol':'unit',
234
+ 'Quality Code':'quality_code',
235
+ 'Quality Code Name':'quality_code_name'}, inplace=True)
236
+ return df
237
+
238
+
239
+
240
+ def filter_quality_codes(df, data_codes):
241
+ '''
242
+ Filter dataframe by valid quality codes
243
+ '''
244
+ return df.loc[df['quality_code'].isin(data_codes)]
245
+
246
+ def average_results(df):
247
+ #df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
248
+ df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
249
+ return df.groupby(['station_id', 'datetime', 'constituent', 'unit']).agg(value=('value', 'mean')).reset_index()
250
+ # Convert units
251
+
252
+
253
+ def calculate_baseflow(df, method = 'Boughton'):
254
+ dfs = [df]
255
+ for station_id in df['station_id'].unique():
256
+ df_station = df.query(f'constituent == "Q" & station_id == "{station_id}"')[['datetime', 'value']].copy().set_index('datetime')
257
+ if df_station.empty:
258
+ continue
259
+ else:
260
+ df_baseflow = bf.single(df_station['value'], area = None, method = method,return_kge = False)[0][method]
261
+
262
+ df_baseflow = pd.DataFrame(
263
+ {
264
+ "station_id": station_id,
265
+ "station_origin": 'wiski',
266
+ "datetime": df_baseflow.index,
267
+ "value": df_baseflow.values,
268
+ "constituent": 'QB',
269
+ "unit": 'cfs',
270
+ }
271
+ )
272
+ dfs.append(df_baseflow)
273
+
274
+ return pd.concat(dfs)
275
+
276
+
277
+ def normalize(df):
278
+ '''
279
+ Standardize raw WISKI data into standardized format without transformations.
280
+ The standardized format includes normalized column names and units.
281
+ ---
282
+ Parameters:
283
+ df (pandas.DataFrame): Raw WISKI data
284
+ Returns:
285
+ pandas.DataFrame: Normalized WISKI data
286
+ '''
287
+
288
+ df = convert_units(df)
289
+ df = normalize_columns(df)
290
+ return df
291
+
292
+ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
293
+ '''
294
+ Transform normalized WISKI data into standardized format
295
+ '''
296
+ df = normalize(df)
297
+ if filter_qc_codes:
298
+ if data_codes is None:
299
+ data_codes = DATA_CODES
300
+ df = filter_quality_codes(df, data_codes)
301
+ df = average_results(df)
302
+ df = calculate_baseflow(df, method = baseflow_method)
303
+ df['station_origin'] = 'wiski'
304
+ #df.set_index('datetime',inplace=True)
305
+ return df
306
+
307
+
308
+
mpcaHydro/xref.py ADDED
@@ -0,0 +1,74 @@
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ WISKI_EQUIS_XREF = pd.read_csv(Path(__file__).parent/'data/WISKI_EQUIS_XREF.csv')
5
+ #WISKI_EQUIS_XREF = pd.read_csv('C:/Users/mfratki/Documents/GitHub/hspf_tools/WISKI_EQUIS_XREF.csv')
6
+
7
+
8
+ def are_lists_identical(nested_list):
9
+ # Sort each sublist
10
+ sorted_sublists = [sorted(sublist) for sublist in nested_list]
11
+ # Compare all sublists to the first one
12
+ return all(sublist == sorted_sublists[0] for sublist in sorted_sublists)
13
+
14
+ def get_wiski_stations():
15
+ return list(WISKI_EQUIS_XREF['WISKI_STATION_NO'].unique())
16
+
17
+ def get_equis_stations():
18
+ return list(WISKI_EQUIS_XREF['EQUIS_STATION_ID'].unique())
19
+
20
+ def wiski_equis_alias(wiski_station_id):
21
+ equis_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'WISKI_EQUIS_ID'].to_list()))
22
+ equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
23
+ if len(equis_ids) == 0:
24
+ return []
25
+ elif len(equis_ids) > 1:
26
+ print(f'Too Many Equis Stations for {wiski_station_id}')
27
+ raise
28
+ else:
29
+ return equis_ids[0]
30
+
31
+ def wiski_equis_associations(wiski_station_id):
32
+ equis_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'EQUIS_STATION_ID'].unique())
33
+ equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
34
+ if len(equis_ids) == 0:
35
+ return []
36
+ else:
37
+ return equis_ids
38
+
39
+ def equis_wiski_associations(equis_station_id):
40
+ wiski_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['EQUIS_STATION_ID'] == equis_station_id,'WISKI_STATION_NO'].unique())
41
+ wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
42
+ if len(wiski_ids) == 0:
43
+ return []
44
+ else:
45
+ return wiski_ids
46
+
47
+ def equis_wiski_alias(equis_station_id):
48
+ wiski_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_EQUIS_ID'] == equis_station_id,'WISKI_STATION_NO'].to_list()))
49
+ wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
50
+ if len(wiski_ids) == 0:
51
+ return []
52
+ elif len(wiski_ids) > 1:
53
+ print(f'Too Many WISKI Stations for {equis_station_id}')
54
+ raise ValueError(f'Too Many WISKI Stations for {equis_station_id}')
55
+ else:
56
+ return wiski_ids[0]
57
+
58
+ def _equis_wiski_associations(equis_station_ids):
59
+ wiski_stations = [equis_wiski_associations(equis_station_id) for equis_station_id in equis_station_ids]
60
+ if are_lists_identical(wiski_stations):
61
+ return wiski_stations[0]
62
+ else:
63
+ return []
64
+
65
+ def _stations_by_wid(wid_no,station_origin):
66
+ if station_origin in ['wiski','wplmn']:
67
+ station_col = 'WISKI_STATION_NO'
68
+ elif station_origin in ['equis','swd']:
69
+ station_col = 'EQUIS_STATION_ID'
70
+ else:
71
+ raise
72
+
73
+ return list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WID'] == wid_no,station_col].unique())
74
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.0.4
3
+ Version: 2.0.5
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -11,6 +11,7 @@ Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Programming Language :: Python
12
12
  Requires-Python: >=3.8
13
13
  Requires-Dist: duckdb
14
+ Requires-Dist: oracledb
14
15
  Requires-Dist: pandas
15
16
  Requires-Dist: pathlib
16
17
  Requires-Dist: requests
@@ -0,0 +1,23 @@
1
+ mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mpcaHydro/data_manager.py,sha256=nTRDWuR3fyJU2v9HDLKiY7TT5Sp4QVn7uHSdGRac_uQ,9280
3
+ mpcaHydro/equis.py,sha256=G4pCjfdDggLTrL0lQeGKAqmPsyO4p-sb1DS4KycUInc,18012
4
+ mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
5
+ mpcaHydro/etlSWD.py,sha256=FvFP5lIOxtzF3eEgUDGw-C2BJTRfxXxkbt3hYl8PCZQ,6367
6
+ mpcaHydro/etlWISKI.py,sha256=S1dNUe08Y0riJNBaEJDfgmewR8CwPtIaB_3Vw8JujkM,22201
7
+ mpcaHydro/etlWPLMN.py,sha256=avLJvWRRxsG35w--OVq0qSCrFjO6G2x0aQ31d9kcYHg,4179
8
+ mpcaHydro/outlets.py,sha256=VuV4m12RzD4_BAznzj9hRWr06gt_IOsWyx3i1wFMIac,13924
9
+ mpcaHydro/pywisk.py,sha256=kaxJCPCZHy9oEo9VnoSmFC58qm1sX9fVbtp6nXs7934,13290
10
+ mpcaHydro/reports.py,sha256=luM7Q5wAJheSImlhaWen9IqUFMWZX9U7DI2rsTEtzWY,2047
11
+ mpcaHydro/warehouse.py,sha256=_L-MQ0DTsgrob4lUY6Kzb-C4oD6y3laoIbZ_q5zFhr4,21731
12
+ mpcaHydro/warehouseManager.py,sha256=Ades6CfPyrpwGUaALpzAvQ_1rPKVZbuSmTPyBA-lCqA,1169
13
+ mpcaHydro/wiski.py,sha256=c7M3m8Qd8ddA8LrylmEimilWjaEpZl1kJkFlc63pWi8,11749
14
+ mpcaHydro/xref.py,sha256=TxMVtUhHKVJ3wZ-445KJaKSoTU3B0VlmSVgT9hX4lLk,3085
15
+ mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
16
+ mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
17
+ mpcaHydro/data/WISKI_QUALITY_CODES.csv,sha256=PvKBMAUj6pmbhaMUUkAOk0CnxM6RN5oIaXY7MJ_x4T8,9812
18
+ mpcaHydro/data/outlets.duckdb,sha256=QIyUJu0K60QeFVfFoWxU8ramsppzYl0VIS_mJ7WzQJ0,2109440
19
+ mpcaHydro/data/stations_EQUIS.gpkg,sha256=SLjjhWWau5Wx17PXogX_kj4cCwIaGgsJwaHqxuNFguo,2031616
20
+ mpcaHydro/data/stations_wiski.gpkg,sha256=No-iVHnngs-SejjrPxFNV-kh55FcmvpIx_QwdJAGnKI,909312
21
+ mpcahydro-2.0.5.dist-info/METADATA,sha256=1ab2dsMygAkKCFwOvy2E019-74edHBSQKOCDOEY611g,567
22
+ mpcahydro-2.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
23
+ mpcahydro-2.0.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any