mpcaHydro 2.1.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,12 +19,9 @@ PARAMETERTYPE_MAP ={'11522': 'TP',
19
19
  '11504': 'WT',
20
20
  '11533': 'DO',
21
21
  '11507':'WL'}
22
- #STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024' ,'5025','5044' ,'5045']
23
- STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
24
22
 
25
23
  DATA_CODES = [1,3,10,12,15,20,29,30,31,32,34,45,46,47,48,49]
26
24
 
27
-
28
25
  TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
29
26
  'unit': '15.Rated'},
30
27
  'External': {'daily': '20.Day.Mean',
@@ -62,7 +59,8 @@ TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
62
59
  'External': {'daily': '20.Day.Mean',
63
60
  'unit': '08.Provisional.Edited'}}}
64
61
 
65
-
62
+ #STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024' ,'5025','5044' ,'5045']
63
+ STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
66
64
 
67
65
  CONSTITUENT_NAME_NO = {'Q' :['262*'],#,'263'],
68
66
  'WT' :['450*', '451*'], # '450.42','451.42'],
@@ -74,6 +72,13 @@ CONSTITUENT_NAME_NO = {'Q' :['262*'],#,'263'],
74
72
  'N' :None,
75
73
  'TKN':None}
76
74
 
75
+ STATIONPARAMETER_NOS_MAP = {'262*':'Q',
76
+ '450*':'WT',
77
+ '451*':'WT',
78
+ '863*':'OP',
79
+ '866*':'DO',
80
+ '811*':'TRB'}
81
+
77
82
  CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
78
83
  'WT' :['450*', '451*'], # '450.42','451.42'],
79
84
  'OP' :['863*','5034' ,'5035'],
@@ -91,6 +96,38 @@ def test_connection():
91
96
  '''
92
97
  return pywisk.test_connection()
93
98
 
99
+ def info(station_ids: list,constituent = None):
100
+ '''
101
+ Fetch metadata for given station IDs from WISKI database using the KISTERS API.
102
+ '''
103
+ if constituent is not None:
104
+ stationparameter_nos = CONSTITUENT_NAME_NO[constituent]
105
+ else:
106
+ stationparameter_nos = STATIONPARAMETER_NOS
107
+
108
+ df = pywisk.get_ts_ids(station_nos = station_ids,
109
+ stationparameter_no = stationparameter_nos,
110
+ ts_name = ['15.Rated','09.Archive','08.Provisional.Edited'])
111
+
112
+ df = normalize_columns(df)
113
+
114
+ # rows = []
115
+ # for station_id in df['station_id'].unique():
116
+ # for constituent in df.loc[df['station_id'] == station_id,'constituent'].unique():
117
+ # df_station_constituent = df.loc[(df['station_id'] == station_id) & (df['constituent'] == constituent) & df['ts_name'].isin(['15.Rated','09.Archive','08.Provisional.Edited'])]
118
+ # if not df_station_constituent.empty:
119
+ # if station_id.lower().startswith('e'):
120
+ # ts_names = TS_NAME_SELECTOR[constituent]['External']['unit']
121
+ # else:
122
+ # ts_names = TS_NAME_SELECTOR[constituent]['Internal']['unit']
123
+ # rows.append(df_station_constituent.loc[df_station_constituent['ts_name'] == ts_names,:])
124
+
125
+ return df
126
+
127
+
128
+
129
+
130
+
94
131
  def download(station_ids: list, start_year: int = 1996, end_year: int = 2030,wplmn: bool = False):
95
132
  '''
96
133
  Fetch data for given station IDs from WISKI database using the KISTERS API.
@@ -157,7 +194,7 @@ def download_chunk(ts_id,start_year = 1996,end_year = 2030, interval = 4, as_jso
157
194
  end = end_year
158
195
  df = pywisk.get_ts(ts_id,start_date = f'{start}-01-01',end_date = f'{end}-12-31',as_json = as_json)
159
196
  if not df.empty: frames.append(df)
160
- df.index = pd.to_datetime(df['Timestamp'])
197
+ df['Timestamp'] = pd.to_datetime(df['Timestamp']).dt.tz_localize(None)
161
198
  time.sleep(.1)
162
199
  return pd.concat(frames)
163
200
 
@@ -197,11 +234,8 @@ def tkn(station_nos,start_year = 1996,end_year = 2030):
197
234
  return _download('TKN',station_nos,start_year,end_year)
198
235
 
199
236
 
200
- def filter_quality_codes(df):
201
- '''
202
- Filter dataframe by valid quality codes
203
- '''
204
- return df.loc[df['Quality Code'].isin(DATA_CODES)]
237
+
238
+
205
239
 
206
240
  def convert_units(df):
207
241
  '''
@@ -222,23 +256,48 @@ def convert_units(df):
222
256
  return df
223
257
 
224
258
 
259
+ def map_constituents(df):
260
+ '''
261
+ Map stationparameter_no to constituent names
262
+ '''
263
+ def map_values(value):
264
+ for key, replacement in STATIONPARAMETER_NOS_MAP.items():
265
+ if value.startswith(key.rstrip('*')): # Match prefix without the wildcard '*'
266
+ return replacement
267
+ return value # If no match, return the original value
268
+
269
+ df['constituent'] = df['stationparameter_no'].apply(map_values)
270
+ return df
271
+
225
272
  def normalize_columns(df):
226
273
  '''
227
274
  Normalize column names and units
228
275
  '''
229
276
  # Map parameter numbers to constituent names
230
- df['constituent'] = df['parametertype_id'].map(PARAMETERTYPE_MAP)
277
+ #df['constituent'] = df['stationparameter_no'].map(STATIONPARAMETER_NOS_MAP,regex=True)
231
278
 
279
+ df = map_constituents(df)
280
+
232
281
  df.rename(columns={
233
282
  'station_no':'station_id',
234
283
  'Timestamp':'datetime',
235
284
  'Value':'value',
236
- 'ts_unitsymbol':'unit'}, inplace=True)
285
+ 'ts_unitsymbol':'unit',
286
+ 'Quality Code':'quality_code',
287
+ 'Quality Code Name':'quality_code_name'}, inplace=True)
237
288
  return df
238
289
 
290
+
291
+
292
+ def filter_quality_codes(df, data_codes):
293
+ '''
294
+ Filter dataframe by valid quality codes
295
+ '''
296
+ return df.loc[df['quality_code'].isin(data_codes)]
297
+
239
298
  def average_results(df):
240
- df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
241
- df['datetime'] = df['datetime'].dt.round('h')
299
+ #df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
300
+ df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
242
301
  return df.groupby(['station_id', 'datetime', 'constituent', 'unit']).agg(value=('value', 'mean')).reset_index()
243
302
  # Convert units
244
303
 
@@ -267,14 +326,35 @@ def calculate_baseflow(df, method = 'Boughton'):
267
326
  return pd.concat(dfs)
268
327
 
269
328
 
270
- def transform(df, baseflow_method = 'Boughton'):
329
+ def normalize(df):
271
330
  '''
272
- Transform raw WISKI data into standardized format
331
+ Standardize raw WISKI data into standardized format without transformations.
332
+ The standardized format includes normalized column names and units.
333
+ ---
334
+ Parameters:
335
+ df (pandas.DataFrame): Raw WISKI data
336
+ Returns:
337
+ pandas.DataFrame: Normalized WISKI data
273
338
  '''
274
- df = filter_quality_codes(df)
339
+
275
340
  df = convert_units(df)
276
341
  df = normalize_columns(df)
342
+ return df
343
+
344
+ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
345
+ '''
346
+ Transform normalized WISKI data into standardized format
347
+ '''
348
+ df = normalize(df)
349
+ if filter_qc_codes:
350
+ if data_codes is None:
351
+ data_codes = DATA_CODES
352
+ df = filter_quality_codes(df, data_codes)
277
353
  df = average_results(df)
278
354
  df = calculate_baseflow(df, method = baseflow_method)
279
355
  df['station_origin'] = 'wiski'
356
+ #df.set_index('datetime',inplace=True)
280
357
  return df
358
+
359
+
360
+
mpcaHydro/xref.py ADDED
@@ -0,0 +1,74 @@
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ WISKI_EQUIS_XREF = pd.read_csv(Path(__file__).parent/'data/WISKI_EQUIS_XREF.csv')
5
+ #WISKI_EQUIS_XREF = pd.read_csv('C:/Users/mfratki/Documents/GitHub/hspf_tools/WISKI_EQUIS_XREF.csv')
6
+
7
+
8
+ def are_lists_identical(nested_list):
9
+ # Sort each sublist
10
+ sorted_sublists = [sorted(sublist) for sublist in nested_list]
11
+ # Compare all sublists to the first one
12
+ return all(sublist == sorted_sublists[0] for sublist in sorted_sublists)
13
+
14
+ def get_wiski_stations():
15
+ return list(WISKI_EQUIS_XREF['WISKI_STATION_NO'].unique())
16
+
17
+ def get_equis_stations():
18
+ return list(WISKI_EQUIS_XREF['EQUIS_STATION_ID'].unique())
19
+
20
+ def wiski_equis_alias(wiski_station_id):
21
+ equis_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'WISKI_EQUIS_ID'].to_list()))
22
+ equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
23
+ if len(equis_ids) == 0:
24
+ return []
25
+ elif len(equis_ids) > 1:
26
+ print(f'Too Many Equis Stations for {wiski_station_id}')
27
+ raise
28
+ else:
29
+ return equis_ids[0]
30
+
31
+ def wiski_equis_associations(wiski_station_id):
32
+ equis_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'EQUIS_STATION_ID'].unique())
33
+ equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
34
+ if len(equis_ids) == 0:
35
+ return []
36
+ else:
37
+ return equis_ids
38
+
39
+ def equis_wiski_associations(equis_station_id):
40
+ wiski_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['EQUIS_STATION_ID'] == equis_station_id,'WISKI_STATION_NO'].unique())
41
+ wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
42
+ if len(wiski_ids) == 0:
43
+ return []
44
+ else:
45
+ return wiski_ids
46
+
47
+ def equis_wiski_alias(equis_station_id):
48
+ wiski_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_EQUIS_ID'] == equis_station_id,'WISKI_STATION_NO'].to_list()))
49
+ wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
50
+ if len(wiski_ids) == 0:
51
+ return []
52
+ elif len(wiski_ids) > 1:
53
+ print(f'Too Many WISKI Stations for {equis_station_id}')
54
+ raise ValueError(f'Too Many WISKI Stations for {equis_station_id}')
55
+ else:
56
+ return wiski_ids[0]
57
+
58
+ def _equis_wiski_associations(equis_station_ids):
59
+ wiski_stations = [equis_wiski_associations(equis_station_id) for equis_station_id in equis_station_ids]
60
+ if are_lists_identical(wiski_stations):
61
+ return wiski_stations[0]
62
+ else:
63
+ return []
64
+
65
+ def _stations_by_wid(wid_no,station_origin):
66
+ if station_origin in ['wiski','wplmn']:
67
+ station_col = 'WISKI_STATION_NO'
68
+ elif station_origin in ['equis','swd']:
69
+ station_col = 'EQUIS_STATION_ID'
70
+ else:
71
+ raise
72
+
73
+ return list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WID'] == wid_no,station_col].unique())
74
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.1.0
3
+ Version: 2.2.1
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -10,7 +10,9 @@ Keywords: Hydrology,MPCA
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Programming Language :: Python
12
12
  Requires-Python: >=3.8
13
+ Requires-Dist: baseflow
13
14
  Requires-Dist: duckdb
15
+ Requires-Dist: oracledb
14
16
  Requires-Dist: pandas
15
17
  Requires-Dist: pathlib
16
18
  Requires-Dist: requests
@@ -0,0 +1,23 @@
1
+ mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mpcaHydro/data_manager.py,sha256=COJF4U5qMbYXcalZKThYcyxgJClvnaLUEZm_wWEsXvs,11504
3
+ mpcaHydro/equis.py,sha256=G4pCjfdDggLTrL0lQeGKAqmPsyO4p-sb1DS4KycUInc,18012
4
+ mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
5
+ mpcaHydro/etlSWD.py,sha256=F1KmvIlMplbMjjrRk8uyKFTjLp7tiP3uwbuL1rqsPmU,6516
6
+ mpcaHydro/etlWISKI.py,sha256=S1dNUe08Y0riJNBaEJDfgmewR8CwPtIaB_3Vw8JujkM,22201
7
+ mpcaHydro/etlWPLMN.py,sha256=avLJvWRRxsG35w--OVq0qSCrFjO6G2x0aQ31d9kcYHg,4179
8
+ mpcaHydro/outlets.py,sha256=EvdSoUztV4Lz86SaDiFD6g5jpmrdsXMLpN85x9qChdQ,13761
9
+ mpcaHydro/pywisk.py,sha256=kaxJCPCZHy9oEo9VnoSmFC58qm1sX9fVbtp6nXs7934,13290
10
+ mpcaHydro/reports.py,sha256=acNQ37LeAbRPfsSmMEw0LxpZrgMvqRNAnlz93hlzMn8,2047
11
+ mpcaHydro/warehouse.py,sha256=wp8LYE71faudzUDLnCQ1npGS7hZtFLdeQBX8ux6Ak84,26024
12
+ mpcaHydro/warehouseManager.py,sha256=zrQIdzVm09rS5pLNr2szaGUS1-036AAW3D-Xf7xUeSs,1432
13
+ mpcaHydro/wiski.py,sha256=pPW7HjABdyvaa-XvpPmBmHvhVTicv5MF7rjkSvJEDtI,13901
14
+ mpcaHydro/xref.py,sha256=TxMVtUhHKVJ3wZ-445KJaKSoTU3B0VlmSVgT9hX4lLk,3085
15
+ mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
16
+ mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
17
+ mpcaHydro/data/WISKI_QUALITY_CODES.csv,sha256=PvKBMAUj6pmbhaMUUkAOk0CnxM6RN5oIaXY7MJ_x4T8,9812
18
+ mpcaHydro/data/outlet.duckdb,sha256=yAme7aTdbaMFc_wf005Tzc1Dhu0UAkDSVFNiaa01Y68,2109440
19
+ mpcaHydro/data/stations_EQUIS.gpkg,sha256=unAeRksLe33LuA1vkWaBurenZwxs0ON6fkn7OQvETzY,2072576
20
+ mpcaHydro/data/stations_wiski.gpkg,sha256=SKcjP1oi3qbpZVGWX_VrnM5gjG9o-_PbsG77owKzcWg,937984
21
+ mpcahydro-2.2.1.dist-info/METADATA,sha256=wIS1JMa4S-Kfz4Fs-PklRpw1JvTO_ozeLQxpmqjMQwc,591
22
+ mpcahydro-2.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
23
+ mpcahydro-2.2.1.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- mpcaHydro/WISKI.py,sha256=VWPwmDGv6rKFHgiQrcn4fAyx-h43Tyf8Vjtewa49yj4,11022
2
- mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- mpcaHydro/data_manager.py,sha256=LofBoNfhfStQlzD8EK1in_15BtNv_8q8057cjK4TmbA,15611
4
- mpcaHydro/equis.py,sha256=R4BEAkj9I6hVhSmd4WjjMLyQXBcOL5B2YIZjwm4EtqE,17943
5
- mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
6
- mpcaHydro/etlSWD.py,sha256=FvFP5lIOxtzF3eEgUDGw-C2BJTRfxXxkbt3hYl8PCZQ,6367
7
- mpcaHydro/etlWISKI.py,sha256=S1dNUe08Y0riJNBaEJDfgmewR8CwPtIaB_3Vw8JujkM,22201
8
- mpcaHydro/etlWPLMN.py,sha256=avLJvWRRxsG35w--OVq0qSCrFjO6G2x0aQ31d9kcYHg,4179
9
- mpcaHydro/pywisk.py,sha256=kaxJCPCZHy9oEo9VnoSmFC58qm1sX9fVbtp6nXs7934,13290
10
- mpcaHydro/warehouse.py,sha256=Rn8onCs9R-EnU9XNA1Gy53B-PAIhW_YaCLBDO29zyMY,7786
11
- mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
12
- mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
13
- mpcahydro-2.1.0.dist-info/METADATA,sha256=LyNxLeMkwdtq1ox-2ygVqPjqUPk5rFCseONZfziJEwg,543
14
- mpcahydro-2.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
15
- mpcahydro-2.1.0.dist-info/RECORD,,