mpcaHydro 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mpcaHydro/data/WISKI_QUALITY_CODES.csv +71 -0
- mpcaHydro/data/outlets.duckdb +0 -0
- mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcaHydro/data_manager.py +142 -314
- mpcaHydro/equis.py +488 -0
- mpcaHydro/etlSWD.py +4 -5
- mpcaHydro/etlWISKI.py +39 -23
- mpcaHydro/etlWPLMN.py +2 -2
- mpcaHydro/outlets.py +371 -0
- mpcaHydro/pywisk.py +381 -0
- mpcaHydro/reports.py +80 -0
- mpcaHydro/warehouse.py +581 -0
- mpcaHydro/warehouseManager.py +47 -0
- mpcaHydro/wiski.py +308 -0
- mpcaHydro/xref.py +74 -0
- {mpcahydro-2.0.4.dist-info → mpcahydro-2.0.6.dist-info}/METADATA +3 -1
- mpcahydro-2.0.6.dist-info/RECORD +23 -0
- {mpcahydro-2.0.4.dist-info → mpcahydro-2.0.6.dist-info}/WHEEL +1 -1
- mpcaHydro/WISKI.py +0 -352
- mpcaHydro/helpers.py +0 -0
- mpcahydro-2.0.4.dist-info/RECORD +0 -13
mpcaHydro/etlWISKI.py
CHANGED
|
@@ -6,10 +6,9 @@ Created on Tue Oct 3 08:04:49 2023
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
|
-
from mpcaHydro
|
|
9
|
+
from mpcaHydro import pywisk
|
|
10
10
|
#from hspf_tools.orm.models import Station
|
|
11
11
|
import time
|
|
12
|
-
wiski = pyWISK()
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
'''
|
|
@@ -45,8 +44,8 @@ PARAMETERTYPE_MAP ={'11522': 'TP',
|
|
|
45
44
|
'11521': 'TKN',
|
|
46
45
|
'11500' : 'Q',
|
|
47
46
|
'11504': 'WT',
|
|
48
|
-
'11533': 'DO'
|
|
49
|
-
'11507':'WL'}
|
|
47
|
+
'11533': 'DO'}
|
|
48
|
+
# '11507':'WL'}
|
|
50
49
|
#STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024' ,'5025','5044' ,'5045']
|
|
51
50
|
STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
|
|
52
51
|
|
|
@@ -115,7 +114,12 @@ TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
|
|
|
115
114
|
'DO':{'Internal':{'daily':'20.Day.Mean',
|
|
116
115
|
'unit': '09.Archive'},
|
|
117
116
|
'External': {'daily': '20.Day.Mean',
|
|
118
|
-
'unit': '08.Provisional.Edited'}}
|
|
117
|
+
'unit': '08.Provisional.Edited'}},
|
|
118
|
+
'TRB':{'Internal':{'daily':'20.Day.Mean',
|
|
119
|
+
'unit': '09.Archive'},
|
|
120
|
+
'External': {'daily': '20.Day.Mean',
|
|
121
|
+
'unit': '08.Provisional.Edited'}},
|
|
122
|
+
}
|
|
119
123
|
|
|
120
124
|
|
|
121
125
|
|
|
@@ -123,9 +127,11 @@ CONSTITUENT_NAME_NO = {'Q' :['262*'],#,'263'],
|
|
|
123
127
|
'WT' :['450*', '451*'], # '450.42','451.42'],
|
|
124
128
|
'OP' :['863*'],
|
|
125
129
|
'DO' :['866*'],
|
|
130
|
+
'TRB': ['811*'],
|
|
131
|
+
'TDS': ['2175*'],
|
|
126
132
|
'TP' :None,
|
|
127
133
|
'TSS':None,
|
|
128
|
-
'N' :
|
|
134
|
+
'N' :['341*'],
|
|
129
135
|
'TKN':None}
|
|
130
136
|
|
|
131
137
|
CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
|
|
@@ -137,7 +143,7 @@ CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
|
|
|
137
143
|
'N' :['5024' ,'5025'],
|
|
138
144
|
'TKN':['5044' ,'5045']}
|
|
139
145
|
|
|
140
|
-
VALID_CONSTITUENTS = ['Q','WT','OP','DO','TP','TSS','N','TKN']
|
|
146
|
+
VALID_CONSTITUENTS = ['Q','WT','OP','DO','TP','TSS','N','TKN','TRB']
|
|
141
147
|
|
|
142
148
|
# def _info(station_nos):
|
|
143
149
|
# station_info = info(station_nos)
|
|
@@ -174,7 +180,7 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
|
|
|
174
180
|
else:
|
|
175
181
|
constituent_nos = CONSTITUENT_NAME_NO[constituent]
|
|
176
182
|
|
|
177
|
-
ts_ids =
|
|
183
|
+
ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
178
184
|
stationparameter_no = constituent_nos,
|
|
179
185
|
ts_name = ts_names['unit'])
|
|
180
186
|
|
|
@@ -182,6 +188,16 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
|
|
|
182
188
|
for ts_id in ts_ids:
|
|
183
189
|
jsons.append(download_chunk(ts_id,start_year,end_year,as_json = True))
|
|
184
190
|
time.sleep(.1)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# Connect to DuckDB (in-memory database)
|
|
196
|
+
con = duckdb.connect(database=':memory:')
|
|
197
|
+
|
|
198
|
+
# Register the Python list of dictionaries as a virtual table
|
|
199
|
+
# DuckDB can automatically infer the schema from this list.
|
|
200
|
+
con.register("my_json_table", json_data)
|
|
185
201
|
return jsons
|
|
186
202
|
|
|
187
203
|
|
|
@@ -208,7 +224,7 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
|
|
|
208
224
|
|
|
209
225
|
|
|
210
226
|
def info(station_nos):
|
|
211
|
-
ts_ids =
|
|
227
|
+
ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
212
228
|
stationparameter_no = STATIONPARAMETER_NOS)
|
|
213
229
|
ts_ids = ts_ids.drop_duplicates(subset = 'parametertype_name')
|
|
214
230
|
ts_ids['constituent'] = ts_ids['parametertype_id'].map(PARAMETERTYPE_MAP)
|
|
@@ -221,7 +237,7 @@ def download(station_nos,start_year = 1996, end_year = 2030, raw = False,wplmn =
|
|
|
221
237
|
print('Downloading Timeseries Data')
|
|
222
238
|
df = pd.concat([_download(constituent,station_nos,start_year,end_year,raw,wplmn) for constituent in VALID_CONSTITUENTS])
|
|
223
239
|
|
|
224
|
-
station_metadata =
|
|
240
|
+
station_metadata = pywisk.get_stations(station_no = station_nos,returnfields = ['stationgroup_id'])
|
|
225
241
|
if any(station_metadata['stationgroup_id'].isin(['1319204'])):
|
|
226
242
|
df['wplmn_flag'] = 1
|
|
227
243
|
else:
|
|
@@ -254,14 +270,14 @@ def transform(data):
|
|
|
254
270
|
data.loc[:,'unit'] = data['unit'].str.lower()
|
|
255
271
|
data.replace({'unit':'°f'},'degF',inplace = True)
|
|
256
272
|
data['data_type'] = 'continuous'
|
|
257
|
-
data['
|
|
273
|
+
data['station_origin'] = 'wiski'
|
|
258
274
|
data.set_index('datetime',drop=True,inplace=True)
|
|
259
275
|
data.index = data.index.tz_convert('UTC-06:00')
|
|
260
276
|
|
|
261
277
|
|
|
262
278
|
data.index = data.index.round('h').round('h')
|
|
263
279
|
data = data.reset_index()
|
|
264
|
-
data = data.groupby(['datetime','variable','unit','station_id','station_name','constituent','interval_minutes','data_format','data_type','
|
|
280
|
+
data = data.groupby(['datetime','variable','unit','station_id','station_name','constituent','interval_minutes','data_format','data_type','station_origin']).mean()
|
|
265
281
|
data = data.reset_index()
|
|
266
282
|
data = data.set_index('datetime')
|
|
267
283
|
|
|
@@ -287,13 +303,13 @@ def _download(constituent,station_nos,start_year = 1996,end_year = 2030, raw = F
|
|
|
287
303
|
else:
|
|
288
304
|
constituent_nos = CONSTITUENT_NAME_NO[constituent]
|
|
289
305
|
|
|
290
|
-
ts_ids =
|
|
306
|
+
ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
291
307
|
stationparameter_no = constituent_nos,
|
|
292
308
|
ts_name = ts_names['unit'])
|
|
293
309
|
|
|
294
310
|
interval_minutes = 60
|
|
295
311
|
if ts_ids.empty:
|
|
296
|
-
ts_ids =
|
|
312
|
+
ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
297
313
|
stationparameter_no = constituent_nos,
|
|
298
314
|
ts_name = ts_names['daily'])
|
|
299
315
|
interval_minutes = 1440
|
|
@@ -323,7 +339,7 @@ def download_chunk(ts_id,start_year = 1996,end_year = 2030, interval = 5, as_jso
|
|
|
323
339
|
end = int(start + interval-1)
|
|
324
340
|
if end > end_year:
|
|
325
341
|
end = end_year
|
|
326
|
-
df =
|
|
342
|
+
df = pywisk.get_ts(ts_id,start_date = f'{start}-01-01',end_date = f'{end}-12-31',as_json = as_json)
|
|
327
343
|
if not df.empty: frames.append(df)
|
|
328
344
|
df.index = pd.to_datetime(df['Timestamp'])
|
|
329
345
|
time.sleep(.1)
|
|
@@ -380,7 +396,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
380
396
|
# else:
|
|
381
397
|
# ts_names = ['15.Rated']
|
|
382
398
|
|
|
383
|
-
# ts_ids =
|
|
399
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
384
400
|
# stationparameter_no = ['262*'],
|
|
385
401
|
# ts_name = ts_names)
|
|
386
402
|
|
|
@@ -406,7 +422,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
406
422
|
# ts_names = ['09.Archive']
|
|
407
423
|
|
|
408
424
|
|
|
409
|
-
# ts_ids =
|
|
425
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
410
426
|
# stationparameter_no = ['450*','450.42*','451*','451.42*'],
|
|
411
427
|
# ts_name = ts_names)
|
|
412
428
|
|
|
@@ -433,7 +449,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
433
449
|
# else:
|
|
434
450
|
# ts_names = ['09.Archive']
|
|
435
451
|
|
|
436
|
-
# ts_ids =
|
|
452
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
437
453
|
# stationparameter_no = ['866*'],
|
|
438
454
|
# ts_name = ts_names)
|
|
439
455
|
|
|
@@ -450,7 +466,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
450
466
|
|
|
451
467
|
|
|
452
468
|
# def orthophosphate(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
453
|
-
# ts_ids =
|
|
469
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
454
470
|
# stationparameter_no = ['5034','5035'],
|
|
455
471
|
# ts_name = ['20.Day.Mean'])
|
|
456
472
|
|
|
@@ -469,7 +485,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
469
485
|
|
|
470
486
|
# def total_phosphorous(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
471
487
|
|
|
472
|
-
# ts_ids =
|
|
488
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
473
489
|
# stationparameter_no = ['5004','5005'],
|
|
474
490
|
# ts_name = ['20.Day.Mean'])
|
|
475
491
|
# if ts_ids.empty:
|
|
@@ -487,7 +503,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
487
503
|
|
|
488
504
|
# def total_suspended_solids(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
489
505
|
|
|
490
|
-
# ts_ids =
|
|
506
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
491
507
|
# stationparameter_no = ['5014','5015'],
|
|
492
508
|
# ts_name = ['20.Day.Mean'])
|
|
493
509
|
# if ts_ids.empty:
|
|
@@ -505,7 +521,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
505
521
|
|
|
506
522
|
# def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
507
523
|
|
|
508
|
-
# ts_ids =
|
|
524
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
509
525
|
# stationparameter_no = ['5044','5045'],
|
|
510
526
|
# ts_name = ['20.Day.Mean'])
|
|
511
527
|
|
|
@@ -524,7 +540,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
|
524
540
|
|
|
525
541
|
# def nitrogen(station_nos,start_year = 1996,end_year = 2030,raw=False):
|
|
526
542
|
|
|
527
|
-
# ts_ids =
|
|
543
|
+
# ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
|
|
528
544
|
# stationparameter_no = ['5024','5025'],
|
|
529
545
|
# ts_name = ['20.Day.Mean'])
|
|
530
546
|
|
mpcaHydro/etlWPLMN.py
CHANGED
|
@@ -8,7 +8,7 @@ Created on Tue Oct 3 08:04:49 2023
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
import pandas as pd
|
|
11
|
-
from
|
|
11
|
+
from mpcaHydro.pyWISK import pyWISK
|
|
12
12
|
import time
|
|
13
13
|
|
|
14
14
|
|
|
@@ -83,7 +83,7 @@ def transform(data):
|
|
|
83
83
|
data = data.loc[:,['datetime','value','variable','unit','quality_id','station_id','station_name','constituent']]
|
|
84
84
|
|
|
85
85
|
data['unit'].replace('ft³/s','cfs',inplace=True)
|
|
86
|
-
data['
|
|
86
|
+
data['station_origin'] = 'wplmn'
|
|
87
87
|
return data
|
|
88
88
|
|
|
89
89
|
# data['Timestamp'] = pd.to_datetime(data['Timestamp']).dt.date
|
mpcaHydro/outlets.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Thu May 1 09:51:51 2025
|
|
4
|
+
|
|
5
|
+
@author: mfratki
|
|
6
|
+
"""
|
|
7
|
+
#import sqlite3
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import geopandas as gpd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import duckdb
|
|
12
|
+
#from hspf_tools.calibrator import etlWISKI, etlSWD
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
19
|
+
stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
|
|
20
|
+
stations_wiski['source'] = 'wiski'
|
|
21
|
+
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
22
|
+
stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
|
|
23
|
+
stations_equis['source'] = 'equis'
|
|
24
|
+
stations_equis['wplmn_flag'] = 0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
|
|
28
|
+
|
|
29
|
+
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
30
|
+
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
31
|
+
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
32
|
+
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
33
|
+
|
|
34
|
+
def _reload():
|
|
35
|
+
global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
|
|
36
|
+
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
37
|
+
stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
|
|
38
|
+
stations_wiski['source'] = 'wiski'
|
|
39
|
+
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
40
|
+
stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
|
|
41
|
+
stations_equis['source'] = 'equis'
|
|
42
|
+
stations_equis['wplmn_flag'] = 0
|
|
43
|
+
|
|
44
|
+
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
45
|
+
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
46
|
+
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
47
|
+
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_model_db(model_name: str):
|
|
51
|
+
return MODL_DB.query('repository_name == @model_name')
|
|
52
|
+
|
|
53
|
+
def split_opnids(opnids: list):
|
|
54
|
+
return [abs(int(float(j))) for i in opnids for j in i]
|
|
55
|
+
|
|
56
|
+
def valid_models():
|
|
57
|
+
return MODL_DB['repository_name'].unique().tolist()
|
|
58
|
+
|
|
59
|
+
def wplmn_station_opnids(model_name):
|
|
60
|
+
opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
61
|
+
return split_opnids(opnids)
|
|
62
|
+
|
|
63
|
+
def wiski_station_opnids(model_name):
|
|
64
|
+
opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
65
|
+
return split_opnids(opnids)
|
|
66
|
+
|
|
67
|
+
def equis_station_opnids(model_name):
|
|
68
|
+
opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
|
|
69
|
+
return split_opnids(opnids)
|
|
70
|
+
|
|
71
|
+
def station_opnids(model_name):
|
|
72
|
+
opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
|
|
73
|
+
return split_opnids(opnids)
|
|
74
|
+
|
|
75
|
+
def equis_stations(model_name):
|
|
76
|
+
return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
|
|
77
|
+
|
|
78
|
+
def wiski_stations(model_name):
|
|
79
|
+
return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
|
|
80
|
+
|
|
81
|
+
def wplmn_stations(model_name):
|
|
82
|
+
return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
|
|
83
|
+
|
|
84
|
+
def outlets(model_name):
|
|
85
|
+
return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
86
|
+
|
|
87
|
+
def outlet_stations(model_name):
|
|
88
|
+
return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
89
|
+
|
|
90
|
+
def _split_opnids(opnids: list):
|
|
91
|
+
return [int(float(j)) for i in opnids for j in i]
|
|
92
|
+
|
|
93
|
+
def connect(db_path, read_only=True):
|
|
94
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
return duckdb.connect(db_path,read_only=read_only)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def init_db(db_path: str,reset: bool = False):
|
|
99
|
+
"""
|
|
100
|
+
Initialize the DuckDB database: create staging and analytics schemas
|
|
101
|
+
"""
|
|
102
|
+
db_path = Path(db_path)
|
|
103
|
+
if reset and db_path.exists():
|
|
104
|
+
db_path.unlink()
|
|
105
|
+
|
|
106
|
+
with connect(db_path.as_posix()) as con:
|
|
107
|
+
con.execute(OUTLETS_SCHEMA)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# Accessors:
|
|
112
|
+
def get_outlets_by_model(model_name: str):
|
|
113
|
+
with connect(DB_PATH) as con:
|
|
114
|
+
df = con.execute(
|
|
115
|
+
"""
|
|
116
|
+
SELECT r.*
|
|
117
|
+
FROM outlets.station_reach_pairs r
|
|
118
|
+
WHERE r.repository_name = ?
|
|
119
|
+
""",
|
|
120
|
+
[model_name]
|
|
121
|
+
).fetchdf()
|
|
122
|
+
return df
|
|
123
|
+
|
|
124
|
+
def get_outlets_by_reach(reach_id: int, model_name: str):
|
|
125
|
+
"""
|
|
126
|
+
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
127
|
+
"""
|
|
128
|
+
with connect(DB_PATH) as con:
|
|
129
|
+
df = con.execute(
|
|
130
|
+
"""
|
|
131
|
+
SELECT r.*
|
|
132
|
+
FROM outlets.station_reach_pairs r
|
|
133
|
+
WHERE r.reach_id = ? AND r.repository_name = ?
|
|
134
|
+
""",
|
|
135
|
+
[reach_id, model_name]).fetchdf()
|
|
136
|
+
return df
|
|
137
|
+
|
|
138
|
+
def get_outlets_by_station(station_id: str, station_origin: str):
|
|
139
|
+
"""
|
|
140
|
+
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
141
|
+
"""
|
|
142
|
+
with connect(DB_PATH) as con:
|
|
143
|
+
|
|
144
|
+
df = con.execute(
|
|
145
|
+
"""
|
|
146
|
+
SELECT r.*
|
|
147
|
+
FROM outlets.station_reach_pairs r
|
|
148
|
+
WHERE r.station_id = ? AND r.station_origin = ?
|
|
149
|
+
""",
|
|
150
|
+
[station_id, station_origin]).fetchdf()
|
|
151
|
+
return df
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class OutletGateway:
|
|
156
|
+
def __init__(self, model_name: str):
|
|
157
|
+
self.model_name = model_name
|
|
158
|
+
self.db_path = DB_PATH
|
|
159
|
+
self.modl_db = get_model_db(model_name)
|
|
160
|
+
|
|
161
|
+
# Legacy methods to access functions
|
|
162
|
+
def wplmn_station_opnids(self):
|
|
163
|
+
return wplmn_station_opnids(self.model_name)
|
|
164
|
+
|
|
165
|
+
def wiski_station_opnids(self):
|
|
166
|
+
return wiski_station_opnids(self.model_name)
|
|
167
|
+
|
|
168
|
+
def equis_station_opnids(self):
|
|
169
|
+
return equis_station_opnids(self.model_name)
|
|
170
|
+
|
|
171
|
+
def station_opnids(self):
|
|
172
|
+
return station_opnids(self.model_name)
|
|
173
|
+
|
|
174
|
+
def equis_stations(self):
|
|
175
|
+
return equis_stations(self.model_name)
|
|
176
|
+
|
|
177
|
+
def wiski_stations(self):
|
|
178
|
+
return wiski_stations(self.model_name)
|
|
179
|
+
|
|
180
|
+
def wplmn_stations(self):
|
|
181
|
+
return wplmn_stations(self.model_name)
|
|
182
|
+
|
|
183
|
+
def outlets(self):
|
|
184
|
+
return outlets(self.model_name)
|
|
185
|
+
|
|
186
|
+
def outlet_stations(self):
|
|
187
|
+
return outlet_stations(self.model_name)
|
|
188
|
+
|
|
189
|
+
# Accessors for outlets
|
|
190
|
+
def get_outlets(self):
|
|
191
|
+
return get_outlets_by_model(self.model_name)
|
|
192
|
+
|
|
193
|
+
def get_outlets_by_reach(self, reach_id: int):
|
|
194
|
+
return get_outlets_by_reach(reach_id, self.model_name)
|
|
195
|
+
|
|
196
|
+
def get_outlets_by_station(self, station_id: str, station_origin: str):
|
|
197
|
+
assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
|
|
198
|
+
return get_outlets_by_station(station_id, station_origin)
|
|
199
|
+
|
|
200
|
+
# constructors:
|
|
201
|
+
def build_outlet_db(db_path: str = None):
|
|
202
|
+
if db_path is None:
|
|
203
|
+
db_path = DB_PATH
|
|
204
|
+
init_db(db_path,reset=True)
|
|
205
|
+
with connect(db_path) as con:
|
|
206
|
+
build_outlets(con)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def build_outlets(con, model_name: str = None):
|
|
210
|
+
if model_name is not None:
|
|
211
|
+
modl_db = get_model_db(model_name)
|
|
212
|
+
else:
|
|
213
|
+
modl_db = MODL_DB
|
|
214
|
+
|
|
215
|
+
for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
|
|
216
|
+
repo_name = group['repository_name'].iloc[0]
|
|
217
|
+
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
218
|
+
|
|
219
|
+
opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
|
|
220
|
+
|
|
221
|
+
for opnid in opnids:
|
|
222
|
+
if opnid < 0:
|
|
223
|
+
exclude = 1
|
|
224
|
+
else:
|
|
225
|
+
exclude = 0
|
|
226
|
+
add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
|
|
227
|
+
|
|
228
|
+
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
229
|
+
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def create_outlet_schema(con, model_name : str):
|
|
233
|
+
for index, (_, group) in enumerate(modl_db.outlets(model_name)):
|
|
234
|
+
repo_name = group['repository_name'].iloc[0]
|
|
235
|
+
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
236
|
+
|
|
237
|
+
opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
|
|
238
|
+
|
|
239
|
+
for opnid in opnids:
|
|
240
|
+
if opnid < 0:
|
|
241
|
+
exclude = 1
|
|
242
|
+
else:
|
|
243
|
+
exclude = 0
|
|
244
|
+
add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
|
|
245
|
+
|
|
246
|
+
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
247
|
+
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def add_outlet(con,
|
|
251
|
+
outlet_id: str,
|
|
252
|
+
repository_name: str,
|
|
253
|
+
outlet_name = None,
|
|
254
|
+
notes = None):
|
|
255
|
+
"""
|
|
256
|
+
Insert an outlet. repository_name is required.
|
|
257
|
+
"""
|
|
258
|
+
con.execute(
|
|
259
|
+
"INSERT INTO outlets.outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
|
|
260
|
+
[outlet_id, repository_name, outlet_name, notes]
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def add_station(con,
|
|
264
|
+
outlet_id: str,
|
|
265
|
+
station_id: str,
|
|
266
|
+
station_origin: str,
|
|
267
|
+
true_opnid: str,
|
|
268
|
+
repository_name: str,
|
|
269
|
+
comments = None):
|
|
270
|
+
"""
|
|
271
|
+
Insert a station membership for an outlet.
|
|
272
|
+
Constraints:
|
|
273
|
+
- PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
|
|
274
|
+
- true_opnid and true_opnid_repository_name are required per schema.
|
|
275
|
+
"""
|
|
276
|
+
con.execute(
|
|
277
|
+
"""INSERT INTO outlets.outlet_stations
|
|
278
|
+
(outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
|
|
279
|
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
280
|
+
[outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def add_reach(con,
|
|
284
|
+
outlet_id: str,
|
|
285
|
+
reach_id: str,
|
|
286
|
+
repository_name: str,
|
|
287
|
+
exclude: int = 0):
|
|
288
|
+
"""
|
|
289
|
+
Insert a reach membership for an outlet.
|
|
290
|
+
- repository_name is required and participates in the PK (reach_id, repository_name).
|
|
291
|
+
- exclude = 1 to mark a reach as excluded from association views.
|
|
292
|
+
"""
|
|
293
|
+
con.execute(
|
|
294
|
+
"""INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name, exclude)
|
|
295
|
+
VALUES (?, ?, ?, ?)""",
|
|
296
|
+
[outlet_id, reach_id, repository_name, int(exclude)]
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
OUTLETS_SCHEMA = """-- schema.sql
|
|
301
|
+
-- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
|
|
302
|
+
-- Compatible with DuckDB and SQLite.
|
|
303
|
+
|
|
304
|
+
-- Table 1: outlets
|
|
305
|
+
-- Represents a logical grouping that ties stations and reaches together.
|
|
306
|
+
CREATE TABLE IF NOT EXISTS outlets (
|
|
307
|
+
outlet_id TEXT PRIMARY KEY,
|
|
308
|
+
repository_name TEXT NOT NULL,
|
|
309
|
+
outlet_name TEXT,
|
|
310
|
+
notes TEXT -- optional: general notes about the outlet grouping
|
|
311
|
+
);
|
|
312
|
+
|
|
313
|
+
-- Table 2: outlet_stations
|
|
314
|
+
-- One-to-many: outlet -> stations
|
|
315
|
+
CREATE TABLE IF NOT EXISTS outlet_stations (
|
|
316
|
+
outlet_id TEXT NOT NULL,
|
|
317
|
+
station_id TEXT NOT NULL,
|
|
318
|
+
station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
|
|
319
|
+
repository_name TEXT NOT NULL, -- repository model the station is physically located in
|
|
320
|
+
true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
|
|
321
|
+
comments TEXT, -- Per-station comments, issues, etc.
|
|
322
|
+
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
323
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
|
|
324
|
+
);
|
|
325
|
+
|
|
326
|
+
-- Table 3: outlet_reaches
|
|
327
|
+
-- One-to-many: outlet -> reaches
|
|
328
|
+
-- A reach can appear in multiple outlets, enabling many-to-many overall.
|
|
329
|
+
CREATE TABLE IF NOT EXISTS outlet_reaches (
|
|
330
|
+
outlet_id TEXT NOT NULL,
|
|
331
|
+
reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
|
|
332
|
+
repository_name TEXT NOT NULL, -- optional: where the mapping comes from
|
|
333
|
+
exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
|
|
334
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
|
|
335
|
+
);
|
|
336
|
+
|
|
337
|
+
-- Useful views:
|
|
338
|
+
|
|
339
|
+
-- View: station_reach_pairs
|
|
340
|
+
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
341
|
+
CREATE VIEW IF NOT EXISTS station_reach_pairs AS
|
|
342
|
+
SELECT
|
|
343
|
+
s.outlet_id,
|
|
344
|
+
s.station_id,
|
|
345
|
+
s.station_origin,
|
|
346
|
+
r.reach_id,
|
|
347
|
+
r.exclude,
|
|
348
|
+
r.repository_name,
|
|
349
|
+
FROM outlet_stations s
|
|
350
|
+
JOIN outlet_reaches r
|
|
351
|
+
ON s.outlet_id = r.outlet_id;
|
|
352
|
+
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
#row = modl_db.MODL_DB.iloc[0]
|
|
356
|
+
|
|
357
|
+
#info = etlWISKI.info(row['station_id'])
|
|
358
|
+
|
|
359
|
+
#modl_db.MODL_DB.query('source == "equis"')
|
|
360
|
+
|
|
361
|
+
# outlet_dict = {'stations': {'wiski': ['E66050001'],
|
|
362
|
+
# 'equis': ['S002-118']},
|
|
363
|
+
# 'reaches': {'Clearwater': [650]}
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# station_ids = ['S002-118']
|
|
369
|
+
# #station_ids = ['E66050001']
|
|
370
|
+
# reach_ids = [650]
|
|
371
|
+
# flow_station_ids = ['E66050001']
|