mpcaHydro 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mpcaHydro/etlWISKI.py CHANGED
@@ -6,10 +6,9 @@ Created on Tue Oct 3 08:04:49 2023
6
6
  """
7
7
 
8
8
  import pandas as pd
9
- from mpcaHydro.WISKI import pyWISK
9
+ from mpcaHydro import pywisk
10
10
  #from hspf_tools.orm.models import Station
11
11
  import time
12
- wiski = pyWISK()
13
12
 
14
13
 
15
14
  '''
@@ -45,8 +44,8 @@ PARAMETERTYPE_MAP ={'11522': 'TP',
45
44
  '11521': 'TKN',
46
45
  '11500' : 'Q',
47
46
  '11504': 'WT',
48
- '11533': 'DO',
49
- '11507':'WL'}
47
+ '11533': 'DO'}
48
+ # '11507':'WL'}
50
49
  #STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024' ,'5025','5044' ,'5045']
51
50
  STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
52
51
 
@@ -115,7 +114,12 @@ TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
115
114
  'DO':{'Internal':{'daily':'20.Day.Mean',
116
115
  'unit': '09.Archive'},
117
116
  'External': {'daily': '20.Day.Mean',
118
- 'unit': '08.Provisional.Edited'}}}
117
+ 'unit': '08.Provisional.Edited'}},
118
+ 'TRB':{'Internal':{'daily':'20.Day.Mean',
119
+ 'unit': '09.Archive'},
120
+ 'External': {'daily': '20.Day.Mean',
121
+ 'unit': '08.Provisional.Edited'}},
122
+ }
119
123
 
120
124
 
121
125
 
@@ -123,9 +127,11 @@ CONSTITUENT_NAME_NO = {'Q' :['262*'],#,'263'],
123
127
  'WT' :['450*', '451*'], # '450.42','451.42'],
124
128
  'OP' :['863*'],
125
129
  'DO' :['866*'],
130
+ 'TRB': ['811*'],
131
+ 'TDS': ['2175*'],
126
132
  'TP' :None,
127
133
  'TSS':None,
128
- 'N' :None,
134
+ 'N' :['341*'],
129
135
  'TKN':None}
130
136
 
131
137
  CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
@@ -137,7 +143,7 @@ CONSTITUENT_NAME_NO_WPLMN = {'Q' :['262*'],#,'263'],
137
143
  'N' :['5024' ,'5025'],
138
144
  'TKN':['5044' ,'5045']}
139
145
 
140
- VALID_CONSTITUENTS = ['Q','WT','OP','DO','TP','TSS','N','TKN']
146
+ VALID_CONSTITUENTS = ['Q','WT','OP','DO','TP','TSS','N','TKN','TRB']
141
147
 
142
148
  # def _info(station_nos):
143
149
  # station_info = info(station_nos)
@@ -174,7 +180,7 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
174
180
  else:
175
181
  constituent_nos = CONSTITUENT_NAME_NO[constituent]
176
182
 
177
- ts_ids = wiski.get_ts_ids(station_nos = station_nos,
183
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
178
184
  stationparameter_no = constituent_nos,
179
185
  ts_name = ts_names['unit'])
180
186
 
@@ -182,6 +188,16 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
182
188
  for ts_id in ts_ids:
183
189
  jsons.append(download_chunk(ts_id,start_year,end_year,as_json = True))
184
190
  time.sleep(.1)
191
+
192
+
193
+
194
+
195
+ # Connect to DuckDB (in-memory database)
196
+ con = duckdb.connect(database=':memory:')
197
+
198
+ # Register the Python list of dictionaries as a virtual table
199
+ # DuckDB can automatically infer the schema from this list.
200
+ con.register("my_json_table", json_data)
185
201
  return jsons
186
202
 
187
203
 
@@ -208,7 +224,7 @@ def extract(station_nos, constituent, dbpath, start_year = 1996, end_year = 2030
208
224
 
209
225
 
210
226
  def info(station_nos):
211
- ts_ids = wiski.get_ts_ids(station_nos = station_nos,
227
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
212
228
  stationparameter_no = STATIONPARAMETER_NOS)
213
229
  ts_ids = ts_ids.drop_duplicates(subset = 'parametertype_name')
214
230
  ts_ids['constituent'] = ts_ids['parametertype_id'].map(PARAMETERTYPE_MAP)
@@ -221,7 +237,7 @@ def download(station_nos,start_year = 1996, end_year = 2030, raw = False,wplmn =
221
237
  print('Downloading Timeseries Data')
222
238
  df = pd.concat([_download(constituent,station_nos,start_year,end_year,raw,wplmn) for constituent in VALID_CONSTITUENTS])
223
239
 
224
- station_metadata = wiski.get_stations(station_no = station_nos,returnfields = ['stationgroup_id'])
240
+ station_metadata = pywisk.get_stations(station_no = station_nos,returnfields = ['stationgroup_id'])
225
241
  if any(station_metadata['stationgroup_id'].isin(['1319204'])):
226
242
  df['wplmn_flag'] = 1
227
243
  else:
@@ -254,14 +270,14 @@ def transform(data):
254
270
  data.loc[:,'unit'] = data['unit'].str.lower()
255
271
  data.replace({'unit':'°f'},'degF',inplace = True)
256
272
  data['data_type'] = 'continuous'
257
- data['source'] = 'wiski'
273
+ data['station_origin'] = 'wiski'
258
274
  data.set_index('datetime',drop=True,inplace=True)
259
275
  data.index = data.index.tz_convert('UTC-06:00')
260
276
 
261
277
 
262
278
  data.index = data.index.round('h').round('h')
263
279
  data = data.reset_index()
264
- data = data.groupby(['datetime','variable','unit','station_id','station_name','constituent','interval_minutes','data_format','data_type','source']).mean()
280
+ data = data.groupby(['datetime','variable','unit','station_id','station_name','constituent','interval_minutes','data_format','data_type','station_origin']).mean()
265
281
  data = data.reset_index()
266
282
  data = data.set_index('datetime')
267
283
 
@@ -287,13 +303,13 @@ def _download(constituent,station_nos,start_year = 1996,end_year = 2030, raw = F
287
303
  else:
288
304
  constituent_nos = CONSTITUENT_NAME_NO[constituent]
289
305
 
290
- ts_ids = wiski.get_ts_ids(station_nos = station_nos,
306
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
291
307
  stationparameter_no = constituent_nos,
292
308
  ts_name = ts_names['unit'])
293
309
 
294
310
  interval_minutes = 60
295
311
  if ts_ids.empty:
296
- ts_ids = wiski.get_ts_ids(station_nos = station_nos,
312
+ ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
297
313
  stationparameter_no = constituent_nos,
298
314
  ts_name = ts_names['daily'])
299
315
  interval_minutes = 1440
@@ -323,7 +339,7 @@ def download_chunk(ts_id,start_year = 1996,end_year = 2030, interval = 5, as_jso
323
339
  end = int(start + interval-1)
324
340
  if end > end_year:
325
341
  end = end_year
326
- df = wiski.get_ts(ts_id,start_date = f'{start}-01-01',end_date = f'{end}-12-31',as_json = as_json)
342
+ df = pywisk.get_ts(ts_id,start_date = f'{start}-01-01',end_date = f'{end}-12-31',as_json = as_json)
327
343
  if not df.empty: frames.append(df)
328
344
  df.index = pd.to_datetime(df['Timestamp'])
329
345
  time.sleep(.1)
@@ -380,7 +396,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
380
396
  # else:
381
397
  # ts_names = ['15.Rated']
382
398
 
383
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
399
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
384
400
  # stationparameter_no = ['262*'],
385
401
  # ts_name = ts_names)
386
402
 
@@ -406,7 +422,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
406
422
  # ts_names = ['09.Archive']
407
423
 
408
424
 
409
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
425
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
410
426
  # stationparameter_no = ['450*','450.42*','451*','451.42*'],
411
427
  # ts_name = ts_names)
412
428
 
@@ -433,7 +449,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
433
449
  # else:
434
450
  # ts_names = ['09.Archive']
435
451
 
436
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
452
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
437
453
  # stationparameter_no = ['866*'],
438
454
  # ts_name = ts_names)
439
455
 
@@ -450,7 +466,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
450
466
 
451
467
 
452
468
  # def orthophosphate(station_nos,start_year = 1996,end_year = 2030,raw=False):
453
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
469
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
454
470
  # stationparameter_no = ['5034','5035'],
455
471
  # ts_name = ['20.Day.Mean'])
456
472
 
@@ -469,7 +485,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
469
485
 
470
486
  # def total_phosphorous(station_nos,start_year = 1996,end_year = 2030,raw=False):
471
487
 
472
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
488
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
473
489
  # stationparameter_no = ['5004','5005'],
474
490
  # ts_name = ['20.Day.Mean'])
475
491
  # if ts_ids.empty:
@@ -487,7 +503,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
487
503
 
488
504
  # def total_suspended_solids(station_nos,start_year = 1996,end_year = 2030,raw=False):
489
505
 
490
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
506
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
491
507
  # stationparameter_no = ['5014','5015'],
492
508
  # ts_name = ['20.Day.Mean'])
493
509
  # if ts_ids.empty:
@@ -505,7 +521,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
505
521
 
506
522
  # def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
507
523
 
508
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
524
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
509
525
  # stationparameter_no = ['5044','5045'],
510
526
  # ts_name = ['20.Day.Mean'])
511
527
 
@@ -524,7 +540,7 @@ def tkn(station_nos,start_year = 1996,end_year = 2030,raw=False):
524
540
 
525
541
  # def nitrogen(station_nos,start_year = 1996,end_year = 2030,raw=False):
526
542
 
527
- # ts_ids = wiski.get_ts_ids(station_nos = station_nos,
543
+ # ts_ids = pywisk.get_ts_ids(station_nos = station_nos,
528
544
  # stationparameter_no = ['5024','5025'],
529
545
  # ts_name = ['20.Day.Mean'])
530
546
 
mpcaHydro/etlWPLMN.py CHANGED
@@ -8,7 +8,7 @@ Created on Tue Oct 3 08:04:49 2023
8
8
 
9
9
 
10
10
  import pandas as pd
11
- from WISKI import pyWISK
11
+ from mpcaHydro.pyWISK import pyWISK
12
12
  import time
13
13
 
14
14
 
@@ -83,7 +83,7 @@ def transform(data):
83
83
  data = data.loc[:,['datetime','value','variable','unit','quality_id','station_id','station_name','constituent']]
84
84
 
85
85
  data['unit'].replace('ft³/s','cfs',inplace=True)
86
- data['source'] = 'wplmn'
86
+ data['station_origin'] = 'wplmn'
87
87
  return data
88
88
 
89
89
  # data['Timestamp'] = pd.to_datetime(data['Timestamp']).dt.date
mpcaHydro/outlets.py ADDED
@@ -0,0 +1,371 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 1 09:51:51 2025
4
+
5
+ @author: mfratki
6
+ """
7
+ #import sqlite3
8
+ from pathlib import Path
9
+ import geopandas as gpd
10
+ import pandas as pd
11
+ import duckdb
12
+ #from hspf_tools.calibrator import etlWISKI, etlSWD
13
+
14
+
15
+ #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
16
+
17
+
18
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
19
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
20
+ stations_wiski['source'] = 'wiski'
21
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
22
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
23
+ stations_equis['source'] = 'equis'
24
+ stations_equis['wplmn_flag'] = 0
25
+
26
+
27
+ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
28
+
29
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
30
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
31
+ MODL_DB = MODL_DB.dropna(subset='opnids')
32
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
33
+
34
+ def _reload():
35
+ global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
36
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
37
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
38
+ stations_wiski['source'] = 'wiski'
39
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
40
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
41
+ stations_equis['source'] = 'equis'
42
+ stations_equis['wplmn_flag'] = 0
43
+
44
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
45
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
46
+ MODL_DB = MODL_DB.dropna(subset='opnids')
47
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
48
+
49
+
50
+ def get_model_db(model_name: str):
51
+ return MODL_DB.query('repository_name == @model_name')
52
+
53
+ def split_opnids(opnids: list):
54
+ return [abs(int(float(j))) for i in opnids for j in i]
55
+
56
+ def valid_models():
57
+ return MODL_DB['repository_name'].unique().tolist()
58
+
59
+ def wplmn_station_opnids(model_name):
60
+ opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
61
+ return split_opnids(opnids)
62
+
63
+ def wiski_station_opnids(model_name):
64
+ opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
65
+ return split_opnids(opnids)
66
+
67
+ def equis_station_opnids(model_name):
68
+ opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
69
+ return split_opnids(opnids)
70
+
71
+ def station_opnids(model_name):
72
+ opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
73
+ return split_opnids(opnids)
74
+
75
+ def equis_stations(model_name):
76
+ return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
77
+
78
+ def wiski_stations(model_name):
79
+ return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
80
+
81
+ def wplmn_stations(model_name):
82
+ return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
83
+
84
+ def outlets(model_name):
85
+ return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
86
+
87
+ def outlet_stations(model_name):
88
+ return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
89
+
90
+ def _split_opnids(opnids: list):
91
+ return [int(float(j)) for i in opnids for j in i]
92
+
93
+ def connect(db_path, read_only=True):
94
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
95
+ return duckdb.connect(db_path,read_only=read_only)
96
+
97
+
98
+ def init_db(db_path: str,reset: bool = False):
99
+ """
100
+ Initialize the DuckDB database: create staging and analytics schemas
101
+ """
102
+ db_path = Path(db_path)
103
+ if reset and db_path.exists():
104
+ db_path.unlink()
105
+
106
+ with connect(db_path.as_posix()) as con:
107
+ con.execute(OUTLETS_SCHEMA)
108
+
109
+
110
+
111
+ # Accessors:
112
+ def get_outlets_by_model(model_name: str):
113
+ with connect(DB_PATH) as con:
114
+ df = con.execute(
115
+ """
116
+ SELECT r.*
117
+ FROM outlets.station_reach_pairs r
118
+ WHERE r.repository_name = ?
119
+ """,
120
+ [model_name]
121
+ ).fetchdf()
122
+ return df
123
+
124
+ def get_outlets_by_reach(reach_id: int, model_name: str):
125
+ """
126
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
127
+ """
128
+ with connect(DB_PATH) as con:
129
+ df = con.execute(
130
+ """
131
+ SELECT r.*
132
+ FROM outlets.station_reach_pairs r
133
+ WHERE r.reach_id = ? AND r.repository_name = ?
134
+ """,
135
+ [reach_id, model_name]).fetchdf()
136
+ return df
137
+
138
+ def get_outlets_by_station(station_id: str, station_origin: str):
139
+ """
140
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
141
+ """
142
+ with connect(DB_PATH) as con:
143
+
144
+ df = con.execute(
145
+ """
146
+ SELECT r.*
147
+ FROM outlets.station_reach_pairs r
148
+ WHERE r.station_id = ? AND r.station_origin = ?
149
+ """,
150
+ [station_id, station_origin]).fetchdf()
151
+ return df
152
+
153
+
154
+
155
+ class OutletGateway:
156
+ def __init__(self, model_name: str):
157
+ self.model_name = model_name
158
+ self.db_path = DB_PATH
159
+ self.modl_db = get_model_db(model_name)
160
+
161
+ # Legacy methods to access functions
162
+ def wplmn_station_opnids(self):
163
+ return wplmn_station_opnids(self.model_name)
164
+
165
+ def wiski_station_opnids(self):
166
+ return wiski_station_opnids(self.model_name)
167
+
168
+ def equis_station_opnids(self):
169
+ return equis_station_opnids(self.model_name)
170
+
171
+ def station_opnids(self):
172
+ return station_opnids(self.model_name)
173
+
174
+ def equis_stations(self):
175
+ return equis_stations(self.model_name)
176
+
177
+ def wiski_stations(self):
178
+ return wiski_stations(self.model_name)
179
+
180
+ def wplmn_stations(self):
181
+ return wplmn_stations(self.model_name)
182
+
183
+ def outlets(self):
184
+ return outlets(self.model_name)
185
+
186
+ def outlet_stations(self):
187
+ return outlet_stations(self.model_name)
188
+
189
+ # Accessors for outlets
190
+ def get_outlets(self):
191
+ return get_outlets_by_model(self.model_name)
192
+
193
+ def get_outlets_by_reach(self, reach_id: int):
194
+ return get_outlets_by_reach(reach_id, self.model_name)
195
+
196
+ def get_outlets_by_station(self, station_id: str, station_origin: str):
197
+ assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
198
+ return get_outlets_by_station(station_id, station_origin)
199
+
200
+ # constructors:
201
+ def build_outlet_db(db_path: str = None):
202
+ if db_path is None:
203
+ db_path = DB_PATH
204
+ init_db(db_path,reset=True)
205
+ with connect(db_path) as con:
206
+ build_outlets(con)
207
+
208
+
209
+ def build_outlets(con, model_name: str = None):
210
+ if model_name is not None:
211
+ modl_db = get_model_db(model_name)
212
+ else:
213
+ modl_db = MODL_DB
214
+
215
+ for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
216
+ repo_name = group['repository_name'].iloc[0]
217
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
218
+
219
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
220
+
221
+ for opnid in opnids:
222
+ if opnid < 0:
223
+ exclude = 1
224
+ else:
225
+ exclude = 0
226
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
227
+
228
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
229
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
230
+
231
+
232
+ def create_outlet_schema(con, model_name : str):
233
+ for index, (_, group) in enumerate(modl_db.outlets(model_name)):
234
+ repo_name = group['repository_name'].iloc[0]
235
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
236
+
237
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
238
+
239
+ for opnid in opnids:
240
+ if opnid < 0:
241
+ exclude = 1
242
+ else:
243
+ exclude = 0
244
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
245
+
246
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
247
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
248
+
249
+
250
+ def add_outlet(con,
251
+ outlet_id: str,
252
+ repository_name: str,
253
+ outlet_name = None,
254
+ notes = None):
255
+ """
256
+ Insert an outlet. repository_name is required.
257
+ """
258
+ con.execute(
259
+ "INSERT INTO outlets.outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
260
+ [outlet_id, repository_name, outlet_name, notes]
261
+ )
262
+
263
+ def add_station(con,
264
+ outlet_id: str,
265
+ station_id: str,
266
+ station_origin: str,
267
+ true_opnid: str,
268
+ repository_name: str,
269
+ comments = None):
270
+ """
271
+ Insert a station membership for an outlet.
272
+ Constraints:
273
+ - PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
274
+ - true_opnid and true_opnid_repository_name are required per schema.
275
+ """
276
+ con.execute(
277
+ """INSERT INTO outlets.outlet_stations
278
+ (outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
279
+ VALUES (?, ?, ?, ?, ?, ?)""",
280
+ [outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
281
+ )
282
+
283
+ def add_reach(con,
284
+ outlet_id: str,
285
+ reach_id: str,
286
+ repository_name: str,
287
+ exclude: int = 0):
288
+ """
289
+ Insert a reach membership for an outlet.
290
+ - repository_name is required and participates in the PK (reach_id, repository_name).
291
+ - exclude = 1 to mark a reach as excluded from association views.
292
+ """
293
+ con.execute(
294
+ """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name, exclude)
295
+ VALUES (?, ?, ?, ?)""",
296
+ [outlet_id, reach_id, repository_name, int(exclude)]
297
+ )
298
+
299
+
300
+ OUTLETS_SCHEMA = """-- schema.sql
301
+ -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
302
+ -- Compatible with DuckDB and SQLite.
303
+
304
+ -- Table 1: outlets
305
+ -- Represents a logical grouping that ties stations and reaches together.
306
+ CREATE TABLE IF NOT EXISTS outlets (
307
+ outlet_id TEXT PRIMARY KEY,
308
+ repository_name TEXT NOT NULL,
309
+ outlet_name TEXT,
310
+ notes TEXT -- optional: general notes about the outlet grouping
311
+ );
312
+
313
+ -- Table 2: outlet_stations
314
+ -- One-to-many: outlet -> stations
315
+ CREATE TABLE IF NOT EXISTS outlet_stations (
316
+ outlet_id TEXT NOT NULL,
317
+ station_id TEXT NOT NULL,
318
+ station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
319
+ repository_name TEXT NOT NULL, -- repository model the station is physically located in
320
+ true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
321
+ comments TEXT, -- Per-station comments, issues, etc.
322
+ CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
323
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
324
+ );
325
+
326
+ -- Table 3: outlet_reaches
327
+ -- One-to-many: outlet -> reaches
328
+ -- A reach can appear in multiple outlets, enabling many-to-many overall.
329
+ CREATE TABLE IF NOT EXISTS outlet_reaches (
330
+ outlet_id TEXT NOT NULL,
331
+ reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
332
+ repository_name TEXT NOT NULL, -- optional: where the mapping comes from
333
+ exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
334
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
335
+ );
336
+
337
+ -- Useful views:
338
+
339
+ -- View: station_reach_pairs
340
+ -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
341
+ CREATE VIEW IF NOT EXISTS station_reach_pairs AS
342
+ SELECT
343
+ s.outlet_id,
344
+ s.station_id,
345
+ s.station_origin,
346
+ r.reach_id,
347
+ r.exclude,
348
+ r.repository_name,
349
+ FROM outlet_stations s
350
+ JOIN outlet_reaches r
351
+ ON s.outlet_id = r.outlet_id;
352
+
353
+ """
354
+
355
+ #row = modl_db.MODL_DB.iloc[0]
356
+
357
+ #info = etlWISKI.info(row['station_id'])
358
+
359
+ #modl_db.MODL_DB.query('source == "equis"')
360
+
361
+ # outlet_dict = {'stations': {'wiski': ['E66050001'],
362
+ # 'equis': ['S002-118']},
363
+ # 'reaches': {'Clearwater': [650]}
364
+
365
+
366
+
367
+
368
+ # station_ids = ['S002-118']
369
+ # #station_ids = ['E66050001']
370
+ # reach_ids = [650]
371
+ # flow_station_ids = ['E66050001']