mpcaHydro 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
mpcaHydro/data_manager.py CHANGED
@@ -5,6 +5,7 @@ Created on Fri Jun 3 10:01:14 2022
5
5
  @author: mfratki
6
6
  """
7
7
 
8
+ from copy import replace
8
9
  import pandas as pd
9
10
  #from abc import abstractmethod
10
11
  from pathlib import Path
@@ -64,88 +65,115 @@ def constituent_summary(db_path):
64
65
  return res.fetch_df()
65
66
 
66
67
 
68
+
69
+
67
70
  class dataManager():
68
71
 
69
- def __init__(self,folderpath, oracle_user = None, oracle_password =None):
72
+ def __init__(self,folderpath, oracle_username = None, oracle_password =None, reset = False):
70
73
 
71
74
  self.data = {}
72
75
  self.folderpath = Path(folderpath)
73
76
  self.db_path = self.folderpath.joinpath('observations.duckdb')
74
-
75
- self.oracle_user = oracle_user
77
+ self.oracle_username = oracle_username
76
78
  self.oracle_password = oracle_password
77
- warehouse.init_db(self.db_path,reset = False)
78
- self.xref = xref
79
- self.outlets = outlets
79
+
80
+ if not self.db_path.exists() or reset:
81
+ self._build_warehouse()
82
+
83
+ self.xref = xref #TODO: implement xref manager class
84
+ self.outlets = outlets #TODO: implement outlets manager class
80
85
  self.reports = reportManager(self.db_path)
81
86
 
82
87
 
83
88
  def connect_to_oracle(self):
84
89
  assert (self.credentials_exist(), 'Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
85
- equis.connect(user = self.oracle_user, password = self.oracle_password)
90
+ equis.connect(user = self.oracle_username, password = self.oracle_password)
86
91
 
87
92
  def credentials_exist(self):
88
- if (self.oracle_user is not None) & (self.oracle_password is not None):
93
+ if (self.oracle_username is not None) & (self.oracle_password is not None):
89
94
  return True
90
95
  else:
91
96
  return False
92
97
 
93
98
  def _build_warehouse(self):
94
- build_warehouse(self.folderpath)
99
+ warehouse.init_db(self.db_path.as_posix(),True)
95
100
 
96
- def download_station_data(self,station_id,station_origin,overwrite=True,to_csv = False,filter_qc_codes = True, start_year = 1996, end_year = 2030,baseflow_method = 'Boughton'):
97
- '''
98
- Method to download data for a specific station and load it into the warehouse.
99
-
100
- :param self: Description
101
- :param station_id: Station identifier
102
- :param station_origin: source of station data: wiski, equis, or swd
103
- :param overwrite: Whether to overwrite existing data
104
- :param to_csv: Whether to export data to CSV
105
- :param filter_qc_codes: Whether to filter quality control codes
106
- :param start_year: Start year for data download
107
- :param end_year: End year for data download
108
- :param baseflow_method: Method for baseflow calculation
109
- '''
110
- with duckdb.connect(self.db_path,read_only=False) as con:
111
- if overwrite:
112
- warehouse.drop_station_id(con,station_id,station_origin)
113
- warehouse.update_views(con)
101
+ def _process_wiski_data(self,filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
102
+ with warehouse.connect(self.db_path,read_only = False) as con:
103
+ df = con.execute("SELECT * FROM staging.wiski").df()
104
+ df_transformed = wiski.transform(df, filter_qc_codes, data_codes, baseflow_method)
105
+ warehouse.load_df_to_table(con,df_transformed, 'analytics.wiski')
106
+ warehouse.update_views(con)
114
107
 
115
- if station_origin == 'wiski':
116
- df = wiski.download([station_id],start_year = start_year, end_year = end_year)
117
- warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = overwrite)
118
- warehouse.load_df_to_analytics(con,wiski.transform(df,filter_qc_codes = filter_qc_codes,baseflow_method = baseflow_method),'wiski') # method includes normalization
119
-
120
- elif station_origin == 'equis':
121
- assert (self.credentials_exist(), 'Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
122
- df = equis.download([station_id])
123
- warehouse.load_df_to_staging(con,df, 'equis_raw',replace = overwrite)
124
- warehouse.load_df_to_analytics(con,equis.transform(df),'equis')
125
-
126
- elif station_origin == 'swd':
127
- df = etlSWD.download(station_id)
128
- warehouse.load_df_to_staging(con,df, 'swd_raw', replace = overwrite)
129
- warehouse.load_df_to_analytics(con,etlSWD.transform(df),'swd')
130
- else:
131
- raise ValueError('station_origin must be wiski, equis, or swd')
132
-
133
- with duckdb.connect(self.db_path,read_only=False) as con:
108
+ def _process_equis_data(self):
109
+ with warehouse.connect(self.db_path,read_only = False) as con:
110
+ df = con.execute("SELECT * FROM staging.equis").df()
111
+ df_transformed = equis.transform(df)
112
+ warehouse.load_df_to_table(con,df_transformed, 'analytics.equis')
134
113
  warehouse.update_views(con)
135
114
 
136
- if to_csv:
137
- self.to_csv(station_id)
138
-
115
+ def _process_data(self,filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
116
+ self._process_wiski_data(filter_qc_codes, data_codes, baseflow_method)
117
+ self._process_equis_data()
118
+
119
+ def _update_views(self):
120
+ with warehouse.connect(self.db_path,read_only = False) as con:
121
+ warehouse.update_views(con)
122
+
123
+ def _download_wiski_data(self,station_ids,start_year = 1996, end_year = 2030, filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
124
+ with warehouse.connect(self.db_path,read_only = False) as con:
125
+ df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
126
+ if not df.empty:
127
+ warehouse.load_df_to_table(con,df, 'staging.wiski')
128
+ warehouse.load_df_to_table(con,wiski.transform(df, filter_qc_codes,data_codes,baseflow_method), 'analytics.wiski')
129
+ warehouse.update_views(con)
130
+ else:
131
+ print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
132
+
133
+ def _download_equis_data(self,station_ids):
134
+ if self.credentials_exist():
135
+ self.connect_to_oracle()
136
+ print('Connected to Oracle database.')
137
+ with warehouse.connect(self.db_path,read_only = False) as con:
138
+ df = equis.download(station_ids)
139
+ if not df.empty:
140
+ warehouse.load_df_to_table(con,df, 'staging.equis')
141
+ warehouse.load_df_to_table(con,equis.transform(df.copy()), 'analytics.equis')
142
+ warehouse.update_views(con)
143
+ else:
144
+ print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
145
+ else:
146
+ raise ValueError('Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
147
+
148
+
149
+ def _get_equis_template(self):
150
+ with duckdb.connect(self.db_path,read_only=True) as con:
151
+ query = '''
152
+ SELECT *
153
+ FROM staging.equis
154
+ LIMIT 0'''
155
+ df = con.execute(query).fetch_df().to_csv(self.folderpath.joinpath('equis_template.csv'), index=False)
139
156
  return df
140
157
 
141
- def get_outlets(self):
158
+ def _get_wiski_template(self):
159
+ with duckdb.connect(self.db_path,read_only=True) as con:
160
+ query = '''
161
+ SELECT *
162
+ FROM staging.wiski
163
+ LIMIT 0'''
164
+ df = con.execute(query).fetch_df().to_csv(self.folderpath.joinpath('wiski_template.csv'), index=False)
165
+ return df
166
+
167
+ def get_outlets(self,model_name):
142
168
  with duckdb.connect(self.db_path,read_only=True) as con:
143
169
  query = '''
144
170
  SELECT *
145
171
  FROM outlets.station_reach_pairs
172
+ WHERE repository_name = ?
146
173
  ORDER BY outlet_id'''
147
- df = con.execute(query).fetch_df()
174
+ df = con.execute(query,[model_name]).fetch_df()
148
175
  return df
176
+
149
177
  def get_station_ids(self,station_origin = None):
150
178
  with duckdb.connect(self.db_path,read_only=True) as con:
151
179
  if station_origin is None:
@@ -163,9 +191,7 @@ class dataManager():
163
191
  return df['station_id'].to_list()
164
192
 
165
193
 
166
- def get_station_data(self,station_ids,constituent,agg_period = None):
167
-
168
-
194
+ def get_observation_data(self,station_ids,constituent,agg_period = None):
169
195
  with duckdb.connect(self.db_path,read_only=True) as con:
170
196
  query = '''
171
197
  SELECT *
@@ -184,9 +210,9 @@ class dataManager():
184
210
  df.attrs['agg_period'] = agg_period
185
211
 
186
212
  df.rename(columns={'value': 'observed'}, inplace=True)
187
- return df
213
+ return df.dropna(subset=['observed'])
188
214
 
189
- def get_outlet_data(self,outlet_id,constituent,agg_period = 'D'):
215
+ def get_outlet_data(self,outlet_id,constituent,agg_period = 'D',to_csv = False):
190
216
  with duckdb.connect(self.db_path,read_only=True) as con:
191
217
  query = '''
192
218
  SELECT *
@@ -207,16 +233,35 @@ class dataManager():
207
233
  df.rename(columns={'value': 'observed',
208
234
  'flow_value': 'observed_flow',
209
235
  'baseflow_value': 'observed_baseflow'}, inplace=True)
210
- return df
211
-
236
+ return df.dropna(subset=['observed'])
212
237
 
238
+ def get_raw_data(self,station_id,station_origin, to_csv = False):
239
+ with duckdb.connect(self.db_path,read_only=True) as con:
240
+ if station_origin.lower() == 'equis':
241
+ query = '''
242
+ SELECT *
243
+ FROM staging.equis_raw
244
+ WHERE station_id = ?'''
245
+ elif station_origin.lower() == 'wiski':
246
+ query = '''
247
+ SELECT *
248
+ FROM staging.wiski_raw
249
+ WHERE station_id = ?'''
250
+ else:
251
+ raise ValueError(f'Station origin {station_origin} not recognized. Valid options are equis or wiski.')
252
+
253
+ df = con.execute(query,[station_id]).fetch_df()
254
+
255
+ if to_csv:
256
+ df.to_csv(self.folderpath.joinpath(f'{station_id}_raw.csv'), index=False)
257
+ return df
213
258
 
214
- def to_csv(self,station_id,folderpath = None):
259
+ def to_csv(self,station_id ,station_origin,folderpath = None):
215
260
  if folderpath is None:
216
261
  folderpath = self.folderpath
217
262
  else:
218
263
  folderpath = Path(folderpath)
219
- df = self._load(station_id)
264
+ df = self.get_station_data([station_id],constituent = 'Q',agg_period = None)
220
265
  if len(df) > 0:
221
266
  df.to_csv(folderpath.joinpath(station_id + '.csv'))
222
267
  else:
mpcaHydro/etlSWD.py CHANGED
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
26
26
  # return df
27
27
  import requests
28
28
 
29
- def _download(station_no):
29
+ def _download(station_id):
30
30
  # Replace {station_no} in the URL with the actual station number
31
- url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
-
31
+ #url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
+ url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
33
+
33
34
  try:
34
35
  # Send a GET request to the URL
35
- response = requests.get(url)
36
+ params = {
37
+ 'stationId': station_id,
38
+ 'format': 'json'
39
+ }
40
+ response = requests.get(url,params = params)
36
41
  response.raise_for_status() # Raise exception for HTTP errors
37
42
  # Parse the JSON data
38
- if response.json()['recordCount'] == 0:
39
- return pd.DataFrame(columns = response.json()['column_names'])
40
- else:
41
- return pd.DataFrame(response.json()['data'])
43
+ return pd.DataFrame(response.json()['data'])
42
44
 
43
45
  except requests.exceptions.RequestException as e:
44
46
  print(f"An error occurred: {e}")
@@ -46,14 +48,18 @@ def _download(station_no):
46
48
 
47
49
 
48
50
 
49
- def download(station_no):
51
+ def download(station_ids):
50
52
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
51
- df = _download(station_no)
52
- if df.empty:
53
- return df
54
- else:
55
- df['station_id'] = station_no
56
- return transform(df)
53
+ dfs = []
54
+ for station_id in station_ids:
55
+ df = _download(station_id)
56
+ if not df.empty:
57
+ df['station_id'] = station_id
58
+ dfs.append(df)
59
+
60
+ return pd.concat(dfs, ignore_index=True)
61
+
62
+
57
63
 
58
64
  def info(station_no):
59
65
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
mpcaHydro/outlets.py CHANGED
@@ -14,17 +14,18 @@ import duckdb
14
14
 
15
15
  #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
16
16
 
17
-
18
17
  _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
19
- stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
18
+ stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
20
19
  stations_wiski['source'] = 'wiski'
21
20
  _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
22
- stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
21
+ stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
23
22
  stations_equis['source'] = 'equis'
24
23
  stations_equis['wplmn_flag'] = 0
25
24
 
26
25
 
27
- DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
26
+
27
+
28
+ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
28
29
 
29
30
  MODL_DB = pd.concat([stations_wiski,stations_equis])
30
31
  MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
@@ -34,64 +35,69 @@ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True
34
35
  def _reload():
35
36
  global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
36
37
  _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
37
- stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
38
+ stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
38
39
  stations_wiski['source'] = 'wiski'
39
40
  _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
40
- stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
41
+ stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
41
42
  stations_equis['source'] = 'equis'
42
43
  stations_equis['wplmn_flag'] = 0
43
44
 
45
+
44
46
  MODL_DB = pd.concat([stations_wiski,stations_equis])
45
47
  MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
46
48
  MODL_DB = MODL_DB.dropna(subset='opnids')
47
49
  MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
48
50
 
49
51
 
50
- def get_model_db(model_name: str):
51
- return MODL_DB.query('repository_name == @model_name')
52
-
53
52
  def split_opnids(opnids: list):
54
- return [abs(int(float(j))) for i in opnids for j in i]
53
+ return [int(float(j)) for i in opnids for j in i]
54
+
55
+ def get_model_db(model_name: str):
56
+ return MODL_DB.query('repo_name == @model_name')
55
57
 
56
58
  def valid_models():
57
- return MODL_DB['repository_name'].unique().tolist()
59
+ return MODL_DB['repo_name'].unique().tolist()
60
+
61
+ def equis_stations(model_name):
62
+ return _stations_equis.query('repo_name == @model_name')['station_id'].tolist()
63
+
64
+ def wiski_stations(model_name):
65
+ return _stations_wiski.query('repo_name == @model_name')['station_id'].tolist()
66
+
67
+ def wplmn_stations(model_name):
68
+ return MODL_DB.query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
58
69
 
59
70
  def wplmn_station_opnids(model_name):
60
- opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
71
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
61
72
  return split_opnids(opnids)
62
73
 
63
74
  def wiski_station_opnids(model_name):
64
- opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
75
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
65
76
  return split_opnids(opnids)
66
77
 
67
78
  def equis_station_opnids(model_name):
68
- opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
79
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
69
80
  return split_opnids(opnids)
70
81
 
71
82
  def station_opnids(model_name):
72
- opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
83
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name')['opnids'].str.split(',').to_list()
73
84
  return split_opnids(opnids)
74
85
 
75
- def equis_stations(model_name):
76
- return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
77
-
78
- def wiski_stations(model_name):
79
- return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
86
+ def mapped_equis_stations(model_name):
87
+ return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
80
88
 
81
- def wplmn_stations(model_name):
82
- return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
89
+ def mapped_wiski_stations(model_name):
90
+ return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['station_id'].tolist()
83
91
 
84
92
  def outlets(model_name):
85
- return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
93
+ return [group for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
86
94
 
87
95
  def outlet_stations(model_name):
88
- return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
96
+ return [group['station_id'].to_list() for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
89
97
 
90
- def _split_opnids(opnids: list):
91
- return [int(float(j)) for i in opnids for j in i]
92
98
 
93
99
  def connect(db_path, read_only=True):
94
- Path(db_path).parent.mkdir(parents=True, exist_ok=True)
100
+ #Path(db_path).parent.mkdir(parents=True, exist_ok=True)
95
101
  return duckdb.connect(db_path,read_only=read_only)
96
102
 
97
103
 
@@ -103,7 +109,7 @@ def init_db(db_path: str,reset: bool = False):
103
109
  if reset and db_path.exists():
104
110
  db_path.unlink()
105
111
 
106
- with connect(db_path.as_posix()) as con:
112
+ with connect(db_path.as_posix(),False) as con:
107
113
  con.execute(OUTLETS_SCHEMA)
108
114
 
109
115
 
@@ -202,7 +208,7 @@ def build_outlet_db(db_path: str = None):
202
208
  if db_path is None:
203
209
  db_path = DB_PATH
204
210
  init_db(db_path,reset=True)
205
- with connect(db_path) as con:
211
+ with connect(db_path,False) as con:
206
212
  build_outlets(con)
207
213
 
208
214
 
@@ -212,43 +218,35 @@ def build_outlets(con, model_name: str = None):
212
218
  else:
213
219
  modl_db = MODL_DB
214
220
 
215
- for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
216
- repo_name = group['repository_name'].iloc[0]
221
+ for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
222
+ repo_name = group['repo_name'].iloc[0]
217
223
  add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
218
224
 
219
- opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
225
+ opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
220
226
 
221
227
  for opnid in opnids:
222
- if opnid < 0:
223
- exclude = 1
224
- else:
225
- exclude = 0
226
- add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
228
+ add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
227
229
 
228
230
  for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
229
231
  add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
230
232
 
231
233
 
232
234
  def create_outlet_schema(con, model_name : str):
233
- for index, (_, group) in enumerate(modl_db.outlets(model_name)):
234
- repo_name = group['repository_name'].iloc[0]
235
+ for index, (_, group) in enumerate(outlets(model_name)):
236
+ repo_name = group['repo_name'].iloc[0]
235
237
  add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
236
238
 
237
- opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
239
+ opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
238
240
 
239
241
  for opnid in opnids:
240
- if opnid < 0:
241
- exclude = 1
242
- else:
243
- exclude = 0
244
- add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
242
+ add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
245
243
 
246
244
  for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
247
245
  add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
248
246
 
249
247
 
250
248
  def add_outlet(con,
251
- outlet_id: str,
249
+ outlet_id: int,
252
250
  repository_name: str,
253
251
  outlet_name = None,
254
252
  notes = None):
@@ -256,15 +254,15 @@ def add_outlet(con,
256
254
  Insert an outlet. repository_name is required.
257
255
  """
258
256
  con.execute(
259
- "INSERT INTO outlets.outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
257
+ "INSERT INTO outlets.outlet_groups (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
260
258
  [outlet_id, repository_name, outlet_name, notes]
261
259
  )
262
260
 
263
261
  def add_station(con,
264
- outlet_id: str,
265
- station_id: str,
262
+ outlet_id: int,
263
+ station_id: int,
266
264
  station_origin: str,
267
- true_opnid: str,
265
+ true_opnid: int,
268
266
  repository_name: str,
269
267
  comments = None):
270
268
  """
@@ -281,19 +279,17 @@ def add_station(con,
281
279
  )
282
280
 
283
281
  def add_reach(con,
284
- outlet_id: str,
285
- reach_id: str,
286
- repository_name: str,
287
- exclude: int = 0):
282
+ outlet_id: int,
283
+ reach_id: int,
284
+ repository_name: str):
288
285
  """
289
286
  Insert a reach membership for an outlet.
290
287
  - repository_name is required and participates in the PK (reach_id, repository_name).
291
- - exclude = 1 to mark a reach as excluded from association views.
292
288
  """
293
289
  con.execute(
294
- """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name, exclude)
295
- VALUES (?, ?, ?, ?)""",
296
- [outlet_id, reach_id, repository_name, int(exclude)]
290
+ """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name)
291
+ VALUES (?, ?, ?)""",
292
+ [outlet_id, reach_id, repository_name]
297
293
  )
298
294
 
299
295
 
@@ -303,8 +299,10 @@ OUTLETS_SCHEMA = """-- schema.sql
303
299
 
304
300
  -- Table 1: outlets
305
301
  -- Represents a logical grouping that ties stations and reaches together.
306
- CREATE TABLE IF NOT EXISTS outlets (
307
- outlet_id TEXT PRIMARY KEY,
302
+ CREATE SCHEMA IF NOT EXISTS outlets;
303
+
304
+ CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
305
+ outlet_id INTEGER PRIMARY KEY,
308
306
  repository_name TEXT NOT NULL,
309
307
  outlet_name TEXT,
310
308
  notes TEXT -- optional: general notes about the outlet grouping
@@ -312,42 +310,40 @@ CREATE TABLE IF NOT EXISTS outlets (
312
310
 
313
311
  -- Table 2: outlet_stations
314
312
  -- One-to-many: outlet -> stations
315
- CREATE TABLE IF NOT EXISTS outlet_stations (
316
- outlet_id TEXT NOT NULL,
313
+ CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
314
+ outlet_id INTEGER NOT NULL,
317
315
  station_id TEXT NOT NULL,
318
316
  station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
319
317
  repository_name TEXT NOT NULL, -- repository model the station is physically located in
320
- true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
318
+ true_opnid INTEGER NOT NULL, -- The specific reach the station physically sits on (optional)
321
319
  comments TEXT, -- Per-station comments, issues, etc.
322
320
  CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
323
- FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
321
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
324
322
  );
325
323
 
326
324
  -- Table 3: outlet_reaches
327
325
  -- One-to-many: outlet -> reaches
328
326
  -- A reach can appear in multiple outlets, enabling many-to-many overall.
329
- CREATE TABLE IF NOT EXISTS outlet_reaches (
330
- outlet_id TEXT NOT NULL,
331
- reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
327
+ CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
328
+ outlet_id INTEGER NOT NULL,
329
+ reach_id INTEGER NOT NULL, -- model reach identifier (aka opind)
332
330
  repository_name TEXT NOT NULL, -- optional: where the mapping comes from
333
- exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
334
- FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
331
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
335
332
  );
336
333
 
337
334
  -- Useful views:
338
335
 
339
336
  -- View: station_reach_pairs
340
337
  -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
341
- CREATE VIEW IF NOT EXISTS station_reach_pairs AS
338
+ CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
342
339
  SELECT
343
340
  s.outlet_id,
344
341
  s.station_id,
345
342
  s.station_origin,
346
343
  r.reach_id,
347
- r.exclude,
348
- r.repository_name,
349
- FROM outlet_stations s
350
- JOIN outlet_reaches r
344
+ r.repository_name
345
+ FROM outlets.outlet_stations AS s
346
+ JOIN outlets.outlet_reaches AS r
351
347
  ON s.outlet_id = r.outlet_id;
352
348
 
353
349
  """
mpcaHydro/reports.py CHANGED
@@ -43,7 +43,7 @@ def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
43
43
  query = '''
44
44
  SELECT *,
45
45
  FROM
46
- staging.wiski_qc_count
46
+ reports.wiski_qc_count
47
47
  ORDER BY
48
48
  station_no,
49
49
  parametertype_name