mpcaHydro 2.1.0__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mpcaHydro/data/WISKI_QUALITY_CODES.csv +71 -0
- mpcaHydro/data/outlet.duckdb +0 -0
- mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcaHydro/data_manager.py +172 -292
- mpcaHydro/equis.py +31 -22
- mpcaHydro/etlSWD.py +21 -15
- mpcaHydro/outlets.py +367 -0
- mpcaHydro/reports.py +80 -0
- mpcaHydro/warehouse.py +525 -17
- mpcaHydro/warehouseManager.py +55 -0
- mpcaHydro/{WISKI.py → wiski.py} +97 -17
- mpcaHydro/xref.py +74 -0
- {mpcahydro-2.1.0.dist-info → mpcahydro-2.2.1.dist-info}/METADATA +3 -1
- mpcahydro-2.2.1.dist-info/RECORD +23 -0
- mpcahydro-2.1.0.dist-info/RECORD +0 -15
- {mpcahydro-2.1.0.dist-info → mpcahydro-2.2.1.dist-info}/WHEEL +0 -0
mpcaHydro/equis.py
CHANGED
|
@@ -164,26 +164,25 @@ def as_utc_offset(naive_dt: Union[datetime, str], tz_label: str, target_offset:
|
|
|
164
164
|
aware_src = naive.replace(tzinfo=src_tz)
|
|
165
165
|
|
|
166
166
|
# convert the instant to fixed UTC-6
|
|
167
|
-
return aware_src.astimezone(target_offset)
|
|
167
|
+
return aware_src.astimezone(target_offset).tz_localize(None)
|
|
168
168
|
|
|
169
169
|
|
|
170
170
|
def normalize_columns(df):
|
|
171
171
|
'''Select relevant columns from Equis data.'''
|
|
172
172
|
return df[['SYS_LOC_CODE',
|
|
173
|
+
'constituent',
|
|
174
|
+
'CAS_RN',
|
|
173
175
|
'datetime',
|
|
174
176
|
'RESULT_NUMERIC',
|
|
175
177
|
'RESULT_UNIT',
|
|
176
|
-
'constituent'
|
|
177
178
|
]].rename(columns={
|
|
178
179
|
'SYS_LOC_CODE':'station_id',
|
|
179
180
|
'RESULT_NUMERIC':'value',
|
|
180
|
-
'RESULT_UNIT':'unit'
|
|
181
|
+
'RESULT_UNIT':'unit',
|
|
182
|
+
'CAS_RN':'cas_rn'
|
|
181
183
|
})
|
|
182
184
|
|
|
183
|
-
|
|
184
|
-
'''Replace non-detect results with 0 in Equis data.'''
|
|
185
|
-
df.loc[df['RESULT_NUMERIC'].isna(), 'RESULT_NUMERIC'] = 0
|
|
186
|
-
return df
|
|
185
|
+
|
|
187
186
|
|
|
188
187
|
def normalize_timezone(df):
|
|
189
188
|
'''Normalize datetime to UTC in Equis data.'''
|
|
@@ -194,27 +193,27 @@ def normalize_timezone(df):
|
|
|
194
193
|
except Exception:
|
|
195
194
|
return pd.NaT
|
|
196
195
|
|
|
197
|
-
df['datetime'] = df.apply(_conv, axis=1)
|
|
196
|
+
df.loc[:,'datetime'] = df.apply(_conv, axis=1)
|
|
198
197
|
return df
|
|
199
198
|
|
|
200
199
|
def convert_units(df):
|
|
201
200
|
'''Convert units in Equis data to standard units.'''
|
|
202
201
|
# Convert ug/L to mg/L
|
|
203
|
-
df['
|
|
202
|
+
df['unit'] = df['unit'].str.lower()
|
|
204
203
|
|
|
205
|
-
mask_ugL = df['
|
|
206
|
-
df.loc[mask_ugL, '
|
|
207
|
-
df.loc[mask_ugL, '
|
|
204
|
+
mask_ugL = df['unit'] == 'ug/l'
|
|
205
|
+
df.loc[mask_ugL, 'value'] = df.loc[mask_ugL, 'value'] / 1000
|
|
206
|
+
df.loc[mask_ugL, 'unit'] = 'mg/l'
|
|
208
207
|
|
|
209
208
|
# Convert mg/g to mg/L (assuming density of 1 g/mL)
|
|
210
|
-
mask_mgg = df['
|
|
211
|
-
df.loc[mask_mgg, '
|
|
212
|
-
df.loc[mask_mgg, '
|
|
209
|
+
mask_mgg = df['unit'] == 'mg/g'
|
|
210
|
+
df.loc[mask_mgg, 'value'] = df.loc[mask_mgg, 'value'] * 1000
|
|
211
|
+
df.loc[mask_mgg, 'unit'] = 'mg/l'
|
|
213
212
|
|
|
214
213
|
# Convert deg C to degF
|
|
215
|
-
mask_degC = df['
|
|
216
|
-
df.loc[mask_degC, '
|
|
217
|
-
df.loc[mask_degC, '
|
|
214
|
+
mask_degC = df['unit'].isin(['deg c', 'degc'])
|
|
215
|
+
df.loc[mask_degC, 'value'] = (df.loc[mask_degC, 'value'] * 9/5) + 32
|
|
216
|
+
df.loc[mask_degC, 'unit'] = 'degf'
|
|
218
217
|
|
|
219
218
|
return df
|
|
220
219
|
|
|
@@ -232,15 +231,25 @@ def average_results(df):
|
|
|
232
231
|
value=('value', 'mean')
|
|
233
232
|
).reset_index()
|
|
234
233
|
|
|
234
|
+
def replace_nondetects(df):
|
|
235
|
+
'''Replace non-detect results with 0 in Equis data.'''
|
|
236
|
+
df.loc[df['value'].isna(), 'value'] = 0
|
|
237
|
+
return df
|
|
238
|
+
|
|
239
|
+
def normalize(df):
|
|
240
|
+
'''Normalize Equis data: select relevant columns.'''
|
|
241
|
+
df = map_constituents(df)
|
|
242
|
+
df = normalize_timezone(df)
|
|
243
|
+
df = normalize_columns(df)
|
|
244
|
+
df = convert_units(df)
|
|
245
|
+
return df
|
|
246
|
+
|
|
235
247
|
def transform(df):
|
|
236
248
|
'''Transform Equis data: handle non-detects, convert units, map constituents.'''
|
|
237
249
|
|
|
250
|
+
df = normalize(df)
|
|
238
251
|
df = replace_nondetects(df)
|
|
239
252
|
if not df.empty:
|
|
240
|
-
df = normalize_timezone(df)
|
|
241
|
-
df = convert_units(df)
|
|
242
|
-
df = map_constituents(df)
|
|
243
|
-
df = normalize_columns(df)
|
|
244
253
|
df = average_results(df)
|
|
245
254
|
return df
|
|
246
255
|
|
mpcaHydro/etlSWD.py
CHANGED
|
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
|
|
|
26
26
|
# return df
|
|
27
27
|
import requests
|
|
28
28
|
|
|
29
|
-
def _download(
|
|
29
|
+
def _download(station_id):
|
|
30
30
|
# Replace {station_no} in the URL with the actual station number
|
|
31
|
-
url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
-
|
|
31
|
+
#url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
+
url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
|
|
33
|
+
|
|
33
34
|
try:
|
|
34
35
|
# Send a GET request to the URL
|
|
35
|
-
|
|
36
|
+
params = {
|
|
37
|
+
'stationId': station_id,
|
|
38
|
+
'format': 'json'
|
|
39
|
+
}
|
|
40
|
+
response = requests.get(url,params = params)
|
|
36
41
|
response.raise_for_status() # Raise exception for HTTP errors
|
|
37
42
|
# Parse the JSON data
|
|
38
|
-
|
|
39
|
-
return pd.DataFrame(columns = response.json()['column_names'])
|
|
40
|
-
else:
|
|
41
|
-
return pd.DataFrame(response.json()['data'])
|
|
43
|
+
return pd.DataFrame(response.json()['data'])
|
|
42
44
|
|
|
43
45
|
except requests.exceptions.RequestException as e:
|
|
44
46
|
print(f"An error occurred: {e}")
|
|
@@ -46,14 +48,18 @@ def _download(station_no):
|
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
|
|
49
|
-
def download(
|
|
51
|
+
def download(station_ids):
|
|
50
52
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
dfs = []
|
|
54
|
+
for station_id in station_ids:
|
|
55
|
+
df = _download(station_id)
|
|
56
|
+
if not df.empty:
|
|
57
|
+
df['station_id'] = station_id
|
|
58
|
+
dfs.append(df)
|
|
59
|
+
|
|
60
|
+
return pd.concat(dfs, ignore_index=True)
|
|
61
|
+
|
|
62
|
+
|
|
57
63
|
|
|
58
64
|
def info(station_no):
|
|
59
65
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
mpcaHydro/outlets.py
ADDED
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Thu May 1 09:51:51 2025
|
|
4
|
+
|
|
5
|
+
@author: mfratki
|
|
6
|
+
"""
|
|
7
|
+
#import sqlite3
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import geopandas as gpd
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import duckdb
|
|
12
|
+
#from hspf_tools.calibrator import etlWISKI, etlSWD
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
|
|
16
|
+
|
|
17
|
+
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
18
|
+
stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
|
|
19
|
+
stations_wiski['source'] = 'wiski'
|
|
20
|
+
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
21
|
+
stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
|
|
22
|
+
stations_equis['source'] = 'equis'
|
|
23
|
+
stations_equis['wplmn_flag'] = 0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
|
|
29
|
+
|
|
30
|
+
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
31
|
+
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
32
|
+
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
33
|
+
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
34
|
+
|
|
35
|
+
def _reload():
|
|
36
|
+
global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
|
|
37
|
+
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
38
|
+
stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
|
|
39
|
+
stations_wiski['source'] = 'wiski'
|
|
40
|
+
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
41
|
+
stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
|
|
42
|
+
stations_equis['source'] = 'equis'
|
|
43
|
+
stations_equis['wplmn_flag'] = 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
47
|
+
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
48
|
+
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
49
|
+
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def split_opnids(opnids: list):
|
|
53
|
+
return [int(float(j)) for i in opnids for j in i]
|
|
54
|
+
|
|
55
|
+
def get_model_db(model_name: str):
|
|
56
|
+
return MODL_DB.query('repo_name == @model_name')
|
|
57
|
+
|
|
58
|
+
def valid_models():
|
|
59
|
+
return MODL_DB['repo_name'].unique().tolist()
|
|
60
|
+
|
|
61
|
+
def equis_stations(model_name):
|
|
62
|
+
return _stations_equis.query('repo_name == @model_name')['station_id'].tolist()
|
|
63
|
+
|
|
64
|
+
def wiski_stations(model_name):
|
|
65
|
+
return _stations_wiski.query('repo_name == @model_name')['station_id'].tolist()
|
|
66
|
+
|
|
67
|
+
def wplmn_stations(model_name):
|
|
68
|
+
return MODL_DB.query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
|
|
69
|
+
|
|
70
|
+
def wplmn_station_opnids(model_name):
|
|
71
|
+
opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
72
|
+
return split_opnids(opnids)
|
|
73
|
+
|
|
74
|
+
def wiski_station_opnids(model_name):
|
|
75
|
+
opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
76
|
+
return split_opnids(opnids)
|
|
77
|
+
|
|
78
|
+
def equis_station_opnids(model_name):
|
|
79
|
+
opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
|
|
80
|
+
return split_opnids(opnids)
|
|
81
|
+
|
|
82
|
+
def station_opnids(model_name):
|
|
83
|
+
opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name')['opnids'].str.split(',').to_list()
|
|
84
|
+
return split_opnids(opnids)
|
|
85
|
+
|
|
86
|
+
def mapped_equis_stations(model_name):
|
|
87
|
+
return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
|
|
88
|
+
|
|
89
|
+
def mapped_wiski_stations(model_name):
|
|
90
|
+
return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['station_id'].tolist()
|
|
91
|
+
|
|
92
|
+
def outlets(model_name):
|
|
93
|
+
return [group for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
|
|
94
|
+
|
|
95
|
+
def outlet_stations(model_name):
|
|
96
|
+
return [group['station_id'].to_list() for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def connect(db_path, read_only=True):
|
|
100
|
+
#Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
return duckdb.connect(db_path,read_only=read_only)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def init_db(db_path: str,reset: bool = False):
|
|
105
|
+
"""
|
|
106
|
+
Initialize the DuckDB database: create staging and analytics schemas
|
|
107
|
+
"""
|
|
108
|
+
db_path = Path(db_path)
|
|
109
|
+
if reset and db_path.exists():
|
|
110
|
+
db_path.unlink()
|
|
111
|
+
|
|
112
|
+
with connect(db_path.as_posix(),False) as con:
|
|
113
|
+
con.execute(OUTLETS_SCHEMA)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Accessors:
|
|
118
|
+
def get_outlets_by_model(model_name: str):
|
|
119
|
+
with connect(DB_PATH) as con:
|
|
120
|
+
df = con.execute(
|
|
121
|
+
"""
|
|
122
|
+
SELECT r.*
|
|
123
|
+
FROM outlets.station_reach_pairs r
|
|
124
|
+
WHERE r.repository_name = ?
|
|
125
|
+
""",
|
|
126
|
+
[model_name]
|
|
127
|
+
).fetchdf()
|
|
128
|
+
return df
|
|
129
|
+
|
|
130
|
+
def get_outlets_by_reach(reach_id: int, model_name: str):
|
|
131
|
+
"""
|
|
132
|
+
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
133
|
+
"""
|
|
134
|
+
with connect(DB_PATH) as con:
|
|
135
|
+
df = con.execute(
|
|
136
|
+
"""
|
|
137
|
+
SELECT r.*
|
|
138
|
+
FROM outlets.station_reach_pairs r
|
|
139
|
+
WHERE r.reach_id = ? AND r.repository_name = ?
|
|
140
|
+
""",
|
|
141
|
+
[reach_id, model_name]).fetchdf()
|
|
142
|
+
return df
|
|
143
|
+
|
|
144
|
+
def get_outlets_by_station(station_id: str, station_origin: str):
|
|
145
|
+
"""
|
|
146
|
+
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
147
|
+
"""
|
|
148
|
+
with connect(DB_PATH) as con:
|
|
149
|
+
|
|
150
|
+
df = con.execute(
|
|
151
|
+
"""
|
|
152
|
+
SELECT r.*
|
|
153
|
+
FROM outlets.station_reach_pairs r
|
|
154
|
+
WHERE r.station_id = ? AND r.station_origin = ?
|
|
155
|
+
""",
|
|
156
|
+
[station_id, station_origin]).fetchdf()
|
|
157
|
+
return df
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class OutletGateway:
|
|
162
|
+
def __init__(self, model_name: str):
|
|
163
|
+
self.model_name = model_name
|
|
164
|
+
self.db_path = DB_PATH
|
|
165
|
+
self.modl_db = get_model_db(model_name)
|
|
166
|
+
|
|
167
|
+
# Legacy methods to access functions
|
|
168
|
+
def wplmn_station_opnids(self):
|
|
169
|
+
return wplmn_station_opnids(self.model_name)
|
|
170
|
+
|
|
171
|
+
def wiski_station_opnids(self):
|
|
172
|
+
return wiski_station_opnids(self.model_name)
|
|
173
|
+
|
|
174
|
+
def equis_station_opnids(self):
|
|
175
|
+
return equis_station_opnids(self.model_name)
|
|
176
|
+
|
|
177
|
+
def station_opnids(self):
|
|
178
|
+
return station_opnids(self.model_name)
|
|
179
|
+
|
|
180
|
+
def equis_stations(self):
|
|
181
|
+
return equis_stations(self.model_name)
|
|
182
|
+
|
|
183
|
+
def wiski_stations(self):
|
|
184
|
+
return wiski_stations(self.model_name)
|
|
185
|
+
|
|
186
|
+
def wplmn_stations(self):
|
|
187
|
+
return wplmn_stations(self.model_name)
|
|
188
|
+
|
|
189
|
+
def outlets(self):
|
|
190
|
+
return outlets(self.model_name)
|
|
191
|
+
|
|
192
|
+
def outlet_stations(self):
|
|
193
|
+
return outlet_stations(self.model_name)
|
|
194
|
+
|
|
195
|
+
# Accessors for outlets
|
|
196
|
+
def get_outlets(self):
|
|
197
|
+
return get_outlets_by_model(self.model_name)
|
|
198
|
+
|
|
199
|
+
def get_outlets_by_reach(self, reach_id: int):
|
|
200
|
+
return get_outlets_by_reach(reach_id, self.model_name)
|
|
201
|
+
|
|
202
|
+
def get_outlets_by_station(self, station_id: str, station_origin: str):
|
|
203
|
+
assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
|
|
204
|
+
return get_outlets_by_station(station_id, station_origin)
|
|
205
|
+
|
|
206
|
+
# constructors:
|
|
207
|
+
def build_outlet_db(db_path: str = None):
|
|
208
|
+
if db_path is None:
|
|
209
|
+
db_path = DB_PATH
|
|
210
|
+
init_db(db_path,reset=True)
|
|
211
|
+
with connect(db_path,False) as con:
|
|
212
|
+
build_outlets(con)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def build_outlets(con, model_name: str = None):
|
|
216
|
+
if model_name is not None:
|
|
217
|
+
modl_db = get_model_db(model_name)
|
|
218
|
+
else:
|
|
219
|
+
modl_db = MODL_DB
|
|
220
|
+
|
|
221
|
+
for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
|
|
222
|
+
repo_name = group['repo_name'].iloc[0]
|
|
223
|
+
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
224
|
+
|
|
225
|
+
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
226
|
+
|
|
227
|
+
for opnid in opnids:
|
|
228
|
+
add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
|
|
229
|
+
|
|
230
|
+
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
231
|
+
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def create_outlet_schema(con, model_name : str):
|
|
235
|
+
for index, (_, group) in enumerate(outlets(model_name)):
|
|
236
|
+
repo_name = group['repo_name'].iloc[0]
|
|
237
|
+
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
238
|
+
|
|
239
|
+
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
240
|
+
|
|
241
|
+
for opnid in opnids:
|
|
242
|
+
add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
|
|
243
|
+
|
|
244
|
+
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
245
|
+
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def add_outlet(con,
|
|
249
|
+
outlet_id: int,
|
|
250
|
+
repository_name: str,
|
|
251
|
+
outlet_name = None,
|
|
252
|
+
notes = None):
|
|
253
|
+
"""
|
|
254
|
+
Insert an outlet. repository_name is required.
|
|
255
|
+
"""
|
|
256
|
+
con.execute(
|
|
257
|
+
"INSERT INTO outlets.outlet_groups (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
|
|
258
|
+
[outlet_id, repository_name, outlet_name, notes]
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def add_station(con,
|
|
262
|
+
outlet_id: int,
|
|
263
|
+
station_id: int,
|
|
264
|
+
station_origin: str,
|
|
265
|
+
true_opnid: int,
|
|
266
|
+
repository_name: str,
|
|
267
|
+
comments = None):
|
|
268
|
+
"""
|
|
269
|
+
Insert a station membership for an outlet.
|
|
270
|
+
Constraints:
|
|
271
|
+
- PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
|
|
272
|
+
- true_opnid and true_opnid_repository_name are required per schema.
|
|
273
|
+
"""
|
|
274
|
+
con.execute(
|
|
275
|
+
"""INSERT INTO outlets.outlet_stations
|
|
276
|
+
(outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
|
|
277
|
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
278
|
+
[outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def add_reach(con,
|
|
282
|
+
outlet_id: int,
|
|
283
|
+
reach_id: int,
|
|
284
|
+
repository_name: str):
|
|
285
|
+
"""
|
|
286
|
+
Insert a reach membership for an outlet.
|
|
287
|
+
- repository_name is required and participates in the PK (reach_id, repository_name).
|
|
288
|
+
"""
|
|
289
|
+
con.execute(
|
|
290
|
+
"""INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name)
|
|
291
|
+
VALUES (?, ?, ?)""",
|
|
292
|
+
[outlet_id, reach_id, repository_name]
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
OUTLETS_SCHEMA = """-- schema.sql
|
|
297
|
+
-- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
|
|
298
|
+
-- Compatible with DuckDB and SQLite.
|
|
299
|
+
|
|
300
|
+
-- Table 1: outlets
|
|
301
|
+
-- Represents a logical grouping that ties stations and reaches together.
|
|
302
|
+
CREATE SCHEMA IF NOT EXISTS outlets;
|
|
303
|
+
|
|
304
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
|
|
305
|
+
outlet_id INTEGER PRIMARY KEY,
|
|
306
|
+
repository_name TEXT NOT NULL,
|
|
307
|
+
outlet_name TEXT,
|
|
308
|
+
notes TEXT -- optional: general notes about the outlet grouping
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
-- Table 2: outlet_stations
|
|
312
|
+
-- One-to-many: outlet -> stations
|
|
313
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
|
|
314
|
+
outlet_id INTEGER NOT NULL,
|
|
315
|
+
station_id TEXT NOT NULL,
|
|
316
|
+
station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
|
|
317
|
+
repository_name TEXT NOT NULL, -- repository model the station is physically located in
|
|
318
|
+
true_opnid INTEGER NOT NULL, -- The specific reach the station physically sits on (optional)
|
|
319
|
+
comments TEXT, -- Per-station comments, issues, etc.
|
|
320
|
+
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
321
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
322
|
+
);
|
|
323
|
+
|
|
324
|
+
-- Table 3: outlet_reaches
|
|
325
|
+
-- One-to-many: outlet -> reaches
|
|
326
|
+
-- A reach can appear in multiple outlets, enabling many-to-many overall.
|
|
327
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
|
|
328
|
+
outlet_id INTEGER NOT NULL,
|
|
329
|
+
reach_id INTEGER NOT NULL, -- model reach identifier (aka opind)
|
|
330
|
+
repository_name TEXT NOT NULL, -- optional: where the mapping comes from
|
|
331
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
-- Useful views:
|
|
335
|
+
|
|
336
|
+
-- View: station_reach_pairs
|
|
337
|
+
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
338
|
+
CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
|
|
339
|
+
SELECT
|
|
340
|
+
s.outlet_id,
|
|
341
|
+
s.station_id,
|
|
342
|
+
s.station_origin,
|
|
343
|
+
r.reach_id,
|
|
344
|
+
r.repository_name
|
|
345
|
+
FROM outlets.outlet_stations AS s
|
|
346
|
+
JOIN outlets.outlet_reaches AS r
|
|
347
|
+
ON s.outlet_id = r.outlet_id;
|
|
348
|
+
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
#row = modl_db.MODL_DB.iloc[0]
|
|
352
|
+
|
|
353
|
+
#info = etlWISKI.info(row['station_id'])
|
|
354
|
+
|
|
355
|
+
#modl_db.MODL_DB.query('source == "equis"')
|
|
356
|
+
|
|
357
|
+
# outlet_dict = {'stations': {'wiski': ['E66050001'],
|
|
358
|
+
# 'equis': ['S002-118']},
|
|
359
|
+
# 'reaches': {'Clearwater': [650]}
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# station_ids = ['S002-118']
|
|
365
|
+
# #station_ids = ['E66050001']
|
|
366
|
+
# reach_ids = [650]
|
|
367
|
+
# flow_station_ids = ['E66050001']
|
mpcaHydro/reports.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import duckdb
|
|
3
|
+
import glob
|
|
4
|
+
|
|
5
|
+
#TODO ensure all reports are actually in the reports schema
|
|
6
|
+
|
|
7
|
+
class reportManager():
|
|
8
|
+
def __init__(self,db_path:Path):
|
|
9
|
+
self.db_path = db_path
|
|
10
|
+
|
|
11
|
+
def wiski_qc_counts(self):
|
|
12
|
+
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
13
|
+
return wiski_qc_counts(con)
|
|
14
|
+
|
|
15
|
+
def constituent_summary(self,constituent: str = None):
|
|
16
|
+
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
17
|
+
return constituent_summary(con,constituent)
|
|
18
|
+
|
|
19
|
+
def station_reach_pairs(self):
|
|
20
|
+
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
21
|
+
return station_reach_pairs(con)
|
|
22
|
+
|
|
23
|
+
def outlet_summary(self):
|
|
24
|
+
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
25
|
+
return outlet_summary(con)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def outlet_summary(con: duckdb.DuckDBPyConnection):
|
|
30
|
+
query = '''
|
|
31
|
+
SELECT *,
|
|
32
|
+
FROM
|
|
33
|
+
reports.outlet_constituent_summary
|
|
34
|
+
ORDER BY
|
|
35
|
+
outlet_id,
|
|
36
|
+
constituent
|
|
37
|
+
'''
|
|
38
|
+
df = con.execute(query).fetch_df()
|
|
39
|
+
return df
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
|
|
43
|
+
query = '''
|
|
44
|
+
SELECT *,
|
|
45
|
+
FROM
|
|
46
|
+
reports.wiski_qc_count
|
|
47
|
+
ORDER BY
|
|
48
|
+
station_no,
|
|
49
|
+
parametertype_name
|
|
50
|
+
'''
|
|
51
|
+
df = con.execute(query).fetch_df()
|
|
52
|
+
return df
|
|
53
|
+
|
|
54
|
+
def constituent_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
|
|
55
|
+
|
|
56
|
+
query = '''
|
|
57
|
+
SELECT *,
|
|
58
|
+
FROM
|
|
59
|
+
reports.constituent_summary
|
|
60
|
+
ORDER BY
|
|
61
|
+
station_id,
|
|
62
|
+
station_origin,
|
|
63
|
+
constituent
|
|
64
|
+
'''
|
|
65
|
+
df = con.execute(query).fetch_df()
|
|
66
|
+
if constituent is not None:
|
|
67
|
+
df = df[df['constituent'] == constituent]
|
|
68
|
+
return df
|
|
69
|
+
|
|
70
|
+
def station_reach_pairs(con: duckdb.DuckDBPyConnection):
|
|
71
|
+
query = '''
|
|
72
|
+
SELECT *,
|
|
73
|
+
FROM
|
|
74
|
+
reports.station_reach_pairs
|
|
75
|
+
ORDER BY
|
|
76
|
+
outlet_id,
|
|
77
|
+
station_id
|
|
78
|
+
'''
|
|
79
|
+
df = con.execute(query).fetch_df()
|
|
80
|
+
return df
|