mpcaHydro 2.2.7__tar.gz → 2.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/PKG-INFO +1 -1
- mpcahydro-2.2.9/demo.py +226 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/pyproject.toml +1 -1
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/outlet.duckdb +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/equis.py +8 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/outlets.py +70 -24
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/swd.py +21 -15
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse.py +57 -3
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse_functions.py +3 -2
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/wiski.py +9 -0
- mpcahydro-2.2.9/tests/integration/test_dataManager.py +61 -0
- mpcahydro-2.2.9/tests/integration/test_warehouse.duckdb +0 -0
- mpcahydro-2.2.9/tests/unit/test_equis.py +19 -0
- mpcahydro-2.2.7/tests/pixi.toml +0 -25
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/.gitattributes +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/.gitignore +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/README.md +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/__init__.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/csg.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/pywisk.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/reports.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/schemas.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/staging_tables.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_analytics.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_outlets.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_reports.sql +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/sql_loader.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/xref.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/README.md +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/conftest.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_data_manager.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_data_manager_integration.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_equis_integration.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_warehouse.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_wiski.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/integration/test_wiski_integration.py +0 -0
- {mpcahydro-2.2.7 → mpcahydro-2.2.9}/tests/test_data_manager_functions.py +0 -0
mpcahydro-2.2.9/demo.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#%%
|
|
2
|
+
from mpcaHydro.data_manager import dataManager
|
|
3
|
+
from pyhcal.repository import Repository
|
|
4
|
+
from mpcaHydro import outlets
|
|
5
|
+
import duckdb
|
|
6
|
+
from mpcaHydro import equis, warehouse, wiski
|
|
7
|
+
from hspf.hspfModel import hspfModel
|
|
8
|
+
from hspf.uci import UCI
|
|
9
|
+
from mpcaHydro import etlSWD
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
#%%
|
|
13
|
+
'''
|
|
14
|
+
New approach. Directly load to warehouse from downloads.
|
|
15
|
+
Store raw and processed data in warehouse. For large timeseries I could store
|
|
16
|
+
as parquet files. The transformations using pandas take a bit of time. I imagine doing them
|
|
17
|
+
within duckdb would be faster.
|
|
18
|
+
|
|
19
|
+
'''
|
|
20
|
+
|
|
21
|
+
# with warehouse.connect(db_path) as con:
|
|
22
|
+
# df = con.execute("SELECT * FROM staging.wiski").df()
|
|
23
|
+
# df = wiski.transform(df,filter_qc_codes = False)
|
|
24
|
+
|
|
25
|
+
#%%
|
|
26
|
+
model_name = 'Nemadji'
|
|
27
|
+
db_path = f'C:/Users/mfratki/Documents/{model_name}.duckdb'
|
|
28
|
+
start_year = 1996
|
|
29
|
+
end_year = 2030
|
|
30
|
+
replace = True
|
|
31
|
+
filter_qc_codes = True
|
|
32
|
+
equis_stations = outlets.equis_stations(model_name)
|
|
33
|
+
wiski_stations = outlets.wiski_stations(model_name)
|
|
34
|
+
equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
|
|
35
|
+
warehouse.init_db(db_path,reset = True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
#%% Old approach. Store as indvidual processed station files then load to warehouse
|
|
39
|
+
#df_equis = equis.download(equis_stations)
|
|
40
|
+
#df_wiski = wiski.download(wiski_stations,start_year = start_year, end_year = end_year)
|
|
41
|
+
|
|
42
|
+
#%% equis
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def download_equis_data(db_path,station_ids,replace = False):
|
|
48
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
49
|
+
df = equis.download(station_ids)
|
|
50
|
+
if not df.empty:
|
|
51
|
+
warehouse.load_df_to_table(con,df, 'staging.equis',replace = replace)
|
|
52
|
+
warehouse.load_df_to_table(con,equis.transform(df), 'analytics.equis',replace = replace)
|
|
53
|
+
else:
|
|
54
|
+
print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
|
|
55
|
+
|
|
56
|
+
def download_wiski_data(db_path,station_ids,replace = False):
|
|
57
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
58
|
+
df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
|
|
59
|
+
if not df.empty:
|
|
60
|
+
warehouse.load_df_to_table(con,df, 'staging.wiski', replace = replace)
|
|
61
|
+
warehouse.load_df_to_table(con,wiski.transform(df), 'analytics.wiski',replace = replace)
|
|
62
|
+
else:
|
|
63
|
+
print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Add to warehouse from custom df. Must contain required normalized columns.
|
|
67
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
68
|
+
if replace:
|
|
69
|
+
warehouse.drop_station_id(con,station_id,station_origin='equis')
|
|
70
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
|
|
74
|
+
df = equis.normalize(df.copy())
|
|
75
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
76
|
+
df = equis.transform(df)
|
|
77
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#%% swd
|
|
82
|
+
|
|
83
|
+
df = etlSWD.download(equis_stations)
|
|
84
|
+
|
|
85
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
86
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
|
|
87
|
+
df = equis.normalize(df.copy())
|
|
88
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
89
|
+
df = equis.transform(df)
|
|
90
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
91
|
+
#%% wiski
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if station_origin == 'wiski':
|
|
96
|
+
df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
|
|
97
|
+
warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = replace)
|
|
98
|
+
df = wiski.normalize(df.copy())
|
|
99
|
+
warehouse.add_to_table(con,df, 'staging','wiski_normalized')
|
|
100
|
+
df = wiski.transform(df,filter_qc_codes = filter_qc_codes)
|
|
101
|
+
warehouse.add_to_table(con,df, 'analytics','wiski') # method includes normalization
|
|
102
|
+
|
|
103
|
+
if station_origin == 'swd':
|
|
104
|
+
df = pd.concat([etlSWD.download(station_id) for station_id in station_ids])
|
|
105
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw', replace = replace)
|
|
106
|
+
df = etlSWD.transform(df.copy())
|
|
107
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
108
|
+
warehouse.update_views(con)
|
|
109
|
+
|
|
110
|
+
with warehouse.connect(db_path) as con:
|
|
111
|
+
warehouse.update_views(con)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
#%%
|
|
115
|
+
|
|
116
|
+
import requests
|
|
117
|
+
url = 'http://ifrshiny.seas.umich.edu/mglp/'
|
|
118
|
+
requests.get(url)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
db_path = 'C:/Users/mfratki/Documents/Rum.duckdb'
|
|
123
|
+
modl_db.build_outlet_db(db_path)
|
|
124
|
+
con = duckdb.connect(db_path)
|
|
125
|
+
con.execute("SELECT * FROM station_reach_pairs").df()
|
|
126
|
+
con.execute('SELECT * FROM station_reach_pairs WHERE outlet_id = 76').df()
|
|
127
|
+
|
|
128
|
+
# Need to remove duplicates from MODL_DB
|
|
129
|
+
modl_db.MODL_DB.loc[modl_db.MODL_DB.duplicated(['station_id','source'])]
|
|
130
|
+
|
|
131
|
+
#%%
|
|
132
|
+
dm = dataManager('C:/Users/mfratki/Documents/')
|
|
133
|
+
dm._build_warehouse()
|
|
134
|
+
equis_stations = modl_db.equis_stations('Nemadji')
|
|
135
|
+
wiski_stations = modl_db.wiski_stations('Nemadji')
|
|
136
|
+
|
|
137
|
+
#%% Old approach. Store as indvidual processed station files then load to warehouse
|
|
138
|
+
for station_id in equis_stations:
|
|
139
|
+
dm._download_station_data(station_id,'equis', True)
|
|
140
|
+
|
|
141
|
+
for station_id in wiski_stations:
|
|
142
|
+
dm._download_station_data(station_id,'wiski', True)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
#%% Adding HSPF outputs to warehouse
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
con = duckdb.connect(db_path)
|
|
166
|
+
|
|
167
|
+
model_name = 'Nemadji'
|
|
168
|
+
outlets = [group for _, group in modl_db.MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
169
|
+
|
|
170
|
+
for outlet in outlets:
|
|
171
|
+
1+1
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
dfs = []
|
|
175
|
+
for constituent in ['Q','TSS','TP','N','OP','TKN']:
|
|
176
|
+
opnids = modl_db.split_opnids([opnid.split(',') for opnid in set(outlet['opnids'].tolist())])
|
|
177
|
+
for opnid in opnids:
|
|
178
|
+
df = mod.hbns.get_reach_constituent(constituent,opnids,time_step='h')
|
|
179
|
+
df.columns = ['value']
|
|
180
|
+
df['constituent'] = constituent
|
|
181
|
+
df['operation'] = operation
|
|
182
|
+
df['opnid'] = opnid
|
|
183
|
+
dfs.append(df)
|
|
184
|
+
|
|
185
|
+
df = pd.concat(dfs).reset_index()
|
|
186
|
+
df['model_name'] = model_name
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
station_ids = ['H05018001','S006-214','S015-102']
|
|
191
|
+
target_constituent = 'TSS'
|
|
192
|
+
flow_constituent = 'Q'
|
|
193
|
+
|
|
194
|
+
# build placeholders for the IN list (one ? per station id)
|
|
195
|
+
placeholders = ','.join(['?'] * len(station_ids))
|
|
196
|
+
|
|
197
|
+
sql = f'''
|
|
198
|
+
SELECT o.*, f.datetime AS flow_datetime, f.value AS flow, f.baseflow, f.station_id AS flow_station_id, f.station_origin AS flow_station_origin
|
|
199
|
+
FROM analytics.observations o
|
|
200
|
+
JOIN analytics.observations f
|
|
201
|
+
ON o.datetime = f.datetime
|
|
202
|
+
WHERE o.constituent = ?
|
|
203
|
+
AND o.station_id IN ({placeholders})
|
|
204
|
+
AND f.constituent = ?;
|
|
205
|
+
'''
|
|
206
|
+
|
|
207
|
+
# parameter order must match the ? positions in the query
|
|
208
|
+
params = [target_constituent] + station_ids + [flow_constituent]
|
|
209
|
+
|
|
210
|
+
df = con.execute(sql, params).df()
|
|
211
|
+
|
|
212
|
+
outlet_id: station_ids
|
|
213
|
+
|
|
214
|
+
outlet_id: opnid
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
outlets = []
|
|
218
|
+
for index, (_, group) in enumerate(modl_db.MODL_DB.groupby(by = ['opnids','repository_name'])):
|
|
219
|
+
group['outlet_id'] = index
|
|
220
|
+
group.reset_index(drop=True, inplace=True)
|
|
221
|
+
outlets.append(group)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
for _, row in group.iterrows():
|
|
225
|
+
opnids = group.split_opnids(row['opnids'].str.split(',').to_list())
|
|
226
|
+
row*len(opnids)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -265,6 +265,13 @@ def replace_nondetects(df):
|
|
|
265
265
|
df.loc[df['value'].isna(), 'value'] = 0
|
|
266
266
|
return df
|
|
267
267
|
|
|
268
|
+
def filter_years(df, start_year=1996, end_year=None):
|
|
269
|
+
'''Filter Equis data to include only samples within a certain year range.'''
|
|
270
|
+
df = df[df['datetime'].dt.year >= start_year]
|
|
271
|
+
if end_year is not None:
|
|
272
|
+
df = df[df['datetime'].dt.year <= end_year]
|
|
273
|
+
return df
|
|
274
|
+
|
|
268
275
|
def normalize(df):
|
|
269
276
|
'''Normalize Equis data: select relevant columns.'''
|
|
270
277
|
df = map_constituents(df)
|
|
@@ -278,6 +285,7 @@ def transform(df):
|
|
|
278
285
|
|
|
279
286
|
df = normalize(df)
|
|
280
287
|
df = replace_nondetects(df)
|
|
288
|
+
df = filter_years(df)
|
|
281
289
|
if not df.empty:
|
|
282
290
|
df = average_results(df)
|
|
283
291
|
return df
|
|
@@ -31,7 +31,15 @@ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
|
|
|
31
31
|
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
32
32
|
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
33
33
|
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
34
|
+
MODL_DB = MODL_DB.dropna(subset = 'repo_name')
|
|
34
35
|
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
36
|
+
# Add outlet_id column to MODL_DB based on enumerate grouping
|
|
37
|
+
outlet_id_map = {}
|
|
38
|
+
for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
|
|
39
|
+
for idx in group.index:
|
|
40
|
+
outlet_id_map[idx] = int(outlet_id)
|
|
41
|
+
MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
|
|
42
|
+
|
|
35
43
|
|
|
36
44
|
def _reload():
|
|
37
45
|
global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
|
|
@@ -47,7 +55,14 @@ def _reload():
|
|
|
47
55
|
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
48
56
|
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
49
57
|
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
58
|
+
MODL_DB = MODL_DB.dropna(subset = 'repo_name')
|
|
50
59
|
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
60
|
+
# Add outlet_id column to MODL_DB based on enumerate grouping
|
|
61
|
+
outlet_id_map = {}
|
|
62
|
+
for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
|
|
63
|
+
for idx in group.index:
|
|
64
|
+
outlet_id_map[idx] = int(outlet_id)
|
|
65
|
+
MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
|
|
51
66
|
|
|
52
67
|
|
|
53
68
|
def split_opnids(opnids: list):
|
|
@@ -144,7 +159,7 @@ def get_outlets_by_reach(reach_id: int, model_name: str):
|
|
|
144
159
|
""",
|
|
145
160
|
[reach_id, model_name]).fetchdf()
|
|
146
161
|
return df
|
|
147
|
-
|
|
162
|
+
|
|
148
163
|
def get_outlets_by_station(station_id: str, station_origin: str):
|
|
149
164
|
"""
|
|
150
165
|
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
@@ -160,6 +175,47 @@ def get_outlets_by_station(station_id: str, station_origin: str):
|
|
|
160
175
|
[station_id, station_origin]).fetchdf()
|
|
161
176
|
return df
|
|
162
177
|
|
|
178
|
+
def get_station_opnids(station_id: str, station_origin: str):
|
|
179
|
+
"""
|
|
180
|
+
Return all model reach IDs (opnids) associated with the given station ID and origin.
|
|
181
|
+
"""
|
|
182
|
+
with connect(DB_PATH) as con:
|
|
183
|
+
df = con.execute(
|
|
184
|
+
"""
|
|
185
|
+
SELECT r.reach_id
|
|
186
|
+
FROM outlets.station_reach_pairs r
|
|
187
|
+
WHERE r.station_id = ? AND r.station_origin = ?
|
|
188
|
+
""",
|
|
189
|
+
[station_id, station_origin]).fetchdf()
|
|
190
|
+
return df['reach_id'].tolist()
|
|
191
|
+
|
|
192
|
+
def get_outlet_opnids(outlet_id: int):
|
|
193
|
+
"""
|
|
194
|
+
Return all model reach IDs (opnids) associated with the given outlet ID.
|
|
195
|
+
"""
|
|
196
|
+
with connect(DB_PATH) as con:
|
|
197
|
+
df = con.execute(
|
|
198
|
+
"""
|
|
199
|
+
SELECT r.reach_id
|
|
200
|
+
FROM outlets.station_reach_pairs r
|
|
201
|
+
WHERE r.outlet_id = ?
|
|
202
|
+
""",
|
|
203
|
+
[outlet_id]).fetchdf()
|
|
204
|
+
return list(set(df['reach_id'].tolist()))
|
|
205
|
+
|
|
206
|
+
def get_outlet_stations(outlet_id: int):
|
|
207
|
+
"""
|
|
208
|
+
Return all station IDs and origins associated with the given outlet ID.
|
|
209
|
+
"""
|
|
210
|
+
with connect(DB_PATH) as con:
|
|
211
|
+
df = con.execute(
|
|
212
|
+
"""
|
|
213
|
+
SELECT r.station_id, r.station_origin
|
|
214
|
+
FROM outlets.station_reach_pairs r
|
|
215
|
+
WHERE r.outlet_id = ?
|
|
216
|
+
""",
|
|
217
|
+
[outlet_id]).fetchdf()
|
|
218
|
+
return df[['station_id', 'station_origin']].drop_duplicates().to_dict(orient='records')
|
|
163
219
|
|
|
164
220
|
|
|
165
221
|
class OutletGateway:
|
|
@@ -179,7 +235,7 @@ class OutletGateway:
|
|
|
179
235
|
return equis_station_opnids(self.model_name)
|
|
180
236
|
|
|
181
237
|
def station_opnids(self):
|
|
182
|
-
return
|
|
238
|
+
return mapped_station_opnids(self.model_name)
|
|
183
239
|
|
|
184
240
|
def equis_stations(self):
|
|
185
241
|
return equis_stations(self.model_name)
|
|
@@ -207,6 +263,12 @@ class OutletGateway:
|
|
|
207
263
|
assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
|
|
208
264
|
return get_outlets_by_station(station_id, station_origin)
|
|
209
265
|
|
|
266
|
+
def get_outlet_opnids(self, outlet_id: int):
|
|
267
|
+
return get_outlet_opnids(outlet_id)
|
|
268
|
+
|
|
269
|
+
def get_outlet_stations(self, outlet_id: int):
|
|
270
|
+
return get_outlet_stations(outlet_id)
|
|
271
|
+
|
|
210
272
|
# constructors:
|
|
211
273
|
def build_outlet_db(db_path: str = None):
|
|
212
274
|
if db_path is None:
|
|
@@ -222,31 +284,15 @@ def build_outlets(con, model_name: str = None):
|
|
|
222
284
|
else:
|
|
223
285
|
modl_db = MODL_DB
|
|
224
286
|
|
|
225
|
-
for
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
287
|
+
for outlet_id in modl_db['outlet_id'].unique():
|
|
288
|
+
group = modl_db.query('outlet_id == @outlet_id')
|
|
289
|
+
repo_name = group['repo_name'].iloc[0]
|
|
290
|
+
add_outlet(con, outlet_id = int(outlet_id), outlet_name = None, repository_name = repo_name, notes = None)
|
|
229
291
|
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
230
|
-
|
|
231
292
|
for opnid in opnids:
|
|
232
|
-
add_reach(con, outlet_id =
|
|
233
|
-
|
|
234
|
-
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
235
|
-
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def create_outlet_schema(con, model_name : str):
|
|
239
|
-
for index, (_, group) in enumerate(outlets(model_name)):
|
|
240
|
-
repo_name = group['repo_name'].iloc[0]
|
|
241
|
-
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
242
|
-
|
|
243
|
-
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
244
|
-
|
|
245
|
-
for opnid in opnids:
|
|
246
|
-
add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
|
|
247
|
-
|
|
293
|
+
add_reach(con, outlet_id = int(outlet_id), reach_id = int(opnid), repository_name = repo_name)
|
|
248
294
|
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
249
|
-
add_station(con, outlet_id =
|
|
295
|
+
add_station(con, outlet_id = int(outlet_id), station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
250
296
|
|
|
251
297
|
|
|
252
298
|
def add_outlet(con,
|
|
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
|
|
|
26
26
|
# return df
|
|
27
27
|
import requests
|
|
28
28
|
|
|
29
|
-
def _download(
|
|
29
|
+
def _download(station_id):
|
|
30
30
|
# Replace {station_no} in the URL with the actual station number
|
|
31
|
-
url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
-
|
|
31
|
+
#url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
+
url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
|
|
33
|
+
|
|
33
34
|
try:
|
|
34
35
|
# Send a GET request to the URL
|
|
35
|
-
|
|
36
|
+
params = {
|
|
37
|
+
'stationId': station_id,
|
|
38
|
+
'format': 'json'
|
|
39
|
+
}
|
|
40
|
+
response = requests.get(url,params = params)
|
|
36
41
|
response.raise_for_status() # Raise exception for HTTP errors
|
|
37
42
|
# Parse the JSON data
|
|
38
|
-
|
|
39
|
-
return pd.DataFrame(columns = response.json()['column_names'])
|
|
40
|
-
else:
|
|
41
|
-
return pd.DataFrame(response.json()['data'])
|
|
43
|
+
return pd.DataFrame(response.json()['data'])
|
|
42
44
|
|
|
43
45
|
except requests.exceptions.RequestException as e:
|
|
44
46
|
print(f"An error occurred: {e}")
|
|
@@ -46,14 +48,18 @@ def _download(station_no):
|
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
|
|
49
|
-
def download(
|
|
51
|
+
def download(station_ids):
|
|
50
52
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
dfs = []
|
|
54
|
+
for station_id in station_ids:
|
|
55
|
+
df = _download(station_id)
|
|
56
|
+
if not df.empty:
|
|
57
|
+
df['station_id'] = station_id
|
|
58
|
+
dfs.append(df)
|
|
59
|
+
|
|
60
|
+
return pd.concat(dfs, ignore_index=True)
|
|
61
|
+
|
|
62
|
+
|
|
57
63
|
|
|
58
64
|
def info(station_no):
|
|
59
65
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
|
@@ -28,6 +28,23 @@ def init_db(db_path: str,reset: bool = False):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def validate_schemas(con: duckdb.DuckDBPyConnection):
|
|
32
|
+
"""Validate that the database has the expected schemas and tables."""
|
|
33
|
+
expected_schemas = {'staging', 'analytics', 'mappings', 'outlets', 'reports'}
|
|
34
|
+
result = con.execute("SELECT schema_name FROM information_schema.schemata").fetchall()
|
|
35
|
+
existing_schemas = {row[0] for row in result}
|
|
36
|
+
missing_schemas = expected_schemas - existing_schemas
|
|
37
|
+
if missing_schemas:
|
|
38
|
+
raise ValueError(f"Missing schemas: {missing_schemas}")
|
|
39
|
+
|
|
40
|
+
def validate_tables(con: duckdb.DuckDBPyConnection, schema: str, expected_tables: set):
|
|
41
|
+
"""Validate that a schema contains the expected tables."""
|
|
42
|
+
result = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = ?", [schema]).fetchall()
|
|
43
|
+
existing_tables = {row[0] for row in result}
|
|
44
|
+
missing_tables = expected_tables - existing_tables
|
|
45
|
+
if missing_tables:
|
|
46
|
+
raise ValueError(f"Missing tables in {schema} schema: {missing_tables}")
|
|
47
|
+
|
|
31
48
|
def create_schemas(con: duckdb.DuckDBPyConnection):
|
|
32
49
|
"""Create staging, analytics, hspf, and reports schemas if they do not exist."""
|
|
33
50
|
con.execute(sql_loader.get_schemas_sql())
|
|
@@ -96,12 +113,49 @@ def create_mapping_tables(con: duckdb.DuckDBPyConnection):
|
|
|
96
113
|
else:
|
|
97
114
|
print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
|
|
98
115
|
|
|
99
|
-
|
|
116
|
+
|
|
117
|
+
def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
|
|
118
|
+
"""
|
|
119
|
+
Attach an external DuckDB database containing outlet definitions.
|
|
120
|
+
"""
|
|
121
|
+
create_schemas(con)
|
|
122
|
+
|
|
123
|
+
con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
|
|
124
|
+
|
|
125
|
+
tables = con.execute("SHOW TABLES FROM outlets_db").fetchall()
|
|
126
|
+
print(f"Tables in the source database: {tables}")
|
|
127
|
+
|
|
128
|
+
for table in tables:
|
|
129
|
+
table_name = table[0] # Extract table name
|
|
130
|
+
con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM outlets_db.{table_name}") # Copy table contents
|
|
131
|
+
|
|
132
|
+
# -- Step 2: Copy all views --
|
|
133
|
+
# Retrieve the list of views in the source database
|
|
134
|
+
views = con.execute("SHOW VIEWS FROM outlets_db").fetchall()
|
|
135
|
+
print(f"Views in the source database: {views}")
|
|
136
|
+
|
|
137
|
+
# Copy each view from source to destination
|
|
138
|
+
for view in views:
|
|
139
|
+
view_name = view[0] # Extract view name
|
|
140
|
+
|
|
141
|
+
# Get the CREATE VIEW statement for the view
|
|
142
|
+
create_view_sql = con.execute(f"SHOW CREATE VIEW outlets_db.{view_name}").fetchone()[0]
|
|
143
|
+
|
|
144
|
+
# Recreate the view in the destination database (remove the `outlets_db.` prefix if exists)
|
|
145
|
+
create_view_sql = create_view_sql.replace(f"outlets_db.", "")
|
|
146
|
+
con.execute(create_view_sql)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
|
|
150
|
+
# Optional: Detach the source database
|
|
151
|
+
con.execute("DETACH 'outlets_db'")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def create_outlets_tables(con: duckdb.DuckDBPyConnection, model_name: str = None):
|
|
100
155
|
"""Create tables in the outlets schema to define outlet-station-reach relationships."""
|
|
101
156
|
con.execute(sql_loader.get_outlets_schema_sql())
|
|
102
157
|
con.execute(sql_loader.get_views_outlets_sql())
|
|
103
|
-
outlets.build_outlets(con)
|
|
104
|
-
|
|
158
|
+
outlets.build_outlets(con, model_name=model_name)
|
|
105
159
|
|
|
106
160
|
def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
|
|
107
161
|
"""Create a view filtering WISKI data based on specified data codes."""
|
|
@@ -101,7 +101,8 @@ def download_wiski_data(
|
|
|
101
101
|
if overwrite:
|
|
102
102
|
warehouse.drop_station_data(con, station_ids, 'wiski')
|
|
103
103
|
warehouse.add_df_to_table(con, df, 'staging', 'wiski')
|
|
104
|
-
|
|
104
|
+
if not df_transformed.empty:
|
|
105
|
+
warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
|
|
105
106
|
warehouse.update_views(con)
|
|
106
107
|
else:
|
|
107
108
|
print('No data necessary for HSPF calibration from wiski for:', station_ids)
|
|
@@ -351,7 +352,7 @@ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
|
|
|
351
352
|
query = '''
|
|
352
353
|
SELECT *,
|
|
353
354
|
FROM
|
|
354
|
-
|
|
355
|
+
outlets.station_reach_pairs
|
|
355
356
|
ORDER BY
|
|
356
357
|
outlet_id,
|
|
357
358
|
station_id
|
|
@@ -336,6 +336,14 @@ def filter_quality_codes(df, data_codes):
|
|
|
336
336
|
'''
|
|
337
337
|
return df.loc[df['quality_code'].isin(data_codes)]
|
|
338
338
|
|
|
339
|
+
def filter_years(df, start_year=1996, end_year=None):
|
|
340
|
+
'''Filter Equis data to include only samples within a certain year range.'''
|
|
341
|
+
df = df[df['datetime'].dt.year >= start_year]
|
|
342
|
+
if end_year is not None:
|
|
343
|
+
df = df[df['datetime'].dt.year <= end_year]
|
|
344
|
+
return df
|
|
345
|
+
|
|
346
|
+
|
|
339
347
|
def average_results(df):
|
|
340
348
|
#df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
|
|
341
349
|
df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
|
|
@@ -392,6 +400,7 @@ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = '
|
|
|
392
400
|
data_codes = DATA_CODES
|
|
393
401
|
df = filter_quality_codes(df, data_codes)
|
|
394
402
|
df = average_results(df)
|
|
403
|
+
df = filter_years(df, start_year=1996)
|
|
395
404
|
df = calculate_baseflow(df, method = baseflow_method)
|
|
396
405
|
df['station_origin'] = 'wiski'
|
|
397
406
|
#df.set_index('datetime',inplace=True)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#%% Imports
|
|
2
|
+
from mpcaHydro.data_manager import dataManager
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import duckdb
|
|
5
|
+
THIS_DIR = Path(__file__).parent
|
|
6
|
+
WISKI_STATIONS = ['E05011002']
|
|
7
|
+
EQUIS_STATIONS = ['S001-235','S005-115']
|
|
8
|
+
|
|
9
|
+
#%%
|
|
10
|
+
def test_build_warehouse():
|
|
11
|
+
dm = dataManager(THIS_DIR)
|
|
12
|
+
dm._build_warehouse()
|
|
13
|
+
|
|
14
|
+
test_build_warehouse()
|
|
15
|
+
# %%
|
|
16
|
+
def test_equis_data_download():
|
|
17
|
+
dm = dataManager(THIS_DIR,
|
|
18
|
+
oracle_username = 'MFRATKI',
|
|
19
|
+
oracle_password = 'DeltaT#MPCA3',
|
|
20
|
+
reset=True)
|
|
21
|
+
|
|
22
|
+
dm.connect_to_oracle()
|
|
23
|
+
dm._download_equis_data(EQUIS_STATIONS)
|
|
24
|
+
|
|
25
|
+
test_equis_data_download()
|
|
26
|
+
#%%
|
|
27
|
+
def test_wiski_data_download():
|
|
28
|
+
dm = dataManager(THIS_DIR, reset=True)
|
|
29
|
+
dm._download_wiski_data(WISKI_STATIONS)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
test_wiski_data_download()
|
|
33
|
+
|
|
34
|
+
#%%
|
|
35
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
36
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
37
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations').fetch_df()
|
|
38
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
39
|
+
|
|
40
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
41
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
|
|
42
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
43
|
+
assert(df['value'].isnull().sum() == 0)
|
|
44
|
+
# %%
|
|
45
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_wiski_download():
|
|
49
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
50
|
+
wiski_stations = WISKI_STATIONS
|
|
51
|
+
dm._download_wiski_data(wiski_stations)
|
|
52
|
+
return dm
|
|
53
|
+
|
|
54
|
+
test_wiski_download()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
58
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
|
|
59
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
60
|
+
|
|
61
|
+
# %%
|
|
Binary file
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
|
|
2
|
+
#%%
|
|
3
|
+
from mpcaHydro import equis
|
|
4
|
+
from mpcaHydro import outlets
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
#%%
|
|
9
|
+
model_name = 'Rum'
|
|
10
|
+
equis_stations = outlets.equis_stations(model_name)
|
|
11
|
+
equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
|
|
12
|
+
|
|
13
|
+
df = equis.download(equis_stations)
|
|
14
|
+
|
|
15
|
+
df_normalized = equis.normalize(df.copy())
|
|
16
|
+
expected_columns = ['station_id', 'constituent', 'cas_rn', 'datetime', 'value', 'unit']
|
|
17
|
+
|
|
18
|
+
assert all(col in df_normalized.columns for col in expected_columns)
|
|
19
|
+
# %%
|
mpcahydro-2.2.7/tests/pixi.toml
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
[workspace]
|
|
2
|
-
channels = ["https://prefix.dev/conda-forge"]
|
|
3
|
-
platforms = ["linux-64", "osx-64", "win-64"]
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
[dependencies]
|
|
7
|
-
requests = "*"
|
|
8
|
-
pandas = "*"
|
|
9
|
-
time = "*"
|
|
10
|
-
pathlib = "*"
|
|
11
|
-
spyder = "*"
|
|
12
|
-
jupyter = "*"
|
|
13
|
-
|
|
14
|
-
[package]
|
|
15
|
-
name = "mpcaHydro"
|
|
16
|
-
version = "0.1.0"
|
|
17
|
-
|
|
18
|
-
[package.build]
|
|
19
|
-
backend = { name = "pixi-build-python", version = "0.1.*" }
|
|
20
|
-
|
|
21
|
-
[package.run-dependencies]
|
|
22
|
-
requests = "*"
|
|
23
|
-
pandas = "*"
|
|
24
|
-
time = "*"
|
|
25
|
-
pathlib = "*"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|