mpcaHydro 2.2.6__tar.gz → 2.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/PKG-INFO +1 -1
- mpcahydro-2.2.8/demo.py +226 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/pyproject.toml +1 -1
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/outlets.py +14 -1
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/pywisk.py +5 -5
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/swd.py +21 -15
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/warehouse.py +55 -0
- mpcahydro-2.2.8/tests/integration/test_dataManager.py +61 -0
- mpcahydro-2.2.8/tests/integration/test_warehouse.duckdb +0 -0
- mpcahydro-2.2.8/tests/unit/test_equis.py +19 -0
- mpcahydro-2.2.6/tests/pixi.toml +0 -25
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/.gitattributes +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/.gitignore +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/README.md +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/__init__.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/csg.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/outlet.duckdb +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/equis.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/reports.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/schemas.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/staging_tables.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_analytics.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_outlets.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_reports.sql +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql_loader.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/warehouse_functions.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/wiski.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/xref.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/README.md +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/conftest.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_data_manager.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_data_manager_integration.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_equis_integration.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_warehouse.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_wiski.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_wiski_integration.py +0 -0
- {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/test_data_manager_functions.py +0 -0
mpcahydro-2.2.8/demo.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#%%
|
|
2
|
+
from mpcaHydro.data_manager import dataManager
|
|
3
|
+
from pyhcal.repository import Repository
|
|
4
|
+
from mpcaHydro import outlets
|
|
5
|
+
import duckdb
|
|
6
|
+
from mpcaHydro import equis, warehouse, wiski
|
|
7
|
+
from hspf.hspfModel import hspfModel
|
|
8
|
+
from hspf.uci import UCI
|
|
9
|
+
from mpcaHydro import etlSWD
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
#%%
|
|
13
|
+
'''
|
|
14
|
+
New approach. Directly load to warehouse from downloads.
|
|
15
|
+
Store raw and processed data in warehouse. For large timeseries I could store
|
|
16
|
+
as parquet files. The transformations using pandas take a bit of time. I imagine doing them
|
|
17
|
+
within duckdb would be faster.
|
|
18
|
+
|
|
19
|
+
'''
|
|
20
|
+
|
|
21
|
+
# with warehouse.connect(db_path) as con:
|
|
22
|
+
# df = con.execute("SELECT * FROM staging.wiski").df()
|
|
23
|
+
# df = wiski.transform(df,filter_qc_codes = False)
|
|
24
|
+
|
|
25
|
+
#%%
|
|
26
|
+
model_name = 'Nemadji'
|
|
27
|
+
db_path = f'C:/Users/mfratki/Documents/{model_name}.duckdb'
|
|
28
|
+
start_year = 1996
|
|
29
|
+
end_year = 2030
|
|
30
|
+
replace = True
|
|
31
|
+
filter_qc_codes = True
|
|
32
|
+
equis_stations = outlets.equis_stations(model_name)
|
|
33
|
+
wiski_stations = outlets.wiski_stations(model_name)
|
|
34
|
+
equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
|
|
35
|
+
warehouse.init_db(db_path,reset = True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
#%% Old approach. Store as indvidual processed station files then load to warehouse
|
|
39
|
+
#df_equis = equis.download(equis_stations)
|
|
40
|
+
#df_wiski = wiski.download(wiski_stations,start_year = start_year, end_year = end_year)
|
|
41
|
+
|
|
42
|
+
#%% equis
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def download_equis_data(db_path,station_ids,replace = False):
|
|
48
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
49
|
+
df = equis.download(station_ids)
|
|
50
|
+
if not df.empty:
|
|
51
|
+
warehouse.load_df_to_table(con,df, 'staging.equis',replace = replace)
|
|
52
|
+
warehouse.load_df_to_table(con,equis.transform(df), 'analytics.equis',replace = replace)
|
|
53
|
+
else:
|
|
54
|
+
print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
|
|
55
|
+
|
|
56
|
+
def download_wiski_data(db_path,station_ids,replace = False):
|
|
57
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
58
|
+
df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
|
|
59
|
+
if not df.empty:
|
|
60
|
+
warehouse.load_df_to_table(con,df, 'staging.wiski', replace = replace)
|
|
61
|
+
warehouse.load_df_to_table(con,wiski.transform(df), 'analytics.wiski',replace = replace)
|
|
62
|
+
else:
|
|
63
|
+
print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Add to warehouse from custom df. Must contain required normalized columns.
|
|
67
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
68
|
+
if replace:
|
|
69
|
+
warehouse.drop_station_id(con,station_id,station_origin='equis')
|
|
70
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
|
|
74
|
+
df = equis.normalize(df.copy())
|
|
75
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
76
|
+
df = equis.transform(df)
|
|
77
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#%% swd
|
|
82
|
+
|
|
83
|
+
df = etlSWD.download(equis_stations)
|
|
84
|
+
|
|
85
|
+
with warehouse.connect(db_path,read_only = False) as con:
|
|
86
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
|
|
87
|
+
df = equis.normalize(df.copy())
|
|
88
|
+
warehouse.add_to_table(con,df, 'staging','equis_normalized')
|
|
89
|
+
df = equis.transform(df)
|
|
90
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
91
|
+
#%% wiski
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if station_origin == 'wiski':
|
|
96
|
+
df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
|
|
97
|
+
warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = replace)
|
|
98
|
+
df = wiski.normalize(df.copy())
|
|
99
|
+
warehouse.add_to_table(con,df, 'staging','wiski_normalized')
|
|
100
|
+
df = wiski.transform(df,filter_qc_codes = filter_qc_codes)
|
|
101
|
+
warehouse.add_to_table(con,df, 'analytics','wiski') # method includes normalization
|
|
102
|
+
|
|
103
|
+
if station_origin == 'swd':
|
|
104
|
+
df = pd.concat([etlSWD.download(station_id) for station_id in station_ids])
|
|
105
|
+
warehouse.load_df_to_staging(con,df, 'equis_raw', replace = replace)
|
|
106
|
+
df = etlSWD.transform(df.copy())
|
|
107
|
+
warehouse.add_to_table(con,df, 'analytics','equis')
|
|
108
|
+
warehouse.update_views(con)
|
|
109
|
+
|
|
110
|
+
with warehouse.connect(db_path) as con:
|
|
111
|
+
warehouse.update_views(con)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
#%%
|
|
115
|
+
|
|
116
|
+
import requests
|
|
117
|
+
url = 'http://ifrshiny.seas.umich.edu/mglp/'
|
|
118
|
+
requests.get(url)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
db_path = 'C:/Users/mfratki/Documents/Rum.duckdb'
|
|
123
|
+
modl_db.build_outlet_db(db_path)
|
|
124
|
+
con = duckdb.connect(db_path)
|
|
125
|
+
con.execute("SELECT * FROM station_reach_pairs").df()
|
|
126
|
+
con.execute('SELECT * FROM station_reach_pairs WHERE outlet_id = 76').df()
|
|
127
|
+
|
|
128
|
+
# Need to remove duplicates from MODL_DB
|
|
129
|
+
modl_db.MODL_DB.loc[modl_db.MODL_DB.duplicated(['station_id','source'])]
|
|
130
|
+
|
|
131
|
+
#%%
|
|
132
|
+
dm = dataManager('C:/Users/mfratki/Documents/')
|
|
133
|
+
dm._build_warehouse()
|
|
134
|
+
equis_stations = modl_db.equis_stations('Nemadji')
|
|
135
|
+
wiski_stations = modl_db.wiski_stations('Nemadji')
|
|
136
|
+
|
|
137
|
+
#%% Old approach. Store as indvidual processed station files then load to warehouse
|
|
138
|
+
for station_id in equis_stations:
|
|
139
|
+
dm._download_station_data(station_id,'equis', True)
|
|
140
|
+
|
|
141
|
+
for station_id in wiski_stations:
|
|
142
|
+
dm._download_station_data(station_id,'wiski', True)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
#%% Adding HSPF outputs to warehouse
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
con = duckdb.connect(db_path)
|
|
166
|
+
|
|
167
|
+
model_name = 'Nemadji'
|
|
168
|
+
outlets = [group for _, group in modl_db.MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
169
|
+
|
|
170
|
+
for outlet in outlets:
|
|
171
|
+
1+1
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
dfs = []
|
|
175
|
+
for constituent in ['Q','TSS','TP','N','OP','TKN']:
|
|
176
|
+
opnids = modl_db.split_opnids([opnid.split(',') for opnid in set(outlet['opnids'].tolist())])
|
|
177
|
+
for opnid in opnids:
|
|
178
|
+
df = mod.hbns.get_reach_constituent(constituent,opnids,time_step='h')
|
|
179
|
+
df.columns = ['value']
|
|
180
|
+
df['constituent'] = constituent
|
|
181
|
+
df['operation'] = operation
|
|
182
|
+
df['opnid'] = opnid
|
|
183
|
+
dfs.append(df)
|
|
184
|
+
|
|
185
|
+
df = pd.concat(dfs).reset_index()
|
|
186
|
+
df['model_name'] = model_name
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
station_ids = ['H05018001','S006-214','S015-102']
|
|
191
|
+
target_constituent = 'TSS'
|
|
192
|
+
flow_constituent = 'Q'
|
|
193
|
+
|
|
194
|
+
# build placeholders for the IN list (one ? per station id)
|
|
195
|
+
placeholders = ','.join(['?'] * len(station_ids))
|
|
196
|
+
|
|
197
|
+
sql = f'''
|
|
198
|
+
SELECT o.*, f.datetime AS flow_datetime, f.value AS flow, f.baseflow, f.station_id AS flow_station_id, f.station_origin AS flow_station_origin
|
|
199
|
+
FROM analytics.observations o
|
|
200
|
+
JOIN analytics.observations f
|
|
201
|
+
ON o.datetime = f.datetime
|
|
202
|
+
WHERE o.constituent = ?
|
|
203
|
+
AND o.station_id IN ({placeholders})
|
|
204
|
+
AND f.constituent = ?;
|
|
205
|
+
'''
|
|
206
|
+
|
|
207
|
+
# parameter order must match the ? positions in the query
|
|
208
|
+
params = [target_constituent] + station_ids + [flow_constituent]
|
|
209
|
+
|
|
210
|
+
df = con.execute(sql, params).df()
|
|
211
|
+
|
|
212
|
+
outlet_id: station_ids
|
|
213
|
+
|
|
214
|
+
outlet_id: opnid
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
outlets = []
|
|
218
|
+
for index, (_, group) in enumerate(modl_db.MODL_DB.groupby(by = ['opnids','repository_name'])):
|
|
219
|
+
group['outlet_id'] = index
|
|
220
|
+
group.reset_index(drop=True, inplace=True)
|
|
221
|
+
outlets.append(group)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
for _, row in group.iterrows():
|
|
225
|
+
opnids = group.split_opnids(row['opnids'].str.split(',').to_list())
|
|
226
|
+
row*len(opnids)
|
|
@@ -144,7 +144,7 @@ def get_outlets_by_reach(reach_id: int, model_name: str):
|
|
|
144
144
|
""",
|
|
145
145
|
[reach_id, model_name]).fetchdf()
|
|
146
146
|
return df
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
def get_outlets_by_station(station_id: str, station_origin: str):
|
|
149
149
|
"""
|
|
150
150
|
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
@@ -160,6 +160,19 @@ def get_outlets_by_station(station_id: str, station_origin: str):
|
|
|
160
160
|
[station_id, station_origin]).fetchdf()
|
|
161
161
|
return df
|
|
162
162
|
|
|
163
|
+
def get_station_opnids(station_id: str, station_origin: str):
|
|
164
|
+
"""
|
|
165
|
+
Return all model reach IDs (opnids) associated with the given station ID and origin.
|
|
166
|
+
"""
|
|
167
|
+
with connect(DB_PATH) as con:
|
|
168
|
+
df = con.execute(
|
|
169
|
+
"""
|
|
170
|
+
SELECT r.reach_id
|
|
171
|
+
FROM outlets.station_reach_pairs r
|
|
172
|
+
WHERE r.station_id = ? AND r.station_origin = ?
|
|
173
|
+
""",
|
|
174
|
+
[station_id, station_origin]).fetchdf()
|
|
175
|
+
return df['reach_id'].tolist()
|
|
163
176
|
|
|
164
177
|
|
|
165
178
|
class OutletGateway:
|
|
@@ -13,7 +13,7 @@ import time
|
|
|
13
13
|
CERT_PATH = str(Path(__file__).resolve().parent/'data\\wiskiweb01.pca.state.mn.us.crt')
|
|
14
14
|
#TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
|
|
15
15
|
class Service():
|
|
16
|
-
base_url = '
|
|
16
|
+
base_url = 'http://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
|
|
17
17
|
base_dict = {
|
|
18
18
|
'datasource': '0',
|
|
19
19
|
'service': 'kisters',
|
|
@@ -30,7 +30,7 @@ class Service():
|
|
|
30
30
|
try:
|
|
31
31
|
# Using requests.head() to fetch headers is faster than requests.get()
|
|
32
32
|
# as it doesn't download the full content
|
|
33
|
-
response = requests.head('
|
|
33
|
+
response = requests.head('http://wiskiweb01.pca.state.mn.us', timeout=timeout)
|
|
34
34
|
|
|
35
35
|
# raise_for_status() raises an HTTPError for 4xx or 5xx status codes
|
|
36
36
|
response.raise_for_status()
|
|
@@ -53,7 +53,7 @@ class Service():
|
|
|
53
53
|
|
|
54
54
|
def _requestTypes(self):
|
|
55
55
|
url = self.url({'request': 'getrequestinfo'})
|
|
56
|
-
return requests.get(url
|
|
56
|
+
return requests.get(url).json()[0]
|
|
57
57
|
|
|
58
58
|
def getRequests(self):
|
|
59
59
|
return list(self._requestTypes()['Requests'].keys())
|
|
@@ -72,7 +72,7 @@ class Service():
|
|
|
72
72
|
|
|
73
73
|
def info(self,request_type):
|
|
74
74
|
url = self.url({'request': 'getrequestinfo'})
|
|
75
|
-
response = requests.get(url
|
|
75
|
+
response = requests.get(url)
|
|
76
76
|
get_requests = response.json()
|
|
77
77
|
return get_requests[0]['Requests'].keys()
|
|
78
78
|
|
|
@@ -95,7 +95,7 @@ class Service():
|
|
|
95
95
|
|
|
96
96
|
def get_json(self,args_dict):
|
|
97
97
|
# Download request
|
|
98
|
-
self.response = requests.get(self.url(args_dict)
|
|
98
|
+
self.response = requests.get(self.url(args_dict))
|
|
99
99
|
if self.response.status_code != 200:
|
|
100
100
|
print('Error: ' + self.response.json()['message'])
|
|
101
101
|
self.response.raise_for_status() # raises exception when not a 2xx response
|
|
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
|
|
|
26
26
|
# return df
|
|
27
27
|
import requests
|
|
28
28
|
|
|
29
|
-
def _download(
|
|
29
|
+
def _download(station_id):
|
|
30
30
|
# Replace {station_no} in the URL with the actual station number
|
|
31
|
-
url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
-
|
|
31
|
+
#url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
|
|
32
|
+
url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
|
|
33
|
+
|
|
33
34
|
try:
|
|
34
35
|
# Send a GET request to the URL
|
|
35
|
-
|
|
36
|
+
params = {
|
|
37
|
+
'stationId': station_id,
|
|
38
|
+
'format': 'json'
|
|
39
|
+
}
|
|
40
|
+
response = requests.get(url,params = params)
|
|
36
41
|
response.raise_for_status() # Raise exception for HTTP errors
|
|
37
42
|
# Parse the JSON data
|
|
38
|
-
|
|
39
|
-
return pd.DataFrame(columns = response.json()['column_names'])
|
|
40
|
-
else:
|
|
41
|
-
return pd.DataFrame(response.json()['data'])
|
|
43
|
+
return pd.DataFrame(response.json()['data'])
|
|
42
44
|
|
|
43
45
|
except requests.exceptions.RequestException as e:
|
|
44
46
|
print(f"An error occurred: {e}")
|
|
@@ -46,14 +48,18 @@ def _download(station_no):
|
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
|
|
49
|
-
def download(
|
|
51
|
+
def download(station_ids):
|
|
50
52
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
dfs = []
|
|
54
|
+
for station_id in station_ids:
|
|
55
|
+
df = _download(station_id)
|
|
56
|
+
if not df.empty:
|
|
57
|
+
df['station_id'] = station_id
|
|
58
|
+
dfs.append(df)
|
|
59
|
+
|
|
60
|
+
return pd.concat(dfs, ignore_index=True)
|
|
61
|
+
|
|
62
|
+
|
|
57
63
|
|
|
58
64
|
def info(station_no):
|
|
59
65
|
#df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
|
|
@@ -28,6 +28,23 @@ def init_db(db_path: str,reset: bool = False):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def validate_schemas(con: duckdb.DuckDBPyConnection):
|
|
32
|
+
"""Validate that the database has the expected schemas and tables."""
|
|
33
|
+
expected_schemas = {'staging', 'analytics', 'mappings', 'outlets', 'reports'}
|
|
34
|
+
result = con.execute("SELECT schema_name FROM information_schema.schemata").fetchall()
|
|
35
|
+
existing_schemas = {row[0] for row in result}
|
|
36
|
+
missing_schemas = expected_schemas - existing_schemas
|
|
37
|
+
if missing_schemas:
|
|
38
|
+
raise ValueError(f"Missing schemas: {missing_schemas}")
|
|
39
|
+
|
|
40
|
+
def validate_tables(con: duckdb.DuckDBPyConnection, schema: str, expected_tables: set):
|
|
41
|
+
"""Validate that a schema contains the expected tables."""
|
|
42
|
+
result = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = ?", [schema]).fetchall()
|
|
43
|
+
existing_tables = {row[0] for row in result}
|
|
44
|
+
missing_tables = expected_tables - existing_tables
|
|
45
|
+
if missing_tables:
|
|
46
|
+
raise ValueError(f"Missing tables in {schema} schema: {missing_tables}")
|
|
47
|
+
|
|
31
48
|
def create_schemas(con: duckdb.DuckDBPyConnection):
|
|
32
49
|
"""Create staging, analytics, hspf, and reports schemas if they do not exist."""
|
|
33
50
|
con.execute(sql_loader.get_schemas_sql())
|
|
@@ -96,6 +113,44 @@ def create_mapping_tables(con: duckdb.DuckDBPyConnection):
|
|
|
96
113
|
else:
|
|
97
114
|
print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
|
|
98
115
|
|
|
116
|
+
|
|
117
|
+
def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
|
|
118
|
+
"""
|
|
119
|
+
Attach an external DuckDB database containing outlet definitions.
|
|
120
|
+
"""
|
|
121
|
+
create_schemas(con)
|
|
122
|
+
|
|
123
|
+
con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
|
|
124
|
+
|
|
125
|
+
tables = con.execute("SHOW TABLES FROM outlets_db").fetchall()
|
|
126
|
+
print(f"Tables in the source database: {tables}")
|
|
127
|
+
|
|
128
|
+
for table in tables:
|
|
129
|
+
table_name = table[0] # Extract table name
|
|
130
|
+
con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM outlets_db.{table_name}") # Copy table contents
|
|
131
|
+
|
|
132
|
+
# -- Step 2: Copy all views --
|
|
133
|
+
# Retrieve the list of views in the source database
|
|
134
|
+
views = con.execute("SHOW VIEWS FROM outlets_db").fetchall()
|
|
135
|
+
print(f"Views in the source database: {views}")
|
|
136
|
+
|
|
137
|
+
# Copy each view from source to destination
|
|
138
|
+
for view in views:
|
|
139
|
+
view_name = view[0] # Extract view name
|
|
140
|
+
|
|
141
|
+
# Get the CREATE VIEW statement for the view
|
|
142
|
+
create_view_sql = con.execute(f"SHOW CREATE VIEW outlets_db.{view_name}").fetchone()[0]
|
|
143
|
+
|
|
144
|
+
# Recreate the view in the destination database (remove the `outlets_db.` prefix if exists)
|
|
145
|
+
create_view_sql = create_view_sql.replace(f"outlets_db.", "")
|
|
146
|
+
con.execute(create_view_sql)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
|
|
150
|
+
# Optional: Detach the source database
|
|
151
|
+
con.execute("DETACH 'outlets_db'")
|
|
152
|
+
|
|
153
|
+
|
|
99
154
|
def create_outlets_tables(con: duckdb.DuckDBPyConnection):
|
|
100
155
|
"""Create tables in the outlets schema to define outlet-station-reach relationships."""
|
|
101
156
|
con.execute(sql_loader.get_outlets_schema_sql())
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#%% Imports
|
|
2
|
+
from mpcaHydro.data_manager import dataManager
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import duckdb
|
|
5
|
+
THIS_DIR = Path(__file__).parent
|
|
6
|
+
WISKI_STATIONS = ['E05011002']
|
|
7
|
+
EQUIS_STATIONS = ['S001-235','S005-115']
|
|
8
|
+
|
|
9
|
+
#%%
|
|
10
|
+
def test_build_warehouse():
|
|
11
|
+
dm = dataManager(THIS_DIR)
|
|
12
|
+
dm._build_warehouse()
|
|
13
|
+
|
|
14
|
+
test_build_warehouse()
|
|
15
|
+
# %%
|
|
16
|
+
def test_equis_data_download():
|
|
17
|
+
dm = dataManager(THIS_DIR,
|
|
18
|
+
oracle_username = 'MFRATKI',
|
|
19
|
+
oracle_password = 'DeltaT#MPCA3',
|
|
20
|
+
reset=True)
|
|
21
|
+
|
|
22
|
+
dm.connect_to_oracle()
|
|
23
|
+
dm._download_equis_data(EQUIS_STATIONS)
|
|
24
|
+
|
|
25
|
+
test_equis_data_download()
|
|
26
|
+
#%%
|
|
27
|
+
def test_wiski_data_download():
|
|
28
|
+
dm = dataManager(THIS_DIR, reset=True)
|
|
29
|
+
dm._download_wiski_data(WISKI_STATIONS)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
test_wiski_data_download()
|
|
33
|
+
|
|
34
|
+
#%%
|
|
35
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
36
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
37
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations').fetch_df()
|
|
38
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
39
|
+
|
|
40
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
41
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
|
|
42
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
43
|
+
assert(df['value'].isnull().sum() == 0)
|
|
44
|
+
# %%
|
|
45
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_wiski_download():
|
|
49
|
+
dm = dataManager(THIS_DIR, reset=False)
|
|
50
|
+
wiski_stations = WISKI_STATIONS
|
|
51
|
+
dm._download_wiski_data(wiski_stations)
|
|
52
|
+
return dm
|
|
53
|
+
|
|
54
|
+
test_wiski_download()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
with duckdb.connect(dm.db_path, read_only=True) as con:
|
|
58
|
+
df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
|
|
59
|
+
assert(df['outlet_id'].isnull().sum() == 0)
|
|
60
|
+
|
|
61
|
+
# %%
|
|
Binary file
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
|
|
2
|
+
#%%
|
|
3
|
+
from mpcaHydro import equis
|
|
4
|
+
from mpcaHydro import outlets
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
#%%
|
|
9
|
+
model_name = 'Rum'
|
|
10
|
+
equis_stations = outlets.equis_stations(model_name)
|
|
11
|
+
equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
|
|
12
|
+
|
|
13
|
+
df = equis.download(equis_stations)
|
|
14
|
+
|
|
15
|
+
df_normalized = equis.normalize(df.copy())
|
|
16
|
+
expected_columns = ['station_id', 'constituent', 'cas_rn', 'datetime', 'value', 'unit']
|
|
17
|
+
|
|
18
|
+
assert all(col in df_normalized.columns for col in expected_columns)
|
|
19
|
+
# %%
|
mpcahydro-2.2.6/tests/pixi.toml
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
[workspace]
|
|
2
|
-
channels = ["https://prefix.dev/conda-forge"]
|
|
3
|
-
platforms = ["linux-64", "osx-64", "win-64"]
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
[dependencies]
|
|
7
|
-
requests = "*"
|
|
8
|
-
pandas = "*"
|
|
9
|
-
time = "*"
|
|
10
|
-
pathlib = "*"
|
|
11
|
-
spyder = "*"
|
|
12
|
-
jupyter = "*"
|
|
13
|
-
|
|
14
|
-
[package]
|
|
15
|
-
name = "mpcaHydro"
|
|
16
|
-
version = "0.1.0"
|
|
17
|
-
|
|
18
|
-
[package.build]
|
|
19
|
-
backend = { name = "pixi-build-python", version = "0.1.*" }
|
|
20
|
-
|
|
21
|
-
[package.run-dependencies]
|
|
22
|
-
requests = "*"
|
|
23
|
-
pandas = "*"
|
|
24
|
-
time = "*"
|
|
25
|
-
pathlib = "*"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|