mpcaHydro 2.2.8__tar.gz → 2.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/PKG-INFO +1 -1
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/pyproject.toml +1 -1
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/outlet.duckdb +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/equis.py +8 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/outlets.py +56 -23
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse.py +2 -3
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse_functions.py +3 -2
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/wiski.py +9 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/.gitattributes +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/.gitignore +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/README.md +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/demo.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/__init__.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/csg.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/pywisk.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/reports.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/schemas.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/staging_tables.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_analytics.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_outlets.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_reports.sql +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql_loader.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/swd.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/xref.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/README.md +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/conftest.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_dataManager.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_data_manager.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_data_manager_integration.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_equis_integration.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_warehouse.duckdb +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_warehouse.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_wiski.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_wiski_integration.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/test_data_manager_functions.py +0 -0
- {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/unit/test_equis.py +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -265,6 +265,13 @@ def replace_nondetects(df):
|
|
|
265
265
|
df.loc[df['value'].isna(), 'value'] = 0
|
|
266
266
|
return df
|
|
267
267
|
|
|
268
|
+
def filter_years(df, start_year=1996, end_year=None):
|
|
269
|
+
'''Filter Equis data to include only samples within a certain year range.'''
|
|
270
|
+
df = df[df['datetime'].dt.year >= start_year]
|
|
271
|
+
if end_year is not None:
|
|
272
|
+
df = df[df['datetime'].dt.year <= end_year]
|
|
273
|
+
return df
|
|
274
|
+
|
|
268
275
|
def normalize(df):
|
|
269
276
|
'''Normalize Equis data: select relevant columns.'''
|
|
270
277
|
df = map_constituents(df)
|
|
@@ -278,6 +285,7 @@ def transform(df):
|
|
|
278
285
|
|
|
279
286
|
df = normalize(df)
|
|
280
287
|
df = replace_nondetects(df)
|
|
288
|
+
df = filter_years(df)
|
|
281
289
|
if not df.empty:
|
|
282
290
|
df = average_results(df)
|
|
283
291
|
return df
|
|
@@ -31,7 +31,15 @@ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
|
|
|
31
31
|
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
32
32
|
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
33
33
|
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
34
|
+
MODL_DB = MODL_DB.dropna(subset = 'repo_name')
|
|
34
35
|
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
36
|
+
# Add outlet_id column to MODL_DB based on enumerate grouping
|
|
37
|
+
outlet_id_map = {}
|
|
38
|
+
for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
|
|
39
|
+
for idx in group.index:
|
|
40
|
+
outlet_id_map[idx] = int(outlet_id)
|
|
41
|
+
MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
|
|
42
|
+
|
|
35
43
|
|
|
36
44
|
def _reload():
|
|
37
45
|
global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
|
|
@@ -47,7 +55,14 @@ def _reload():
|
|
|
47
55
|
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
48
56
|
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
49
57
|
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
58
|
+
MODL_DB = MODL_DB.dropna(subset = 'repo_name')
|
|
50
59
|
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
60
|
+
# Add outlet_id column to MODL_DB based on enumerate grouping
|
|
61
|
+
outlet_id_map = {}
|
|
62
|
+
for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
|
|
63
|
+
for idx in group.index:
|
|
64
|
+
outlet_id_map[idx] = int(outlet_id)
|
|
65
|
+
MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
|
|
51
66
|
|
|
52
67
|
|
|
53
68
|
def split_opnids(opnids: list):
|
|
@@ -174,6 +189,34 @@ def get_station_opnids(station_id: str, station_origin: str):
|
|
|
174
189
|
[station_id, station_origin]).fetchdf()
|
|
175
190
|
return df['reach_id'].tolist()
|
|
176
191
|
|
|
192
|
+
def get_outlet_opnids(outlet_id: int):
|
|
193
|
+
"""
|
|
194
|
+
Return all model reach IDs (opnids) associated with the given outlet ID.
|
|
195
|
+
"""
|
|
196
|
+
with connect(DB_PATH) as con:
|
|
197
|
+
df = con.execute(
|
|
198
|
+
"""
|
|
199
|
+
SELECT r.reach_id
|
|
200
|
+
FROM outlets.station_reach_pairs r
|
|
201
|
+
WHERE r.outlet_id = ?
|
|
202
|
+
""",
|
|
203
|
+
[outlet_id]).fetchdf()
|
|
204
|
+
return list(set(df['reach_id'].tolist()))
|
|
205
|
+
|
|
206
|
+
def get_outlet_stations(outlet_id: int):
|
|
207
|
+
"""
|
|
208
|
+
Return all station IDs and origins associated with the given outlet ID.
|
|
209
|
+
"""
|
|
210
|
+
with connect(DB_PATH) as con:
|
|
211
|
+
df = con.execute(
|
|
212
|
+
"""
|
|
213
|
+
SELECT r.station_id, r.station_origin
|
|
214
|
+
FROM outlets.station_reach_pairs r
|
|
215
|
+
WHERE r.outlet_id = ?
|
|
216
|
+
""",
|
|
217
|
+
[outlet_id]).fetchdf()
|
|
218
|
+
return df[['station_id', 'station_origin']].drop_duplicates().to_dict(orient='records')
|
|
219
|
+
|
|
177
220
|
|
|
178
221
|
class OutletGateway:
|
|
179
222
|
def __init__(self, model_name: str):
|
|
@@ -192,7 +235,7 @@ class OutletGateway:
|
|
|
192
235
|
return equis_station_opnids(self.model_name)
|
|
193
236
|
|
|
194
237
|
def station_opnids(self):
|
|
195
|
-
return
|
|
238
|
+
return mapped_station_opnids(self.model_name)
|
|
196
239
|
|
|
197
240
|
def equis_stations(self):
|
|
198
241
|
return equis_stations(self.model_name)
|
|
@@ -220,6 +263,12 @@ class OutletGateway:
|
|
|
220
263
|
assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
|
|
221
264
|
return get_outlets_by_station(station_id, station_origin)
|
|
222
265
|
|
|
266
|
+
def get_outlet_opnids(self, outlet_id: int):
|
|
267
|
+
return get_outlet_opnids(outlet_id)
|
|
268
|
+
|
|
269
|
+
def get_outlet_stations(self, outlet_id: int):
|
|
270
|
+
return get_outlet_stations(outlet_id)
|
|
271
|
+
|
|
223
272
|
# constructors:
|
|
224
273
|
def build_outlet_db(db_path: str = None):
|
|
225
274
|
if db_path is None:
|
|
@@ -235,31 +284,15 @@ def build_outlets(con, model_name: str = None):
|
|
|
235
284
|
else:
|
|
236
285
|
modl_db = MODL_DB
|
|
237
286
|
|
|
238
|
-
for
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
287
|
+
for outlet_id in modl_db['outlet_id'].unique():
|
|
288
|
+
group = modl_db.query('outlet_id == @outlet_id')
|
|
289
|
+
repo_name = group['repo_name'].iloc[0]
|
|
290
|
+
add_outlet(con, outlet_id = int(outlet_id), outlet_name = None, repository_name = repo_name, notes = None)
|
|
242
291
|
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
243
|
-
|
|
244
292
|
for opnid in opnids:
|
|
245
|
-
add_reach(con, outlet_id =
|
|
246
|
-
|
|
247
|
-
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
248
|
-
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def create_outlet_schema(con, model_name : str):
|
|
252
|
-
for index, (_, group) in enumerate(outlets(model_name)):
|
|
253
|
-
repo_name = group['repo_name'].iloc[0]
|
|
254
|
-
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
255
|
-
|
|
256
|
-
opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
|
|
257
|
-
|
|
258
|
-
for opnid in opnids:
|
|
259
|
-
add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
|
|
260
|
-
|
|
293
|
+
add_reach(con, outlet_id = int(outlet_id), reach_id = int(opnid), repository_name = repo_name)
|
|
261
294
|
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
262
|
-
add_station(con, outlet_id =
|
|
295
|
+
add_station(con, outlet_id = int(outlet_id), station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
263
296
|
|
|
264
297
|
|
|
265
298
|
def add_outlet(con,
|
|
@@ -151,12 +151,11 @@ def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
|
|
|
151
151
|
con.execute("DETACH 'outlets_db'")
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def create_outlets_tables(con: duckdb.DuckDBPyConnection):
|
|
154
|
+
def create_outlets_tables(con: duckdb.DuckDBPyConnection, model_name: str = None):
|
|
155
155
|
"""Create tables in the outlets schema to define outlet-station-reach relationships."""
|
|
156
156
|
con.execute(sql_loader.get_outlets_schema_sql())
|
|
157
157
|
con.execute(sql_loader.get_views_outlets_sql())
|
|
158
|
-
outlets.build_outlets(con)
|
|
159
|
-
|
|
158
|
+
outlets.build_outlets(con, model_name=model_name)
|
|
160
159
|
|
|
161
160
|
def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
|
|
162
161
|
"""Create a view filtering WISKI data based on specified data codes."""
|
|
@@ -101,7 +101,8 @@ def download_wiski_data(
|
|
|
101
101
|
if overwrite:
|
|
102
102
|
warehouse.drop_station_data(con, station_ids, 'wiski')
|
|
103
103
|
warehouse.add_df_to_table(con, df, 'staging', 'wiski')
|
|
104
|
-
|
|
104
|
+
if not df_transformed.empty:
|
|
105
|
+
warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
|
|
105
106
|
warehouse.update_views(con)
|
|
106
107
|
else:
|
|
107
108
|
print('No data necessary for HSPF calibration from wiski for:', station_ids)
|
|
@@ -351,7 +352,7 @@ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
|
|
|
351
352
|
query = '''
|
|
352
353
|
SELECT *,
|
|
353
354
|
FROM
|
|
354
|
-
|
|
355
|
+
outlets.station_reach_pairs
|
|
355
356
|
ORDER BY
|
|
356
357
|
outlet_id,
|
|
357
358
|
station_id
|
|
@@ -336,6 +336,14 @@ def filter_quality_codes(df, data_codes):
|
|
|
336
336
|
'''
|
|
337
337
|
return df.loc[df['quality_code'].isin(data_codes)]
|
|
338
338
|
|
|
339
|
+
def filter_years(df, start_year=1996, end_year=None):
|
|
340
|
+
'''Filter Equis data to include only samples within a certain year range.'''
|
|
341
|
+
df = df[df['datetime'].dt.year >= start_year]
|
|
342
|
+
if end_year is not None:
|
|
343
|
+
df = df[df['datetime'].dt.year <= end_year]
|
|
344
|
+
return df
|
|
345
|
+
|
|
346
|
+
|
|
339
347
|
def average_results(df):
|
|
340
348
|
#df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
|
|
341
349
|
df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
|
|
@@ -392,6 +400,7 @@ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = '
|
|
|
392
400
|
data_codes = DATA_CODES
|
|
393
401
|
df = filter_quality_codes(df, data_codes)
|
|
394
402
|
df = average_results(df)
|
|
403
|
+
df = filter_years(df, start_year=1996)
|
|
395
404
|
df = calculate_baseflow(df, method = baseflow_method)
|
|
396
405
|
df['station_origin'] = 'wiski'
|
|
397
406
|
#df.set_index('datetime',inplace=True)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|