mpcaHydro 2.0.6__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mpcaHydro/{wiski.py → WISKI.py} +12 -40
- mpcaHydro/data_manager.py +287 -122
- mpcaHydro/equis.py +22 -31
- mpcaHydro/warehouse.py +11 -389
- {mpcahydro-2.0.6.dist-info → mpcahydro-2.1.0.dist-info}/METADATA +1 -3
- mpcahydro-2.1.0.dist-info/RECORD +15 -0
- mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -71
- mpcaHydro/data/outlets.duckdb +0 -0
- mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcaHydro/outlets.py +0 -371
- mpcaHydro/reports.py +0 -80
- mpcaHydro/warehouseManager.py +0 -47
- mpcaHydro/xref.py +0 -74
- mpcahydro-2.0.6.dist-info/RECORD +0 -23
- {mpcahydro-2.0.6.dist-info → mpcahydro-2.1.0.dist-info}/WHEEL +0 -0
mpcaHydro/outlets.py
DELETED
|
@@ -1,371 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
Created on Thu May 1 09:51:51 2025
|
|
4
|
-
|
|
5
|
-
@author: mfratki
|
|
6
|
-
"""
|
|
7
|
-
#import sqlite3
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
import geopandas as gpd
|
|
10
|
-
import pandas as pd
|
|
11
|
-
import duckdb
|
|
12
|
-
#from hspf_tools.calibrator import etlWISKI, etlSWD
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
#stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
19
|
-
stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
|
|
20
|
-
stations_wiski['source'] = 'wiski'
|
|
21
|
-
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
22
|
-
stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
|
|
23
|
-
stations_equis['source'] = 'equis'
|
|
24
|
-
stations_equis['wplmn_flag'] = 0
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
|
|
28
|
-
|
|
29
|
-
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
30
|
-
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
31
|
-
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
32
|
-
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
33
|
-
|
|
34
|
-
def _reload():
|
|
35
|
-
global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
|
|
36
|
-
_stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
|
|
37
|
-
stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
|
|
38
|
-
stations_wiski['source'] = 'wiski'
|
|
39
|
-
_stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
|
|
40
|
-
stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
|
|
41
|
-
stations_equis['source'] = 'equis'
|
|
42
|
-
stations_equis['wplmn_flag'] = 0
|
|
43
|
-
|
|
44
|
-
MODL_DB = pd.concat([stations_wiski,stations_equis])
|
|
45
|
-
MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
|
|
46
|
-
MODL_DB = MODL_DB.dropna(subset='opnids')
|
|
47
|
-
MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def get_model_db(model_name: str):
|
|
51
|
-
return MODL_DB.query('repository_name == @model_name')
|
|
52
|
-
|
|
53
|
-
def split_opnids(opnids: list):
|
|
54
|
-
return [abs(int(float(j))) for i in opnids for j in i]
|
|
55
|
-
|
|
56
|
-
def valid_models():
|
|
57
|
-
return MODL_DB['repository_name'].unique().tolist()
|
|
58
|
-
|
|
59
|
-
def wplmn_station_opnids(model_name):
|
|
60
|
-
opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
61
|
-
return split_opnids(opnids)
|
|
62
|
-
|
|
63
|
-
def wiski_station_opnids(model_name):
|
|
64
|
-
opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
|
|
65
|
-
return split_opnids(opnids)
|
|
66
|
-
|
|
67
|
-
def equis_station_opnids(model_name):
|
|
68
|
-
opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
|
|
69
|
-
return split_opnids(opnids)
|
|
70
|
-
|
|
71
|
-
def station_opnids(model_name):
|
|
72
|
-
opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
|
|
73
|
-
return split_opnids(opnids)
|
|
74
|
-
|
|
75
|
-
def equis_stations(model_name):
|
|
76
|
-
return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
|
|
77
|
-
|
|
78
|
-
def wiski_stations(model_name):
|
|
79
|
-
return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
|
|
80
|
-
|
|
81
|
-
def wplmn_stations(model_name):
|
|
82
|
-
return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
|
|
83
|
-
|
|
84
|
-
def outlets(model_name):
|
|
85
|
-
return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
86
|
-
|
|
87
|
-
def outlet_stations(model_name):
|
|
88
|
-
return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
|
|
89
|
-
|
|
90
|
-
def _split_opnids(opnids: list):
|
|
91
|
-
return [int(float(j)) for i in opnids for j in i]
|
|
92
|
-
|
|
93
|
-
def connect(db_path, read_only=True):
|
|
94
|
-
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
-
return duckdb.connect(db_path,read_only=read_only)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def init_db(db_path: str,reset: bool = False):
|
|
99
|
-
"""
|
|
100
|
-
Initialize the DuckDB database: create staging and analytics schemas
|
|
101
|
-
"""
|
|
102
|
-
db_path = Path(db_path)
|
|
103
|
-
if reset and db_path.exists():
|
|
104
|
-
db_path.unlink()
|
|
105
|
-
|
|
106
|
-
with connect(db_path.as_posix()) as con:
|
|
107
|
-
con.execute(OUTLETS_SCHEMA)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# Accessors:
|
|
112
|
-
def get_outlets_by_model(model_name: str):
|
|
113
|
-
with connect(DB_PATH) as con:
|
|
114
|
-
df = con.execute(
|
|
115
|
-
"""
|
|
116
|
-
SELECT r.*
|
|
117
|
-
FROM outlets.station_reach_pairs r
|
|
118
|
-
WHERE r.repository_name = ?
|
|
119
|
-
""",
|
|
120
|
-
[model_name]
|
|
121
|
-
).fetchdf()
|
|
122
|
-
return df
|
|
123
|
-
|
|
124
|
-
def get_outlets_by_reach(reach_id: int, model_name: str):
|
|
125
|
-
"""
|
|
126
|
-
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
127
|
-
"""
|
|
128
|
-
with connect(DB_PATH) as con:
|
|
129
|
-
df = con.execute(
|
|
130
|
-
"""
|
|
131
|
-
SELECT r.*
|
|
132
|
-
FROM outlets.station_reach_pairs r
|
|
133
|
-
WHERE r.reach_id = ? AND r.repository_name = ?
|
|
134
|
-
""",
|
|
135
|
-
[reach_id, model_name]).fetchdf()
|
|
136
|
-
return df
|
|
137
|
-
|
|
138
|
-
def get_outlets_by_station(station_id: str, station_origin: str):
|
|
139
|
-
"""
|
|
140
|
-
Return all outlet rows for outlets that include the given reach_id in the given model_name.
|
|
141
|
-
"""
|
|
142
|
-
with connect(DB_PATH) as con:
|
|
143
|
-
|
|
144
|
-
df = con.execute(
|
|
145
|
-
"""
|
|
146
|
-
SELECT r.*
|
|
147
|
-
FROM outlets.station_reach_pairs r
|
|
148
|
-
WHERE r.station_id = ? AND r.station_origin = ?
|
|
149
|
-
""",
|
|
150
|
-
[station_id, station_origin]).fetchdf()
|
|
151
|
-
return df
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
class OutletGateway:
|
|
156
|
-
def __init__(self, model_name: str):
|
|
157
|
-
self.model_name = model_name
|
|
158
|
-
self.db_path = DB_PATH
|
|
159
|
-
self.modl_db = get_model_db(model_name)
|
|
160
|
-
|
|
161
|
-
# Legacy methods to access functions
|
|
162
|
-
def wplmn_station_opnids(self):
|
|
163
|
-
return wplmn_station_opnids(self.model_name)
|
|
164
|
-
|
|
165
|
-
def wiski_station_opnids(self):
|
|
166
|
-
return wiski_station_opnids(self.model_name)
|
|
167
|
-
|
|
168
|
-
def equis_station_opnids(self):
|
|
169
|
-
return equis_station_opnids(self.model_name)
|
|
170
|
-
|
|
171
|
-
def station_opnids(self):
|
|
172
|
-
return station_opnids(self.model_name)
|
|
173
|
-
|
|
174
|
-
def equis_stations(self):
|
|
175
|
-
return equis_stations(self.model_name)
|
|
176
|
-
|
|
177
|
-
def wiski_stations(self):
|
|
178
|
-
return wiski_stations(self.model_name)
|
|
179
|
-
|
|
180
|
-
def wplmn_stations(self):
|
|
181
|
-
return wplmn_stations(self.model_name)
|
|
182
|
-
|
|
183
|
-
def outlets(self):
|
|
184
|
-
return outlets(self.model_name)
|
|
185
|
-
|
|
186
|
-
def outlet_stations(self):
|
|
187
|
-
return outlet_stations(self.model_name)
|
|
188
|
-
|
|
189
|
-
# Accessors for outlets
|
|
190
|
-
def get_outlets(self):
|
|
191
|
-
return get_outlets_by_model(self.model_name)
|
|
192
|
-
|
|
193
|
-
def get_outlets_by_reach(self, reach_id: int):
|
|
194
|
-
return get_outlets_by_reach(reach_id, self.model_name)
|
|
195
|
-
|
|
196
|
-
def get_outlets_by_station(self, station_id: str, station_origin: str):
|
|
197
|
-
assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
|
|
198
|
-
return get_outlets_by_station(station_id, station_origin)
|
|
199
|
-
|
|
200
|
-
# constructors:
|
|
201
|
-
def build_outlet_db(db_path: str = None):
|
|
202
|
-
if db_path is None:
|
|
203
|
-
db_path = DB_PATH
|
|
204
|
-
init_db(db_path,reset=True)
|
|
205
|
-
with connect(db_path) as con:
|
|
206
|
-
build_outlets(con)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
def build_outlets(con, model_name: str = None):
|
|
210
|
-
if model_name is not None:
|
|
211
|
-
modl_db = get_model_db(model_name)
|
|
212
|
-
else:
|
|
213
|
-
modl_db = MODL_DB
|
|
214
|
-
|
|
215
|
-
for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
|
|
216
|
-
repo_name = group['repository_name'].iloc[0]
|
|
217
|
-
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
218
|
-
|
|
219
|
-
opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
|
|
220
|
-
|
|
221
|
-
for opnid in opnids:
|
|
222
|
-
if opnid < 0:
|
|
223
|
-
exclude = 1
|
|
224
|
-
else:
|
|
225
|
-
exclude = 0
|
|
226
|
-
add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
|
|
227
|
-
|
|
228
|
-
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
229
|
-
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def create_outlet_schema(con, model_name : str):
|
|
233
|
-
for index, (_, group) in enumerate(modl_db.outlets(model_name)):
|
|
234
|
-
repo_name = group['repository_name'].iloc[0]
|
|
235
|
-
add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
|
|
236
|
-
|
|
237
|
-
opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
|
|
238
|
-
|
|
239
|
-
for opnid in opnids:
|
|
240
|
-
if opnid < 0:
|
|
241
|
-
exclude = 1
|
|
242
|
-
else:
|
|
243
|
-
exclude = 0
|
|
244
|
-
add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
|
|
245
|
-
|
|
246
|
-
for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
|
|
247
|
-
add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def add_outlet(con,
|
|
251
|
-
outlet_id: str,
|
|
252
|
-
repository_name: str,
|
|
253
|
-
outlet_name = None,
|
|
254
|
-
notes = None):
|
|
255
|
-
"""
|
|
256
|
-
Insert an outlet. repository_name is required.
|
|
257
|
-
"""
|
|
258
|
-
con.execute(
|
|
259
|
-
"INSERT INTO outlets.outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
|
|
260
|
-
[outlet_id, repository_name, outlet_name, notes]
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
def add_station(con,
|
|
264
|
-
outlet_id: str,
|
|
265
|
-
station_id: str,
|
|
266
|
-
station_origin: str,
|
|
267
|
-
true_opnid: str,
|
|
268
|
-
repository_name: str,
|
|
269
|
-
comments = None):
|
|
270
|
-
"""
|
|
271
|
-
Insert a station membership for an outlet.
|
|
272
|
-
Constraints:
|
|
273
|
-
- PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
|
|
274
|
-
- true_opnid and true_opnid_repository_name are required per schema.
|
|
275
|
-
"""
|
|
276
|
-
con.execute(
|
|
277
|
-
"""INSERT INTO outlets.outlet_stations
|
|
278
|
-
(outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
|
|
279
|
-
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
280
|
-
[outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
def add_reach(con,
|
|
284
|
-
outlet_id: str,
|
|
285
|
-
reach_id: str,
|
|
286
|
-
repository_name: str,
|
|
287
|
-
exclude: int = 0):
|
|
288
|
-
"""
|
|
289
|
-
Insert a reach membership for an outlet.
|
|
290
|
-
- repository_name is required and participates in the PK (reach_id, repository_name).
|
|
291
|
-
- exclude = 1 to mark a reach as excluded from association views.
|
|
292
|
-
"""
|
|
293
|
-
con.execute(
|
|
294
|
-
"""INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name, exclude)
|
|
295
|
-
VALUES (?, ?, ?, ?)""",
|
|
296
|
-
[outlet_id, reach_id, repository_name, int(exclude)]
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
OUTLETS_SCHEMA = """-- schema.sql
|
|
301
|
-
-- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
|
|
302
|
-
-- Compatible with DuckDB and SQLite.
|
|
303
|
-
|
|
304
|
-
-- Table 1: outlets
|
|
305
|
-
-- Represents a logical grouping that ties stations and reaches together.
|
|
306
|
-
CREATE TABLE IF NOT EXISTS outlets (
|
|
307
|
-
outlet_id TEXT PRIMARY KEY,
|
|
308
|
-
repository_name TEXT NOT NULL,
|
|
309
|
-
outlet_name TEXT,
|
|
310
|
-
notes TEXT -- optional: general notes about the outlet grouping
|
|
311
|
-
);
|
|
312
|
-
|
|
313
|
-
-- Table 2: outlet_stations
|
|
314
|
-
-- One-to-many: outlet -> stations
|
|
315
|
-
CREATE TABLE IF NOT EXISTS outlet_stations (
|
|
316
|
-
outlet_id TEXT NOT NULL,
|
|
317
|
-
station_id TEXT NOT NULL,
|
|
318
|
-
station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
|
|
319
|
-
repository_name TEXT NOT NULL, -- repository model the station is physically located in
|
|
320
|
-
true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
|
|
321
|
-
comments TEXT, -- Per-station comments, issues, etc.
|
|
322
|
-
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
323
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
|
|
324
|
-
);
|
|
325
|
-
|
|
326
|
-
-- Table 3: outlet_reaches
|
|
327
|
-
-- One-to-many: outlet -> reaches
|
|
328
|
-
-- A reach can appear in multiple outlets, enabling many-to-many overall.
|
|
329
|
-
CREATE TABLE IF NOT EXISTS outlet_reaches (
|
|
330
|
-
outlet_id TEXT NOT NULL,
|
|
331
|
-
reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
|
|
332
|
-
repository_name TEXT NOT NULL, -- optional: where the mapping comes from
|
|
333
|
-
exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
|
|
334
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
|
|
335
|
-
);
|
|
336
|
-
|
|
337
|
-
-- Useful views:
|
|
338
|
-
|
|
339
|
-
-- View: station_reach_pairs
|
|
340
|
-
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
341
|
-
CREATE VIEW IF NOT EXISTS station_reach_pairs AS
|
|
342
|
-
SELECT
|
|
343
|
-
s.outlet_id,
|
|
344
|
-
s.station_id,
|
|
345
|
-
s.station_origin,
|
|
346
|
-
r.reach_id,
|
|
347
|
-
r.exclude,
|
|
348
|
-
r.repository_name,
|
|
349
|
-
FROM outlet_stations s
|
|
350
|
-
JOIN outlet_reaches r
|
|
351
|
-
ON s.outlet_id = r.outlet_id;
|
|
352
|
-
|
|
353
|
-
"""
|
|
354
|
-
|
|
355
|
-
#row = modl_db.MODL_DB.iloc[0]
|
|
356
|
-
|
|
357
|
-
#info = etlWISKI.info(row['station_id'])
|
|
358
|
-
|
|
359
|
-
#modl_db.MODL_DB.query('source == "equis"')
|
|
360
|
-
|
|
361
|
-
# outlet_dict = {'stations': {'wiski': ['E66050001'],
|
|
362
|
-
# 'equis': ['S002-118']},
|
|
363
|
-
# 'reaches': {'Clearwater': [650]}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
# station_ids = ['S002-118']
|
|
369
|
-
# #station_ids = ['E66050001']
|
|
370
|
-
# reach_ids = [650]
|
|
371
|
-
# flow_station_ids = ['E66050001']
|
mpcaHydro/reports.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
import duckdb
|
|
3
|
-
import glob
|
|
4
|
-
|
|
5
|
-
#TODO ensure all reports are actually in the reports schema
|
|
6
|
-
|
|
7
|
-
class reportManager():
|
|
8
|
-
def __init__(self,db_path:Path):
|
|
9
|
-
self.db_path = db_path
|
|
10
|
-
|
|
11
|
-
def wiski_qc_counts(self):
|
|
12
|
-
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
13
|
-
return wiski_qc_counts(con)
|
|
14
|
-
|
|
15
|
-
def constituent_summary(self,constituent: str = None):
|
|
16
|
-
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
17
|
-
return constituent_summary(con,constituent)
|
|
18
|
-
|
|
19
|
-
def station_reach_pairs(self):
|
|
20
|
-
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
21
|
-
return station_reach_pairs(con)
|
|
22
|
-
|
|
23
|
-
def outlet_summary(self):
|
|
24
|
-
with duckdb.connect(self.db_path,read_only=True) as con:
|
|
25
|
-
return outlet_summary(con)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def outlet_summary(con: duckdb.DuckDBPyConnection):
|
|
30
|
-
query = '''
|
|
31
|
-
SELECT *,
|
|
32
|
-
FROM
|
|
33
|
-
reports.outlet_constituent_summary
|
|
34
|
-
ORDER BY
|
|
35
|
-
outlet_id,
|
|
36
|
-
constituent
|
|
37
|
-
'''
|
|
38
|
-
df = con.execute(query).fetch_df()
|
|
39
|
-
return df
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
|
|
43
|
-
query = '''
|
|
44
|
-
SELECT *,
|
|
45
|
-
FROM
|
|
46
|
-
staging.wiski_qc_count
|
|
47
|
-
ORDER BY
|
|
48
|
-
station_no,
|
|
49
|
-
parametertype_name
|
|
50
|
-
'''
|
|
51
|
-
df = con.execute(query).fetch_df()
|
|
52
|
-
return df
|
|
53
|
-
|
|
54
|
-
def constituent_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
|
|
55
|
-
|
|
56
|
-
query = '''
|
|
57
|
-
SELECT *,
|
|
58
|
-
FROM
|
|
59
|
-
reports.constituent_summary
|
|
60
|
-
ORDER BY
|
|
61
|
-
station_id,
|
|
62
|
-
station_origin,
|
|
63
|
-
constituent
|
|
64
|
-
'''
|
|
65
|
-
df = con.execute(query).fetch_df()
|
|
66
|
-
if constituent is not None:
|
|
67
|
-
df = df[df['constituent'] == constituent]
|
|
68
|
-
return df
|
|
69
|
-
|
|
70
|
-
def station_reach_pairs(con: duckdb.DuckDBPyConnection):
|
|
71
|
-
query = '''
|
|
72
|
-
SELECT *,
|
|
73
|
-
FROM
|
|
74
|
-
reports.station_reach_pairs
|
|
75
|
-
ORDER BY
|
|
76
|
-
outlet_id,
|
|
77
|
-
station_id
|
|
78
|
-
'''
|
|
79
|
-
df = con.execute(query).fetch_df()
|
|
80
|
-
return df
|
mpcaHydro/warehouseManager.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import pandas as pd
|
|
3
|
-
#from abc import abstractmethod
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from mpcaHydro import equis, wiski, warehouse
|
|
6
|
-
import duckdb
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
#%%
|
|
13
|
-
'''
|
|
14
|
-
This modules contains classes and functions to manage data downloads and storage into a local data warehouse.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
'''
|
|
18
|
-
|
|
19
|
-
def get_db_path(warehouse_path:Path,db_name:str = 'observations')->Path:
|
|
20
|
-
'''
|
|
21
|
-
Constructs the full path to the database file within the warehouse directory.
|
|
22
|
-
|
|
23
|
-
Parameters:
|
|
24
|
-
warehouse_path (Path): The path to the warehouse directory.
|
|
25
|
-
db_name (str): The name of the database file.
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
Path: The full path to the database file.
|
|
29
|
-
'''
|
|
30
|
-
return Path(warehouse_path) / db_name
|
|
31
|
-
|
|
32
|
-
def construct_database(db_path:Path,db_name:str = 'observations')->Path:
|
|
33
|
-
'''
|
|
34
|
-
Constructs the full path to the database file within the warehouse directory.
|
|
35
|
-
|
|
36
|
-
Parameters:
|
|
37
|
-
warehouse_path (Path): The path to the warehouse directory.
|
|
38
|
-
db_name (str): The name of the database file.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
Path: The full path to the database file.
|
|
42
|
-
'''
|
|
43
|
-
db_path = Path(db_path) / db_name
|
|
44
|
-
warehouse.init_db(warehouse_path=db_path)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
mpcaHydro/xref.py
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
WISKI_EQUIS_XREF = pd.read_csv(Path(__file__).parent/'data/WISKI_EQUIS_XREF.csv')
|
|
5
|
-
#WISKI_EQUIS_XREF = pd.read_csv('C:/Users/mfratki/Documents/GitHub/hspf_tools/WISKI_EQUIS_XREF.csv')
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def are_lists_identical(nested_list):
|
|
9
|
-
# Sort each sublist
|
|
10
|
-
sorted_sublists = [sorted(sublist) for sublist in nested_list]
|
|
11
|
-
# Compare all sublists to the first one
|
|
12
|
-
return all(sublist == sorted_sublists[0] for sublist in sorted_sublists)
|
|
13
|
-
|
|
14
|
-
def get_wiski_stations():
|
|
15
|
-
return list(WISKI_EQUIS_XREF['WISKI_STATION_NO'].unique())
|
|
16
|
-
|
|
17
|
-
def get_equis_stations():
|
|
18
|
-
return list(WISKI_EQUIS_XREF['EQUIS_STATION_ID'].unique())
|
|
19
|
-
|
|
20
|
-
def wiski_equis_alias(wiski_station_id):
|
|
21
|
-
equis_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'WISKI_EQUIS_ID'].to_list()))
|
|
22
|
-
equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
|
|
23
|
-
if len(equis_ids) == 0:
|
|
24
|
-
return []
|
|
25
|
-
elif len(equis_ids) > 1:
|
|
26
|
-
print(f'Too Many Equis Stations for {wiski_station_id}')
|
|
27
|
-
raise
|
|
28
|
-
else:
|
|
29
|
-
return equis_ids[0]
|
|
30
|
-
|
|
31
|
-
def wiski_equis_associations(wiski_station_id):
|
|
32
|
-
equis_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_STATION_NO'] == wiski_station_id,'EQUIS_STATION_ID'].unique())
|
|
33
|
-
equis_ids = [equis_id for equis_id in equis_ids if not pd.isna(equis_id)]
|
|
34
|
-
if len(equis_ids) == 0:
|
|
35
|
-
return []
|
|
36
|
-
else:
|
|
37
|
-
return equis_ids
|
|
38
|
-
|
|
39
|
-
def equis_wiski_associations(equis_station_id):
|
|
40
|
-
wiski_ids = list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['EQUIS_STATION_ID'] == equis_station_id,'WISKI_STATION_NO'].unique())
|
|
41
|
-
wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
|
|
42
|
-
if len(wiski_ids) == 0:
|
|
43
|
-
return []
|
|
44
|
-
else:
|
|
45
|
-
return wiski_ids
|
|
46
|
-
|
|
47
|
-
def equis_wiski_alias(equis_station_id):
|
|
48
|
-
wiski_ids = list(set(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WISKI_EQUIS_ID'] == equis_station_id,'WISKI_STATION_NO'].to_list()))
|
|
49
|
-
wiski_ids = [wiski_id for wiski_id in wiski_ids if not pd.isna(wiski_id)]
|
|
50
|
-
if len(wiski_ids) == 0:
|
|
51
|
-
return []
|
|
52
|
-
elif len(wiski_ids) > 1:
|
|
53
|
-
print(f'Too Many WISKI Stations for {equis_station_id}')
|
|
54
|
-
raise ValueError(f'Too Many WISKI Stations for {equis_station_id}')
|
|
55
|
-
else:
|
|
56
|
-
return wiski_ids[0]
|
|
57
|
-
|
|
58
|
-
def _equis_wiski_associations(equis_station_ids):
|
|
59
|
-
wiski_stations = [equis_wiski_associations(equis_station_id) for equis_station_id in equis_station_ids]
|
|
60
|
-
if are_lists_identical(wiski_stations):
|
|
61
|
-
return wiski_stations[0]
|
|
62
|
-
else:
|
|
63
|
-
return []
|
|
64
|
-
|
|
65
|
-
def _stations_by_wid(wid_no,station_origin):
|
|
66
|
-
if station_origin in ['wiski','wplmn']:
|
|
67
|
-
station_col = 'WISKI_STATION_NO'
|
|
68
|
-
elif station_origin in ['equis','swd']:
|
|
69
|
-
station_col = 'EQUIS_STATION_ID'
|
|
70
|
-
else:
|
|
71
|
-
raise
|
|
72
|
-
|
|
73
|
-
return list(WISKI_EQUIS_XREF.loc[WISKI_EQUIS_XREF['WID'] == wid_no,station_col].unique())
|
|
74
|
-
|
mpcahydro-2.0.6.dist-info/RECORD
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mpcaHydro/data_manager.py,sha256=nTRDWuR3fyJU2v9HDLKiY7TT5Sp4QVn7uHSdGRac_uQ,9280
|
|
3
|
-
mpcaHydro/equis.py,sha256=G4pCjfdDggLTrL0lQeGKAqmPsyO4p-sb1DS4KycUInc,18012
|
|
4
|
-
mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
|
|
5
|
-
mpcaHydro/etlSWD.py,sha256=FvFP5lIOxtzF3eEgUDGw-C2BJTRfxXxkbt3hYl8PCZQ,6367
|
|
6
|
-
mpcaHydro/etlWISKI.py,sha256=S1dNUe08Y0riJNBaEJDfgmewR8CwPtIaB_3Vw8JujkM,22201
|
|
7
|
-
mpcaHydro/etlWPLMN.py,sha256=avLJvWRRxsG35w--OVq0qSCrFjO6G2x0aQ31d9kcYHg,4179
|
|
8
|
-
mpcaHydro/outlets.py,sha256=VuV4m12RzD4_BAznzj9hRWr06gt_IOsWyx3i1wFMIac,13924
|
|
9
|
-
mpcaHydro/pywisk.py,sha256=kaxJCPCZHy9oEo9VnoSmFC58qm1sX9fVbtp6nXs7934,13290
|
|
10
|
-
mpcaHydro/reports.py,sha256=luM7Q5wAJheSImlhaWen9IqUFMWZX9U7DI2rsTEtzWY,2047
|
|
11
|
-
mpcaHydro/warehouse.py,sha256=_L-MQ0DTsgrob4lUY6Kzb-C4oD6y3laoIbZ_q5zFhr4,21731
|
|
12
|
-
mpcaHydro/warehouseManager.py,sha256=Ades6CfPyrpwGUaALpzAvQ_1rPKVZbuSmTPyBA-lCqA,1169
|
|
13
|
-
mpcaHydro/wiski.py,sha256=c7M3m8Qd8ddA8LrylmEimilWjaEpZl1kJkFlc63pWi8,11749
|
|
14
|
-
mpcaHydro/xref.py,sha256=TxMVtUhHKVJ3wZ-445KJaKSoTU3B0VlmSVgT9hX4lLk,3085
|
|
15
|
-
mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
|
|
16
|
-
mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
|
|
17
|
-
mpcaHydro/data/WISKI_QUALITY_CODES.csv,sha256=PvKBMAUj6pmbhaMUUkAOk0CnxM6RN5oIaXY7MJ_x4T8,9812
|
|
18
|
-
mpcaHydro/data/outlets.duckdb,sha256=QIyUJu0K60QeFVfFoWxU8ramsppzYl0VIS_mJ7WzQJ0,2109440
|
|
19
|
-
mpcaHydro/data/stations_EQUIS.gpkg,sha256=SLjjhWWau5Wx17PXogX_kj4cCwIaGgsJwaHqxuNFguo,2031616
|
|
20
|
-
mpcaHydro/data/stations_wiski.gpkg,sha256=No-iVHnngs-SejjrPxFNV-kh55FcmvpIx_QwdJAGnKI,909312
|
|
21
|
-
mpcahydro-2.0.6.dist-info/METADATA,sha256=vbCtovxCAQkwGJ9n-bU2HVUmZsWTPIk38k1XPcUtpDI,591
|
|
22
|
-
mpcahydro-2.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
23
|
-
mpcahydro-2.0.6.dist-info/RECORD,,
|
|
File without changes
|