mpcaHydro 2.0.6__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mpcaHydro/{wiski.py → WISKI.py} +12 -40
- mpcaHydro/data_manager.py +287 -122
- mpcaHydro/equis.py +22 -31
- mpcaHydro/warehouse.py +11 -389
- {mpcahydro-2.0.6.dist-info → mpcahydro-2.1.0.dist-info}/METADATA +1 -3
- mpcahydro-2.1.0.dist-info/RECORD +15 -0
- mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -71
- mpcaHydro/data/outlets.duckdb +0 -0
- mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcaHydro/outlets.py +0 -371
- mpcaHydro/reports.py +0 -80
- mpcaHydro/warehouseManager.py +0 -47
- mpcaHydro/xref.py +0 -74
- mpcahydro-2.0.6.dist-info/RECORD +0 -23
- {mpcahydro-2.0.6.dist-info → mpcahydro-2.1.0.dist-info}/WHEEL +0 -0
mpcaHydro/warehouse.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import duckdb
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from mpcaHydro import outlets
|
|
5
4
|
|
|
6
5
|
def init_db(db_path: str,reset: bool = False):
|
|
7
6
|
"""
|
|
@@ -15,14 +14,14 @@ def init_db(db_path: str,reset: bool = False):
|
|
|
15
14
|
# Create all schemas
|
|
16
15
|
create_schemas(con)
|
|
17
16
|
|
|
18
|
-
# Create tables
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
17
|
+
# Create tables for observational data
|
|
18
|
+
# Wrapped in try/except as they depend on tables that may not exist yet
|
|
19
|
+
try:
|
|
20
|
+
create_combined_observations_view(con)
|
|
21
|
+
create_constituent_summary_report(con)
|
|
22
|
+
except duckdb.CatalogException as e:
|
|
23
|
+
print(f"Could not create observation views, likely because backing tables don't exist yet. This is safe to ignore on first run. Details: {e}")
|
|
22
24
|
|
|
23
|
-
# Create views
|
|
24
|
-
#update_views(con)
|
|
25
|
-
|
|
26
25
|
|
|
27
26
|
def create_schemas(con: duckdb.DuckDBPyConnection):
|
|
28
27
|
"""
|
|
@@ -31,245 +30,8 @@ def create_schemas(con: duckdb.DuckDBPyConnection):
|
|
|
31
30
|
con.execute("CREATE SCHEMA IF NOT EXISTS staging")
|
|
32
31
|
con.execute("CREATE SCHEMA IF NOT EXISTS analytics")
|
|
33
32
|
con.execute("CREATE SCHEMA IF NOT EXISTS reports")
|
|
34
|
-
con.execute("CREATE SCHEMA IF NOT EXISTS
|
|
35
|
-
con.execute("CREATE SCHEMA IF NOT EXISTS mappings")
|
|
33
|
+
con.execute("CREATE SCHEMA IF NOT EXISTS hspf")
|
|
36
34
|
|
|
37
|
-
def create_analytics_tables(con: duckdb.DuckDBPyConnection):
|
|
38
|
-
"""
|
|
39
|
-
Create necessary tables in the analytics schema.
|
|
40
|
-
"""
|
|
41
|
-
con.execute("""
|
|
42
|
-
CREATE TABLE IF NOT EXISTS analytics.equis (
|
|
43
|
-
datetime TIMESTAMP,
|
|
44
|
-
value DOUBLE,
|
|
45
|
-
station_id TEXT,
|
|
46
|
-
station_origin TEXT,
|
|
47
|
-
constituent TEXT,
|
|
48
|
-
unit TEXT
|
|
49
|
-
);
|
|
50
|
-
""")
|
|
51
|
-
con.execute("""
|
|
52
|
-
CREATE TABLE IF NOT EXISTS analytics.wiski (
|
|
53
|
-
datetime TIMESTAMP,
|
|
54
|
-
value DOUBLE,
|
|
55
|
-
station_id TEXT,
|
|
56
|
-
station_origin TEXT,
|
|
57
|
-
constituent TEXT,
|
|
58
|
-
unit TEXT
|
|
59
|
-
);
|
|
60
|
-
""")
|
|
61
|
-
|
|
62
|
-
def create_mapping_tables(con: duckdb.DuckDBPyConnection):
|
|
63
|
-
"""
|
|
64
|
-
Create and populate tables in the mappings schema from Python dicts and CSVs.
|
|
65
|
-
"""
|
|
66
|
-
# WISKI parametertype_id -> constituent
|
|
67
|
-
wiski_parametertype_map = {
|
|
68
|
-
'11522': 'TP',
|
|
69
|
-
'11531': 'TP',
|
|
70
|
-
'11532': 'TSS',
|
|
71
|
-
'11523': 'TSS',
|
|
72
|
-
'11526': 'N',
|
|
73
|
-
'11519': 'N',
|
|
74
|
-
'11520': 'OP',
|
|
75
|
-
'11528': 'OP',
|
|
76
|
-
'11530': 'TKN',
|
|
77
|
-
'11521': 'TKN',
|
|
78
|
-
'11500': 'Q',
|
|
79
|
-
'11504': 'WT',
|
|
80
|
-
'11533': 'DO',
|
|
81
|
-
'11507': 'WL'
|
|
82
|
-
}
|
|
83
|
-
df_wiski_params = pd.DataFrame(wiski_parametertype_map.items(), columns=['parametertype_id', 'constituent'])
|
|
84
|
-
con.execute("CREATE TABLE IF NOT EXISTS mappings.wiski_parametertype AS SELECT * FROM df_wiski_params")
|
|
85
|
-
|
|
86
|
-
# EQuIS cas_rn -> constituent
|
|
87
|
-
equis_casrn_map = {
|
|
88
|
-
'479-61-8': 'CHLA',
|
|
89
|
-
'CHLA-CORR': 'CHLA',
|
|
90
|
-
'BOD': 'BOD',
|
|
91
|
-
'NO2NO3': 'N',
|
|
92
|
-
'14797-55-8': 'NO3',
|
|
93
|
-
'14797-65-0': 'NO2',
|
|
94
|
-
'14265-44-2': 'OP',
|
|
95
|
-
'N-KJEL': 'TKN',
|
|
96
|
-
'PHOSPHATE-P': 'TP',
|
|
97
|
-
'7723-14-0': 'TP',
|
|
98
|
-
'SOLIDS-TSS': 'TSS',
|
|
99
|
-
'TEMP-W': 'WT',
|
|
100
|
-
'7664-41-7': 'NH3'
|
|
101
|
-
}
|
|
102
|
-
df_equis_cas = pd.DataFrame(equis_casrn_map.items(), columns=['cas_rn', 'constituent'])
|
|
103
|
-
con.execute("CREATE TABLE IF NOT EXISTS mappings.equis_casrn AS SELECT * FROM df_equis_cas")
|
|
104
|
-
|
|
105
|
-
# Load station cross-reference from CSV
|
|
106
|
-
# Assumes this script is run from a location where this relative path is valid
|
|
107
|
-
xref_csv_path = Path(__file__).parent / 'data/WISKI_EQUIS_XREF.csv'
|
|
108
|
-
if xref_csv_path.exists():
|
|
109
|
-
con.execute(f"CREATE TABLE IF NOT EXISTS mappings.station_xref AS SELECT * FROM read_csv_auto('{xref_csv_path.as_posix()}')")
|
|
110
|
-
else:
|
|
111
|
-
print(f"Warning: WISKI_EQUIS_XREF.csv not found at {xref_csv_path}")
|
|
112
|
-
|
|
113
|
-
# Load wiski_quality_codes from CSV
|
|
114
|
-
wiski_qc_csv_path = Path(__file__).parent / 'data/WISKI_QUALITY_CODES.csv'
|
|
115
|
-
if wiski_qc_csv_path.exists():
|
|
116
|
-
con.execute(f"CREATE TABLE IF NOT EXISTS mappings.wiski_quality_codes AS SELECT * FROM read_csv_auto('{wiski_qc_csv_path.as_posix()}')")
|
|
117
|
-
else:
|
|
118
|
-
print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
|
|
119
|
-
|
|
120
|
-
def create_outlets_tables(con: duckdb.DuckDBPyConnection):
|
|
121
|
-
"""
|
|
122
|
-
Create tables in the outlets schema to define outlet-station-reach relationships.
|
|
123
|
-
"""
|
|
124
|
-
con.execute("""-- schema.sql
|
|
125
|
-
-- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
|
|
126
|
-
-- Compatible with DuckDB and SQLite.
|
|
127
|
-
|
|
128
|
-
-- Table 1: outlets
|
|
129
|
-
-- Represents a logical grouping that ties stations and reaches together.
|
|
130
|
-
CREATE TABLE IF NOT EXISTS outlets.outlets (
|
|
131
|
-
outlet_id TEXT PRIMARY KEY,
|
|
132
|
-
repository_name TEXT NOT NULL,
|
|
133
|
-
outlet_name TEXT,
|
|
134
|
-
notes TEXT -- optional: general notes about the outlet grouping
|
|
135
|
-
);
|
|
136
|
-
|
|
137
|
-
-- Table 2: outlet_stations
|
|
138
|
-
-- One-to-many: outlet -> stations
|
|
139
|
-
CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
|
|
140
|
-
outlet_id TEXT NOT NULL,
|
|
141
|
-
station_id TEXT NOT NULL,
|
|
142
|
-
station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
|
|
143
|
-
repository_name TEXT NOT NULL, -- repository model the station is physically located in
|
|
144
|
-
true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
|
|
145
|
-
comments TEXT, -- Per-station comments, issues, etc.
|
|
146
|
-
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
147
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets.outlets(outlet_id)
|
|
148
|
-
);
|
|
149
|
-
|
|
150
|
-
-- Table 3: outlet_reaches
|
|
151
|
-
-- One-to-many: outlet -> reaches
|
|
152
|
-
-- A reach can appear in multiple outlets, enabling many-to-many overall.
|
|
153
|
-
CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
|
|
154
|
-
outlet_id TEXT NOT NULL,
|
|
155
|
-
reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
|
|
156
|
-
repository_name TEXT NOT NULL, -- optional: where the mapping comes from
|
|
157
|
-
exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
|
|
158
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets.outlets(outlet_id)
|
|
159
|
-
);
|
|
160
|
-
|
|
161
|
-
-- Useful views:
|
|
162
|
-
|
|
163
|
-
-- View: station_reach_pairs
|
|
164
|
-
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
165
|
-
CREATE VIEW IF NOT EXISTS outlets.station_reach_pairs AS
|
|
166
|
-
SELECT
|
|
167
|
-
s.outlet_id,
|
|
168
|
-
s.station_id,
|
|
169
|
-
s.station_origin,
|
|
170
|
-
r.reach_id,
|
|
171
|
-
r.exclude,
|
|
172
|
-
r.repository_name,
|
|
173
|
-
FROM outlets.outlet_stations s
|
|
174
|
-
JOIN outlets.outlet_reaches r
|
|
175
|
-
ON s.outlet_id = r.outlet_id;
|
|
176
|
-
|
|
177
|
-
""")
|
|
178
|
-
|
|
179
|
-
def create_normalized_wiski_view(con: duckdb.DuckDBPyConnection):
|
|
180
|
-
"""
|
|
181
|
-
Create a view in the database that contains normalized WISKI data.
|
|
182
|
-
Units converted to standard units.
|
|
183
|
-
columns renamed.
|
|
184
|
-
constituents mapped.
|
|
185
|
-
"""
|
|
186
|
-
con.execute("""
|
|
187
|
-
-- Create a single view with all transformations
|
|
188
|
-
CREATE OR REPLACE VIEW analytics.wiski_normalized AS
|
|
189
|
-
SELECT
|
|
190
|
-
|
|
191
|
-
-- Convert °C to °F and keep other values unchanged
|
|
192
|
-
CASE
|
|
193
|
-
WHEN LOWER(ts_unitsymbol) = '°c' THEN (value * 9.0 / 5.0) + 32
|
|
194
|
-
WHEN ts_unitsymbol = 'kg' THEN value * 2.20462 -- Convert kg to lb
|
|
195
|
-
ELSE value
|
|
196
|
-
END AS value,
|
|
197
|
-
|
|
198
|
-
-- Normalize units
|
|
199
|
-
CASE
|
|
200
|
-
WHEN LOWER(ts_unitsymbol) = '°c' THEN 'degf' -- Normalize °C to degF
|
|
201
|
-
WHEN ts_unitsymbol = 'kg' THEN 'lb' -- Normalize kg to lb
|
|
202
|
-
WHEN ts_unitsymbol = 'ft³/s' THEN 'cfs' -- Rename ft³/s to cfs
|
|
203
|
-
ELSE ts_unitsymbol
|
|
204
|
-
END AS unit,
|
|
205
|
-
|
|
206
|
-
-- Normalize column names
|
|
207
|
-
station_no AS station_id, -- Rename station_no to station_id
|
|
208
|
-
Timestamp AS datetime, -- Rename Timestamp to datetime
|
|
209
|
-
"Quality Code" AS quality_code, -- Rename Quality Code to quality_code
|
|
210
|
-
"Quality Code Name" AS quality_code_name, -- Rename Quality Code Name to quality_code_name
|
|
211
|
-
parametertype_id, -- Keeps parametertype_id as is
|
|
212
|
-
constituent -- Keeps constituent as is
|
|
213
|
-
FROM staging.wiski_raw;""")
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
|
|
217
|
-
"""
|
|
218
|
-
Create a view in the database that filters WISKI data based on specified data codes.
|
|
219
|
-
"""
|
|
220
|
-
query = f"""
|
|
221
|
-
CREATE OR REPLACE VIEW analytics.wiski_filtered AS
|
|
222
|
-
SELECT *
|
|
223
|
-
FROM analytics.wiski_normalized
|
|
224
|
-
WHERE quality_code IN ({placeholders});
|
|
225
|
-
"""
|
|
226
|
-
|
|
227
|
-
placeholders = ', '.join(['?'] * len(data_codes))
|
|
228
|
-
query = query.format(placeholders=placeholders)
|
|
229
|
-
con.execute(query, data_codes)
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def create_aggregated_wiski_view(con: duckdb.DuckDBPyConnection):
|
|
233
|
-
"""
|
|
234
|
-
Create a view in the database that aggregates WISKI data by hour, station, and constituent.
|
|
235
|
-
"""
|
|
236
|
-
con.execute("""
|
|
237
|
-
CREATE OR REPLACE Table analytics.wiski_aggregated AS
|
|
238
|
-
SELECT
|
|
239
|
-
station_id,
|
|
240
|
-
constituent,
|
|
241
|
-
time_bucket(INTERVAL '1 hour', datetime) AS hour_start,
|
|
242
|
-
AVG(value) AS value,
|
|
243
|
-
unit
|
|
244
|
-
FROM analytics.wiski_normalized
|
|
245
|
-
GROUP BY
|
|
246
|
-
station_id,
|
|
247
|
-
constituent,
|
|
248
|
-
hour_start,
|
|
249
|
-
unit;
|
|
250
|
-
""")
|
|
251
|
-
|
|
252
|
-
def create_staging_qc_count_view(con: duckdb.DuckDBPyConnection):
|
|
253
|
-
"""
|
|
254
|
-
Create a view in staging schema that counts quality codes for each station and constituent.
|
|
255
|
-
"""
|
|
256
|
-
con.execute("""
|
|
257
|
-
CREATE OR REPLACE VIEW staging.wiski_qc_count AS (
|
|
258
|
-
SELECT
|
|
259
|
-
w.station_no,
|
|
260
|
-
w.parametertype_name,
|
|
261
|
-
w."Quality Code",
|
|
262
|
-
w."Quality Code Name",
|
|
263
|
-
COUNT(w."Quality Code") AS count
|
|
264
|
-
FROM staging.wiski_raw w
|
|
265
|
-
GROUP BY
|
|
266
|
-
w."Quality Code",w."Quality Code Name",w.parametertype_name, w.station_no
|
|
267
|
-
);
|
|
268
|
-
""")
|
|
269
|
-
# ORDER BY
|
|
270
|
-
# w.station_no,w.parametertype_name, w."Quality Code"
|
|
271
|
-
# )
|
|
272
|
-
# """)
|
|
273
35
|
|
|
274
36
|
def create_combined_observations_view(con: duckdb.DuckDBPyConnection):
|
|
275
37
|
"""
|
|
@@ -285,87 +47,6 @@ def create_combined_observations_view(con: duckdb.DuckDBPyConnection):
|
|
|
285
47
|
""")
|
|
286
48
|
|
|
287
49
|
|
|
288
|
-
def create_outlet_observations_view(con: duckdb.DuckDBPyConnection):
|
|
289
|
-
"""
|
|
290
|
-
Create a view in analytics schema that links observations to model reaches via outlets.
|
|
291
|
-
"""
|
|
292
|
-
con.execute("""
|
|
293
|
-
CREATE OR REPLACE VIEW analytics.outlet_observations AS
|
|
294
|
-
SELECT
|
|
295
|
-
o.datetime,
|
|
296
|
-
os.outlet_id,
|
|
297
|
-
o.constituent,
|
|
298
|
-
AVG(o.value) AS value,
|
|
299
|
-
COUNT(o.value) AS count
|
|
300
|
-
FROM
|
|
301
|
-
analytics.observations AS o
|
|
302
|
-
LEFT JOIN
|
|
303
|
-
outlets.outlet_stations AS os ON o.station_id = os.station_id AND o.station_origin = os.station_origin
|
|
304
|
-
GROUP BY
|
|
305
|
-
os.outlet_id,
|
|
306
|
-
o.constituent,
|
|
307
|
-
o.datetime; -- Group by the truncated date
|
|
308
|
-
""")
|
|
309
|
-
# ORDER BY
|
|
310
|
-
# os.outlet_id,
|
|
311
|
-
# o.constituent,
|
|
312
|
-
# datetime);
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def create_outlet_observations_with_flow_view(con: duckdb.DuckDBPyConnection):
|
|
317
|
-
|
|
318
|
-
con.execute("""
|
|
319
|
-
CREATE OR REPLACE VIEW analytics.outlet_observations_with_flow AS
|
|
320
|
-
WITH baseflow_data AS (
|
|
321
|
-
SELECT
|
|
322
|
-
outlet_id,
|
|
323
|
-
datetime,
|
|
324
|
-
"value" AS baseflow_value
|
|
325
|
-
FROM
|
|
326
|
-
analytics.outlet_observations
|
|
327
|
-
WHERE
|
|
328
|
-
(constituent = 'QB')),
|
|
329
|
-
flow_data AS (
|
|
330
|
-
SELECT
|
|
331
|
-
outlet_id,
|
|
332
|
-
datetime,
|
|
333
|
-
"value" AS flow_value
|
|
334
|
-
FROM
|
|
335
|
-
analytics.outlet_observations
|
|
336
|
-
WHERE
|
|
337
|
-
(constituent = 'Q')),
|
|
338
|
-
constituent_data AS (
|
|
339
|
-
SELECT
|
|
340
|
-
outlet_id,
|
|
341
|
-
datetime,
|
|
342
|
-
constituent,
|
|
343
|
-
"value",
|
|
344
|
-
count
|
|
345
|
-
FROM
|
|
346
|
-
analytics.outlet_observations
|
|
347
|
-
WHERE
|
|
348
|
-
(constituent NOT IN ('Q', 'QB')))
|
|
349
|
-
SELECT
|
|
350
|
-
constituent_data.outlet_id,
|
|
351
|
-
constituent_data.constituent,
|
|
352
|
-
constituent_data.datetime,
|
|
353
|
-
constituent_data."value",
|
|
354
|
-
flow_data.flow_value,
|
|
355
|
-
baseflow_data.baseflow_value
|
|
356
|
-
FROM
|
|
357
|
-
constituent_data
|
|
358
|
-
FULL JOIN flow_data ON
|
|
359
|
-
(((constituent_data.outlet_id = flow_data.outlet_id)
|
|
360
|
-
AND (constituent_data.datetime = flow_data.datetime)))
|
|
361
|
-
LEFT JOIN baseflow_data ON
|
|
362
|
-
(((constituent_data.outlet_id = baseflow_data.outlet_id)
|
|
363
|
-
AND (constituent_data.datetime = baseflow_data.datetime)));""")
|
|
364
|
-
# ORDER BY
|
|
365
|
-
# constituent_data.outlet_id,
|
|
366
|
-
# constituent_data.datetime;
|
|
367
|
-
#
|
|
368
|
-
|
|
369
50
|
def create_constituent_summary_report(con: duckdb.DuckDBPyConnection):
|
|
370
51
|
"""
|
|
371
52
|
Create a constituent summary report in the reports schema that groups observations by constituent and station.
|
|
@@ -385,54 +66,11 @@ def create_constituent_summary_report(con: duckdb.DuckDBPyConnection):
|
|
|
385
66
|
FROM
|
|
386
67
|
analytics.observations
|
|
387
68
|
GROUP BY
|
|
388
|
-
constituent,station_id,station_origin
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
# ORDER BY
|
|
392
|
-
# constituent,sample_count;''')
|
|
393
|
-
|
|
394
|
-
def create_outlet_summary_report(con: duckdb.DuckDBPyConnection):
|
|
395
|
-
con.execute("""
|
|
396
|
-
CREATE VIEW reports.outlet_constituent_summary AS
|
|
397
|
-
SELECT
|
|
398
|
-
outlet_id,
|
|
399
|
-
constituent,
|
|
400
|
-
count_star() AS sample_count,
|
|
401
|
-
avg("value") AS average_value,
|
|
402
|
-
min("value") AS min_value,
|
|
403
|
-
max("value") AS max_value,
|
|
404
|
-
"year"(min(datetime)) AS start_date,
|
|
405
|
-
"year"(max(datetime)) AS end_date
|
|
406
|
-
FROM
|
|
407
|
-
analytics.outlet_observations
|
|
408
|
-
GROUP BY
|
|
409
|
-
constituent,
|
|
410
|
-
outlet_id
|
|
411
|
-
""")
|
|
69
|
+
constituent,station_id,station_origin
|
|
70
|
+
ORDER BY
|
|
71
|
+
constituent,sample_count;''')
|
|
412
72
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
def drop_station_id(con: duckdb.DuckDBPyConnection, station_id: str,station_origin: str):
|
|
416
|
-
"""
|
|
417
|
-
Drop all data for a specific station from staging and analytics schemas.
|
|
418
|
-
"""
|
|
419
|
-
con.execute(f"DELETE FROM staging.equis_raw WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
|
|
420
|
-
con.execute(f"DELETE FROM staging.wiski_raw WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
|
|
421
|
-
con.execute(f"DELETE FROM analytics.equis WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
|
|
422
|
-
con.execute(f"DELETE FROM analytics.wiski WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
|
|
423
|
-
update_views(con)
|
|
424
73
|
|
|
425
|
-
def update_views(con: duckdb.DuckDBPyConnection):
|
|
426
|
-
"""
|
|
427
|
-
Update all views in the database.
|
|
428
|
-
"""
|
|
429
|
-
create_staging_qc_count_view(con)
|
|
430
|
-
create_combined_observations_view(con)
|
|
431
|
-
create_constituent_summary_report(con)
|
|
432
|
-
create_outlet_observations_view(con)
|
|
433
|
-
create_outlet_observations_with_flow_view(con)
|
|
434
|
-
create_outlet_summary_report(con)
|
|
435
|
-
|
|
436
74
|
def connect(db_path: str, read_only: bool = False) -> duckdb.DuckDBPyConnection:
|
|
437
75
|
"""
|
|
438
76
|
Returns a DuckDB connection to the given database path.
|
|
@@ -468,22 +106,6 @@ def load_df_to_staging(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_n
|
|
|
468
106
|
con.execute(f"CREATE TABLE staging.{table_name} AS SELECT * FROM tmp_df")
|
|
469
107
|
con.unregister("tmp_df")
|
|
470
108
|
|
|
471
|
-
def add_df_to_staging(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str):
|
|
472
|
-
"""
|
|
473
|
-
Append a pandas DataFrame into a staging table. This will create the staging
|
|
474
|
-
table if it does not exist.
|
|
475
|
-
"""
|
|
476
|
-
# register pandas DF and create table if not exists
|
|
477
|
-
con.register("tmp_df", df)
|
|
478
|
-
con.execute(f"""
|
|
479
|
-
CREATE TABLE IF NOT EXISTS staging.{table_name} AS
|
|
480
|
-
SELECT * FROM tmp_df
|
|
481
|
-
""")
|
|
482
|
-
con.execute(f"""
|
|
483
|
-
INSERT INTO staging.{table_name}
|
|
484
|
-
SELECT * FROM tmp_df
|
|
485
|
-
""")
|
|
486
|
-
con.unregister("tmp_df")
|
|
487
109
|
|
|
488
110
|
def load_csv_to_staging(con: duckdb.DuckDBPyConnection, csv_path: str, table_name: str, replace: bool = True, **read_csv_kwargs):
|
|
489
111
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mpcaHydro
|
|
3
|
-
Version: 2.0
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: Python package for downloading MPCA hydrology data
|
|
5
5
|
Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
|
|
6
6
|
Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
|
|
@@ -10,9 +10,7 @@ Keywords: Hydrology,MPCA
|
|
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
|
11
11
|
Classifier: Programming Language :: Python
|
|
12
12
|
Requires-Python: >=3.8
|
|
13
|
-
Requires-Dist: baseflow
|
|
14
13
|
Requires-Dist: duckdb
|
|
15
|
-
Requires-Dist: oracledb
|
|
16
14
|
Requires-Dist: pandas
|
|
17
15
|
Requires-Dist: pathlib
|
|
18
16
|
Requires-Dist: requests
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
mpcaHydro/WISKI.py,sha256=VWPwmDGv6rKFHgiQrcn4fAyx-h43Tyf8Vjtewa49yj4,11022
|
|
2
|
+
mpcaHydro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
mpcaHydro/data_manager.py,sha256=LofBoNfhfStQlzD8EK1in_15BtNv_8q8057cjK4TmbA,15611
|
|
4
|
+
mpcaHydro/equis.py,sha256=R4BEAkj9I6hVhSmd4WjjMLyQXBcOL5B2YIZjwm4EtqE,17943
|
|
5
|
+
mpcaHydro/etlCSG.py,sha256=5QT6V2dHvNKC9r5-dspt-NpOmECP2LFw1Lyq1zdkqps,2630
|
|
6
|
+
mpcaHydro/etlSWD.py,sha256=FvFP5lIOxtzF3eEgUDGw-C2BJTRfxXxkbt3hYl8PCZQ,6367
|
|
7
|
+
mpcaHydro/etlWISKI.py,sha256=S1dNUe08Y0riJNBaEJDfgmewR8CwPtIaB_3Vw8JujkM,22201
|
|
8
|
+
mpcaHydro/etlWPLMN.py,sha256=avLJvWRRxsG35w--OVq0qSCrFjO6G2x0aQ31d9kcYHg,4179
|
|
9
|
+
mpcaHydro/pywisk.py,sha256=kaxJCPCZHy9oEo9VnoSmFC58qm1sX9fVbtp6nXs7934,13290
|
|
10
|
+
mpcaHydro/warehouse.py,sha256=Rn8onCs9R-EnU9XNA1Gy53B-PAIhW_YaCLBDO29zyMY,7786
|
|
11
|
+
mpcaHydro/data/EQUIS_PARAMETER_XREF.csv,sha256=XZPrcZan9irSqFd4UasnPo_NQBcjyFodi0q3FGQphjI,5667
|
|
12
|
+
mpcaHydro/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
|
|
13
|
+
mpcahydro-2.1.0.dist-info/METADATA,sha256=LyNxLeMkwdtq1ox-2ygVqPjqUPk5rFCseONZfziJEwg,543
|
|
14
|
+
mpcahydro-2.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
15
|
+
mpcahydro-2.1.0.dist-info/RECORD,,
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
quality_code,Text,Description,Active
|
|
2
|
-
0,Unchecked,Unchecked data in progress or data that is not quality coded as part of the workup. Default coding for shifts so the quality codes from Level and Ratings are used for discharges. ,1
|
|
3
|
-
3,Instantaneous,Instantaneous groundwater measurements or sampled date for load stations.,1
|
|
4
|
-
5,Excellent,Discharge measurements that are excellent.,1
|
|
5
|
-
8,Reliable Interpolation,The value of the data point is an interpolation between adjacent points. Code used for filling gaps less than 4 hours or with no change in data trend likely based on reference information.,1
|
|
6
|
-
10,Good,Time series data that tracks well and requires no corrections or corrections of very small magnitude or timeseries data that has been reviewed and accepted for precipitation and groundwater level. Also used for discharge measurements and rating points. ,1
|
|
7
|
-
15,Fair,Time series data that tracks fairly well and requires some corrections of relatively small magnitude. Also used for discharge measurements and rating points. ,1
|
|
8
|
-
20,Poor,Time series data that tracks poorly and requires significant or many corrections. Also used for discharge measurements and rating points. ,1
|
|
9
|
-
27,Questionable,"Timeseries data or discharge measurements that are questionable due to operator error, equipment error, etc). Extra scrutiny should be used for these data. ",1
|
|
10
|
-
28,Unknown data quality,"Unknown quality of time series data, ratings or discharge measurements.",1
|
|
11
|
-
29,Modeled,"Time-series data, rating point or discharge from a reliable mathematical and\or computer model. ",1
|
|
12
|
-
34,Estimated,"Time-series data estimated from reference traces, models or extrapolation of the rating curve using supporting data and up to two times the maximum measured discharge.",1
|
|
13
|
-
35,Unreliable,Time-series data computed with a rating extrapolated without supporting data or beyond two times the maximum measured discharge without a model.,1
|
|
14
|
-
36,Threshold Exceedance,"Time-series data may be beyond the measuring limits of the monitoring equipment, or outside the bounds of historical extremes.",1
|
|
15
|
-
40,Default import code,WISKI default coding for gaugings. ,1
|
|
16
|
-
45,Approved Ext Data,"External data that has been graded externally as ""Approved"".",1
|
|
17
|
-
48,Unknown Ext Data,External data that has been graded internally as “Unknown”.,1
|
|
18
|
-
49,Estimated Ext Data,External data that has been graded externally as “Estimated.” Typically this is finalized ice data.,1
|
|
19
|
-
50,Provisional Ext Data,External data that has been graded internally or externally as “Provisional”.,1
|
|
20
|
-
80,Ice - Estimated,Ice affected time series data. Discharge computed with ice affected stage data is considered estimated.,1
|
|
21
|
-
199,199-Logger Unknown,Initial code for data coming to the system from the logger.,1
|
|
22
|
-
200,200,Initial code for data coming to the system from telemetry or default coding for WISKI timeseries. ,1
|
|
23
|
-
228,Info Parameter,This parameter is collected for informational purposes only. Data has been through a cursory check only. This is stored in the database and available upon request.,1
|
|
24
|
-
255,---,System assigned code for gaps in the data set. Records with null values. ,1
|
|
25
|
-
1,Continuous Data,~Discontinued~ Good TS data that requires no correction.,0
|
|
26
|
-
2,Edited Data,~Discontinued~ TS data that has been edited. Typically used when spikes are removed or when points are edited manual for datum corrections.,0
|
|
27
|
-
3,Instantaneous Data,Final WQ data.,0
|
|
28
|
-
4,Questionable data,~Discontinued~,0
|
|
29
|
-
5,Excellent measurment,Used to indicated discharge measurements that are excellent as well as excellent sections of the rating.,0
|
|
30
|
-
10,Good measurement,Used to indicated discharge measurements and sections of the rating that are good and time series data that tracks well and requires no corrections or corrections of very small magnitude.,0
|
|
31
|
-
12,Modeled measurement,~Discontinued~ Rating point or discharge was obtained from a relizble mathematical and/or computer model. After 3/1/11 use QC148.,0
|
|
32
|
-
15,Fair measurement,Used to indicated discharge measurements and sections of the rating that are fair and time series data that tracks fairly well and requires some corrections of relatively small magnitude.,0
|
|
33
|
-
20,Poor measurement,Used to indicated discharge measurements and sections of the rating that are poor and time series data that tracks poorly and requires significant or many corrections.,0
|
|
34
|
-
25,Unknown measurement,Measurement data not available.,0
|
|
35
|
-
27,Questionable data,"Flow measurement is very poor and should be given extra scrutiny or time series data that is questionable due to operator error, equipment error, etc.",0
|
|
36
|
-
30,Good Archived Daily Value,This code is used for archived daily value data that is considered “Good”.,0
|
|
37
|
-
31,Fair Archived Daily Value,This code is used for archived daily value data that is considered “Fair”.,0
|
|
38
|
-
32,Poor Archived Daily Value,This code is used for archived daily value data that is considered “Poor”.,0
|
|
39
|
-
33,Unknown Archived Daily Value,This code is used for archived daily value data that has unknown quality based on lack of documentation.,0
|
|
40
|
-
34,Estimated Archived Daily Value,This code is used for archived daily value data that has been estimated.,0
|
|
41
|
-
35,Unreliable Archived Daily Value,This code is used for archived daily value data that is unreliable based on the quality of the supporting time series data and/or rating.,0
|
|
42
|
-
45,Good External Data,This code is used for external data that has been graded internally as “Good”.,0
|
|
43
|
-
46,Fair External Data,This code is used for external data that has been graded internally as “Fair”.,0
|
|
44
|
-
47,Poor External Data,This code is used for external data that has been graded internally as “Poor”.,0
|
|
45
|
-
48,Unknown External Data,This code is used for external data that has been graded internally as “Unknown”,0
|
|
46
|
-
49,Estimated External Data,This code is used for external data that has been graded externally as “Estimated.” Typically this is finalized ice data.,0
|
|
47
|
-
50,Provisional External Data,This code is used for external data that has been graded internally as “Provisional”,0
|
|
48
|
-
51,Telemetry data - DCP,This code is used for time-series data when imported into hydstra using an automated telemetry method that accesses a DCP through the GOES network. The “questionable measurement” flag is set through the shef code that accompanies the DCP data.,0
|
|
49
|
-
60,Above rating,~Discontinued~,0
|
|
50
|
-
70,Estimated Data,Value of the data point is estimated.,0
|
|
51
|
-
76,Reliable interpolation,Value of the data point is an interpolation between adjacent points. ,0
|
|
52
|
-
80,Ice,"(DISCONTINUED) Used to indicate ice conditions when the data should not be exported. Use in conjunction with 80 to code 232.00 values, run USDAY to compute daily flow, then recode 232.00 80 values to 180 so unit value export cannot occur.",0
|
|
53
|
-
82,Linear interpolation across a gap in records,~Discontinued~ Points that were added to fill a gap in the data record. The points fall on a straight line between the end points of the gap. This code was changed to 8 in WISKI.,0
|
|
54
|
-
103,Provisional Instantaneous Data,Provisional WQ data.,0
|
|
55
|
-
130,Good Provisional Daily Value,This code is used for archived daily value data that is considered “Good” but Provisional because there is only one year of gaging measurements.,0
|
|
56
|
-
131,Fair Provisional Daily Value,This code is used for archived daily value data that is considered “Fair” but Provisional because there is only one year of gaging measurements.,0
|
|
57
|
-
132,Poor Provisional Daily Value,This code is used for archived daily value data that is considered “Poor” but Provisional because there is only one year of gaging measurements.,0
|
|
58
|
-
133,Unknown Provisional Archived Daily Value,This code is used for archived daily value data that has unknown quality based on lack of documentation but Provisional because there is only one year of gaging measurements.,0
|
|
59
|
-
134,Estimated Provisional Archived Daily Value,This code is used for archived daily value data that has been estimated but Provisional because there is only one year of gaging measurements.,0
|
|
60
|
-
135,Unreliable Provisional Archived Daily Value,This code is used for archived daily value data that is unreliable based on the quality of the supporting time series data and/or rating but Provisional because there is only one year of gaging measurements.,0
|
|
61
|
-
140,Data not yet checked,This code is used for time-series data when it is initially imported into hydstra using manual import methods. ,0
|
|
62
|
-
141,Telemetry data - not yet checked,This code is used for time-series data when it is imported into hydstra using an automated telemetry method.,0
|
|
63
|
-
148,Modeled measurement,Rating point or discharge was obtained from a reliable mathematical and/or computer model.,0
|
|
64
|
-
149,Extrapolated rating point,Rating point accurately extrapolated using supporting data and is less than two times the maxiumum measured discharge.,0
|
|
65
|
-
150,Over-extrapolated rating point,Rating point extrapolated without supporting data or beyone two times the maximum measured discharge without a mathematical model.,0
|
|
66
|
-
151,Data Missing,"This code is used to flag the end of a period of missing time-series data, before the next good data value.",0
|
|
67
|
-
160,Above rating,~Discontinued~,0
|
|
68
|
-
169,Datalogger Hardware Error Code 6999,"This code is used to indicate that a time-series point had a value of 6999 or -6999, a typical hardware error code, and the value was changed.",0
|
|
69
|
-
170,Estimated Data,"Used to indicate estimated data when the data should not be exported. Often used in conjunction with 70 to code 232.00 values, run USDAY to compute daily flow, then recode 232.00 70 values to 170 so unit value export can not occur.",0
|
|
70
|
-
180,Ice,Used to indicate ice conditions.,0
|
|
71
|
-
255,Data Missing,This code is used when data is exported and does not exist for a given time period.,0
|
mpcaHydro/data/outlets.duckdb
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|