mpcaHydro 2.2.3__tar.gz → 2.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/PKG-INFO +1 -1
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/pyproject.toml +1 -1
- mpcahydro-2.2.4/src/mpcaHydro/__init__.py +4 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/outlet.duckdb +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
- mpcahydro-2.2.4/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +48 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/equis.py +49 -27
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/outlets.py +8 -58
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/pywisk.py +14 -7
- mpcahydro-2.2.4/src/mpcaHydro/sql/analytics_tables.sql +20 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/outlets_schema.sql +53 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/schemas.sql +8 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/staging_tables.sql +90 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/views_analytics.sql +117 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/views_outlets.sql +15 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql/views_reports.sql +56 -0
- mpcahydro-2.2.4/src/mpcaHydro/sql_loader.py +56 -0
- mpcahydro-2.2.3/src/mpcaHydro/etlSWD.py → mpcahydro-2.2.4/src/mpcaHydro/swd.py +15 -21
- mpcahydro-2.2.4/src/mpcaHydro/warehouse.py +313 -0
- mpcahydro-2.2.4/src/mpcaHydro/warehouse_functions.py +527 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/wiski.py +44 -3
- mpcahydro-2.2.4/tests/integration/README.md +48 -0
- mpcahydro-2.2.4/tests/integration/conftest.py +17 -0
- mpcahydro-2.2.4/tests/integration/test.json +258 -0
- mpcahydro-2.2.4/tests/integration/test.sqlite +0 -0
- mpcahydro-2.2.4/tests/integration/test_data_manager.py +120 -0
- mpcahydro-2.2.4/tests/integration/test_data_manager_integration.py +220 -0
- mpcahydro-2.2.4/tests/integration/test_equis_integration.py +165 -0
- mpcahydro-2.2.4/tests/integration/test_warehouse.py +216 -0
- mpcahydro-2.2.4/tests/integration/test_wiski.py +33 -0
- mpcahydro-2.2.4/tests/integration/test_wiski_integration.py +165 -0
- mpcahydro-2.2.4/tests/pixi.toml +25 -0
- mpcahydro-2.2.4/tests/test_data_manager_functions.py +194 -0
- mpcahydro-2.2.3/ERROR.FIL +0 -6
- mpcahydro-2.2.3/demo.py +0 -226
- mpcahydro-2.2.3/src/mpcaHydro/__init__.py +0 -0
- mpcahydro-2.2.3/src/mpcaHydro/data_manager.py +0 -293
- mpcahydro-2.2.3/src/mpcaHydro/etlWISKI.py +0 -624
- mpcahydro-2.2.3/src/mpcaHydro/etlWPLMN.py +0 -104
- mpcahydro-2.2.3/src/mpcaHydro/warehouse.py +0 -711
- mpcahydro-2.2.3/src/mpcaHydro/warehouseManager.py +0 -55
- mpcahydro-2.2.3/tests/integration/observations.duckdb +0 -0
- mpcahydro-2.2.3/tests/integration/test_dataManager.py +0 -61
- mpcahydro-2.2.3/tests/integration/test_warehouse.duckdb +0 -0
- mpcahydro-2.2.3/tests/integration/test_warehouse.py +0 -113
- mpcahydro-2.2.3/tests/unit/test_equis.py +0 -19
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/.gitattributes +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/.gitignore +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/README.md +0 -0
- /mpcahydro-2.2.3/src/mpcaHydro/etlCSG.py → /mpcahydro-2.2.4/src/mpcaHydro/csg.py +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/reports.py +0 -0
- {mpcahydro-2.2.3 → mpcahydro-2.2.4}/src/mpcaHydro/xref.py +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
|
2
|
+
MIIIjzCCBvegAwIBAgIQIOC9Vbo5TbuTGYi1z42GyjANBgkqhkiG9w0BAQsFADBg
|
|
3
|
+
MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTcwNQYDVQQD
|
|
4
|
+
Ey5TZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gQ0EgRVYgUjM2
|
|
5
|
+
MB4XDTI1MDkyNTAwMDAwMFoXDTI2MDkyNTIzNTk1OVowgbAxGjAYBgNVBAUTEUdv
|
|
6
|
+
dmVybm1lbnQgRW50aXR5MRMwEQYLKwYBBAGCNzwCAQMTAlVTMRowGAYDVQQPExFH
|
|
7
|
+
b3Zlcm5tZW50IEVudGl0eTELMAkGA1UEBhMCVVMxEjAQBgNVBAgTCU1pbm5lc290
|
|
8
|
+
YTEbMBkGA1UEChMSU3RhdGUgb2YgTWlubmVzb3RhMSMwIQYDVQQDExp3aXNraXdl
|
|
9
|
+
YjAxLnBjYS5zdGF0ZS5tbi51czCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoC
|
|
10
|
+
ggIBAJ8aX33B4Jq37RF0QcjYrsxKwjiFLDcgrLGzsUCm/WJUuvsX3pbU8rfpYte8
|
|
11
|
+
BuI6XyudiPpwqPezSInc3hFCbZzYmdcy/RgjsQ1Uqsfj+0NB3A44CHGh6NHY59X0
|
|
12
|
+
thAroUgnz1gcnHfHiHvJPHwYOAnhbVYel8hBBfIq2wqnk2B1Z99DM9u/Q9/ZGpIg
|
|
13
|
+
2HvbWg+8YjoMVQl3vH1jycE0i3VVTLWr5OfkV1jhl/rdFSq+Ott8Ipmni0J0GBox
|
|
14
|
+
EHNKaTbBwB5EzV6a67uiiA6x5rkNEIM0oMCGWdAvm5hT2gzwVwj+kvSgQHsls4jy
|
|
15
|
+
Gyjxlcy7N3A43dxTs/203jiGk9M55C9JEf9RbmA0muDxn7OlAmEzCyMY20BdlGBI
|
|
16
|
+
fBkPtByIE2MOOIz6sG0PdhO57+7nIvSMftgJfjlhszwtMrpLLQr2gOqk5/XFOqw4
|
|
17
|
+
SSXRQC24mKQ78q1OIohhfeLJdecAkPGcb0uSYQdpOA8RO0hgBGvsFBu/8zn/J3WM
|
|
18
|
+
Pn2wYvE13xz39XQ6G3ySH/MqC7cY9FlzM+RMk6qQyionZ9O7lC6tFufyJNX1E9Ps
|
|
19
|
+
W9E4enrunXHG5k+5glA/43hmhs8CbaeK4xdv1ZuJj7TvLLPAZ3+ojh6tzyV2iK66
|
|
20
|
+
YRzWOW3IaXIZ1/hoWlFHr++WcWJi72k3C85TdUITvxJKoqgrAgMBAAGjggNyMIID
|
|
21
|
+
bjAfBgNVHSMEGDAWgBSYLV4ej+tU9Ln/VZWtTMd+pJiuezAdBgNVHQ4EFgQUaW8x
|
|
22
|
+
VX6iXFQFTWYYY6ZZZyCBl9owDgYDVR0PAQH/BAQDAgWgMAwGA1UdEwEB/wQCMAAw
|
|
23
|
+
HQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMEkGA1UdIARCMEAwNQYMKwYB
|
|
24
|
+
BAGyMQECAQUBMCUwIwYIKwYBBQUHAgEWF2h0dHBzOi8vc2VjdGlnby5jb20vQ1BT
|
|
25
|
+
MAcGBWeBDAEBMFQGA1UdHwRNMEswSaBHoEWGQ2h0dHA6Ly9jcmwuc2VjdGlnby5j
|
|
26
|
+
b20vU2VjdGlnb1B1YmxpY1NlcnZlckF1dGhlbnRpY2F0aW9uQ0FFVlIzNi5jcmww
|
|
27
|
+
gYQGCCsGAQUFBwEBBHgwdjBPBggrBgEFBQcwAoZDaHR0cDovL2NydC5zZWN0aWdv
|
|
28
|
+
LmNvbS9TZWN0aWdvUHVibGljU2VydmVyQXV0aGVudGljYXRpb25DQUVWUjM2LmNy
|
|
29
|
+
dDAjBggrBgEFBQcwAYYXaHR0cDovL29jc3Auc2VjdGlnby5jb20wRQYDVR0RBD4w
|
|
30
|
+
PIIad2lza2l3ZWIwMS5wY2Euc3RhdGUubW4udXOCHnd3dy53aXNraXdlYjAxLnBj
|
|
31
|
+
YS5zdGF0ZS5tbi51czCCAX4GCisGAQQB1nkCBAIEggFuBIIBagFoAHcA2AlVO5RP
|
|
32
|
+
ev/IFhlvlE+Fq7D4/F6HVSYPFdEucrtFSxQAAAGZgmRjOAAABAMASDBGAiEAmmM+
|
|
33
|
+
KMiBitsTZegFbO80sXyZSvBejMJ1zoyOKAtKLmwCIQDfbCgMVJoTyhmqTyTHSd/w
|
|
34
|
+
OmlNc/v9YCiBaYgq3mpvwwB2AK9niDtXsE7dj6bZfvYuqOuBCsdxYPAkXlXWDC/n
|
|
35
|
+
hYc6AAABmYJkY6MAAAQDAEcwRQIgQduMFxbWG6okiji3zTQsobhymG4Hj06i3q95
|
|
36
|
+
/+xIJLUCIQC8DYbjvaUudd6TgiJWoKMYf9pYIirfPJmID129oLkAZQB1AKyrMHBs
|
|
37
|
+
6+yEMfQT0vSRXxEeQiRDsfKmjE88KzunHgLDAAABmYJkYswAAAQDAEYwRAIgCyMR
|
|
38
|
+
cyQpwfFfIpp5qTTjuZVK1IvnGoIWvWTYbQkojzYCIAM+XXKTC7a91IwdC51PidF2
|
|
39
|
+
Pr21+PSOptD7pN8C9pD7MA0GCSqGSIb3DQEBCwUAA4IBgQBqfDG3CPDoY1tE56fp
|
|
40
|
+
gGcqVlXknE5ttqTqDzJtQUagf6OAQ52d8McwsWsV7IIjpMXCNikJFw7crE45ICg8
|
|
41
|
+
FCaqUkpQb9Du+OhNUJNHQiC82gPd4xo3VTK3mM2wrhesXrOhenpzhz0z5TRFXX2J
|
|
42
|
+
8dl2k/sC0fz7QBzw1iCmucG+4f3rvQdr5zfHVqblSC0k6fP6N7cVSCB3d+nMZiHP
|
|
43
|
+
2FhHJnWHzLqPynarKj7aBugCwv+/jAkT8xfyzYhf4X7IH1L5uCHKB+phHj1/AqG2
|
|
44
|
+
n2aVDooyfCYqdAwFiUCtg8iEeXvXBc9Kcov81H94oq1yuZrQOy6lrJ8UpnDO5Zlu
|
|
45
|
+
C4jzg/mSgDioLmSrIQYFEp8R77xbjRBMb0mqko/Hvp5bysb5XTWSgmJrR+3/b/0E
|
|
46
|
+
jkIBv5LdEuxNrd28L1rneDEztIVbTc7bq/V9fQlq6j4YJ+BMh442DMrGnRweicD5
|
|
47
|
+
PFFFejW+IfQT8Z9GwApcgGXNne3SzfAd5PtJqxdXC1KZkbk=
|
|
48
|
+
-----END CERTIFICATE-----
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from datetime import datetime, timezone, timedelta
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from typing import Union
|
|
5
|
+
from typing import Union, Optional
|
|
6
6
|
import oracledb
|
|
7
7
|
import duckdb
|
|
8
8
|
|
|
@@ -33,10 +33,18 @@ def connect(user: str, password: str, host: str = "DELTAT", port: int = 1521, si
|
|
|
33
33
|
sid=sid)
|
|
34
34
|
return CONNECTION
|
|
35
35
|
|
|
36
|
-
def close_connection():
|
|
37
|
-
'''Close the
|
|
36
|
+
def close_connection(connection: Optional[oracledb.Connection] = None):
|
|
37
|
+
'''Close the Oracle database connection.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
connection: Optional connection to close. If not provided, closes global CONNECTION.
|
|
41
|
+
'''
|
|
38
42
|
global CONNECTION
|
|
39
|
-
if
|
|
43
|
+
if connection is not None:
|
|
44
|
+
connection.close()
|
|
45
|
+
if connection is CONNECTION:
|
|
46
|
+
CONNECTION = None
|
|
47
|
+
elif CONNECTION is not None:
|
|
40
48
|
CONNECTION.close()
|
|
41
49
|
CONNECTION = None
|
|
42
50
|
|
|
@@ -80,11 +88,32 @@ def to_dataframe(odb_cursor):
|
|
|
80
88
|
return df
|
|
81
89
|
|
|
82
90
|
#%% Query for station locations with HSPF related constituents
|
|
83
|
-
|
|
84
|
-
def
|
|
91
|
+
|
|
92
|
+
def info(station_ids, connection: Optional[oracledb.Connection] = None):
|
|
93
|
+
'''Get information for given station IDs from Oracle database.'''
|
|
94
|
+
conn = connection if connection is not None else CONNECTION
|
|
95
|
+
if conn is None:
|
|
96
|
+
raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
|
|
97
|
+
|
|
98
|
+
df = normalize(download(station_ids, connection=conn)).drop_duplicates(subset=['station_id','constituent'])
|
|
99
|
+
return df
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def download(station_ids, connection: Optional[oracledb.Connection] = None):
|
|
85
105
|
'''Download data for given station IDs from Oracle database.
|
|
86
106
|
This grabs data from the Data access Layer (DAL) equis result view for
|
|
87
|
-
river/stream locations and HSPF related constituents only.
|
|
107
|
+
river/stream locations and HSPF related constituents only.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
station_ids: List of station IDs to download
|
|
111
|
+
connection: Optional Oracle connection. If not provided, uses global CONNECTION.
|
|
112
|
+
'''
|
|
113
|
+
conn = connection if connection is not None else CONNECTION
|
|
114
|
+
if conn is None:
|
|
115
|
+
raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
|
|
116
|
+
|
|
88
117
|
placeholders, binds = make_placeholders(station_ids)
|
|
89
118
|
query = f"""
|
|
90
119
|
SELECT
|
|
@@ -122,7 +151,7 @@ SELECT
|
|
|
122
151
|
AND mpca_dal.eq_sample.sample_method IN ('G-EVT', 'G', 'FIELDMSROBS', 'LKSURF1M', 'LKSURF2M', 'LKSURFOTH')
|
|
123
152
|
AND mpca_dal.mv_eq_result.sys_loc_code IN ({placeholders})
|
|
124
153
|
"""
|
|
125
|
-
with
|
|
154
|
+
with conn.cursor() as cursor:
|
|
126
155
|
cursor.execute(query,binds)
|
|
127
156
|
return to_dataframe(cursor)
|
|
128
157
|
|
|
@@ -441,8 +470,16 @@ def transform(df):
|
|
|
441
470
|
# GROUP BY station_id, DATE_TRUNC('hour', datetime), constituent, unit
|
|
442
471
|
# """ )
|
|
443
472
|
|
|
444
|
-
def fetch_station_locations():
|
|
445
|
-
'''Fetch station location data for stations with HSPF related constituents.
|
|
473
|
+
def fetch_station_locations(connection: Optional[oracledb.Connection] = None):
|
|
474
|
+
'''Fetch station location data for stations with HSPF related constituents.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
connection: Optional Oracle connection. If not provided, uses global CONNECTION.
|
|
478
|
+
'''
|
|
479
|
+
conn = connection if connection is not None else CONNECTION
|
|
480
|
+
if conn is None:
|
|
481
|
+
raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
|
|
482
|
+
|
|
446
483
|
query ="""SELECT DISTINCT
|
|
447
484
|
m.SYS_LOC_CODE,
|
|
448
485
|
stn.LONGITUDE,
|
|
@@ -467,22 +504,7 @@ def fetch_station_locations():
|
|
|
467
504
|
'TEMP-W',
|
|
468
505
|
'7664-41-7')
|
|
469
506
|
"""
|
|
470
|
-
with
|
|
507
|
+
with conn.cursor() as cursor:
|
|
471
508
|
cursor.execute(query)
|
|
472
509
|
df = to_dataframe(cursor)
|
|
473
|
-
|
|
474
|
-
# dups = set(df.loc[df['SYS_LOC_CODE'].isin(df.loc[df['SYS_LOC_CODE'].duplicated()]['SYS_LOC_CODE']),'SYS_LOC_CODE'].to_list())
|
|
475
|
-
# for dup in dups:
|
|
476
|
-
# #percent difference between lat/long values
|
|
477
|
-
# sub = df.loc[df['SYS_LOC_CODE'] == dup]
|
|
478
|
-
# lat_diff = abs(sub['LATITUDE'].max() - sub['LATITUDE'].min()) / ((sub['LATITUDE'].max() + sub['LATITUDE'].min()) / 2) * 100
|
|
479
|
-
# long_diff = abs(sub['LONGITUDE'].max() - sub['LONGITUDE'].min()) / ((sub['LONGITUDE'].max() + sub['LONGITUDE'].min()) / 2) * 100
|
|
480
|
-
# print(f'Duplicate station {dup} has {lat_diff:.6f}% latitude difference')
|
|
481
|
-
# print(f'Duplicate station {dup} has {long_diff:.6f}% longitude difference')
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
# geometry = gpd.points_from_xy(df['LONGITUDE'], df['LATITUDE'])
|
|
485
|
-
# gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
|
|
486
|
-
# filename = 'EQ_STATION_' + str(date.today()) + '.gpkg'
|
|
487
|
-
# gdf.to_file(save_path.joinpath(filename), driver = 'GPKG')
|
|
488
|
-
# gdf.rename(columns={'SYS_LOC_CODE':'station_id'}, inplace=True)
|
|
510
|
+
return df
|
|
@@ -9,6 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
import geopandas as gpd
|
|
10
10
|
import pandas as pd
|
|
11
11
|
import duckdb
|
|
12
|
+
from mpcaHydro.sql_loader import get_outlets_schema_sql
|
|
12
13
|
#from hspf_tools.calibrator import etlWISKI, etlSWD
|
|
13
14
|
|
|
14
15
|
|
|
@@ -79,10 +80,14 @@ def equis_station_opnids(model_name):
|
|
|
79
80
|
opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
|
|
80
81
|
return split_opnids(opnids)
|
|
81
82
|
|
|
82
|
-
def
|
|
83
|
-
opnids = MODL_DB.dropna(subset=['opnids']).query('
|
|
83
|
+
def mapped_station_opnids(station_id, station_origin):
|
|
84
|
+
opnids = MODL_DB.dropna(subset=['opnids']).query('station_id == @station_id and source == @station_origin')['opnids'].str.split(',').to_list()
|
|
84
85
|
return split_opnids(opnids)
|
|
85
86
|
|
|
87
|
+
def mapped_stations(model_name,station_origin):
|
|
88
|
+
assert(station_origin in ['wiski', 'equis'])
|
|
89
|
+
return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == @station_origin')['station_id'].tolist()
|
|
90
|
+
|
|
86
91
|
def mapped_equis_stations(model_name):
|
|
87
92
|
return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
|
|
88
93
|
|
|
@@ -108,9 +113,8 @@ def init_db(db_path: str,reset: bool = False):
|
|
|
108
113
|
db_path = Path(db_path)
|
|
109
114
|
if reset and db_path.exists():
|
|
110
115
|
db_path.unlink()
|
|
111
|
-
|
|
112
116
|
with connect(db_path.as_posix(),False) as con:
|
|
113
|
-
con.execute(
|
|
117
|
+
con.execute(get_outlets_schema_sql())
|
|
114
118
|
|
|
115
119
|
|
|
116
120
|
|
|
@@ -293,60 +297,6 @@ def add_reach(con,
|
|
|
293
297
|
)
|
|
294
298
|
|
|
295
299
|
|
|
296
|
-
OUTLETS_SCHEMA = """-- schema.sql
|
|
297
|
-
-- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
|
|
298
|
-
-- Compatible with DuckDB and SQLite.
|
|
299
|
-
|
|
300
|
-
-- Table 1: outlets
|
|
301
|
-
-- Represents a logical grouping that ties stations and reaches together.
|
|
302
|
-
CREATE SCHEMA IF NOT EXISTS outlets;
|
|
303
|
-
|
|
304
|
-
CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
|
|
305
|
-
outlet_id INTEGER PRIMARY KEY,
|
|
306
|
-
repository_name TEXT NOT NULL,
|
|
307
|
-
outlet_name TEXT,
|
|
308
|
-
notes TEXT -- optional: general notes about the outlet grouping
|
|
309
|
-
);
|
|
310
|
-
|
|
311
|
-
-- Table 2: outlet_stations
|
|
312
|
-
-- One-to-many: outlet -> stations
|
|
313
|
-
CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
|
|
314
|
-
outlet_id INTEGER NOT NULL,
|
|
315
|
-
station_id TEXT NOT NULL,
|
|
316
|
-
station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
|
|
317
|
-
repository_name TEXT NOT NULL, -- repository model the station is physically located in
|
|
318
|
-
true_opnid INTEGER NOT NULL, -- The specific reach the station physically sits on (optional)
|
|
319
|
-
comments TEXT, -- Per-station comments, issues, etc.
|
|
320
|
-
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
321
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
322
|
-
);
|
|
323
|
-
|
|
324
|
-
-- Table 3: outlet_reaches
|
|
325
|
-
-- One-to-many: outlet -> reaches
|
|
326
|
-
-- A reach can appear in multiple outlets, enabling many-to-many overall.
|
|
327
|
-
CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
|
|
328
|
-
outlet_id INTEGER NOT NULL,
|
|
329
|
-
reach_id INTEGER NOT NULL, -- model reach identifier (aka opind)
|
|
330
|
-
repository_name TEXT NOT NULL, -- optional: where the mapping comes from
|
|
331
|
-
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
332
|
-
);
|
|
333
|
-
|
|
334
|
-
-- Useful views:
|
|
335
|
-
|
|
336
|
-
-- View: station_reach_pairs
|
|
337
|
-
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
338
|
-
CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
|
|
339
|
-
SELECT
|
|
340
|
-
s.outlet_id,
|
|
341
|
-
s.station_id,
|
|
342
|
-
s.station_origin,
|
|
343
|
-
r.reach_id,
|
|
344
|
-
r.repository_name
|
|
345
|
-
FROM outlets.outlet_stations AS s
|
|
346
|
-
JOIN outlets.outlet_reaches AS r
|
|
347
|
-
ON s.outlet_id = r.outlet_id;
|
|
348
|
-
|
|
349
|
-
"""
|
|
350
300
|
|
|
351
301
|
#row = modl_db.MODL_DB.iloc[0]
|
|
352
302
|
|
|
@@ -4,14 +4,16 @@ Created on Mon Jul 10 16:18:03 2023
|
|
|
4
4
|
|
|
5
5
|
@author: mfratki
|
|
6
6
|
"""
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
import requests
|
|
8
9
|
from requests.exceptions import ConnectionError, Timeout, HTTPError, RequestException
|
|
9
10
|
import pandas as pd
|
|
10
11
|
import time
|
|
11
12
|
|
|
13
|
+
CERT_PATH = str(Path(__file__).resolve().parent/'data\\wiskiweb01.pca.state.mn.us.crt')
|
|
12
14
|
#TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
|
|
13
15
|
class Service():
|
|
14
|
-
base_url = '
|
|
16
|
+
base_url = 'https://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
|
|
15
17
|
base_dict = {
|
|
16
18
|
'datasource': '0',
|
|
17
19
|
'service': 'kisters',
|
|
@@ -28,7 +30,7 @@ class Service():
|
|
|
28
30
|
try:
|
|
29
31
|
# Using requests.head() to fetch headers is faster than requests.get()
|
|
30
32
|
# as it doesn't download the full content
|
|
31
|
-
response = requests.head('
|
|
33
|
+
response = requests.head('https://wiskiweb01.pca.state.mn.us', timeout=timeout)
|
|
32
34
|
|
|
33
35
|
# raise_for_status() raises an HTTPError for 4xx or 5xx status codes
|
|
34
36
|
response.raise_for_status()
|
|
@@ -51,7 +53,7 @@ class Service():
|
|
|
51
53
|
|
|
52
54
|
def _requestTypes(self):
|
|
53
55
|
url = self.url({'request': 'getrequestinfo'})
|
|
54
|
-
return requests.get(url).json()[0]
|
|
56
|
+
return requests.get(url,verify=CERT_PATH).json()[0]
|
|
55
57
|
|
|
56
58
|
def getRequests(self):
|
|
57
59
|
return list(self._requestTypes()['Requests'].keys())
|
|
@@ -70,7 +72,7 @@ class Service():
|
|
|
70
72
|
|
|
71
73
|
def info(self,request_type):
|
|
72
74
|
url = self.url({'request': 'getrequestinfo'})
|
|
73
|
-
response = requests.get(url)
|
|
75
|
+
response = requests.get(url, verify=CERT_PATH)
|
|
74
76
|
get_requests = response.json()
|
|
75
77
|
return get_requests[0]['Requests'].keys()
|
|
76
78
|
|
|
@@ -93,7 +95,7 @@ class Service():
|
|
|
93
95
|
|
|
94
96
|
def get_json(self,args_dict):
|
|
95
97
|
# Download request
|
|
96
|
-
self.response = requests.get(self.url(args_dict))
|
|
98
|
+
self.response = requests.get(self.url(args_dict), verify=CERT_PATH)
|
|
97
99
|
if self.response.status_code != 200:
|
|
98
100
|
print('Error: ' + self.response.json()['message'])
|
|
99
101
|
self.response.raise_for_status() # raises exception when not a 2xx response
|
|
@@ -192,7 +194,10 @@ def construct_aggregation(interval, aggregation_type):
|
|
|
192
194
|
return f'aggregate({interval}~{aggregation_type})'
|
|
193
195
|
|
|
194
196
|
def validate_aggregation_type(aggregation_type):
|
|
195
|
-
|
|
197
|
+
if aggregation_type.startswith('perc-'):
|
|
198
|
+
validate_percentile(aggregation_type)
|
|
199
|
+
else:
|
|
200
|
+
assert(aggregation_type in VALID_AGGREGATION_TYPES)
|
|
196
201
|
return True
|
|
197
202
|
|
|
198
203
|
def validate_percentile(aggregation_type):
|
|
@@ -297,7 +302,8 @@ def get_stations(
|
|
|
297
302
|
return df
|
|
298
303
|
|
|
299
304
|
def get_ts_ids(
|
|
300
|
-
station_nos,
|
|
305
|
+
station_nos = None,
|
|
306
|
+
ts_ids = None,
|
|
301
307
|
parametertype_id = None,
|
|
302
308
|
stationparameter_no = None,
|
|
303
309
|
stationgroup_id = None,
|
|
@@ -317,6 +323,7 @@ def get_ts_ids(
|
|
|
317
323
|
|
|
318
324
|
args ={'request': 'getTimeseriesList',
|
|
319
325
|
'station_no': station_nos,
|
|
326
|
+
'ts_id': ts_ids,
|
|
320
327
|
'parametertype_id': parametertype_id,
|
|
321
328
|
'stationparameter_no': stationparameter_no,
|
|
322
329
|
'ts_name' : ts_name,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
-- analytics_tables.sql
|
|
2
|
+
-- Create tables in the analytics schema for processed/transformed data
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS analytics.equis (
|
|
5
|
+
datetime TIMESTAMP,
|
|
6
|
+
value DOUBLE,
|
|
7
|
+
station_id TEXT,
|
|
8
|
+
station_origin TEXT,
|
|
9
|
+
constituent TEXT,
|
|
10
|
+
unit TEXT
|
|
11
|
+
);
|
|
12
|
+
|
|
13
|
+
CREATE TABLE IF NOT EXISTS analytics.wiski (
|
|
14
|
+
datetime TIMESTAMP,
|
|
15
|
+
value DOUBLE,
|
|
16
|
+
station_id TEXT,
|
|
17
|
+
station_origin TEXT,
|
|
18
|
+
constituent TEXT,
|
|
19
|
+
unit TEXT
|
|
20
|
+
);
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
-- outlets_schema.sql
|
|
2
|
+
-- Schema for managing associations between model reaches and observation stations via outlets
|
|
3
|
+
-- Compatible with DuckDB and SQLite
|
|
4
|
+
|
|
5
|
+
CREATE SCHEMA IF NOT EXISTS outlets;
|
|
6
|
+
|
|
7
|
+
-- Table 1: outlet_groups
|
|
8
|
+
-- Represents a logical grouping that ties stations and reaches together
|
|
9
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
|
|
10
|
+
outlet_id INTEGER PRIMARY KEY,
|
|
11
|
+
repository_name TEXT NOT NULL,
|
|
12
|
+
outlet_name TEXT,
|
|
13
|
+
notes TEXT
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
-- Table 2: outlet_stations
|
|
17
|
+
-- One-to-many: outlet -> stations
|
|
18
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
|
|
19
|
+
outlet_id INTEGER NOT NULL,
|
|
20
|
+
station_id TEXT NOT NULL,
|
|
21
|
+
station_origin TEXT NOT NULL,
|
|
22
|
+
repository_name TEXT NOT NULL,
|
|
23
|
+
true_opnid INTEGER NOT NULL,
|
|
24
|
+
comments TEXT,
|
|
25
|
+
CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
|
|
26
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
-- Table 3: outlet_reaches
|
|
30
|
+
-- One-to-many: outlet -> reaches
|
|
31
|
+
-- A reach can appear in multiple outlets, enabling many-to-many overall
|
|
32
|
+
CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
|
|
33
|
+
outlet_id INTEGER NOT NULL,
|
|
34
|
+
reach_id INTEGER NOT NULL,
|
|
35
|
+
repository_name TEXT NOT NULL,
|
|
36
|
+
FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
-- Useful views:
|
|
40
|
+
|
|
41
|
+
-- View: station_reach_pairs
|
|
42
|
+
-- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
|
|
43
|
+
CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
|
|
44
|
+
SELECT
|
|
45
|
+
s.outlet_id,
|
|
46
|
+
s.station_id,
|
|
47
|
+
s.station_origin,
|
|
48
|
+
r.reach_id,
|
|
49
|
+
r.repository_name
|
|
50
|
+
FROM outlets.outlet_stations AS s
|
|
51
|
+
JOIN outlets.outlet_reaches AS r
|
|
52
|
+
ON s.outlet_id = r.outlet_id;
|
|
53
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
-- schemas.sql
|
|
2
|
+
-- Create all database schemas for the data warehouse
|
|
3
|
+
|
|
4
|
+
CREATE SCHEMA IF NOT EXISTS staging;
|
|
5
|
+
CREATE SCHEMA IF NOT EXISTS analytics;
|
|
6
|
+
CREATE SCHEMA IF NOT EXISTS reports;
|
|
7
|
+
CREATE SCHEMA IF NOT EXISTS outlets;
|
|
8
|
+
CREATE SCHEMA IF NOT EXISTS mappings;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
-- staging_tables.sql
|
|
2
|
+
-- Create tables in the staging schema for raw data from external sources
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS staging.equis(
|
|
5
|
+
LATITUDE DOUBLE,
|
|
6
|
+
LONGITUDE DOUBLE,
|
|
7
|
+
WID_LIST VARCHAR,
|
|
8
|
+
SAMPLE_METHOD VARCHAR,
|
|
9
|
+
SAMPLE_REMARK VARCHAR,
|
|
10
|
+
FACILITY_ID BIGINT,
|
|
11
|
+
FACILITY_NAME VARCHAR,
|
|
12
|
+
FACILITY_TYPE VARCHAR,
|
|
13
|
+
SYS_LOC_CODE VARCHAR,
|
|
14
|
+
LOC_NAME VARCHAR,
|
|
15
|
+
LOC_TYPE VARCHAR,
|
|
16
|
+
LOC_TYPE_2 VARCHAR,
|
|
17
|
+
TASK_CODE VARCHAR,
|
|
18
|
+
SAMPLE_ID BIGINT,
|
|
19
|
+
SYS_SAMPLE_CODE VARCHAR,
|
|
20
|
+
TEST_ID BIGINT,
|
|
21
|
+
ANALYTE_TYPE VARCHAR,
|
|
22
|
+
ANALYTE_TYPE_DESC VARCHAR,
|
|
23
|
+
ANALYTIC_METHOD VARCHAR,
|
|
24
|
+
PREFERRED_NAME VARCHAR,
|
|
25
|
+
PARAMETER VARCHAR,
|
|
26
|
+
CAS_RN VARCHAR,
|
|
27
|
+
CHEMICAL_NAME VARCHAR,
|
|
28
|
+
GTLT VARCHAR,
|
|
29
|
+
RESULT_TEXT VARCHAR,
|
|
30
|
+
RESULT_NUMERIC DOUBLE,
|
|
31
|
+
RESULT_UNIT VARCHAR,
|
|
32
|
+
STAT_TYPE INTEGER,
|
|
33
|
+
VALUE_TYPE VARCHAR,
|
|
34
|
+
DETECT_FLAG VARCHAR,
|
|
35
|
+
DETECT_DESC VARCHAR,
|
|
36
|
+
RESULT_REMARK VARCHAR,
|
|
37
|
+
RESULT_TYPE_CODE VARCHAR,
|
|
38
|
+
METHOD_DETECTION_LIMIT VARCHAR,
|
|
39
|
+
REPORTING_DETECTION_LIMIT VARCHAR,
|
|
40
|
+
QUANTITATION_LIMIT INTEGER,
|
|
41
|
+
LAB_QUALIFIERS VARCHAR,
|
|
42
|
+
INTERPRETED_QUALIFIERS VARCHAR,
|
|
43
|
+
REPORTABLE_RESULT VARCHAR,
|
|
44
|
+
APPROVAL_CODE VARCHAR,
|
|
45
|
+
SENSITIVE_NOTPUBLIC VARCHAR,
|
|
46
|
+
TEST_TYPE VARCHAR,
|
|
47
|
+
DILUTION_FACTOR DOUBLE,
|
|
48
|
+
FRACTION VARCHAR,
|
|
49
|
+
BASIS VARCHAR,
|
|
50
|
+
TEMP_BASIS VARCHAR,
|
|
51
|
+
TEST_REMARK VARCHAR,
|
|
52
|
+
ANALYSIS_DATE_TIME TIMESTAMP_NS,
|
|
53
|
+
ANALYSIS_DATE VARCHAR,
|
|
54
|
+
ANALYSIS_TIME VARCHAR,
|
|
55
|
+
ANALYSIS_DATE_TIMEZONE VARCHAR,
|
|
56
|
+
COMPANY_NAME VARCHAR,
|
|
57
|
+
LAB_NAME_CODE VARCHAR,
|
|
58
|
+
LAB_SAMPLE_ID VARCHAR,
|
|
59
|
+
SAMPLE_TYPE_GROUP VARCHAR,
|
|
60
|
+
SAMPLE_TYPE_CODE VARCHAR,
|
|
61
|
+
SAMPLE_TYPE_DESC VARCHAR,
|
|
62
|
+
MEDIUM_CODE VARCHAR,
|
|
63
|
+
MATRIX_CODE VARCHAR,
|
|
64
|
+
START_DEPTH DOUBLE,
|
|
65
|
+
DEPTH_UNIT VARCHAR,
|
|
66
|
+
SAMPLE_DATE_TIME TIMESTAMP_NS,
|
|
67
|
+
SAMPLE_DATE VARCHAR,
|
|
68
|
+
SAMPLE_TIME VARCHAR,
|
|
69
|
+
SAMPLE_DATE_TIMEZONE VARCHAR,
|
|
70
|
+
EBATCH DOUBLE
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
CREATE TABLE IF NOT EXISTS staging.wiski(
|
|
74
|
+
"Timestamp" VARCHAR,
|
|
75
|
+
"Value" DOUBLE,
|
|
76
|
+
"Quality Code" BIGINT,
|
|
77
|
+
"Quality Code Name" VARCHAR,
|
|
78
|
+
ts_unitsymbol VARCHAR,
|
|
79
|
+
ts_name VARCHAR,
|
|
80
|
+
ts_id VARCHAR,
|
|
81
|
+
station_no VARCHAR,
|
|
82
|
+
station_name VARCHAR,
|
|
83
|
+
station_latitude VARCHAR,
|
|
84
|
+
station_longitude VARCHAR,
|
|
85
|
+
parametertype_id VARCHAR,
|
|
86
|
+
parametertype_name VARCHAR,
|
|
87
|
+
stationparameter_no VARCHAR,
|
|
88
|
+
stationparameter_name VARCHAR,
|
|
89
|
+
wplmn_flag BIGINT
|
|
90
|
+
);
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
-- views_analytics.sql
|
|
2
|
+
-- Views for the analytics schema
|
|
3
|
+
|
|
4
|
+
-- View: wiski_normalized
|
|
5
|
+
-- Normalized WISKI data with unit conversions and column renames
|
|
6
|
+
-- CREATE OR REPLACE VIEW analytics.wiski_normalized AS
|
|
7
|
+
-- SELECT
|
|
8
|
+
-- -- Convert °C to °F and keep other values unchanged
|
|
9
|
+
-- CASE
|
|
10
|
+
-- WHEN LOWER(ts_unitsymbol) = '°c' THEN (value * 9.0 / 5.0) + 32
|
|
11
|
+
-- WHEN ts_unitsymbol = 'kg' THEN value * 2.20462
|
|
12
|
+
-- ELSE value
|
|
13
|
+
-- END AS value,
|
|
14
|
+
|
|
15
|
+
-- -- Normalize units
|
|
16
|
+
-- CASE
|
|
17
|
+
-- WHEN LOWER(ts_unitsymbol) = '°c' THEN 'degf'
|
|
18
|
+
-- WHEN ts_unitsymbol = 'kg' THEN 'lb'
|
|
19
|
+
-- WHEN ts_unitsymbol = 'ft³/s' THEN 'cfs'
|
|
20
|
+
-- ELSE ts_unitsymbol
|
|
21
|
+
-- END AS unit,
|
|
22
|
+
|
|
23
|
+
-- -- Normalize column names
|
|
24
|
+
-- station_no AS station_id,
|
|
25
|
+
-- Timestamp AS datetime,
|
|
26
|
+
-- "Quality Code" AS quality_code,
|
|
27
|
+
-- "Quality Code Name" AS quality_code_name,
|
|
28
|
+
-- parametertype_id,
|
|
29
|
+
-- constituent
|
|
30
|
+
-- FROM staging.wiski;
|
|
31
|
+
|
|
32
|
+
-- View: observations
|
|
33
|
+
-- Combined observations from equis and wiski processed tables
|
|
34
|
+
CREATE OR REPLACE VIEW analytics.observations AS
|
|
35
|
+
SELECT datetime, value, station_id, station_origin, constituent, unit
|
|
36
|
+
FROM analytics.equis
|
|
37
|
+
UNION ALL
|
|
38
|
+
SELECT datetime, value, station_id, station_origin, constituent, unit
|
|
39
|
+
FROM analytics.wiski;
|
|
40
|
+
|
|
41
|
+
-- View: outlet_observations
|
|
42
|
+
-- Links observations to model reaches via outlets
|
|
43
|
+
CREATE OR REPLACE VIEW analytics.outlet_observations AS
|
|
44
|
+
SELECT
|
|
45
|
+
o.datetime,
|
|
46
|
+
os.outlet_id,
|
|
47
|
+
o.constituent,
|
|
48
|
+
AVG(o.value) AS value,
|
|
49
|
+
COUNT(o.value) AS count
|
|
50
|
+
FROM
|
|
51
|
+
analytics.observations AS o
|
|
52
|
+
INNER JOIN
|
|
53
|
+
outlets.outlet_stations AS os
|
|
54
|
+
ON o.station_id = os.station_id AND o.station_origin = os.station_origin
|
|
55
|
+
WHERE os.outlet_id IS NOT NULL
|
|
56
|
+
GROUP BY
|
|
57
|
+
os.outlet_id,
|
|
58
|
+
o.constituent,
|
|
59
|
+
o.datetime;
|
|
60
|
+
|
|
61
|
+
-- View: outlet_observations_with_flow
|
|
62
|
+
-- Outlet observations joined with flow and baseflow data
|
|
63
|
+
CREATE OR REPLACE VIEW analytics.outlet_observations_with_flow AS
|
|
64
|
+
WITH
|
|
65
|
+
baseflow_data AS (
|
|
66
|
+
SELECT
|
|
67
|
+
outlet_id,
|
|
68
|
+
datetime,
|
|
69
|
+
"value" AS baseflow_value
|
|
70
|
+
FROM
|
|
71
|
+
analytics.outlet_observations
|
|
72
|
+
WHERE
|
|
73
|
+
constituent = 'QB'
|
|
74
|
+
),
|
|
75
|
+
|
|
76
|
+
flow_data AS (
|
|
77
|
+
SELECT
|
|
78
|
+
outlet_id,
|
|
79
|
+
datetime,
|
|
80
|
+
"value" AS flow_value
|
|
81
|
+
FROM
|
|
82
|
+
analytics.outlet_observations
|
|
83
|
+
WHERE
|
|
84
|
+
constituent = 'Q'
|
|
85
|
+
),
|
|
86
|
+
|
|
87
|
+
constituent_data AS (
|
|
88
|
+
SELECT
|
|
89
|
+
outlet_id,
|
|
90
|
+
datetime,
|
|
91
|
+
constituent,
|
|
92
|
+
"value",
|
|
93
|
+
count
|
|
94
|
+
FROM
|
|
95
|
+
analytics.outlet_observations
|
|
96
|
+
WHERE
|
|
97
|
+
constituent NOT IN ('Q', 'QB')
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
SELECT
|
|
101
|
+
c.outlet_id,
|
|
102
|
+
c.constituent,
|
|
103
|
+
c.datetime,
|
|
104
|
+
c."value",
|
|
105
|
+
c.count,
|
|
106
|
+
f.flow_value,
|
|
107
|
+
b.baseflow_value
|
|
108
|
+
FROM
|
|
109
|
+
constituent_data AS c
|
|
110
|
+
LEFT JOIN
|
|
111
|
+
flow_data AS f
|
|
112
|
+
ON c.outlet_id = f.outlet_id
|
|
113
|
+
AND c.datetime = f.datetime
|
|
114
|
+
LEFT JOIN
|
|
115
|
+
baseflow_data AS b
|
|
116
|
+
ON c.outlet_id = b.outlet_id
|
|
117
|
+
AND c.datetime = b.datetime;
|