mpcaHydro 2.2.2__tar.gz → 2.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/PKG-INFO +1 -1
  2. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/pyproject.toml +1 -1
  3. mpcahydro-2.2.4/src/mpcaHydro/__init__.py +4 -0
  4. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/outlet.duckdb +0 -0
  5. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
  6. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
  7. mpcahydro-2.2.4/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +48 -0
  8. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/equis.py +49 -27
  9. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/outlets.py +8 -58
  10. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/pywisk.py +14 -7
  11. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/reports.py +3 -3
  12. mpcahydro-2.2.4/src/mpcaHydro/sql/analytics_tables.sql +20 -0
  13. mpcahydro-2.2.4/src/mpcaHydro/sql/outlets_schema.sql +53 -0
  14. mpcahydro-2.2.4/src/mpcaHydro/sql/schemas.sql +8 -0
  15. mpcahydro-2.2.4/src/mpcaHydro/sql/staging_tables.sql +90 -0
  16. mpcahydro-2.2.4/src/mpcaHydro/sql/views_analytics.sql +117 -0
  17. mpcahydro-2.2.4/src/mpcaHydro/sql/views_outlets.sql +15 -0
  18. mpcahydro-2.2.4/src/mpcaHydro/sql/views_reports.sql +56 -0
  19. mpcahydro-2.2.4/src/mpcaHydro/sql_loader.py +56 -0
  20. mpcahydro-2.2.2/src/mpcaHydro/etlSWD.py → mpcahydro-2.2.4/src/mpcaHydro/swd.py +15 -21
  21. mpcahydro-2.2.4/src/mpcaHydro/warehouse.py +313 -0
  22. mpcahydro-2.2.4/src/mpcaHydro/warehouse_functions.py +527 -0
  23. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/wiski.py +44 -3
  24. mpcahydro-2.2.4/tests/integration/README.md +48 -0
  25. mpcahydro-2.2.4/tests/integration/conftest.py +17 -0
  26. mpcahydro-2.2.4/tests/integration/test.json +258 -0
  27. mpcahydro-2.2.2/src/mpcaHydro/data/stations_wiski.gpkg-shm → mpcahydro-2.2.4/tests/integration/test.sqlite +0 -0
  28. mpcahydro-2.2.4/tests/integration/test_data_manager.py +120 -0
  29. mpcahydro-2.2.4/tests/integration/test_data_manager_integration.py +220 -0
  30. mpcahydro-2.2.4/tests/integration/test_equis_integration.py +165 -0
  31. mpcahydro-2.2.4/tests/integration/test_warehouse.py +216 -0
  32. mpcahydro-2.2.4/tests/integration/test_wiski.py +33 -0
  33. mpcahydro-2.2.4/tests/integration/test_wiski_integration.py +165 -0
  34. mpcahydro-2.2.4/tests/pixi.toml +25 -0
  35. mpcahydro-2.2.4/tests/test_data_manager_functions.py +194 -0
  36. mpcahydro-2.2.2/ERROR.FIL +0 -6
  37. mpcahydro-2.2.2/demo.py +0 -226
  38. mpcahydro-2.2.2/src/mpcaHydro/__init__.py +0 -0
  39. mpcahydro-2.2.2/src/mpcaHydro/data/stations_wiski.gpkg-wal +0 -0
  40. mpcahydro-2.2.2/src/mpcaHydro/data_manager.py +0 -290
  41. mpcahydro-2.2.2/src/mpcaHydro/etlWISKI.py +0 -624
  42. mpcahydro-2.2.2/src/mpcaHydro/etlWPLMN.py +0 -104
  43. mpcahydro-2.2.2/src/mpcaHydro/warehouse.py +0 -711
  44. mpcahydro-2.2.2/src/mpcaHydro/warehouseManager.py +0 -55
  45. mpcahydro-2.2.2/tests/integration/observations.duckdb +0 -0
  46. mpcahydro-2.2.2/tests/integration/test_dataManager.py +0 -61
  47. mpcahydro-2.2.2/tests/integration/test_warehouse.duckdb +0 -0
  48. mpcahydro-2.2.2/tests/integration/test_warehouse.py +0 -113
  49. mpcahydro-2.2.2/tests/unit/test_equis.py +0 -19
  50. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/.gitattributes +0 -0
  51. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/.gitignore +0 -0
  52. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/README.md +0 -0
  53. /mpcahydro-2.2.2/src/mpcaHydro/etlCSG.py → /mpcahydro-2.2.4/src/mpcaHydro/csg.py +0 -0
  54. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
  55. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
  56. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
  57. {mpcahydro-2.2.2 → mpcahydro-2.2.4}/src/mpcaHydro/xref.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.2.2
3
+ Version: 2.2.4
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "mpcaHydro"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/mpcaHydro" } # ? Add this!
8
- version = "2.2.2"
8
+ version = "2.2.4"
9
9
  dependencies = [
10
10
  "pandas",
11
11
  "requests",
@@ -0,0 +1,4 @@
1
+ # mpcaHydro - Modules for downloading hydrology data from MPCA servers and databases
2
+
3
+ # Modules are imported lazily to avoid issues with missing data files
4
+ # Use: from mpcaHydro import calibration_config
@@ -0,0 +1,48 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIIjzCCBvegAwIBAgIQIOC9Vbo5TbuTGYi1z42GyjANBgkqhkiG9w0BAQsFADBg
3
+ MQswCQYDVQQGEwJHQjEYMBYGA1UEChMPU2VjdGlnbyBMaW1pdGVkMTcwNQYDVQQD
4
+ Ey5TZWN0aWdvIFB1YmxpYyBTZXJ2ZXIgQXV0aGVudGljYXRpb24gQ0EgRVYgUjM2
5
+ MB4XDTI1MDkyNTAwMDAwMFoXDTI2MDkyNTIzNTk1OVowgbAxGjAYBgNVBAUTEUdv
6
+ dmVybm1lbnQgRW50aXR5MRMwEQYLKwYBBAGCNzwCAQMTAlVTMRowGAYDVQQPExFH
7
+ b3Zlcm5tZW50IEVudGl0eTELMAkGA1UEBhMCVVMxEjAQBgNVBAgTCU1pbm5lc290
8
+ YTEbMBkGA1UEChMSU3RhdGUgb2YgTWlubmVzb3RhMSMwIQYDVQQDExp3aXNraXdl
9
+ YjAxLnBjYS5zdGF0ZS5tbi51czCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoC
10
+ ggIBAJ8aX33B4Jq37RF0QcjYrsxKwjiFLDcgrLGzsUCm/WJUuvsX3pbU8rfpYte8
11
+ BuI6XyudiPpwqPezSInc3hFCbZzYmdcy/RgjsQ1Uqsfj+0NB3A44CHGh6NHY59X0
12
+ thAroUgnz1gcnHfHiHvJPHwYOAnhbVYel8hBBfIq2wqnk2B1Z99DM9u/Q9/ZGpIg
13
+ 2HvbWg+8YjoMVQl3vH1jycE0i3VVTLWr5OfkV1jhl/rdFSq+Ott8Ipmni0J0GBox
14
+ EHNKaTbBwB5EzV6a67uiiA6x5rkNEIM0oMCGWdAvm5hT2gzwVwj+kvSgQHsls4jy
15
+ Gyjxlcy7N3A43dxTs/203jiGk9M55C9JEf9RbmA0muDxn7OlAmEzCyMY20BdlGBI
16
+ fBkPtByIE2MOOIz6sG0PdhO57+7nIvSMftgJfjlhszwtMrpLLQr2gOqk5/XFOqw4
17
+ SSXRQC24mKQ78q1OIohhfeLJdecAkPGcb0uSYQdpOA8RO0hgBGvsFBu/8zn/J3WM
18
+ Pn2wYvE13xz39XQ6G3ySH/MqC7cY9FlzM+RMk6qQyionZ9O7lC6tFufyJNX1E9Ps
19
+ W9E4enrunXHG5k+5glA/43hmhs8CbaeK4xdv1ZuJj7TvLLPAZ3+ojh6tzyV2iK66
20
+ YRzWOW3IaXIZ1/hoWlFHr++WcWJi72k3C85TdUITvxJKoqgrAgMBAAGjggNyMIID
21
+ bjAfBgNVHSMEGDAWgBSYLV4ej+tU9Ln/VZWtTMd+pJiuezAdBgNVHQ4EFgQUaW8x
22
+ VX6iXFQFTWYYY6ZZZyCBl9owDgYDVR0PAQH/BAQDAgWgMAwGA1UdEwEB/wQCMAAw
23
+ HQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMEkGA1UdIARCMEAwNQYMKwYB
24
+ BAGyMQECAQUBMCUwIwYIKwYBBQUHAgEWF2h0dHBzOi8vc2VjdGlnby5jb20vQ1BT
25
+ MAcGBWeBDAEBMFQGA1UdHwRNMEswSaBHoEWGQ2h0dHA6Ly9jcmwuc2VjdGlnby5j
26
+ b20vU2VjdGlnb1B1YmxpY1NlcnZlckF1dGhlbnRpY2F0aW9uQ0FFVlIzNi5jcmww
27
+ gYQGCCsGAQUFBwEBBHgwdjBPBggrBgEFBQcwAoZDaHR0cDovL2NydC5zZWN0aWdv
28
+ LmNvbS9TZWN0aWdvUHVibGljU2VydmVyQXV0aGVudGljYXRpb25DQUVWUjM2LmNy
29
+ dDAjBggrBgEFBQcwAYYXaHR0cDovL29jc3Auc2VjdGlnby5jb20wRQYDVR0RBD4w
30
+ PIIad2lza2l3ZWIwMS5wY2Euc3RhdGUubW4udXOCHnd3dy53aXNraXdlYjAxLnBj
31
+ YS5zdGF0ZS5tbi51czCCAX4GCisGAQQB1nkCBAIEggFuBIIBagFoAHcA2AlVO5RP
32
+ ev/IFhlvlE+Fq7D4/F6HVSYPFdEucrtFSxQAAAGZgmRjOAAABAMASDBGAiEAmmM+
33
+ KMiBitsTZegFbO80sXyZSvBejMJ1zoyOKAtKLmwCIQDfbCgMVJoTyhmqTyTHSd/w
34
+ OmlNc/v9YCiBaYgq3mpvwwB2AK9niDtXsE7dj6bZfvYuqOuBCsdxYPAkXlXWDC/n
35
+ hYc6AAABmYJkY6MAAAQDAEcwRQIgQduMFxbWG6okiji3zTQsobhymG4Hj06i3q95
36
+ /+xIJLUCIQC8DYbjvaUudd6TgiJWoKMYf9pYIirfPJmID129oLkAZQB1AKyrMHBs
37
+ 6+yEMfQT0vSRXxEeQiRDsfKmjE88KzunHgLDAAABmYJkYswAAAQDAEYwRAIgCyMR
38
+ cyQpwfFfIpp5qTTjuZVK1IvnGoIWvWTYbQkojzYCIAM+XXKTC7a91IwdC51PidF2
39
+ Pr21+PSOptD7pN8C9pD7MA0GCSqGSIb3DQEBCwUAA4IBgQBqfDG3CPDoY1tE56fp
40
+ gGcqVlXknE5ttqTqDzJtQUagf6OAQ52d8McwsWsV7IIjpMXCNikJFw7crE45ICg8
41
+ FCaqUkpQb9Du+OhNUJNHQiC82gPd4xo3VTK3mM2wrhesXrOhenpzhz0z5TRFXX2J
42
+ 8dl2k/sC0fz7QBzw1iCmucG+4f3rvQdr5zfHVqblSC0k6fP6N7cVSCB3d+nMZiHP
43
+ 2FhHJnWHzLqPynarKj7aBugCwv+/jAkT8xfyzYhf4X7IH1L5uCHKB+phHj1/AqG2
44
+ n2aVDooyfCYqdAwFiUCtg8iEeXvXBc9Kcov81H94oq1yuZrQOy6lrJ8UpnDO5Zlu
45
+ C4jzg/mSgDioLmSrIQYFEp8R77xbjRBMb0mqko/Hvp5bysb5XTWSgmJrR+3/b/0E
46
+ jkIBv5LdEuxNrd28L1rneDEztIVbTc7bq/V9fQlq6j4YJ+BMh442DMrGnRweicD5
47
+ PFFFejW+IfQT8Z9GwApcgGXNne3SzfAd5PtJqxdXC1KZkbk=
48
+ -----END CERTIFICATE-----
@@ -2,7 +2,7 @@
2
2
 
3
3
  from datetime import datetime, timezone, timedelta
4
4
  import pandas as pd
5
- from typing import Union
5
+ from typing import Union, Optional
6
6
  import oracledb
7
7
  import duckdb
8
8
 
@@ -33,10 +33,18 @@ def connect(user: str, password: str, host: str = "DELTAT", port: int = 1521, si
33
33
  sid=sid)
34
34
  return CONNECTION
35
35
 
36
- def close_connection():
37
- '''Close the global Oracle database connection if it exists.'''
36
+ def close_connection(connection: Optional[oracledb.Connection] = None):
37
+ '''Close the Oracle database connection.
38
+
39
+ Args:
40
+ connection: Optional connection to close. If not provided, closes global CONNECTION.
41
+ '''
38
42
  global CONNECTION
39
- if CONNECTION:
43
+ if connection is not None:
44
+ connection.close()
45
+ if connection is CONNECTION:
46
+ CONNECTION = None
47
+ elif CONNECTION is not None:
40
48
  CONNECTION.close()
41
49
  CONNECTION = None
42
50
 
@@ -80,11 +88,32 @@ def to_dataframe(odb_cursor):
80
88
  return df
81
89
 
82
90
  #%% Query for station locations with HSPF related constituents
83
-
84
- def download(station_ids):
91
+
92
+ def info(station_ids, connection: Optional[oracledb.Connection] = None):
93
+ '''Get information for given station IDs from Oracle database.'''
94
+ conn = connection if connection is not None else CONNECTION
95
+ if conn is None:
96
+ raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
97
+
98
+ df = normalize(download(station_ids, connection=conn)).drop_duplicates(subset=['station_id','constituent'])
99
+ return df
100
+
101
+
102
+
103
+
104
+ def download(station_ids, connection: Optional[oracledb.Connection] = None):
85
105
  '''Download data for given station IDs from Oracle database.
86
106
  This grabs data from the Data access Layer (DAL) equis result view for
87
- river/stream locations and HSPF related constituents only.'''
107
+ river/stream locations and HSPF related constituents only.
108
+
109
+ Args:
110
+ station_ids: List of station IDs to download
111
+ connection: Optional Oracle connection. If not provided, uses global CONNECTION.
112
+ '''
113
+ conn = connection if connection is not None else CONNECTION
114
+ if conn is None:
115
+ raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
116
+
88
117
  placeholders, binds = make_placeholders(station_ids)
89
118
  query = f"""
90
119
  SELECT
@@ -122,7 +151,7 @@ SELECT
122
151
  AND mpca_dal.eq_sample.sample_method IN ('G-EVT', 'G', 'FIELDMSROBS', 'LKSURF1M', 'LKSURF2M', 'LKSURFOTH')
123
152
  AND mpca_dal.mv_eq_result.sys_loc_code IN ({placeholders})
124
153
  """
125
- with CONNECTION.cursor() as cursor:
154
+ with conn.cursor() as cursor:
126
155
  cursor.execute(query,binds)
127
156
  return to_dataframe(cursor)
128
157
 
@@ -441,8 +470,16 @@ def transform(df):
441
470
  # GROUP BY station_id, DATE_TRUNC('hour', datetime), constituent, unit
442
471
  # """ )
443
472
 
444
- def fetch_station_locations():
445
- '''Fetch station location data for stations with HSPF related constituents.'''
473
+ def fetch_station_locations(connection: Optional[oracledb.Connection] = None):
474
+ '''Fetch station location data for stations with HSPF related constituents.
475
+
476
+ Args:
477
+ connection: Optional Oracle connection. If not provided, uses global CONNECTION.
478
+ '''
479
+ conn = connection if connection is not None else CONNECTION
480
+ if conn is None:
481
+ raise ValueError("No connection provided and global CONNECTION is not set. Call connect() first or pass a connection.")
482
+
446
483
  query ="""SELECT DISTINCT
447
484
  m.SYS_LOC_CODE,
448
485
  stn.LONGITUDE,
@@ -467,22 +504,7 @@ def fetch_station_locations():
467
504
  'TEMP-W',
468
505
  '7664-41-7')
469
506
  """
470
- with CONNECTION.cursor() as cursor:
507
+ with conn.cursor() as cursor:
471
508
  cursor.execute(query)
472
509
  df = to_dataframe(cursor)
473
-
474
- # dups = set(df.loc[df['SYS_LOC_CODE'].isin(df.loc[df['SYS_LOC_CODE'].duplicated()]['SYS_LOC_CODE']),'SYS_LOC_CODE'].to_list())
475
- # for dup in dups:
476
- # #percent difference between lat/long values
477
- # sub = df.loc[df['SYS_LOC_CODE'] == dup]
478
- # lat_diff = abs(sub['LATITUDE'].max() - sub['LATITUDE'].min()) / ((sub['LATITUDE'].max() + sub['LATITUDE'].min()) / 2) * 100
479
- # long_diff = abs(sub['LONGITUDE'].max() - sub['LONGITUDE'].min()) / ((sub['LONGITUDE'].max() + sub['LONGITUDE'].min()) / 2) * 100
480
- # print(f'Duplicate station {dup} has {lat_diff:.6f}% latitude difference')
481
- # print(f'Duplicate station {dup} has {long_diff:.6f}% longitude difference')
482
-
483
-
484
- # geometry = gpd.points_from_xy(df['LONGITUDE'], df['LATITUDE'])
485
- # gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
486
- # filename = 'EQ_STATION_' + str(date.today()) + '.gpkg'
487
- # gdf.to_file(save_path.joinpath(filename), driver = 'GPKG')
488
- # gdf.rename(columns={'SYS_LOC_CODE':'station_id'}, inplace=True)
510
+ return df
@@ -9,6 +9,7 @@ from pathlib import Path
9
9
  import geopandas as gpd
10
10
  import pandas as pd
11
11
  import duckdb
12
+ from mpcaHydro.sql_loader import get_outlets_schema_sql
12
13
  #from hspf_tools.calibrator import etlWISKI, etlSWD
13
14
 
14
15
 
@@ -79,10 +80,14 @@ def equis_station_opnids(model_name):
79
80
  opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
80
81
  return split_opnids(opnids)
81
82
 
82
- def station_opnids(model_name):
83
- opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name')['opnids'].str.split(',').to_list()
83
+ def mapped_station_opnids(station_id, station_origin):
84
+ opnids = MODL_DB.dropna(subset=['opnids']).query('station_id == @station_id and source == @station_origin')['opnids'].str.split(',').to_list()
84
85
  return split_opnids(opnids)
85
86
 
87
+ def mapped_stations(model_name,station_origin):
88
+ assert(station_origin in ['wiski', 'equis'])
89
+ return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == @station_origin')['station_id'].tolist()
90
+
86
91
  def mapped_equis_stations(model_name):
87
92
  return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
88
93
 
@@ -108,9 +113,8 @@ def init_db(db_path: str,reset: bool = False):
108
113
  db_path = Path(db_path)
109
114
  if reset and db_path.exists():
110
115
  db_path.unlink()
111
-
112
116
  with connect(db_path.as_posix(),False) as con:
113
- con.execute(OUTLETS_SCHEMA)
117
+ con.execute(get_outlets_schema_sql())
114
118
 
115
119
 
116
120
 
@@ -293,60 +297,6 @@ def add_reach(con,
293
297
  )
294
298
 
295
299
 
296
- OUTLETS_SCHEMA = """-- schema.sql
297
- -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
298
- -- Compatible with DuckDB and SQLite.
299
-
300
- -- Table 1: outlets
301
- -- Represents a logical grouping that ties stations and reaches together.
302
- CREATE SCHEMA IF NOT EXISTS outlets;
303
-
304
- CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
305
- outlet_id INTEGER PRIMARY KEY,
306
- repository_name TEXT NOT NULL,
307
- outlet_name TEXT,
308
- notes TEXT -- optional: general notes about the outlet grouping
309
- );
310
-
311
- -- Table 2: outlet_stations
312
- -- One-to-many: outlet -> stations
313
- CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
314
- outlet_id INTEGER NOT NULL,
315
- station_id TEXT NOT NULL,
316
- station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
317
- repository_name TEXT NOT NULL, -- repository model the station is physically located in
318
- true_opnid INTEGER NOT NULL, -- The specific reach the station physically sits on (optional)
319
- comments TEXT, -- Per-station comments, issues, etc.
320
- CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
321
- FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
322
- );
323
-
324
- -- Table 3: outlet_reaches
325
- -- One-to-many: outlet -> reaches
326
- -- A reach can appear in multiple outlets, enabling many-to-many overall.
327
- CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
328
- outlet_id INTEGER NOT NULL,
329
- reach_id INTEGER NOT NULL, -- model reach identifier (aka opind)
330
- repository_name TEXT NOT NULL, -- optional: where the mapping comes from
331
- FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
332
- );
333
-
334
- -- Useful views:
335
-
336
- -- View: station_reach_pairs
337
- -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
338
- CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
339
- SELECT
340
- s.outlet_id,
341
- s.station_id,
342
- s.station_origin,
343
- r.reach_id,
344
- r.repository_name
345
- FROM outlets.outlet_stations AS s
346
- JOIN outlets.outlet_reaches AS r
347
- ON s.outlet_id = r.outlet_id;
348
-
349
- """
350
300
 
351
301
  #row = modl_db.MODL_DB.iloc[0]
352
302
 
@@ -4,14 +4,16 @@ Created on Mon Jul 10 16:18:03 2023
4
4
 
5
5
  @author: mfratki
6
6
  """
7
+ from pathlib import Path
7
8
  import requests
8
9
  from requests.exceptions import ConnectionError, Timeout, HTTPError, RequestException
9
10
  import pandas as pd
10
11
  import time
11
12
 
13
+ CERT_PATH = str(Path(__file__).resolve().parent/'data\\wiskiweb01.pca.state.mn.us.crt')
12
14
  #TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
13
15
  class Service():
14
- base_url = 'http://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
16
+ base_url = 'https://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
15
17
  base_dict = {
16
18
  'datasource': '0',
17
19
  'service': 'kisters',
@@ -28,7 +30,7 @@ class Service():
28
30
  try:
29
31
  # Using requests.head() to fetch headers is faster than requests.get()
30
32
  # as it doesn't download the full content
31
- response = requests.head('http://wiskiweb01.pca.state.mn.us', timeout=timeout)
33
+ response = requests.head('https://wiskiweb01.pca.state.mn.us', timeout=timeout)
32
34
 
33
35
  # raise_for_status() raises an HTTPError for 4xx or 5xx status codes
34
36
  response.raise_for_status()
@@ -51,7 +53,7 @@ class Service():
51
53
 
52
54
  def _requestTypes(self):
53
55
  url = self.url({'request': 'getrequestinfo'})
54
- return requests.get(url).json()[0]
56
+ return requests.get(url,verify=CERT_PATH).json()[0]
55
57
 
56
58
  def getRequests(self):
57
59
  return list(self._requestTypes()['Requests'].keys())
@@ -70,7 +72,7 @@ class Service():
70
72
 
71
73
  def info(self,request_type):
72
74
  url = self.url({'request': 'getrequestinfo'})
73
- response = requests.get(url)
75
+ response = requests.get(url, verify=CERT_PATH)
74
76
  get_requests = response.json()
75
77
  return get_requests[0]['Requests'].keys()
76
78
 
@@ -93,7 +95,7 @@ class Service():
93
95
 
94
96
  def get_json(self,args_dict):
95
97
  # Download request
96
- self.response = requests.get(self.url(args_dict))
98
+ self.response = requests.get(self.url(args_dict), verify=CERT_PATH)
97
99
  if self.response.status_code != 200:
98
100
  print('Error: ' + self.response.json()['message'])
99
101
  self.response.raise_for_status() # raises exception when not a 2xx response
@@ -192,7 +194,10 @@ def construct_aggregation(interval, aggregation_type):
192
194
  return f'aggregate({interval}~{aggregation_type})'
193
195
 
194
196
  def validate_aggregation_type(aggregation_type):
195
- assert(aggregation_type in VALID_AGGREGATION_TYPES or validate_percentile(aggregation_type))
197
+ if aggregation_type.startswith('perc-'):
198
+ validate_percentile(aggregation_type)
199
+ else:
200
+ assert(aggregation_type in VALID_AGGREGATION_TYPES)
196
201
  return True
197
202
 
198
203
  def validate_percentile(aggregation_type):
@@ -297,7 +302,8 @@ def get_stations(
297
302
  return df
298
303
 
299
304
  def get_ts_ids(
300
- station_nos,
305
+ station_nos = None,
306
+ ts_ids = None,
301
307
  parametertype_id = None,
302
308
  stationparameter_no = None,
303
309
  stationgroup_id = None,
@@ -317,6 +323,7 @@ def get_ts_ids(
317
323
 
318
324
  args ={'request': 'getTimeseriesList',
319
325
  'station_no': station_nos,
326
+ 'ts_id': ts_ids,
320
327
  'parametertype_id': parametertype_id,
321
328
  'stationparameter_no': stationparameter_no,
322
329
  'ts_name' : ts_name,
@@ -12,9 +12,9 @@ class reportManager():
12
12
  with duckdb.connect(self.db_path,read_only=True) as con:
13
13
  return wiski_qc_counts(con)
14
14
 
15
- def constituent_summary(self,constituent: str = None):
15
+ def station_summary(self,constituent: str = None):
16
16
  with duckdb.connect(self.db_path,read_only=True) as con:
17
- return constituent_summary(con,constituent)
17
+ return station_summary(con,constituent)
18
18
 
19
19
  def station_reach_pairs(self):
20
20
  with duckdb.connect(self.db_path,read_only=True) as con:
@@ -51,7 +51,7 @@ def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
51
51
  df = con.execute(query).fetch_df()
52
52
  return df
53
53
 
54
- def constituent_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
54
+ def station_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
55
55
 
56
56
  query = '''
57
57
  SELECT *,
@@ -0,0 +1,20 @@
1
+ -- analytics_tables.sql
2
+ -- Create tables in the analytics schema for processed/transformed data
3
+
4
+ CREATE TABLE IF NOT EXISTS analytics.equis (
5
+ datetime TIMESTAMP,
6
+ value DOUBLE,
7
+ station_id TEXT,
8
+ station_origin TEXT,
9
+ constituent TEXT,
10
+ unit TEXT
11
+ );
12
+
13
+ CREATE TABLE IF NOT EXISTS analytics.wiski (
14
+ datetime TIMESTAMP,
15
+ value DOUBLE,
16
+ station_id TEXT,
17
+ station_origin TEXT,
18
+ constituent TEXT,
19
+ unit TEXT
20
+ );
@@ -0,0 +1,53 @@
1
+ -- outlets_schema.sql
2
+ -- Schema for managing associations between model reaches and observation stations via outlets
3
+ -- Compatible with DuckDB and SQLite
4
+
5
+ CREATE SCHEMA IF NOT EXISTS outlets;
6
+
7
+ -- Table 1: outlet_groups
8
+ -- Represents a logical grouping that ties stations and reaches together
9
+ CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
10
+ outlet_id INTEGER PRIMARY KEY,
11
+ repository_name TEXT NOT NULL,
12
+ outlet_name TEXT,
13
+ notes TEXT
14
+ );
15
+
16
+ -- Table 2: outlet_stations
17
+ -- One-to-many: outlet -> stations
18
+ CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
19
+ outlet_id INTEGER NOT NULL,
20
+ station_id TEXT NOT NULL,
21
+ station_origin TEXT NOT NULL,
22
+ repository_name TEXT NOT NULL,
23
+ true_opnid INTEGER NOT NULL,
24
+ comments TEXT,
25
+ CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
26
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
27
+ );
28
+
29
+ -- Table 3: outlet_reaches
30
+ -- One-to-many: outlet -> reaches
31
+ -- A reach can appear in multiple outlets, enabling many-to-many overall
32
+ CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
33
+ outlet_id INTEGER NOT NULL,
34
+ reach_id INTEGER NOT NULL,
35
+ repository_name TEXT NOT NULL,
36
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
37
+ );
38
+
39
+ -- Useful views:
40
+
41
+ -- View: station_reach_pairs
42
+ -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
43
+ CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
44
+ SELECT
45
+ s.outlet_id,
46
+ s.station_id,
47
+ s.station_origin,
48
+ r.reach_id,
49
+ r.repository_name
50
+ FROM outlets.outlet_stations AS s
51
+ JOIN outlets.outlet_reaches AS r
52
+ ON s.outlet_id = r.outlet_id;
53
+
@@ -0,0 +1,8 @@
1
+ -- schemas.sql
2
+ -- Create all database schemas for the data warehouse
3
+
4
+ CREATE SCHEMA IF NOT EXISTS staging;
5
+ CREATE SCHEMA IF NOT EXISTS analytics;
6
+ CREATE SCHEMA IF NOT EXISTS reports;
7
+ CREATE SCHEMA IF NOT EXISTS outlets;
8
+ CREATE SCHEMA IF NOT EXISTS mappings;
@@ -0,0 +1,90 @@
1
+ -- staging_tables.sql
2
+ -- Create tables in the staging schema for raw data from external sources
3
+
4
+ CREATE TABLE IF NOT EXISTS staging.equis(
5
+ LATITUDE DOUBLE,
6
+ LONGITUDE DOUBLE,
7
+ WID_LIST VARCHAR,
8
+ SAMPLE_METHOD VARCHAR,
9
+ SAMPLE_REMARK VARCHAR,
10
+ FACILITY_ID BIGINT,
11
+ FACILITY_NAME VARCHAR,
12
+ FACILITY_TYPE VARCHAR,
13
+ SYS_LOC_CODE VARCHAR,
14
+ LOC_NAME VARCHAR,
15
+ LOC_TYPE VARCHAR,
16
+ LOC_TYPE_2 VARCHAR,
17
+ TASK_CODE VARCHAR,
18
+ SAMPLE_ID BIGINT,
19
+ SYS_SAMPLE_CODE VARCHAR,
20
+ TEST_ID BIGINT,
21
+ ANALYTE_TYPE VARCHAR,
22
+ ANALYTE_TYPE_DESC VARCHAR,
23
+ ANALYTIC_METHOD VARCHAR,
24
+ PREFERRED_NAME VARCHAR,
25
+ PARAMETER VARCHAR,
26
+ CAS_RN VARCHAR,
27
+ CHEMICAL_NAME VARCHAR,
28
+ GTLT VARCHAR,
29
+ RESULT_TEXT VARCHAR,
30
+ RESULT_NUMERIC DOUBLE,
31
+ RESULT_UNIT VARCHAR,
32
+ STAT_TYPE INTEGER,
33
+ VALUE_TYPE VARCHAR,
34
+ DETECT_FLAG VARCHAR,
35
+ DETECT_DESC VARCHAR,
36
+ RESULT_REMARK VARCHAR,
37
+ RESULT_TYPE_CODE VARCHAR,
38
+ METHOD_DETECTION_LIMIT VARCHAR,
39
+ REPORTING_DETECTION_LIMIT VARCHAR,
40
+ QUANTITATION_LIMIT INTEGER,
41
+ LAB_QUALIFIERS VARCHAR,
42
+ INTERPRETED_QUALIFIERS VARCHAR,
43
+ REPORTABLE_RESULT VARCHAR,
44
+ APPROVAL_CODE VARCHAR,
45
+ SENSITIVE_NOTPUBLIC VARCHAR,
46
+ TEST_TYPE VARCHAR,
47
+ DILUTION_FACTOR DOUBLE,
48
+ FRACTION VARCHAR,
49
+ BASIS VARCHAR,
50
+ TEMP_BASIS VARCHAR,
51
+ TEST_REMARK VARCHAR,
52
+ ANALYSIS_DATE_TIME TIMESTAMP_NS,
53
+ ANALYSIS_DATE VARCHAR,
54
+ ANALYSIS_TIME VARCHAR,
55
+ ANALYSIS_DATE_TIMEZONE VARCHAR,
56
+ COMPANY_NAME VARCHAR,
57
+ LAB_NAME_CODE VARCHAR,
58
+ LAB_SAMPLE_ID VARCHAR,
59
+ SAMPLE_TYPE_GROUP VARCHAR,
60
+ SAMPLE_TYPE_CODE VARCHAR,
61
+ SAMPLE_TYPE_DESC VARCHAR,
62
+ MEDIUM_CODE VARCHAR,
63
+ MATRIX_CODE VARCHAR,
64
+ START_DEPTH DOUBLE,
65
+ DEPTH_UNIT VARCHAR,
66
+ SAMPLE_DATE_TIME TIMESTAMP_NS,
67
+ SAMPLE_DATE VARCHAR,
68
+ SAMPLE_TIME VARCHAR,
69
+ SAMPLE_DATE_TIMEZONE VARCHAR,
70
+ EBATCH DOUBLE
71
+ );
72
+
73
+ CREATE TABLE IF NOT EXISTS staging.wiski(
74
+ "Timestamp" VARCHAR,
75
+ "Value" DOUBLE,
76
+ "Quality Code" BIGINT,
77
+ "Quality Code Name" VARCHAR,
78
+ ts_unitsymbol VARCHAR,
79
+ ts_name VARCHAR,
80
+ ts_id VARCHAR,
81
+ station_no VARCHAR,
82
+ station_name VARCHAR,
83
+ station_latitude VARCHAR,
84
+ station_longitude VARCHAR,
85
+ parametertype_id VARCHAR,
86
+ parametertype_name VARCHAR,
87
+ stationparameter_no VARCHAR,
88
+ stationparameter_name VARCHAR,
89
+ wplmn_flag BIGINT
90
+ );