mpcaHydro 2.2.6__tar.gz → 2.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/PKG-INFO +1 -1
  2. mpcahydro-2.2.8/demo.py +226 -0
  3. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/pyproject.toml +1 -1
  4. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/outlets.py +14 -1
  5. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/pywisk.py +5 -5
  6. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/swd.py +21 -15
  7. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/warehouse.py +55 -0
  8. mpcahydro-2.2.8/tests/integration/test_dataManager.py +61 -0
  9. mpcahydro-2.2.8/tests/integration/test_warehouse.duckdb +0 -0
  10. mpcahydro-2.2.8/tests/unit/test_equis.py +19 -0
  11. mpcahydro-2.2.6/tests/pixi.toml +0 -25
  12. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/.gitattributes +0 -0
  13. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/.gitignore +0 -0
  14. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/README.md +0 -0
  15. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/__init__.py +0 -0
  16. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/csg.py +0 -0
  17. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
  18. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
  19. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
  20. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/outlet.duckdb +0 -0
  21. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
  22. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
  23. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
  24. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/equis.py +0 -0
  25. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/reports.py +0 -0
  26. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
  27. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
  28. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/schemas.sql +0 -0
  29. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/staging_tables.sql +0 -0
  30. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_analytics.sql +0 -0
  31. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_outlets.sql +0 -0
  32. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql/views_reports.sql +0 -0
  33. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/sql_loader.py +0 -0
  34. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/warehouse_functions.py +0 -0
  35. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/wiski.py +0 -0
  36. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/src/mpcaHydro/xref.py +0 -0
  37. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/README.md +0 -0
  38. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/conftest.py +0 -0
  39. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_data_manager.py +0 -0
  40. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_data_manager_integration.py +0 -0
  41. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_equis_integration.py +0 -0
  42. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_warehouse.py +0 -0
  43. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_wiski.py +0 -0
  44. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/integration/test_wiski_integration.py +0 -0
  45. {mpcahydro-2.2.6 → mpcahydro-2.2.8}/tests/test_data_manager_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.2.6
3
+ Version: 2.2.8
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -0,0 +1,226 @@
1
+ #%%
2
+ from mpcaHydro.data_manager import dataManager
3
+ from pyhcal.repository import Repository
4
+ from mpcaHydro import outlets
5
+ import duckdb
6
+ from mpcaHydro import equis, warehouse, wiski
7
+ from hspf.hspfModel import hspfModel
8
+ from hspf.uci import UCI
9
+ from mpcaHydro import etlSWD
10
+
11
+
12
+ #%%
13
+ '''
14
+ New approach. Directly load to warehouse from downloads.
15
+ Store raw and processed data in warehouse. For large timeseries I could store
16
+ as parquet files. The transformations using pandas take a bit of time. I imagine doing them
17
+ within duckdb would be faster.
18
+
19
+ '''
20
+
21
+ # with warehouse.connect(db_path) as con:
22
+ # df = con.execute("SELECT * FROM staging.wiski").df()
23
+ # df = wiski.transform(df,filter_qc_codes = False)
24
+
25
+ #%%
26
+ model_name = 'Nemadji'
27
+ db_path = f'C:/Users/mfratki/Documents/{model_name}.duckdb'
28
+ start_year = 1996
29
+ end_year = 2030
30
+ replace = True
31
+ filter_qc_codes = True
32
+ equis_stations = outlets.equis_stations(model_name)
33
+ wiski_stations = outlets.wiski_stations(model_name)
34
+ equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
35
+ warehouse.init_db(db_path,reset = True)
36
+
37
+
38
+ #%% Old approach. Store as indvidual processed station files then load to warehouse
39
+ #df_equis = equis.download(equis_stations)
40
+ #df_wiski = wiski.download(wiski_stations,start_year = start_year, end_year = end_year)
41
+
42
+ #%% equis
43
+
44
+
45
+
46
+
47
+ def download_equis_data(db_path,station_ids,replace = False):
48
+ with warehouse.connect(db_path,read_only = False) as con:
49
+ df = equis.download(station_ids)
50
+ if not df.empty:
51
+ warehouse.load_df_to_table(con,df, 'staging.equis',replace = replace)
52
+ warehouse.load_df_to_table(con,equis.transform(df), 'analytics.equis',replace = replace)
53
+ else:
54
+ print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
55
+
56
+ def download_wiski_data(db_path,station_ids,replace = False):
57
+ with warehouse.connect(db_path,read_only = False) as con:
58
+ df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
59
+ if not df.empty:
60
+ warehouse.load_df_to_table(con,df, 'staging.wiski', replace = replace)
61
+ warehouse.load_df_to_table(con,wiski.transform(df), 'analytics.wiski',replace = replace)
62
+ else:
63
+ print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
64
+
65
+
66
+ # Add to warehouse from custom df. Must contain required normalized columns.
67
+ with warehouse.connect(db_path,read_only = False) as con:
68
+ if replace:
69
+ warehouse.drop_station_id(con,station_id,station_origin='equis')
70
+ warehouse.add_to_table(con,df, 'staging','equis_normalized')
71
+
72
+
73
+ warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
74
+ df = equis.normalize(df.copy())
75
+ warehouse.add_to_table(con,df, 'staging','equis_normalized')
76
+ df = equis.transform(df)
77
+ warehouse.add_to_table(con,df, 'analytics','equis')
78
+
79
+
80
+
81
+ #%% swd
82
+
83
+ df = etlSWD.download(equis_stations)
84
+
85
+ with warehouse.connect(db_path,read_only = False) as con:
86
+ warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
87
+ df = equis.normalize(df.copy())
88
+ warehouse.add_to_table(con,df, 'staging','equis_normalized')
89
+ df = equis.transform(df)
90
+ warehouse.add_to_table(con,df, 'analytics','equis')
91
+ #%% wiski
92
+
93
+
94
+
95
+ if station_origin == 'wiski':
96
+ df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
97
+ warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = replace)
98
+ df = wiski.normalize(df.copy())
99
+ warehouse.add_to_table(con,df, 'staging','wiski_normalized')
100
+ df = wiski.transform(df,filter_qc_codes = filter_qc_codes)
101
+ warehouse.add_to_table(con,df, 'analytics','wiski') # method includes normalization
102
+
103
+ if station_origin == 'swd':
104
+ df = pd.concat([etlSWD.download(station_id) for station_id in station_ids])
105
+ warehouse.load_df_to_staging(con,df, 'equis_raw', replace = replace)
106
+ df = etlSWD.transform(df.copy())
107
+ warehouse.add_to_table(con,df, 'analytics','equis')
108
+ warehouse.update_views(con)
109
+
110
+ with warehouse.connect(db_path) as con:
111
+ warehouse.update_views(con)
112
+
113
+
114
+ #%%
115
+
116
+ import requests
117
+ url = 'http://ifrshiny.seas.umich.edu/mglp/'
118
+ requests.get(url)
119
+
120
+
121
+
122
+ db_path = 'C:/Users/mfratki/Documents/Rum.duckdb'
123
+ modl_db.build_outlet_db(db_path)
124
+ con = duckdb.connect(db_path)
125
+ con.execute("SELECT * FROM station_reach_pairs").df()
126
+ con.execute('SELECT * FROM station_reach_pairs WHERE outlet_id = 76').df()
127
+
128
+ # Need to remove duplicates from MODL_DB
129
+ modl_db.MODL_DB.loc[modl_db.MODL_DB.duplicated(['station_id','source'])]
130
+
131
+ #%%
132
+ dm = dataManager('C:/Users/mfratki/Documents/')
133
+ dm._build_warehouse()
134
+ equis_stations = modl_db.equis_stations('Nemadji')
135
+ wiski_stations = modl_db.wiski_stations('Nemadji')
136
+
137
+ #%% Old approach. Store as indvidual processed station files then load to warehouse
138
+ for station_id in equis_stations:
139
+ dm._download_station_data(station_id,'equis', True)
140
+
141
+ for station_id in wiski_stations:
142
+ dm._download_station_data(station_id,'wiski', True)
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+ #%% Adding HSPF outputs to warehouse
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+ con = duckdb.connect(db_path)
166
+
167
+ model_name = 'Nemadji'
168
+ outlets = [group for _, group in modl_db.MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
169
+
170
+ for outlet in outlets:
171
+ 1+1
172
+
173
+
174
+ dfs = []
175
+ for constituent in ['Q','TSS','TP','N','OP','TKN']:
176
+ opnids = modl_db.split_opnids([opnid.split(',') for opnid in set(outlet['opnids'].tolist())])
177
+ for opnid in opnids:
178
+ df = mod.hbns.get_reach_constituent(constituent,opnids,time_step='h')
179
+ df.columns = ['value']
180
+ df['constituent'] = constituent
181
+ df['operation'] = operation
182
+ df['opnid'] = opnid
183
+ dfs.append(df)
184
+
185
+ df = pd.concat(dfs).reset_index()
186
+ df['model_name'] = model_name
187
+
188
+
189
+
190
+ station_ids = ['H05018001','S006-214','S015-102']
191
+ target_constituent = 'TSS'
192
+ flow_constituent = 'Q'
193
+
194
+ # build placeholders for the IN list (one ? per station id)
195
+ placeholders = ','.join(['?'] * len(station_ids))
196
+
197
+ sql = f'''
198
+ SELECT o.*, f.datetime AS flow_datetime, f.value AS flow, f.baseflow, f.station_id AS flow_station_id, f.station_origin AS flow_station_origin
199
+ FROM analytics.observations o
200
+ JOIN analytics.observations f
201
+ ON o.datetime = f.datetime
202
+ WHERE o.constituent = ?
203
+ AND o.station_id IN ({placeholders})
204
+ AND f.constituent = ?;
205
+ '''
206
+
207
+ # parameter order must match the ? positions in the query
208
+ params = [target_constituent] + station_ids + [flow_constituent]
209
+
210
+ df = con.execute(sql, params).df()
211
+
212
+ outlet_id: station_ids
213
+
214
+ outlet_id: opnid
215
+
216
+
217
+ outlets = []
218
+ for index, (_, group) in enumerate(modl_db.MODL_DB.groupby(by = ['opnids','repository_name'])):
219
+ group['outlet_id'] = index
220
+ group.reset_index(drop=True, inplace=True)
221
+ outlets.append(group)
222
+
223
+
224
+ for _, row in group.iterrows():
225
+ opnids = group.split_opnids(row['opnids'].str.split(',').to_list())
226
+ row*len(opnids)
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "mpcaHydro"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/mpcaHydro" } # ? Add this!
8
- version = "2.2.6"
8
+ version = "2.2.8"
9
9
  dependencies = [
10
10
  "pandas",
11
11
  "requests",
@@ -144,7 +144,7 @@ def get_outlets_by_reach(reach_id: int, model_name: str):
144
144
  """,
145
145
  [reach_id, model_name]).fetchdf()
146
146
  return df
147
-
147
+
148
148
  def get_outlets_by_station(station_id: str, station_origin: str):
149
149
  """
150
150
  Return all outlet rows for outlets that include the given reach_id in the given model_name.
@@ -160,6 +160,19 @@ def get_outlets_by_station(station_id: str, station_origin: str):
160
160
  [station_id, station_origin]).fetchdf()
161
161
  return df
162
162
 
163
+ def get_station_opnids(station_id: str, station_origin: str):
164
+ """
165
+ Return all model reach IDs (opnids) associated with the given station ID and origin.
166
+ """
167
+ with connect(DB_PATH) as con:
168
+ df = con.execute(
169
+ """
170
+ SELECT r.reach_id
171
+ FROM outlets.station_reach_pairs r
172
+ WHERE r.station_id = ? AND r.station_origin = ?
173
+ """,
174
+ [station_id, station_origin]).fetchdf()
175
+ return df['reach_id'].tolist()
163
176
 
164
177
 
165
178
  class OutletGateway:
@@ -13,7 +13,7 @@ import time
13
13
  CERT_PATH = str(Path(__file__).resolve().parent/'data\\wiskiweb01.pca.state.mn.us.crt')
14
14
  #TODO: Use this url to make sure web service is working https://wiskiweb01.pca.state.mn.us/
15
15
  class Service():
16
- base_url = 'https://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
16
+ base_url = 'http://wiskiweb01.pca.state.mn.us/KiWIS/KiWIS?'
17
17
  base_dict = {
18
18
  'datasource': '0',
19
19
  'service': 'kisters',
@@ -30,7 +30,7 @@ class Service():
30
30
  try:
31
31
  # Using requests.head() to fetch headers is faster than requests.get()
32
32
  # as it doesn't download the full content
33
- response = requests.head('https://wiskiweb01.pca.state.mn.us', timeout=timeout)
33
+ response = requests.head('http://wiskiweb01.pca.state.mn.us', timeout=timeout)
34
34
 
35
35
  # raise_for_status() raises an HTTPError for 4xx or 5xx status codes
36
36
  response.raise_for_status()
@@ -53,7 +53,7 @@ class Service():
53
53
 
54
54
  def _requestTypes(self):
55
55
  url = self.url({'request': 'getrequestinfo'})
56
- return requests.get(url,verify=CERT_PATH).json()[0]
56
+ return requests.get(url).json()[0]
57
57
 
58
58
  def getRequests(self):
59
59
  return list(self._requestTypes()['Requests'].keys())
@@ -72,7 +72,7 @@ class Service():
72
72
 
73
73
  def info(self,request_type):
74
74
  url = self.url({'request': 'getrequestinfo'})
75
- response = requests.get(url, verify=CERT_PATH)
75
+ response = requests.get(url)
76
76
  get_requests = response.json()
77
77
  return get_requests[0]['Requests'].keys()
78
78
 
@@ -95,7 +95,7 @@ class Service():
95
95
 
96
96
  def get_json(self,args_dict):
97
97
  # Download request
98
- self.response = requests.get(self.url(args_dict), verify=CERT_PATH)
98
+ self.response = requests.get(self.url(args_dict))
99
99
  if self.response.status_code != 200:
100
100
  print('Error: ' + self.response.json()['message'])
101
101
  self.response.raise_for_status() # raises exception when not a 2xx response
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
26
26
  # return df
27
27
  import requests
28
28
 
29
- def _download(station_no):
29
+ def _download(station_id):
30
30
  # Replace {station_no} in the URL with the actual station number
31
- url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
-
31
+ #url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
+ url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
33
+
33
34
  try:
34
35
  # Send a GET request to the URL
35
- response = requests.get(url)
36
+ params = {
37
+ 'stationId': station_id,
38
+ 'format': 'json'
39
+ }
40
+ response = requests.get(url,params = params)
36
41
  response.raise_for_status() # Raise exception for HTTP errors
37
42
  # Parse the JSON data
38
- if response.json()['recordCount'] == 0:
39
- return pd.DataFrame(columns = response.json()['column_names'])
40
- else:
41
- return pd.DataFrame(response.json()['data'])
43
+ return pd.DataFrame(response.json()['data'])
42
44
 
43
45
  except requests.exceptions.RequestException as e:
44
46
  print(f"An error occurred: {e}")
@@ -46,14 +48,18 @@ def _download(station_no):
46
48
 
47
49
 
48
50
 
49
- def download(station_no):
51
+ def download(station_ids):
50
52
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
51
- df = _download(station_no)
52
- if df.empty:
53
- return df
54
- else:
55
- df['station_id'] = station_no
56
- return transform(df)
53
+ dfs = []
54
+ for station_id in station_ids:
55
+ df = _download(station_id)
56
+ if not df.empty:
57
+ df['station_id'] = station_id
58
+ dfs.append(df)
59
+
60
+ return pd.concat(dfs, ignore_index=True)
61
+
62
+
57
63
 
58
64
  def info(station_no):
59
65
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
@@ -28,6 +28,23 @@ def init_db(db_path: str,reset: bool = False):
28
28
 
29
29
 
30
30
 
31
+ def validate_schemas(con: duckdb.DuckDBPyConnection):
32
+ """Validate that the database has the expected schemas and tables."""
33
+ expected_schemas = {'staging', 'analytics', 'mappings', 'outlets', 'reports'}
34
+ result = con.execute("SELECT schema_name FROM information_schema.schemata").fetchall()
35
+ existing_schemas = {row[0] for row in result}
36
+ missing_schemas = expected_schemas - existing_schemas
37
+ if missing_schemas:
38
+ raise ValueError(f"Missing schemas: {missing_schemas}")
39
+
40
+ def validate_tables(con: duckdb.DuckDBPyConnection, schema: str, expected_tables: set):
41
+ """Validate that a schema contains the expected tables."""
42
+ result = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = ?", [schema]).fetchall()
43
+ existing_tables = {row[0] for row in result}
44
+ missing_tables = expected_tables - existing_tables
45
+ if missing_tables:
46
+ raise ValueError(f"Missing tables in {schema} schema: {missing_tables}")
47
+
31
48
  def create_schemas(con: duckdb.DuckDBPyConnection):
32
49
  """Create staging, analytics, hspf, and reports schemas if they do not exist."""
33
50
  con.execute(sql_loader.get_schemas_sql())
@@ -96,6 +113,44 @@ def create_mapping_tables(con: duckdb.DuckDBPyConnection):
96
113
  else:
97
114
  print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
98
115
 
116
+
117
+ def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
118
+ """
119
+ Attach an external DuckDB database containing outlet definitions.
120
+ """
121
+ create_schemas(con)
122
+
123
+ con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
124
+
125
+ tables = con.execute("SHOW TABLES FROM outlets_db").fetchall()
126
+ print(f"Tables in the source database: {tables}")
127
+
128
+ for table in tables:
129
+ table_name = table[0] # Extract table name
130
+ con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM outlets_db.{table_name}") # Copy table contents
131
+
132
+ # -- Step 2: Copy all views --
133
+ # Retrieve the list of views in the source database
134
+ views = con.execute("SHOW VIEWS FROM outlets_db").fetchall()
135
+ print(f"Views in the source database: {views}")
136
+
137
+ # Copy each view from source to destination
138
+ for view in views:
139
+ view_name = view[0] # Extract view name
140
+
141
+ # Get the CREATE VIEW statement for the view
142
+ create_view_sql = con.execute(f"SHOW CREATE VIEW outlets_db.{view_name}").fetchone()[0]
143
+
144
+ # Recreate the view in the destination database (remove the `outlets_db.` prefix if exists)
145
+ create_view_sql = create_view_sql.replace(f"outlets_db.", "")
146
+ con.execute(create_view_sql)
147
+
148
+
149
+ con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
150
+ # Optional: Detach the source database
151
+ con.execute("DETACH 'outlets_db'")
152
+
153
+
99
154
  def create_outlets_tables(con: duckdb.DuckDBPyConnection):
100
155
  """Create tables in the outlets schema to define outlet-station-reach relationships."""
101
156
  con.execute(sql_loader.get_outlets_schema_sql())
@@ -0,0 +1,61 @@
1
+ #%% Imports
2
+ from mpcaHydro.data_manager import dataManager
3
+ from pathlib import Path
4
+ import duckdb
5
+ THIS_DIR = Path(__file__).parent
6
+ WISKI_STATIONS = ['E05011002']
7
+ EQUIS_STATIONS = ['S001-235','S005-115']
8
+
9
+ #%%
10
+ def test_build_warehouse():
11
+ dm = dataManager(THIS_DIR)
12
+ dm._build_warehouse()
13
+
14
+ test_build_warehouse()
15
+ # %%
16
+ def test_equis_data_download():
17
+ dm = dataManager(THIS_DIR,
18
+ oracle_username = 'MFRATKI',
19
+ oracle_password = 'DeltaT#MPCA3',
20
+ reset=True)
21
+
22
+ dm.connect_to_oracle()
23
+ dm._download_equis_data(EQUIS_STATIONS)
24
+
25
+ test_equis_data_download()
26
+ #%%
27
+ def test_wiski_data_download():
28
+ dm = dataManager(THIS_DIR, reset=True)
29
+ dm._download_wiski_data(WISKI_STATIONS)
30
+
31
+
32
+ test_wiski_data_download()
33
+
34
+ #%%
35
+ dm = dataManager(THIS_DIR, reset=False)
36
+ with duckdb.connect(dm.db_path, read_only=True) as con:
37
+ df = con.execute('SELECT * FROM analytics.outlet_observations').fetch_df()
38
+ assert(df['outlet_id'].isnull().sum() == 0)
39
+
40
+ with duckdb.connect(dm.db_path, read_only=True) as con:
41
+ df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
42
+ assert(df['outlet_id'].isnull().sum() == 0)
43
+ assert(df['value'].isnull().sum() == 0)
44
+ # %%
45
+ dm = dataManager(THIS_DIR, reset=False)
46
+
47
+
48
+ def test_wiski_download():
49
+ dm = dataManager(THIS_DIR, reset=False)
50
+ wiski_stations = WISKI_STATIONS
51
+ dm._download_wiski_data(wiski_stations)
52
+ return dm
53
+
54
+ test_wiski_download()
55
+
56
+
57
+ with duckdb.connect(dm.db_path, read_only=True) as con:
58
+ df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
59
+ assert(df['outlet_id'].isnull().sum() == 0)
60
+
61
+ # %%
@@ -0,0 +1,19 @@
1
+
2
+ #%%
3
+ from mpcaHydro import equis
4
+ from mpcaHydro import outlets
5
+
6
+
7
+
8
+ #%%
9
+ model_name = 'Rum'
10
+ equis_stations = outlets.equis_stations(model_name)
11
+ equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
12
+
13
+ df = equis.download(equis_stations)
14
+
15
+ df_normalized = equis.normalize(df.copy())
16
+ expected_columns = ['station_id', 'constituent', 'cas_rn', 'datetime', 'value', 'unit']
17
+
18
+ assert all(col in df_normalized.columns for col in expected_columns)
19
+ # %%
@@ -1,25 +0,0 @@
1
- [workspace]
2
- channels = ["https://prefix.dev/conda-forge"]
3
- platforms = ["linux-64", "osx-64", "win-64"]
4
-
5
-
6
- [dependencies]
7
- requests = "*"
8
- pandas = "*"
9
- time = "*"
10
- pathlib = "*"
11
- spyder = "*"
12
- jupyter = "*"
13
-
14
- [package]
15
- name = "mpcaHydro"
16
- version = "0.1.0"
17
-
18
- [package.build]
19
- backend = { name = "pixi-build-python", version = "0.1.*" }
20
-
21
- [package.run-dependencies]
22
- requests = "*"
23
- pandas = "*"
24
- time = "*"
25
- pathlib = "*"
File without changes
File without changes
File without changes