mpcaHydro 2.1.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mpcaHydro/equis.py CHANGED
@@ -164,26 +164,25 @@ def as_utc_offset(naive_dt: Union[datetime, str], tz_label: str, target_offset:
164
164
  aware_src = naive.replace(tzinfo=src_tz)
165
165
 
166
166
  # convert the instant to fixed UTC-6
167
- return aware_src.astimezone(target_offset)
167
+ return aware_src.astimezone(target_offset).tz_localize(None)
168
168
 
169
169
 
170
170
  def normalize_columns(df):
171
171
  '''Select relevant columns from Equis data.'''
172
172
  return df[['SYS_LOC_CODE',
173
+ 'constituent',
174
+ 'CAS_RN',
173
175
  'datetime',
174
176
  'RESULT_NUMERIC',
175
177
  'RESULT_UNIT',
176
- 'constituent'
177
178
  ]].rename(columns={
178
179
  'SYS_LOC_CODE':'station_id',
179
180
  'RESULT_NUMERIC':'value',
180
- 'RESULT_UNIT':'unit'
181
+ 'RESULT_UNIT':'unit',
182
+ 'CAS_RN':'cas_rn'
181
183
  })
182
184
 
183
- def replace_nondetects(df):
184
- '''Replace non-detect results with 0 in Equis data.'''
185
- df.loc[df['RESULT_NUMERIC'].isna(), 'RESULT_NUMERIC'] = 0
186
- return df
185
+
187
186
 
188
187
  def normalize_timezone(df):
189
188
  '''Normalize datetime to UTC in Equis data.'''
@@ -194,27 +193,27 @@ def normalize_timezone(df):
194
193
  except Exception:
195
194
  return pd.NaT
196
195
 
197
- df['datetime'] = df.apply(_conv, axis=1)
196
+ df.loc[:,'datetime'] = df.apply(_conv, axis=1)
198
197
  return df
199
198
 
200
199
  def convert_units(df):
201
200
  '''Convert units in Equis data to standard units.'''
202
201
  # Convert ug/L to mg/L
203
- df['RESULT_UNIT'] = df['RESULT_UNIT'].str.lower()
202
+ df['unit'] = df['unit'].str.lower()
204
203
 
205
- mask_ugL = df['RESULT_UNIT'] == 'ug/l'
206
- df.loc[mask_ugL, 'RESULT_NUMERIC'] = df.loc[mask_ugL, 'RESULT_NUMERIC'] / 1000
207
- df.loc[mask_ugL, 'RESULT_UNIT'] = 'mg/l'
204
+ mask_ugL = df['unit'] == 'ug/l'
205
+ df.loc[mask_ugL, 'value'] = df.loc[mask_ugL, 'value'] / 1000
206
+ df.loc[mask_ugL, 'unit'] = 'mg/l'
208
207
 
209
208
  # Convert mg/g to mg/L (assuming density of 1 g/mL)
210
- mask_mgg = df['RESULT_UNIT'] == 'mg/g'
211
- df.loc[mask_mgg, 'RESULT_NUMERIC'] = df.loc[mask_mgg, 'RESULT_NUMERIC'] * 1000
212
- df.loc[mask_mgg, 'RESULT_UNIT'] = 'mg/l'
209
+ mask_mgg = df['unit'] == 'mg/g'
210
+ df.loc[mask_mgg, 'value'] = df.loc[mask_mgg, 'value'] * 1000
211
+ df.loc[mask_mgg, 'unit'] = 'mg/l'
213
212
 
214
213
  # Convert deg C to degF
215
- mask_degC = df['RESULT_UNIT'].isin(['deg c', 'degc'])
216
- df.loc[mask_degC, 'RESULT_NUMERIC'] = (df.loc[mask_degC, 'RESULT_NUMERIC'] * 9/5) + 32
217
- df.loc[mask_degC, 'RESULT_UNIT'] = 'degf'
214
+ mask_degC = df['unit'].isin(['deg c', 'degc'])
215
+ df.loc[mask_degC, 'value'] = (df.loc[mask_degC, 'value'] * 9/5) + 32
216
+ df.loc[mask_degC, 'unit'] = 'degf'
218
217
 
219
218
  return df
220
219
 
@@ -232,15 +231,25 @@ def average_results(df):
232
231
  value=('value', 'mean')
233
232
  ).reset_index()
234
233
 
234
+ def replace_nondetects(df):
235
+ '''Replace non-detect results with 0 in Equis data.'''
236
+ df.loc[df['value'].isna(), 'value'] = 0
237
+ return df
238
+
239
+ def normalize(df):
240
+ '''Normalize Equis data: select relevant columns.'''
241
+ df = map_constituents(df)
242
+ df = normalize_timezone(df)
243
+ df = normalize_columns(df)
244
+ df = convert_units(df)
245
+ return df
246
+
235
247
  def transform(df):
236
248
  '''Transform Equis data: handle non-detects, convert units, map constituents.'''
237
249
 
250
+ df = normalize(df)
238
251
  df = replace_nondetects(df)
239
252
  if not df.empty:
240
- df = normalize_timezone(df)
241
- df = convert_units(df)
242
- df = map_constituents(df)
243
- df = normalize_columns(df)
244
253
  df = average_results(df)
245
254
  return df
246
255
 
mpcaHydro/etlSWD.py CHANGED
@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
26
26
  # return df
27
27
  import requests
28
28
 
29
- def _download(station_no):
29
+ def _download(station_id):
30
30
  # Replace {station_no} in the URL with the actual station number
31
- url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
-
31
+ #url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
32
+ url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
33
+
33
34
  try:
34
35
  # Send a GET request to the URL
35
- response = requests.get(url)
36
+ params = {
37
+ 'stationId': station_id,
38
+ 'format': 'json'
39
+ }
40
+ response = requests.get(url,params = params)
36
41
  response.raise_for_status() # Raise exception for HTTP errors
37
42
  # Parse the JSON data
38
- if response.json()['recordCount'] == 0:
39
- return pd.DataFrame(columns = response.json()['column_names'])
40
- else:
41
- return pd.DataFrame(response.json()['data'])
43
+ return pd.DataFrame(response.json()['data'])
42
44
 
43
45
  except requests.exceptions.RequestException as e:
44
46
  print(f"An error occurred: {e}")
@@ -46,14 +48,18 @@ def _download(station_no):
46
48
 
47
49
 
48
50
 
49
- def download(station_no):
51
+ def download(station_ids):
50
52
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
51
- df = _download(station_no)
52
- if df.empty:
53
- return df
54
- else:
55
- df['station_id'] = station_no
56
- return transform(df)
53
+ dfs = []
54
+ for station_id in station_ids:
55
+ df = _download(station_id)
56
+ if not df.empty:
57
+ df['station_id'] = station_id
58
+ dfs.append(df)
59
+
60
+ return pd.concat(dfs, ignore_index=True)
61
+
62
+
57
63
 
58
64
  def info(station_no):
59
65
  #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
mpcaHydro/outlets.py ADDED
@@ -0,0 +1,367 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 1 09:51:51 2025
4
+
5
+ @author: mfratki
6
+ """
7
+ #import sqlite3
8
+ from pathlib import Path
9
+ import geopandas as gpd
10
+ import pandas as pd
11
+ import duckdb
12
+ #from hspf_tools.calibrator import etlWISKI, etlSWD
13
+
14
+
15
+ #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
16
+
17
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
18
+ stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
19
+ stations_wiski['source'] = 'wiski'
20
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
21
+ stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
22
+ stations_equis['source'] = 'equis'
23
+ stations_equis['wplmn_flag'] = 0
24
+
25
+
26
+
27
+
28
+ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
29
+
30
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
31
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
32
+ MODL_DB = MODL_DB.dropna(subset='opnids')
33
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
34
+
35
+ def _reload():
36
+ global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
37
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
38
+ stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
39
+ stations_wiski['source'] = 'wiski'
40
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
41
+ stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
42
+ stations_equis['source'] = 'equis'
43
+ stations_equis['wplmn_flag'] = 0
44
+
45
+
46
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
47
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
48
+ MODL_DB = MODL_DB.dropna(subset='opnids')
49
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
50
+
51
+
52
+ def split_opnids(opnids: list):
53
+ return [int(float(j)) for i in opnids for j in i]
54
+
55
+ def get_model_db(model_name: str):
56
+ return MODL_DB.query('repo_name == @model_name')
57
+
58
+ def valid_models():
59
+ return MODL_DB['repo_name'].unique().tolist()
60
+
61
+ def equis_stations(model_name):
62
+ return _stations_equis.query('repo_name == @model_name')['station_id'].tolist()
63
+
64
+ def wiski_stations(model_name):
65
+ return _stations_wiski.query('repo_name == @model_name')['station_id'].tolist()
66
+
67
+ def wplmn_stations(model_name):
68
+ return MODL_DB.query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
69
+
70
+ def wplmn_station_opnids(model_name):
71
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
72
+ return split_opnids(opnids)
73
+
74
+ def wiski_station_opnids(model_name):
75
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
76
+ return split_opnids(opnids)
77
+
78
+ def equis_station_opnids(model_name):
79
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
80
+ return split_opnids(opnids)
81
+
82
+ def station_opnids(model_name):
83
+ opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name')['opnids'].str.split(',').to_list()
84
+ return split_opnids(opnids)
85
+
86
+ def mapped_equis_stations(model_name):
87
+ return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
88
+
89
+ def mapped_wiski_stations(model_name):
90
+ return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['station_id'].tolist()
91
+
92
+ def outlets(model_name):
93
+ return [group for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
94
+
95
+ def outlet_stations(model_name):
96
+ return [group['station_id'].to_list() for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
97
+
98
+
99
+ def connect(db_path, read_only=True):
100
+ #Path(db_path).parent.mkdir(parents=True, exist_ok=True)
101
+ return duckdb.connect(db_path,read_only=read_only)
102
+
103
+
104
+ def init_db(db_path: str,reset: bool = False):
105
+ """
106
+ Initialize the DuckDB database: create staging and analytics schemas
107
+ """
108
+ db_path = Path(db_path)
109
+ if reset and db_path.exists():
110
+ db_path.unlink()
111
+
112
+ with connect(db_path.as_posix(),False) as con:
113
+ con.execute(OUTLETS_SCHEMA)
114
+
115
+
116
+
117
+ # Accessors:
118
+ def get_outlets_by_model(model_name: str):
119
+ with connect(DB_PATH) as con:
120
+ df = con.execute(
121
+ """
122
+ SELECT r.*
123
+ FROM outlets.station_reach_pairs r
124
+ WHERE r.repository_name = ?
125
+ """,
126
+ [model_name]
127
+ ).fetchdf()
128
+ return df
129
+
130
+ def get_outlets_by_reach(reach_id: int, model_name: str):
131
+ """
132
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
133
+ """
134
+ with connect(DB_PATH) as con:
135
+ df = con.execute(
136
+ """
137
+ SELECT r.*
138
+ FROM outlets.station_reach_pairs r
139
+ WHERE r.reach_id = ? AND r.repository_name = ?
140
+ """,
141
+ [reach_id, model_name]).fetchdf()
142
+ return df
143
+
144
+ def get_outlets_by_station(station_id: str, station_origin: str):
145
+ """
146
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
147
+ """
148
+ with connect(DB_PATH) as con:
149
+
150
+ df = con.execute(
151
+ """
152
+ SELECT r.*
153
+ FROM outlets.station_reach_pairs r
154
+ WHERE r.station_id = ? AND r.station_origin = ?
155
+ """,
156
+ [station_id, station_origin]).fetchdf()
157
+ return df
158
+
159
+
160
+
161
+ class OutletGateway:
162
+ def __init__(self, model_name: str):
163
+ self.model_name = model_name
164
+ self.db_path = DB_PATH
165
+ self.modl_db = get_model_db(model_name)
166
+
167
+ # Legacy methods to access functions
168
+ def wplmn_station_opnids(self):
169
+ return wplmn_station_opnids(self.model_name)
170
+
171
+ def wiski_station_opnids(self):
172
+ return wiski_station_opnids(self.model_name)
173
+
174
+ def equis_station_opnids(self):
175
+ return equis_station_opnids(self.model_name)
176
+
177
+ def station_opnids(self):
178
+ return station_opnids(self.model_name)
179
+
180
+ def equis_stations(self):
181
+ return equis_stations(self.model_name)
182
+
183
+ def wiski_stations(self):
184
+ return wiski_stations(self.model_name)
185
+
186
+ def wplmn_stations(self):
187
+ return wplmn_stations(self.model_name)
188
+
189
+ def outlets(self):
190
+ return outlets(self.model_name)
191
+
192
+ def outlet_stations(self):
193
+ return outlet_stations(self.model_name)
194
+
195
+ # Accessors for outlets
196
+ def get_outlets(self):
197
+ return get_outlets_by_model(self.model_name)
198
+
199
+ def get_outlets_by_reach(self, reach_id: int):
200
+ return get_outlets_by_reach(reach_id, self.model_name)
201
+
202
+ def get_outlets_by_station(self, station_id: str, station_origin: str):
203
+ assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
204
+ return get_outlets_by_station(station_id, station_origin)
205
+
206
+ # constructors:
207
+ def build_outlet_db(db_path: str = None):
208
+ if db_path is None:
209
+ db_path = DB_PATH
210
+ init_db(db_path,reset=True)
211
+ with connect(db_path,False) as con:
212
+ build_outlets(con)
213
+
214
+
215
+ def build_outlets(con, model_name: str = None):
216
+ if model_name is not None:
217
+ modl_db = get_model_db(model_name)
218
+ else:
219
+ modl_db = MODL_DB
220
+
221
+ for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
222
+ repo_name = group['repo_name'].iloc[0]
223
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
224
+
225
+ opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
226
+
227
+ for opnid in opnids:
228
+ add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
229
+
230
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
231
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
232
+
233
+
234
+ def create_outlet_schema(con, model_name : str):
235
+ for index, (_, group) in enumerate(outlets(model_name)):
236
+ repo_name = group['repo_name'].iloc[0]
237
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
238
+
239
+ opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
240
+
241
+ for opnid in opnids:
242
+ add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
243
+
244
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
245
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
246
+
247
+
248
+ def add_outlet(con,
249
+ outlet_id: int,
250
+ repository_name: str,
251
+ outlet_name = None,
252
+ notes = None):
253
+ """
254
+ Insert an outlet. repository_name is required.
255
+ """
256
+ con.execute(
257
+ "INSERT INTO outlets.outlet_groups (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
258
+ [outlet_id, repository_name, outlet_name, notes]
259
+ )
260
+
261
+ def add_station(con,
262
+ outlet_id: int,
263
+ station_id: int,
264
+ station_origin: str,
265
+ true_opnid: int,
266
+ repository_name: str,
267
+ comments = None):
268
+ """
269
+ Insert a station membership for an outlet.
270
+ Constraints:
271
+ - PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
272
+ - true_opnid and true_opnid_repository_name are required per schema.
273
+ """
274
+ con.execute(
275
+ """INSERT INTO outlets.outlet_stations
276
+ (outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
277
+ VALUES (?, ?, ?, ?, ?, ?)""",
278
+ [outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
279
+ )
280
+
281
+ def add_reach(con,
282
+ outlet_id: int,
283
+ reach_id: int,
284
+ repository_name: str):
285
+ """
286
+ Insert a reach membership for an outlet.
287
+ - repository_name is required and participates in the PK (reach_id, repository_name).
288
+ """
289
+ con.execute(
290
+ """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name)
291
+ VALUES (?, ?, ?)""",
292
+ [outlet_id, reach_id, repository_name]
293
+ )
294
+
295
+
296
+ OUTLETS_SCHEMA = """-- schema.sql
297
+ -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
298
+ -- Compatible with DuckDB and SQLite.
299
+
300
+ -- Table 1: outlets
301
+ -- Represents a logical grouping that ties stations and reaches together.
302
+ CREATE SCHEMA IF NOT EXISTS outlets;
303
+
304
+ CREATE TABLE IF NOT EXISTS outlets.outlet_groups (
305
+ outlet_id INTEGER PRIMARY KEY,
306
+ repository_name TEXT NOT NULL,
307
+ outlet_name TEXT,
308
+ notes TEXT -- optional: general notes about the outlet grouping
309
+ );
310
+
311
+ -- Table 2: outlet_stations
312
+ -- One-to-many: outlet -> stations
313
+ CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
314
+ outlet_id INTEGER NOT NULL,
315
+ station_id TEXT NOT NULL,
316
+ station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
317
+ repository_name TEXT NOT NULL, -- repository model the station is physically located in
318
+ true_opnid INTEGER NOT NULL, -- The specific reach the station physically sits on (optional)
319
+ comments TEXT, -- Per-station comments, issues, etc.
320
+ CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
321
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
322
+ );
323
+
324
+ -- Table 3: outlet_reaches
325
+ -- One-to-many: outlet -> reaches
326
+ -- A reach can appear in multiple outlets, enabling many-to-many overall.
327
+ CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
328
+ outlet_id INTEGER NOT NULL,
329
+ reach_id INTEGER NOT NULL, -- model reach identifier (aka opind)
330
+ repository_name TEXT NOT NULL, -- optional: where the mapping comes from
331
+ FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
332
+ );
333
+
334
+ -- Useful views:
335
+
336
+ -- View: station_reach_pairs
337
+ -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
338
+ CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
339
+ SELECT
340
+ s.outlet_id,
341
+ s.station_id,
342
+ s.station_origin,
343
+ r.reach_id,
344
+ r.repository_name
345
+ FROM outlets.outlet_stations AS s
346
+ JOIN outlets.outlet_reaches AS r
347
+ ON s.outlet_id = r.outlet_id;
348
+
349
+ """
350
+
351
+ #row = modl_db.MODL_DB.iloc[0]
352
+
353
+ #info = etlWISKI.info(row['station_id'])
354
+
355
+ #modl_db.MODL_DB.query('source == "equis"')
356
+
357
+ # outlet_dict = {'stations': {'wiski': ['E66050001'],
358
+ # 'equis': ['S002-118']},
359
+ # 'reaches': {'Clearwater': [650]}
360
+
361
+
362
+
363
+
364
+ # station_ids = ['S002-118']
365
+ # #station_ids = ['E66050001']
366
+ # reach_ids = [650]
367
+ # flow_station_ids = ['E66050001']
mpcaHydro/reports.py ADDED
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+ import duckdb
3
+ import glob
4
+
5
+ #TODO ensure all reports are actually in the reports schema
6
+
7
+ class reportManager():
8
+ def __init__(self,db_path:Path):
9
+ self.db_path = db_path
10
+
11
+ def wiski_qc_counts(self):
12
+ with duckdb.connect(self.db_path,read_only=True) as con:
13
+ return wiski_qc_counts(con)
14
+
15
+ def constituent_summary(self,constituent: str = None):
16
+ with duckdb.connect(self.db_path,read_only=True) as con:
17
+ return constituent_summary(con,constituent)
18
+
19
+ def station_reach_pairs(self):
20
+ with duckdb.connect(self.db_path,read_only=True) as con:
21
+ return station_reach_pairs(con)
22
+
23
+ def outlet_summary(self):
24
+ with duckdb.connect(self.db_path,read_only=True) as con:
25
+ return outlet_summary(con)
26
+
27
+
28
+
29
+ def outlet_summary(con: duckdb.DuckDBPyConnection):
30
+ query = '''
31
+ SELECT *,
32
+ FROM
33
+ reports.outlet_constituent_summary
34
+ ORDER BY
35
+ outlet_id,
36
+ constituent
37
+ '''
38
+ df = con.execute(query).fetch_df()
39
+ return df
40
+
41
+
42
+ def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
43
+ query = '''
44
+ SELECT *,
45
+ FROM
46
+ reports.wiski_qc_count
47
+ ORDER BY
48
+ station_no,
49
+ parametertype_name
50
+ '''
51
+ df = con.execute(query).fetch_df()
52
+ return df
53
+
54
+ def constituent_summary(con: duckdb.DuckDBPyConnection,constituent: str = None):
55
+
56
+ query = '''
57
+ SELECT *,
58
+ FROM
59
+ reports.constituent_summary
60
+ ORDER BY
61
+ station_id,
62
+ station_origin,
63
+ constituent
64
+ '''
65
+ df = con.execute(query).fetch_df()
66
+ if constituent is not None:
67
+ df = df[df['constituent'] == constituent]
68
+ return df
69
+
70
+ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
71
+ query = '''
72
+ SELECT *,
73
+ FROM
74
+ reports.station_reach_pairs
75
+ ORDER BY
76
+ outlet_id,
77
+ station_id
78
+ '''
79
+ df = con.execute(query).fetch_df()
80
+ return df