mpcaHydro 2.2.8__tar.gz → 2.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/PKG-INFO +1 -1
  2. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/pyproject.toml +1 -1
  3. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/outlet.duckdb +0 -0
  4. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_EQUIS.gpkg +0 -0
  5. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_wiski.gpkg +0 -0
  6. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/equis.py +8 -0
  7. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/outlets.py +56 -23
  8. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse.py +2 -3
  9. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse_functions.py +3 -2
  10. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/wiski.py +9 -0
  11. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/.gitattributes +0 -0
  12. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/.gitignore +0 -0
  13. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/README.md +0 -0
  14. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/demo.py +0 -0
  15. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/__init__.py +0 -0
  16. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/csg.py +0 -0
  17. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/EQUIS_PARAMETER_XREF.csv +0 -0
  18. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_EQUIS_XREF.csv +0 -0
  19. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/WISKI_QUALITY_CODES.csv +0 -0
  20. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/data/wiskiweb01.pca.state.mn.us.crt +0 -0
  21. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/pywisk.py +0 -0
  22. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/reports.py +0 -0
  23. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/analytics_tables.sql +0 -0
  24. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/outlets_schema.sql +0 -0
  25. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/schemas.sql +0 -0
  26. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/staging_tables.sql +0 -0
  27. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_analytics.sql +0 -0
  28. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_outlets.sql +0 -0
  29. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql/views_reports.sql +0 -0
  30. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/sql_loader.py +0 -0
  31. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/swd.py +0 -0
  32. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/src/mpcaHydro/xref.py +0 -0
  33. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/README.md +0 -0
  34. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/conftest.py +0 -0
  35. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_dataManager.py +0 -0
  36. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_data_manager.py +0 -0
  37. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_data_manager_integration.py +0 -0
  38. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_equis_integration.py +0 -0
  39. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_warehouse.duckdb +0 -0
  40. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_warehouse.py +0 -0
  41. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_wiski.py +0 -0
  42. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/integration/test_wiski_integration.py +0 -0
  43. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/test_data_manager_functions.py +0 -0
  44. {mpcahydro-2.2.8 → mpcahydro-2.2.9}/tests/unit/test_equis.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcaHydro
3
- Version: 2.2.8
3
+ Version: 2.2.9
4
4
  Summary: Python package for downloading MPCA hydrology data
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "mpcaHydro"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/mpcaHydro" } # ? Add this!
8
- version = "2.2.8"
8
+ version = "2.2.9"
9
9
  dependencies = [
10
10
  "pandas",
11
11
  "requests",
@@ -265,6 +265,13 @@ def replace_nondetects(df):
265
265
  df.loc[df['value'].isna(), 'value'] = 0
266
266
  return df
267
267
 
268
+ def filter_years(df, start_year=1996, end_year=None):
269
+ '''Filter Equis data to include only samples within a certain year range.'''
270
+ df = df[df['datetime'].dt.year >= start_year]
271
+ if end_year is not None:
272
+ df = df[df['datetime'].dt.year <= end_year]
273
+ return df
274
+
268
275
  def normalize(df):
269
276
  '''Normalize Equis data: select relevant columns.'''
270
277
  df = map_constituents(df)
@@ -278,6 +285,7 @@ def transform(df):
278
285
 
279
286
  df = normalize(df)
280
287
  df = replace_nondetects(df)
288
+ df = filter_years(df)
281
289
  if not df.empty:
282
290
  df = average_results(df)
283
291
  return df
@@ -31,7 +31,15 @@ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
31
31
  MODL_DB = pd.concat([stations_wiski,stations_equis])
32
32
  MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
33
33
  MODL_DB = MODL_DB.dropna(subset='opnids')
34
+ MODL_DB = MODL_DB.dropna(subset = 'repo_name')
34
35
  MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
36
+ # Add outlet_id column to MODL_DB based on enumerate grouping
37
+ outlet_id_map = {}
38
+ for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
39
+ for idx in group.index:
40
+ outlet_id_map[idx] = int(outlet_id)
41
+ MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
42
+
35
43
 
36
44
  def _reload():
37
45
  global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
@@ -47,7 +55,14 @@ def _reload():
47
55
  MODL_DB = pd.concat([stations_wiski,stations_equis])
48
56
  MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
49
57
  MODL_DB = MODL_DB.dropna(subset='opnids')
58
+ MODL_DB = MODL_DB.dropna(subset = 'repo_name')
50
59
  MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
60
+ # Add outlet_id column to MODL_DB based on enumerate grouping
61
+ outlet_id_map = {}
62
+ for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
63
+ for idx in group.index:
64
+ outlet_id_map[idx] = int(outlet_id)
65
+ MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
51
66
 
52
67
 
53
68
  def split_opnids(opnids: list):
@@ -174,6 +189,34 @@ def get_station_opnids(station_id: str, station_origin: str):
174
189
  [station_id, station_origin]).fetchdf()
175
190
  return df['reach_id'].tolist()
176
191
 
192
+ def get_outlet_opnids(outlet_id: int):
193
+ """
194
+ Return all model reach IDs (opnids) associated with the given outlet ID.
195
+ """
196
+ with connect(DB_PATH) as con:
197
+ df = con.execute(
198
+ """
199
+ SELECT r.reach_id
200
+ FROM outlets.station_reach_pairs r
201
+ WHERE r.outlet_id = ?
202
+ """,
203
+ [outlet_id]).fetchdf()
204
+ return list(set(df['reach_id'].tolist()))
205
+
206
+ def get_outlet_stations(outlet_id: int):
207
+ """
208
+ Return all station IDs and origins associated with the given outlet ID.
209
+ """
210
+ with connect(DB_PATH) as con:
211
+ df = con.execute(
212
+ """
213
+ SELECT r.station_id, r.station_origin
214
+ FROM outlets.station_reach_pairs r
215
+ WHERE r.outlet_id = ?
216
+ """,
217
+ [outlet_id]).fetchdf()
218
+ return df[['station_id', 'station_origin']].drop_duplicates().to_dict(orient='records')
219
+
177
220
 
178
221
  class OutletGateway:
179
222
  def __init__(self, model_name: str):
@@ -192,7 +235,7 @@ class OutletGateway:
192
235
  return equis_station_opnids(self.model_name)
193
236
 
194
237
  def station_opnids(self):
195
- return station_opnids(self.model_name)
238
+ return mapped_station_opnids(self.model_name)
196
239
 
197
240
  def equis_stations(self):
198
241
  return equis_stations(self.model_name)
@@ -220,6 +263,12 @@ class OutletGateway:
220
263
  assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
221
264
  return get_outlets_by_station(station_id, station_origin)
222
265
 
266
+ def get_outlet_opnids(self, outlet_id: int):
267
+ return get_outlet_opnids(outlet_id)
268
+
269
+ def get_outlet_stations(self, outlet_id: int):
270
+ return get_outlet_stations(outlet_id)
271
+
223
272
  # constructors:
224
273
  def build_outlet_db(db_path: str = None):
225
274
  if db_path is None:
@@ -235,31 +284,15 @@ def build_outlets(con, model_name: str = None):
235
284
  else:
236
285
  modl_db = MODL_DB
237
286
 
238
- for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
239
- repo_name = group['repo_name'].iloc[0]
240
- add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
241
-
287
+ for outlet_id in modl_db['outlet_id'].unique():
288
+ group = modl_db.query('outlet_id == @outlet_id')
289
+ repo_name = group['repo_name'].iloc[0]
290
+ add_outlet(con, outlet_id = int(outlet_id), outlet_name = None, repository_name = repo_name, notes = None)
242
291
  opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
243
-
244
292
  for opnid in opnids:
245
- add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
246
-
247
- for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
248
- add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
249
-
250
-
251
- def create_outlet_schema(con, model_name : str):
252
- for index, (_, group) in enumerate(outlets(model_name)):
253
- repo_name = group['repo_name'].iloc[0]
254
- add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
255
-
256
- opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
257
-
258
- for opnid in opnids:
259
- add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
260
-
293
+ add_reach(con, outlet_id = int(outlet_id), reach_id = int(opnid), repository_name = repo_name)
261
294
  for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
262
- add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
295
+ add_station(con, outlet_id = int(outlet_id), station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
263
296
 
264
297
 
265
298
  def add_outlet(con,
@@ -151,12 +151,11 @@ def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
151
151
  con.execute("DETACH 'outlets_db'")
152
152
 
153
153
 
154
- def create_outlets_tables(con: duckdb.DuckDBPyConnection):
154
+ def create_outlets_tables(con: duckdb.DuckDBPyConnection, model_name: str = None):
155
155
  """Create tables in the outlets schema to define outlet-station-reach relationships."""
156
156
  con.execute(sql_loader.get_outlets_schema_sql())
157
157
  con.execute(sql_loader.get_views_outlets_sql())
158
- outlets.build_outlets(con)
159
-
158
+ outlets.build_outlets(con, model_name=model_name)
160
159
 
161
160
  def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
162
161
  """Create a view filtering WISKI data based on specified data codes."""
@@ -101,7 +101,8 @@ def download_wiski_data(
101
101
  if overwrite:
102
102
  warehouse.drop_station_data(con, station_ids, 'wiski')
103
103
  warehouse.add_df_to_table(con, df, 'staging', 'wiski')
104
- warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
104
+ if not df_transformed.empty:
105
+ warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
105
106
  warehouse.update_views(con)
106
107
  else:
107
108
  print('No data necessary for HSPF calibration from wiski for:', station_ids)
@@ -351,7 +352,7 @@ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
351
352
  query = '''
352
353
  SELECT *,
353
354
  FROM
354
- reports.station_reach_pairs
355
+ outlets.station_reach_pairs
355
356
  ORDER BY
356
357
  outlet_id,
357
358
  station_id
@@ -336,6 +336,14 @@ def filter_quality_codes(df, data_codes):
336
336
  '''
337
337
  return df.loc[df['quality_code'].isin(data_codes)]
338
338
 
339
+ def filter_years(df, start_year=1996, end_year=None):
340
+ '''Filter Equis data to include only samples within a certain year range.'''
341
+ df = df[df['datetime'].dt.year >= start_year]
342
+ if end_year is not None:
343
+ df = df[df['datetime'].dt.year <= end_year]
344
+ return df
345
+
346
+
339
347
  def average_results(df):
340
348
  #df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
341
349
  df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
@@ -392,6 +400,7 @@ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = '
392
400
  data_codes = DATA_CODES
393
401
  df = filter_quality_codes(df, data_codes)
394
402
  df = average_results(df)
403
+ df = filter_years(df, start_year=1996)
395
404
  df = calculate_baseflow(df, method = baseflow_method)
396
405
  df['station_origin'] = 'wiski'
397
406
  #df.set_index('datetime',inplace=True)
File without changes
File without changes
File without changes
File without changes