pyhcal 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyhcal/calibrators.py CHANGED
@@ -9,10 +9,6 @@ from copy import deepcopy
9
9
  import subprocess
10
10
  #non-standard imports
11
11
  import pandas as pd
12
- pd.set_option('display.max_columns', None)
13
- pd.set_option('display.max_rows', None)
14
- # to reset this
15
- pd.reset_option('display.max_columns')
16
12
  import numpy as np
17
13
  from pathlib import Path
18
14
 
@@ -23,48 +19,26 @@ from hspf import helpers
23
19
  from mpcaHydro import data_manager as dm
24
20
  from pyhcal import metrics
25
21
  from pyhcal import figures
26
- from pyhcal.setup_utils import Builder
27
- #from hspf_tools.orm.monitoring_db import MonitoringDatabase
22
+ from pyhcal import setup_utils
28
23
 
29
24
 
30
- class calProject():
31
- #valid_models = Builder.valid_models()
32
- def __init__(self,project_location):
33
- self.project_location = Path(project_location)
34
-
35
-
36
- def new_project(self,model_name):
37
- return Builder(model_name) #self._builder.new_project(project_location,model_name)
38
-
39
- def load_project(self,model_name):
40
- if model_name in [f.name for f in self.project_location.iterdir() if f.is_dir()]:
41
- return calibrator(self.project_location.joinpath(model_name))
42
- else:
43
- answer = input("No calibration project for that model. Would you like to set on up? (yes or no")
44
- if answer.lower() in ["y","yes"]:
45
- self.new_project(model_name)
46
- return calibrator(self.project_location.joinpath(model_name))
47
- elif answer.lower() in ["n","no"]:
48
- return
49
- # Do other stuff
50
- else:
51
- print('please enter yes or no')
52
-
53
-
54
- def config_info(project_folder):
55
- project_path = Path(project_folder)
56
- info = {'project_path' : project_path,
57
- 'project_name' : project_path.name,
58
- 'model_path' : project_path.joinpath('model'),
59
- 'output_path' : project_path.joinpath('output'),
60
- 'start_date' : '1996-01-01',
61
- 'end_date' : '2100-01-01',
62
- }
63
- return info
25
+ def new_calibration(project_folder,model_name,download_station_data = True,run_model = True,convert_wdms = True):
26
+ return setup_utils.create_calibration_project(model_name,project_folder,download_station_data,run_model,convert_wdms)
64
27
 
65
28
 
29
+ def load(project_folder):
30
+ validate_project_folder(project_folder)
31
+ return calibrator(project_folder)
66
32
 
67
-
33
+ def validate_project_folder(project_folder):
34
+ project_path = Path(project_folder)
35
+ assert project_path.exists(), 'Project folder does not exist'
36
+ assert project_path.joinpath('model').exists(), 'Model folder does not exist'
37
+ assert project_path.joinpath('data').exists(), 'Data folder does not exist'
38
+ assert project_path.joinpath('output').exists(), 'Output folder does not exist'
39
+ assert project_path.joinpath('targets.csv').exists(), 'targets.csv file does not exist in project folder'
40
+ return True
41
+
68
42
  class calibrator:
69
43
  def __init__(self,project_folder):
70
44
  self.project_path = Path(project_folder)
@@ -72,13 +46,11 @@ class calibrator:
72
46
  self.model_path = self.project_path.joinpath('model')
73
47
  self.output_path = self.project_path.joinpath('output')
74
48
  self.run = None
75
- #self.winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
76
49
  self.start_date = '1996-01-01'
77
50
  self.end_date = '2100-01-01'
78
51
 
79
52
  # Load observational data into memory TODO: Convert to database?
80
53
  self.dm = dm.dataManager(self.project_path.joinpath('data'))
81
- #self.odm = MonitoringDatabase(cal.project_path.joinpath(cal.project_name))
82
54
 
83
55
  self.targets = None
84
56
  if self.project_path.joinpath('targets.csv').exists():
@@ -128,10 +100,7 @@ class calibrator:
128
100
  self.model.reports.wdms = self._wdms
129
101
  self.uci = deepcopy(self.model.uci) #uci to be manipulated
130
102
 
131
-
132
- # def setup_run(self, reach_ids = None, time_Step = 3,n = 1):
133
- # setup_utils.setup(self.uci,self.project_name,run = self.run,reach_ids = reach_ids,n = 1,time_step = 3)
134
-
103
+
135
104
  def run_model(self,name = None,overwrite_hbn = False): # NO STATE CHANGE
136
105
 
137
106
  if name is None:
@@ -146,8 +115,8 @@ class calibrator:
146
115
 
147
116
  uci_file = self.model_path.joinpath(name + '.uci').as_posix()
148
117
  self.uci.write(uci_file)
149
- winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
150
- subprocess.run([winHSPF,uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
118
+ #winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
119
+ subprocess.run([self.model.winHSPF,uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
151
120
 
152
121
 
153
122
  def get_simulated_output(self,reach_ids,constituent,time_step = 'YE'):
@@ -155,13 +124,13 @@ class calibrator:
155
124
  sim.name = 'simulated'
156
125
  return sim
157
126
 
158
- def get_observed_data(self,station_ids,constituent,time_step = 'YE'):
127
+ def get_observed_data(self,station_ids,constituent,time_step = 'YE',baseflow_percentage = None):
159
128
  obs = self.dm._get_data(station_ids,constituent,agg_period = time_step).sort_index(level = 'index')
160
129
  obs.name = 'observed'
161
130
  return obs
162
131
 
163
132
 
164
- def compare_simulated_observed(self,station_ids,reach_ids,constituent,time_step,flow_station_ids = None, dropna = False):
133
+ def compare_simulated_observed(self,station_ids,reach_ids,constituent,time_step,flow_station_ids = None, dropna = False, baseflow_percentage = None):
165
134
  obs = self.get_observed_data(station_ids,constituent,time_step)
166
135
  sim = self.get_simulated_output(reach_ids,constituent,time_step = time_step)
167
136
 
@@ -177,11 +146,21 @@ class calibrator:
177
146
  # matching flow data
178
147
  sim_flow = self.get_simulated_output(reach_ids,'Q',time_step)
179
148
  sim_flow.name = 'simulated_flow'
149
+ sim_flow.columns = ['simulated_flow']
180
150
  df = df.join(sim_flow,how = 'inner')
181
151
  obs_flow = self.get_observed_data(flow_station_ids,'Q',time_step)
182
152
  obs_flow.name = 'observed_flow'
153
+ obs_flow.columns = ['observed_flow']
183
154
  df = df.join(obs_flow,how='left')
184
- df.columns = ['simulated','observed','simulated_flow','observed_flow']
155
+
156
+ obs_baseflow = self.get_observed_data(flow_station_ids,'QB',time_step)
157
+ obs_baseflow.name = 'observed_baseflow'
158
+ obs_baseflow.columns = ['observed_baseflow']
159
+ df = df.join(obs_baseflow,how='left')
160
+ df.columns = ['simulated','observed','simulated_flow','observed_flow','observed_baseflow']
161
+
162
+ if baseflow_percentage is not None:
163
+ df = df.loc[(df['observed_baseflow']/df['observed_flow'] >= baseflow_percentage/100)]
185
164
 
186
165
  # Add metadata
187
166
  df.attrs['station_ids'] = station_ids
@@ -225,14 +204,7 @@ class calibrator:
225
204
  obs_flow.name = 'observed_flow'
226
205
  df = df.join(obs_flow,how='left')
227
206
  df.columns = ['simulated','observed','simulated_flow','observed_flow']
228
- # sim_flow = self.model.hbns.get_rchres_data('Q',reach_ids, 'cfs','D')
229
- # sim_flow.name = 'simulated_flow'
230
- # df = df.join(sim_flow,how = 'inner')
231
- # obs_flow = pd.concat([self.dm.get_data(station_id,'Q','cfs','D') for station_id in station_ids])
232
- # obs_flow.name = 'observed_flow'
233
- # df = df.join(obs_flow,how='left')
234
- # df.columns = ['simulated','observed','simulated_flow','observed_flow']
235
-
207
+ # Add metadata
236
208
  df.attrs['station_ids'] = station_ids
237
209
  df.attrs['reach_ids'] = reach_ids
238
210
  df.attrs['constituent'] = constituent
@@ -271,7 +243,35 @@ class calibrator:
271
243
 
272
244
  return df_agg
273
245
 
274
- def landcover(self,constituent):
246
+ def landcover(self,constituent,ref_landcover = None):
247
+ def normalize_group(group, ref_landcover):
248
+ if ref_landcover in group['LSID'].to_list():
249
+ ref_value = group.loc[group['LSID'] == ref_landcover, 'mean'].values[0]
250
+ group['mean_norm'] = group['mean'] / ref_value
251
+ else:
252
+ group['mean_norm'] = pd.NA
253
+ return group
254
+
255
+ if ref_landcover is None:
256
+ ref_landcover = self.targets['uci_name'][self.targets['dom_lc']==1].values[0]
257
+
258
+ df = self.model.hbns.get_perlnd_data(constituent)*2000 #tons/ac/yr to lbs/acr/year
259
+ df = df[(df.index >= self.start_date) & (df.index <= self.end_date)]
260
+ df = df.mean().rename('mean').to_frame()
261
+ df = pd.merge(df, self.uci.opnid_dict['PERLND'], left_index = True,right_on='TOPFST', how='inner')
262
+ df_normalized = pd.concat([normalize_group(group, ref_landcover) for _, group in list(df.groupby('metzone'))])
263
+
264
+ targets = self.targets.set_index('uci_name')[constituent]
265
+ targets = targets/targets.loc[ref_landcover]
266
+
267
+ df_normalized['target'] = targets.loc[df_normalized['LSID']].values/df_normalized['mean_norm']
268
+ df_normalized = df_normalized.fillna(1) #don't make any changes to 0 concentration perlands
269
+ #df_normalized = df_normalized.join(self.uci.table('PERLND','SED-PARM3'))
270
+ return df_normalized
271
+
272
+
273
+
274
+ def landcover_legacy(self,constituent):
275
275
  perlnd_names = self.model.uci.table('PERLND','GEN-INFO')['LSID']
276
276
  df = self.model.hbns.get_perlnd_data(constituent)*2000 #tons/ac/yr to lbs/acr/year
277
277
  df = df[(df.index >= self.start_date) & (df.index <= self.end_date)]
Binary file
Binary file
Binary file
pyhcal/modl_db.py CHANGED
@@ -8,74 +8,296 @@ Created on Thu May 1 09:51:51 2025
8
8
  from pathlib import Path
9
9
  import geopandas as gpd
10
10
  import pandas as pd
11
+ import duckdb
11
12
  #from hspf_tools.calibrator import etlWISKI, etlSWD
12
13
 
13
14
 
14
15
  #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
15
16
 
16
17
 
17
- stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg')).dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
18
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
19
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
18
20
  stations_wiski['source'] = 'wiski'
19
- stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg')).dropna(subset='opnids')[['id_code','true_opnid','opnids','comments','modeled','repository_name']]
21
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
22
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
20
23
  stations_equis['source'] = 'equis'
21
24
  stations_equis['wplmn_flag'] = 0
22
- stations_equis = stations_equis.rename(columns = {'id_code':'station_id'})
23
25
 
24
26
 
27
+ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
28
+
25
29
  MODL_DB = pd.concat([stations_wiski,stations_equis])
30
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
31
+ MODL_DB = MODL_DB.dropna(subset='opnids')
32
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
26
33
 
27
- database = """
28
- -- Stations/Locations table
29
- CREATE TABLE IF NOT EXISTS Station (
30
- stationPK INTEGER PRIMARY KEY AUTOINCREMENT,
31
- reachPK INTEGER REFERENCES Reach(reachPK),
32
- stationID TEXT NOT NULL,
33
- stationName TEXT,
34
- stationOrigin TEXT NOT NULL,
35
- latitude REAL,
36
- longitude REAL,
37
- stationType TEXT,
38
- UNIQUE(stationID, stationOrigin)
39
- );
40
-
41
- -- Station Associations table
42
- CREATE TABLE IF NOT EXISTS StationAssociations (
43
- stationPK INTEGER REFERENCES Station(stationPK),
44
- associationPK INTEGER REFERENCES Station(stationPK)
45
- );
46
-
47
- -- Station Aliases table
48
- CREATE TABLE IF NOT EXISTS StationAliases (
49
- stationPK INTEGER NOT NULL,
50
- aliasPK INTEGER NOT NULL,
51
- FOREIGN KEY (stationPK) REFERENCES Station(stationPK),
52
- FOREIGN KEY (aliasPK) REFERENCES Station(stationPK)
53
- );
54
-
55
- CREATE TABLE Reach (
56
- reachPK INTEGER PRIMARY KEY,
57
- modelName TEXT NOT NULL,
58
- reachID INTEGER NOT NULL,
59
- drainageArea FLOAT
60
- );
61
-
62
- CREATE TABLE Outlet (
63
- outletPK INTEGER PRIMARY KEY,
64
- outletName TEXT
65
- );
66
-
67
- -- Outlet-Station Associations table
68
- CREATE TABLE IF NOT EXISTS StationAssociations (
69
- outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
70
- stationPK INTEGER NOT NULL REFERENCES Station(reachPK)
71
- );
72
-
73
- -- Outlet-Reach Associations table
74
- CREATE TABLE IF NOT EXISTS StationAssociations (
75
- outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
76
- reachPK INTEGER NOT NULL REFERENCES Station(reachPK)
77
- exclude INTEGER NOT NULL
78
- );"""
34
+ def _reload():
35
+ global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
36
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
37
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
38
+ stations_wiski['source'] = 'wiski'
39
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
40
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
41
+ stations_equis['source'] = 'equis'
42
+ stations_equis['wplmn_flag'] = 0
43
+
44
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
45
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
46
+ MODL_DB = MODL_DB.dropna(subset='opnids')
47
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
48
+
49
+
50
+ def get_model_db(model_name: str):
51
+ return MODL_DB.query('repository_name == @model_name')
52
+
53
+ def split_opnids(opnids: list):
54
+ return [abs(int(float(j))) for i in opnids for j in i]
55
+
56
+ def valid_models():
57
+ return MODL_DB['repository_name'].unique().tolist()
58
+
59
+ def wplmn_station_opnids(model_name):
60
+ opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
61
+ return split_opnids(opnids)
62
+
63
+ def wiski_station_opnids(model_name):
64
+ opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
65
+ return split_opnids(opnids)
66
+
67
+ def equis_station_opnids(model_name):
68
+ opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
69
+ return split_opnids(opnids)
70
+
71
+ def station_opnids(model_name):
72
+ opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
73
+ return split_opnids(opnids)
74
+
75
+ def equis_stations(model_name):
76
+ return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
77
+
78
+ def wiski_stations(model_name):
79
+ return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
80
+
81
+ def wplmn_stations(model_name):
82
+ return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
83
+
84
+ def outlets(model_name):
85
+ return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
86
+
87
+ def outlet_stations(model_name):
88
+ return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
89
+
90
+ def _split_opnids(opnids: list):
91
+ return [int(float(j)) for i in opnids for j in i]
92
+
93
+ def connect(db_path):
94
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
95
+ return duckdb.connect(db_path)
96
+
97
+
98
+ def init_db(db_path: str,reset: bool = False):
99
+ """
100
+ Initialize the DuckDB database: create staging and analytics schemas
101
+ """
102
+ db_path = Path(db_path)
103
+ if reset and db_path.exists():
104
+ db_path.unlink()
105
+
106
+ with connect(db_path.as_posix()) as con:
107
+ con.execute(OUTLETS_SCHEMA)
108
+
109
+
110
+
111
+ # Accessors:
112
+ def get_outlets_by_model(model_name: str):
113
+ with connect(DB_PATH) as con:
114
+ df = con.execute(
115
+ """
116
+ SELECT r.*
117
+ FROM station_reach_pairs r
118
+ WHERE r.repository_name = ?
119
+ """,
120
+ [model_name]
121
+ ).fetchdf()
122
+ return df
123
+
124
+ def get_outlets_by_reach(reach_id: int, model_name: str):
125
+ """
126
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
127
+ """
128
+ with connect(DB_PATH) as con:
129
+ df = con.execute(
130
+ """
131
+ SELECT r.*
132
+ FROM station_reach_pairs r
133
+ WHERE r.reach_id = ? AND r.repository_name = ?
134
+ """,
135
+ [reach_id, model_name]).fetchdf()
136
+ return df
137
+
138
+ def get_outlets_by_station(station_id: str, station_origin: str):
139
+ """
140
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
141
+ """
142
+ with connect(DB_PATH) as con:
143
+
144
+ df = con.execute(
145
+ """
146
+ SELECT r.*
147
+ FROM station_reach_pairs r
148
+ WHERE r.station_id = ? AND r.station_origin = ?
149
+ """,
150
+ [station_id, station_origin]).fetchdf()
151
+ return df
152
+
153
+ # constructors:
154
+ def build_outlet_db(db_path: str = None):
155
+ if db_path is None:
156
+ db_path = DB_PATH
157
+ init_db(db_path,reset=True)
158
+ with connect(db_path) as con:
159
+ for index, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
160
+ repo_name = group['repository_name'].iloc[0]
161
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
162
+
163
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
164
+
165
+ for opnid in opnids:
166
+ if opnid < 0:
167
+ exclude = 1
168
+ else:
169
+ exclude = 0
170
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
171
+
172
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
173
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
174
+
175
+
176
+ def create_outlet_schema(con, model_name : str):
177
+ for index, (_, group) in enumerate(modl_db.outlets(model_name)):
178
+ repo_name = group['repository_name'].iloc[0]
179
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
180
+
181
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
182
+
183
+ for opnid in opnids:
184
+ if opnid < 0:
185
+ exclude = 1
186
+ else:
187
+ exclude = 0
188
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
189
+
190
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
191
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
192
+
193
+
194
+ def add_outlet(con,
195
+ outlet_id: str,
196
+ repository_name: str,
197
+ outlet_name = None,
198
+ notes = None):
199
+ """
200
+ Insert an outlet. repository_name is required.
201
+ """
202
+ con.execute(
203
+ "INSERT INTO outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
204
+ [outlet_id, repository_name, outlet_name, notes]
205
+ )
206
+
207
+ def add_station(con,
208
+ outlet_id: str,
209
+ station_id: str,
210
+ station_origin: str,
211
+ true_opnid: str,
212
+ repository_name: str,
213
+ comments = None):
214
+ """
215
+ Insert a station membership for an outlet.
216
+ Constraints:
217
+ - PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
218
+ - true_opnid and true_opnid_repository_name are required per schema.
219
+ """
220
+ con.execute(
221
+ """INSERT INTO outlet_stations
222
+ (outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
223
+ VALUES (?, ?, ?, ?, ?, ?)""",
224
+ [outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
225
+ )
226
+
227
+ def add_reach(con,
228
+ outlet_id: str,
229
+ reach_id: str,
230
+ repository_name: str,
231
+ exclude: int = 0):
232
+ """
233
+ Insert a reach membership for an outlet.
234
+ - repository_name is required and participates in the PK (reach_id, repository_name).
235
+ - exclude = 1 to mark a reach as excluded from association views.
236
+ """
237
+ con.execute(
238
+ """INSERT INTO outlet_reaches (outlet_id, reach_id, repository_name, exclude)
239
+ VALUES (?, ?, ?, ?)""",
240
+ [outlet_id, reach_id, repository_name, int(exclude)]
241
+ )
242
+
243
+
244
+ OUTLETS_SCHEMA = """-- schema.sql
245
+ -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
246
+ -- Compatible with DuckDB and SQLite.
247
+
248
+ -- Table 1: outlets
249
+ -- Represents a logical grouping that ties stations and reaches together.
250
+ CREATE TABLE IF NOT EXISTS outlets (
251
+ outlet_id TEXT PRIMARY KEY,
252
+ repository_name TEXT NOT NULL,
253
+ outlet_name TEXT,
254
+ notes TEXT -- optional: general notes about the outlet grouping
255
+ );
256
+
257
+ -- Table 2: outlet_stations
258
+ -- One-to-many: outlet -> stations
259
+ CREATE TABLE IF NOT EXISTS outlet_stations (
260
+ outlet_id TEXT NOT NULL,
261
+ station_id TEXT NOT NULL,
262
+ station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
263
+ repository_name TEXT NOT NULL, -- repository model the station is physically located in
264
+ true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
265
+ comments TEXT, -- Per-station comments, issues, etc.
266
+ CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
267
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
268
+ );
269
+
270
+ -- Table 3: outlet_reaches
271
+ -- One-to-many: outlet -> reaches
272
+ -- A reach can appear in multiple outlets, enabling many-to-many overall.
273
+ CREATE TABLE IF NOT EXISTS outlet_reaches (
274
+ outlet_id TEXT NOT NULL,
275
+ reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
276
+ repository_name TEXT NOT NULL, -- optional: where the mapping comes from
277
+ exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
278
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
279
+ );
280
+
281
+ -- Useful views:
282
+
283
+ -- View: station_reach_pairs
284
+ -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
285
+ CREATE VIEW IF NOT EXISTS station_reach_pairs AS
286
+ SELECT
287
+ s.outlet_id,
288
+ s.station_id,
289
+ s.station_origin,
290
+ r.reach_id,
291
+ r.exclude,
292
+ r.repository_name,
293
+ FROM outlet_stations s
294
+ JOIN outlet_reaches r
295
+ ON s.outlet_id = r.outlet_id;
296
+
297
+ -- Example indexes (SQLite will accept CREATE INDEX; DuckDB treats them as metadata but it’s okay to define):
298
+ CREATE INDEX IF NOT EXISTS idx_outlet_stations_outlet ON outlet_stations(outlet_id);
299
+ CREATE INDEX IF NOT EXISTS idx_outlet_reaches_outlet ON outlet_reaches(outlet_id);
300
+ CREATE INDEX IF NOT EXISTS idx_station_reach_pairs_station ON outlet_stations(station_id);"""
79
301
 
80
302
 
81
303
  #row = modl_db.MODL_DB.iloc[0]
pyhcal/repository.py CHANGED
@@ -6,7 +6,7 @@ Created on Wed Nov 27 09:16:30 2024
6
6
  """
7
7
 
8
8
  import pandas as pd
9
- from pyhcal.modl_db import MODL_DB
9
+ from pyhcal import modl_db
10
10
  from pathlib import Path
11
11
  import shutil
12
12
 
@@ -20,7 +20,7 @@ class Repository():
20
20
  'USGS HUC-4':'string',
21
21
  'USGS HUC-2':'string'})
22
22
 
23
- MODL_DB = MODL_DB
23
+
24
24
 
25
25
 
26
26
 
@@ -38,7 +38,7 @@ class Repository():
38
38
 
39
39
  self.REPOSITORY_PATH = repository_path
40
40
  huc_directory = self.HUC_DIRECTORY.loc[self.HUC_DIRECTORY['Repository_HUC8 Name'] == model_name]
41
- self.modl_db = self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
41
+ self.modl_db = modl_db.get_model_db(model_name) #self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
42
42
  #self.modl_db = pd.concat([self.MODL_DB.loc[self.MODL_DB['repository_name'].str.startswith(huc8_id,na=False)] for huc8_id in huc8_ids])
43
43
  self.model_name = model_name
44
44
  self.huc8_ids = list(huc_directory['USGS HUC-8'])
@@ -48,6 +48,9 @@ class Repository():
48
48
  self.uci_file = self.repo_folder.joinpath('HSPF','.'.join([self.model_name,'uci']))
49
49
  self.wdm_files = [item for item in self.repo_folder.joinpath('HSPF').iterdir() if (item.name.endswith('.wdm')) | (item.name.endswith('.WDM'))]
50
50
  self.shapefiles = {item.name.split('.')[0].split('_')[-1]:item for item in self.repo_folder.joinpath('GIS').iterdir() if (item.name.endswith('.shp')) | (item.name.endswith('.SHP'))}
51
+ self.wiski_stations = modl_db.wiski_stations(model_name)
52
+ self.equis_stations = modl_db.equis_stations(model_name)
53
+
51
54
 
52
55
  def copy(self,copy_path):
53
56
  copy_path = Path(copy_path)
@@ -73,7 +76,7 @@ class Repository():
73
76
  files = [file for file in shapefile.parent.iterdir() if file.stem == shapefile.stem]
74
77
  [shutil.copyfile(file,Path(copy_path).joinpath(Path(file).name)) for file in files]
75
78
 
76
-
79
+
77
80
 
78
81
  def build_folders(trg_path):
79
82
 
pyhcal/setup_utils.py CHANGED
@@ -8,6 +8,7 @@ from mpcaHydro.data_manager import dataManager
8
8
  from hspf.wdmReader import readWDM
9
9
  from hspf.uci import UCI
10
10
  from pyhcal.repository import Repository
11
+ from pyhcal import modl_db
11
12
 
12
13
  import numpy as np
13
14
  import pandas as pd
@@ -38,6 +39,8 @@ class Builder():
38
39
  self.new_uci = None
39
40
  self.uci = None
40
41
  self.dm = None
42
+ self.calibration_reaches = modl_db.wplmn_station_opnids(model_name)
43
+
41
44
 
42
45
  def valid_models():
43
46
  return Repository.valid_models()
@@ -65,44 +68,33 @@ class Builder():
65
68
  self.download_station_data()
66
69
  self.uci.build_targets().to_csv(self.project_path.joinpath('targets.csv'))
67
70
 
68
- # if not self.project_path.joinpath('model',self.project_name + '_0-0.hbn').exists():
69
- # self.run_model()
70
-
71
- #TODO move to UCI class
72
71
 
73
72
  def format_uci(self,calibration_reaches = None):
74
-
75
-
73
+ if calibration_reaches is None:
74
+ calibration_reaches = self.calibration_reaches
75
+
76
76
  setup_files(self.uci,self.project_name,run = 0)
77
77
  setup_geninfo(self.uci)
78
-
79
- if calibration_reaches is None:
80
- calibration_reaches = self.repository.modl_db.loc[self.repository.modl_db['wplmn_flag'] == 1]['opnids'].str.split(',').to_list()
81
- calibration_reaches = [abs(int(j)) for i in calibration_reaches for j in i]
82
- #calibration_reaches = self.uci.network.station_order(calibration_reaches)[-1][0]
83
-
84
-
85
- self.uci.initialize(name = self.project_name + '_0')
86
- # for reaches in self._MODL_DB['opnids'].str.split('+').to_list():
87
- # [calibration_reaches.append(int(reach)) for reach in reaches if ~pd.isna(reach)]
88
-
78
+ self.uci.initialize(name = self.project_name + '_0')
89
79
  setup_binaryinfo(self.uci,reach_ids = calibration_reaches)
90
80
  setup_qualid(self.uci)
91
81
  self.uci.write(self.new_uci)
92
82
 
93
- # Download observation data
94
- # Sources/Databases WISKI and EQUIS (DELTA databases?)
95
- #TODO: use a single WISKI etl script for csg and wplmn data
96
-
97
-
98
- def download_station_data(self):#:,station_id,source_id):
99
- stations = self.repository.modl_db.drop_duplicates(subset = ['source','station_id'])
100
- if not stations.empty:
101
- for index, row in stations.iterrows():
102
- assert(row['source'].lower() in ['wiski','equis','swd'])
103
- station_id = row['station_id']
104
- self.dm.download_station_data(station_id,row['source'].lower())
105
-
83
+
84
+ def download_station_data(self,start_year,end_year):#:,station_id,source_id):
85
+ #%% Old approach. Store as indvidual processed station files then load to warehouse
86
+ if len(equis_stations) > 0:
87
+ if self.dm.credentials_exist():
88
+ station_origin = 'equis'
89
+ else: station_origin = 'swd'
90
+
91
+ for station_id in equis_stations:
92
+ self.dm.download_station_data(station_id,station_origin, True)
93
+
94
+ if len(wiski_stations) > 0:
95
+ for station_id in wiski_stations:
96
+ self.dm.download_station_data(station_id,'wiski', True)
97
+
106
98
 
107
99
  def convert_wdms(self):
108
100
  copy_path = Path(self.project_path.joinpath('model'))
@@ -111,11 +103,31 @@ class Builder():
111
103
  readWDM(wdm_file,
112
104
  copy_path.joinpath(wdm_file.name.replace('.wdm','.hdf5').replace('.WDM','hdf5')))
113
105
 
114
- def run_model(self):
106
+ def run_model(self, wait_for_completion=True):
107
+ #run_model(self.new_uci, wait_for_completion)
115
108
  # Run the uci file
116
109
  winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
117
110
  subprocess.run([winHSPF,self.new_uci]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
118
111
 
112
+ def run_model(uci_file, wait_for_completion=True):
113
+ winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFlt\\WinHspfLt.exe'
114
+
115
+ # Arguments for the subprocess
116
+ args = [winHSPF, uci_file.as_posix()]
117
+
118
+ if wait_for_completion:
119
+ # Use subprocess.run to wait for the process to complete (original behavior)
120
+ subprocess.run(args)
121
+ else:
122
+ # Use subprocess.Popen to run the process in the background without waiting
123
+ # On Windows, you can use creationflags to prevent a console window from appearing
124
+ if sys.platform.startswith('win'):
125
+ # Use a variable for the flag to ensure it's only used on Windows
126
+ creationflags = subprocess.CREATE_NO_WINDOW
127
+ subprocess.Popen(args, creationflags=creationflags)
128
+ else:
129
+ # For other platforms (like Linux/macOS), Popen without special flags works fine
130
+ subprocess.Popen(args)
119
131
 
120
132
 
121
133
 
@@ -134,11 +146,12 @@ def setup_files(uci,name,run,n = 5):
134
146
  table = uci.table('FILES',drop_comments = False)
135
147
  for index, row in table.iterrows():
136
148
  filename = Path(row['FILENAME'])
137
- if filename.suffix in ['.wdm','.ech','.out']:
149
+ if filename.suffix in ['.wdm','.ech','.out','.mut']:
138
150
  table.loc[index,'FILENAME'] = filename.name
139
- if filename.suffix in ['.hbn']:
151
+ elif filename.suffix in ['.hbn']:
140
152
  table.loc[index,'FILENAME'] = filename.name
141
- if filename.suffix in ['.plt']:
153
+ #if filename.suffix in ['.plt']:
154
+ else:
142
155
  table.drop(index,inplace = True)
143
156
 
144
157
  # Get new binary number and create new BINO rows
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyhcal
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: Python package for calibrating MPCA HSPF models
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/pyhcal
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -0,0 +1,15 @@
1
+ pyhcal/__init__.py,sha256=4TEpGD-PfEY8yK-od8DpEMA4_iQ-q9y0PBvROXSPdB0,94
2
+ pyhcal/calibrators.py,sha256=lDxvXpjLj1Xhp-NPCVUyyWeaMTZdP5FA9Bh-PuwrOHs,30383
3
+ pyhcal/figures.py,sha256=Iu7LaN_i2IuDA_nfxj-a8AkG-FTLZVicJ3-efIs5OiE,45534
4
+ pyhcal/metrics.py,sha256=GUGHd-op-g1Foj8wnS_JVURSms4ifcC0a5h8ketQ29I,17911
5
+ pyhcal/modl_db.py,sha256=z8trT387Gcbg15xAqAcW62c1dbgSSjymv3qOyH3Dyx8,12569
6
+ pyhcal/repository.py,sha256=VPHeSbrnFAG1F6tFXdnI2C72xm3dTPb7Z3rkPr--srI,4603
7
+ pyhcal/setup_utils.py,sha256=sIINj8_h-MgQhcCH_u95nFTJVw0QEgnR0fZgAitX1iQ,30398
8
+ pyhcal/data/HUC_Names.csv,sha256=UGmd3Q5E8DyFWggXzaXWpsRze7sFyrlpYqaYpMWAiGM,18946
9
+ pyhcal/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
10
+ pyhcal/data/outlets.duckdb,sha256=zB1t2NsnrUed-3HOT61DYmtOy89PZ9uWZAwmAEUhleY,2895872
11
+ pyhcal/data/stations_EQUIS.gpkg,sha256=SPyxGK5H3bbqMikv45n_ul-KULPNWyad9XcDq_9mXwM,2019328
12
+ pyhcal/data/stations_wiski.gpkg,sha256=vlh03SihjlQMIbn25rfPqOKQtJsSYS2FMR65zAznTQQ,905216
13
+ pyhcal-1.1.0.dist-info/METADATA,sha256=xRX7vV5LA1UxSsAZze6VMRzTJI6ePZ749Pdf8h-VQgw,560
14
+ pyhcal-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
15
+ pyhcal-1.1.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,14 +0,0 @@
1
- pyhcal/__init__.py,sha256=4TEpGD-PfEY8yK-od8DpEMA4_iQ-q9y0PBvROXSPdB0,94
2
- pyhcal/calibrators.py,sha256=PIQ32zVmZlgKCg3XTyEKfPyYFvtQYgdal1QOrVf918I,29958
3
- pyhcal/figures.py,sha256=Iu7LaN_i2IuDA_nfxj-a8AkG-FTLZVicJ3-efIs5OiE,45534
4
- pyhcal/metrics.py,sha256=GUGHd-op-g1Foj8wnS_JVURSms4ifcC0a5h8ketQ29I,17911
5
- pyhcal/modl_db.py,sha256=0NoWNNxfbF1fy43geOO4EJt-Uev6Og4Hw-KNn9xvAys,3163
6
- pyhcal/repository.py,sha256=4Ep6htWIAWEnLDs-z3gjCKpl7WEEl3t6Kt98rTS9QB8,4456
7
- pyhcal/setup_utils.py,sha256=Hl244HimWFVJYLXtplmWlXleiTQ6vN-PhY7WXyvZciY,29861
8
- pyhcal/data/HUC_Names.csv,sha256=UGmd3Q5E8DyFWggXzaXWpsRze7sFyrlpYqaYpMWAiGM,18946
9
- pyhcal/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
10
- pyhcal/data/stations_EQUIS.gpkg,sha256=KyWMRbNoSSMDB1IweoLhyFeKu1P-YoYpQbXna6VYp8I,3284992
11
- pyhcal/data/stations_wiski.gpkg,sha256=HlH5EwUo9qEEoxtbEpL0cJZHEW57XEUA-ROy4sN7eE4,892928
12
- pyhcal-1.0.1.dist-info/METADATA,sha256=N4mSt9_1dhUsS8i7_61Fv6JPFfaAnteTJrrr0qkBFDU,560
13
- pyhcal-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- pyhcal-1.0.1.dist-info/RECORD,,