pyhcal 1.0.2__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyhcal-1.1.1/ERROR.FIL ADDED
@@ -0,0 +1,6 @@
1
+ 11:01:39.908 : LOG_MSG:ERROR.FIL OPENED
2
+ 11:01:39.909 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
3
+ 11:01:39.910 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
4
+ FILBLK RETCOD 0
5
+ wdmfl 0 0 0 0
6
+ FILBLK RETCOD 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyhcal
3
- Version: 1.0.2
3
+ Version: 1.1.1
4
4
  Summary: Python package for calibrating MPCA HSPF models
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/pyhcal
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "pyhcal"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/pyhcal" } # ? Add this!
8
- version = "1.0.2"
8
+ version = "1.1.1"
9
9
  dependencies = [
10
10
  "hspf",
11
11
  "mpcaHydro",
@@ -0,0 +1,78 @@
1
+ 16:15:00.416 : LOG_MSG:ERROR.FIL OPENED
2
+ 16:15:00.417 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
3
+ 16:15:00.417 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
4
+ FILBLK RETCOD 0
5
+ wdmfl 0 0 0 0
6
+ FILBLK RETCOD 0
7
+ 18:06:13.144 : LOG_MSG:ERROR.FIL OPENED
8
+ 18:06:13.145 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
9
+ 18:06:13.145 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
10
+ FILBLK RETCOD 0
11
+ wdmfl 0 0 0 0
12
+ FILBLK RETCOD 0
13
+ 18:09:53.704 : LOG_MSG:ERROR.FIL OPENED
14
+ 18:09:53.705 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
15
+ 18:09:53.705 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
16
+ FILBLK RETCOD 0
17
+ wdmfl 0 0 0 0
18
+ FILBLK RETCOD 0
19
+ 18:19:02.896 : LOG_MSG:ERROR.FIL OPENED
20
+ 18:19:02.896 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
21
+ 18:19:02.896 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
22
+ FILBLK RETCOD 0
23
+ wdmfl 0 0 0 0
24
+ FILBLK RETCOD 0
25
+ 18:19:05.790 : LOG_MSG:ERROR.FIL OPENED
26
+ 18:19:05.790 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
27
+ 18:19:05.790 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
28
+ FILBLK RETCOD 0
29
+ wdmfl 0 0 0 0
30
+ FILBLK RETCOD 0
31
+ 18:32:46.775 : LOG_MSG:ERROR.FIL OPENED
32
+ 18:32:46.776 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
33
+ 18:32:46.776 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
34
+ FILBLK RETCOD 0
35
+ wdmfl 0 0 0 0
36
+ FILBLK RETCOD 0
37
+ 18:38:43.431 : LOG_MSG:ERROR.FIL OPENED
38
+ 18:38:43.432 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
39
+ 18:38:43.432 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
40
+ FILBLK RETCOD 0
41
+ wdmfl 0 0 0 0
42
+ FILBLK RETCOD 0
43
+ 18:44:23.798 : LOG_MSG:ERROR.FIL OPENED
44
+ 18:44:23.799 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
45
+ 18:44:23.800 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
46
+ FILBLK RETCOD 0
47
+ wdmfl 0 0 0 0
48
+ FILBLK RETCOD 0
49
+ 18:52:12.607 : LOG_MSG:ERROR.FIL OPENED
50
+ 18:52:12.608 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
51
+ 18:52:12.608 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
52
+ FILBLK RETCOD 0
53
+ wdmfl 0 0 0 0
54
+ FILBLK RETCOD 0
55
+ 18:59:18.608 : LOG_MSG:ERROR.FIL OPENED
56
+ 18:59:18.609 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
57
+ 18:59:18.609 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
58
+ FILBLK RETCOD 0
59
+ wdmfl 0 0 0 0
60
+ FILBLK RETCOD 0
61
+ 19:05:51.046 : LOG_MSG:ERROR.FIL OPENED
62
+ 19:05:51.047 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
63
+ 19:05:51.047 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
64
+ FILBLK RETCOD 0
65
+ wdmfl 0 0 0 0
66
+ FILBLK RETCOD 0
67
+ 19:14:05.088 : LOG_MSG:ERROR.FIL OPENED
68
+ 19:14:05.088 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
69
+ 19:14:05.089 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
70
+ FILBLK RETCOD 0
71
+ wdmfl 0 0 0 0
72
+ FILBLK RETCOD 0
73
+ 19:18:33.712 : LOG_MSG:ERROR.FIL OPENED
74
+ 19:18:33.713 : HASS_ENT:F90_WDBOPNR:entr:WDMSFL,RWFLG: 100 1 C:\Users\mfratki\Documents\github\pyHSPF\src\hspf\bin\WinHSPFLt\hspfmsg.wdm
75
+ 19:18:33.713 : HASS_ENT:F90_WDBOPNR:exit:WDMSFL,RETCOD 100 0
76
+ FILBLK RETCOD 0
77
+ wdmfl 0 0 0 0
78
+ FILBLK RETCOD 0
@@ -9,11 +9,8 @@ from copy import deepcopy
9
9
  import subprocess
10
10
  #non-standard imports
11
11
  import pandas as pd
12
- pd.set_option('display.max_columns', None)
13
- pd.set_option('display.max_rows', None)
14
- # to reset this
15
- pd.reset_option('display.max_columns')
16
12
  import numpy as np
13
+ import geopandas as gpd
17
14
  from pathlib import Path
18
15
 
19
16
  #My packages
@@ -23,62 +20,39 @@ from hspf import helpers
23
20
  from mpcaHydro import data_manager as dm
24
21
  from pyhcal import metrics
25
22
  from pyhcal import figures
26
- from pyhcal.setup_utils import Builder
27
- #from hspf_tools.orm.monitoring_db import MonitoringDatabase
23
+ from pyhcal import setup_utils
24
+ from pyhcal.mappers import Mapper
28
25
 
26
+ def new_calibration(project_folder,model_name,download_station_data = True,run_model = True,convert_wdms = True):
27
+ return setup_utils.create_calibration_project(model_name,project_folder,download_station_data,run_model,convert_wdms)
29
28
 
30
- class calProject():
31
- #valid_models = Builder.valid_models()
32
- def __init__(self,project_location):
33
- self.project_location = Path(project_location)
34
-
35
-
36
- def new_project(self,model_name):
37
- return Builder(model_name) #self._builder.new_project(project_location,model_name)
38
-
39
- def load_project(self,model_name):
40
- if model_name in [f.name for f in self.project_location.iterdir() if f.is_dir()]:
41
- return calibrator(self.project_location.joinpath(model_name))
42
- else:
43
- answer = input("No calibration project for that model. Would you like to set on up? (yes or no")
44
- if answer.lower() in ["y","yes"]:
45
- self.new_project(model_name)
46
- return calibrator(self.project_location.joinpath(model_name))
47
- elif answer.lower() in ["n","no"]:
48
- return
49
- # Do other stuff
50
- else:
51
- print('please enter yes or no')
52
-
53
-
54
- def config_info(project_folder):
55
- project_path = Path(project_folder)
56
- info = {'project_path' : project_path,
57
- 'project_name' : project_path.name,
58
- 'model_path' : project_path.joinpath('model'),
59
- 'output_path' : project_path.joinpath('output'),
60
- 'start_date' : '1996-01-01',
61
- 'end_date' : '2100-01-01',
62
- }
63
- return info
64
29
 
30
+ def load(project_folder):
31
+ validate_project_folder(project_folder)
32
+ return calibrator(project_folder)
65
33
 
66
-
67
-
34
+ def validate_project_folder(project_folder):
35
+ project_path = Path(project_folder)
36
+ assert project_path.exists(), 'Project folder does not exist'
37
+ assert project_path.joinpath('model').exists(), 'Model folder does not exist'
38
+ assert project_path.joinpath('data').exists(), 'Data folder does not exist'
39
+ assert project_path.joinpath('output').exists(), 'Output folder does not exist'
40
+ assert project_path.joinpath('targets.csv').exists(), 'targets.csv file does not exist in project folder'
41
+ return True
42
+
68
43
  class calibrator:
69
44
  def __init__(self,project_folder):
70
45
  self.project_path = Path(project_folder)
71
46
  self.project_name = self.project_path.name
72
47
  self.model_path = self.project_path.joinpath('model')
73
48
  self.output_path = self.project_path.joinpath('output')
49
+ self.gis_path = self.project_path.joinpath('gis')
74
50
  self.run = None
75
- #self.winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
76
51
  self.start_date = '1996-01-01'
77
52
  self.end_date = '2100-01-01'
78
53
 
79
54
  # Load observational data into memory TODO: Convert to database?
80
55
  self.dm = dm.dataManager(self.project_path.joinpath('data'))
81
- #self.odm = MonitoringDatabase(cal.project_path.joinpath(cal.project_name))
82
56
 
83
57
  self.targets = None
84
58
  if self.project_path.joinpath('targets.csv').exists():
@@ -86,25 +60,48 @@ class calibrator:
86
60
 
87
61
  self.MODL_DB = pd.read_csv(self.project_path.joinpath('_'.join([self.project_name ,'MODL_DB.csv'])))
88
62
 
63
+ # Alot of effort to try and include the subwatershed gdf if it exists. TODO: refactor
64
+ self.subwatershed_gdf_filepath = self.gis_path.joinpath('_'.join([self.project_name ,'Subwatersheds.shp']))
65
+ if self.subwatershed_gdf_filepath.exists():
66
+ self.subwatershed_gdf = gpd.read_file(self.subwatershed_gdf_filepath)
67
+ if 'SubID' in self.subwatershed_gdf.columns:
68
+ self.subwatershed_gdf = self.subwatershed_gdf.set_index('SubID')
69
+ else:
70
+ print("Warning: 'SubID' column not found in subwatershed shapefile. GIS operations will not function correctly.")
71
+ else:
72
+ self.subwatershed_gdf = None
73
+
89
74
  self.model = None
90
75
  self._wdms = None
91
76
  self.uci = None
92
-
77
+ self.mapper = None
93
78
  ## Input/Output methods
79
+
80
+
94
81
  def initialize(self,reach_ids,default = 4):
95
82
 
96
83
  self.uci.update_table(default,'RCHRES','BINARY-INFO',0,columns = ['HEATPR','HYDRPR','SEDPR','OXRXPR','NUTRPR','PLNKPR'],operator = 'set')
97
84
  self.uci.update_table(2,'RCHRES','BINARY-INFO',0,columns = ['HEATPR','HYDRPR','SEDPR','OXRXPR','NUTRPR','PLNKPR'],opnids = reach_ids,operator = 'set')
98
85
 
99
86
  self.uci.write(self.model.uci_file)
100
- winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
101
- subprocess.run([winHSPF,self.model.uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
87
+ subprocess.run([self.model.winHSPF,self.model.uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
102
88
 
103
89
  def set_dates(self, start_date = '1996-01-01',end_date ='2100-01-01'):
104
90
  self.start_date = start_date
105
91
  self.end_date = end_date
106
92
 
107
-
93
+ def _load_gdf(self,catchment_id_column = 'SubID'):
94
+ self.subwatershed_gdf_filepath = self.gis_path.joinpath('_'.join([self.project_name ,'Subwatersheds.shp']))
95
+ if self.subwatershed_gdf_filepath.exists():
96
+ self.subwatershed_gdf = gpd.read_file(self.subwatershed_gdf_filepath)
97
+ self.subwatershed_gdf = self.subwatershed_gdf.set_index(catchment_id_column)
98
+ else:
99
+ self.subwatershed_gdf = None
100
+
101
+ def _update_mapper(self):
102
+ if self.subwatershed_gdf is not None:
103
+ self.mapper = Mapper(self.project_name,self.uci,self.subwatershed_gdf,hbn = self.model.hbns)
104
+
108
105
  def load_model(self,name):
109
106
 
110
107
  if isinstance(name,int): # Default approach
@@ -126,12 +123,11 @@ class calibrator:
126
123
 
127
124
  self.model.wdms = self._wdms
128
125
  self.model.reports.wdms = self._wdms
129
- self.uci = deepcopy(self.model.uci) #uci to be manipulated
126
+ self.uci = deepcopy(self.model.uci) #uci to be manipulated
127
+
128
+ if self.subwatershed_gdf is not None:
129
+ self.mapper = Mapper(self.project_name,self.uci,self.subwatershed_gdf,hbn = self.model.hbns)
130
130
 
131
-
132
- # def setup_run(self, reach_ids = None, time_Step = 3,n = 1):
133
- # setup_utils.setup(self.uci,self.project_name,run = self.run,reach_ids = reach_ids,n = 1,time_step = 3)
134
-
135
131
  def run_model(self,name = None,overwrite_hbn = False): # NO STATE CHANGE
136
132
 
137
133
  if name is None:
@@ -146,8 +142,8 @@ class calibrator:
146
142
 
147
143
  uci_file = self.model_path.joinpath(name + '.uci').as_posix()
148
144
  self.uci.write(uci_file)
149
- winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
150
- subprocess.run([winHSPF,uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
145
+ #winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
146
+ subprocess.run([self.model.winHSPF,uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
151
147
 
152
148
 
153
149
  def get_simulated_output(self,reach_ids,constituent,time_step = 'YE'):
@@ -155,13 +151,13 @@ class calibrator:
155
151
  sim.name = 'simulated'
156
152
  return sim
157
153
 
158
- def get_observed_data(self,station_ids,constituent,time_step = 'YE'):
159
- obs = self.dm._get_data(station_ids,constituent,agg_period = time_step).sort_index(level = 'index')
154
+ def get_observed_data(self,station_ids,constituent,time_step = 'YE',baseflow_percentage = None):
155
+ obs = self.dm.get_station_data(station_ids,constituent,agg_period = time_step)['observed'].sort_index(level = 'index')
160
156
  obs.name = 'observed'
161
157
  return obs
162
158
 
163
159
 
164
- def compare_simulated_observed(self,station_ids,reach_ids,constituent,time_step,flow_station_ids = None, dropna = False):
160
+ def compare_simulated_observed(self,station_ids,reach_ids,constituent,time_step,flow_station_ids = None, dropna = False, baseflow_percentage = None):
165
161
  obs = self.get_observed_data(station_ids,constituent,time_step)
166
162
  sim = self.get_simulated_output(reach_ids,constituent,time_step = time_step)
167
163
 
@@ -177,11 +173,21 @@ class calibrator:
177
173
  # matching flow data
178
174
  sim_flow = self.get_simulated_output(reach_ids,'Q',time_step)
179
175
  sim_flow.name = 'simulated_flow'
176
+ sim_flow.columns = ['simulated_flow']
180
177
  df = df.join(sim_flow,how = 'inner')
181
178
  obs_flow = self.get_observed_data(flow_station_ids,'Q',time_step)
182
179
  obs_flow.name = 'observed_flow'
180
+ obs_flow.columns = ['observed_flow']
183
181
  df = df.join(obs_flow,how='left')
184
- df.columns = ['simulated','observed','simulated_flow','observed_flow']
182
+
183
+ obs_baseflow = self.get_observed_data(flow_station_ids,'QB',time_step)
184
+ obs_baseflow.name = 'observed_baseflow'
185
+ obs_baseflow.columns = ['observed_baseflow']
186
+ df = df.join(obs_baseflow,how='left')
187
+ df.columns = ['simulated','observed','simulated_flow','observed_flow','observed_baseflow']
188
+
189
+ if baseflow_percentage is not None:
190
+ df = df.loc[(df['observed_baseflow']/df['observed_flow'] >= baseflow_percentage/100)]
185
191
 
186
192
  # Add metadata
187
193
  df.attrs['station_ids'] = station_ids
@@ -225,14 +231,7 @@ class calibrator:
225
231
  obs_flow.name = 'observed_flow'
226
232
  df = df.join(obs_flow,how='left')
227
233
  df.columns = ['simulated','observed','simulated_flow','observed_flow']
228
- # sim_flow = self.model.hbns.get_rchres_data('Q',reach_ids, 'cfs','D')
229
- # sim_flow.name = 'simulated_flow'
230
- # df = df.join(sim_flow,how = 'inner')
231
- # obs_flow = pd.concat([self.dm.get_data(station_id,'Q','cfs','D') for station_id in station_ids])
232
- # obs_flow.name = 'observed_flow'
233
- # df = df.join(obs_flow,how='left')
234
- # df.columns = ['simulated','observed','simulated_flow','observed_flow']
235
-
234
+ # Add metadata
236
235
  df.attrs['station_ids'] = station_ids
237
236
  df.attrs['reach_ids'] = reach_ids
238
237
  df.attrs['constituent'] = constituent
@@ -271,7 +270,7 @@ class calibrator:
271
270
 
272
271
  return df_agg
273
272
 
274
- def landcover(cal,constituent,ref_landcover = None):
273
+ def landcover(self,constituent,ref_landcover = None):
275
274
  def normalize_group(group, ref_landcover):
276
275
  if ref_landcover in group['LSID'].to_list():
277
276
  ref_value = group.loc[group['LSID'] == ref_landcover, 'mean'].values[0]
@@ -281,19 +280,20 @@ class calibrator:
281
280
  return group
282
281
 
283
282
  if ref_landcover is None:
284
- ref_landcover = cal.targets['uci_name'][cal.targets['dom_lc']==1].values[0]
283
+ ref_landcover = self.targets['uci_name'][self.targets['dom_lc']==1].values[0]
285
284
 
286
- df = cal.model.hbns.get_perlnd_data(constituent)*2000 #tons/ac/yr to lbs/acr/year
287
- df = df[(df.index >= cal.start_date) & (df.index <= cal.end_date)]
285
+ df = self.model.hbns.get_perlnd_data(constituent)*2000 #tons/ac/yr to lbs/acr/year
286
+ df = df[(df.index >= self.start_date) & (df.index <= self.end_date)]
288
287
  df = df.mean().rename('mean').to_frame()
289
- df = pd.merge(df, cal.uci.opnid_dict['PERLND'], left_index = True,right_on='TOPFST', how='inner')
288
+ df = pd.merge(df, self.uci.opnid_dict['PERLND'], left_index = True,right_on='TOPFST', how='inner')
290
289
  df_normalized = pd.concat([normalize_group(group, ref_landcover) for _, group in list(df.groupby('metzone'))])
291
290
 
292
- targets = cal.targets.set_index('uci_name')[constituent]
291
+ targets = self.targets.set_index('uci_name')[constituent]
293
292
  targets = targets/targets.loc[ref_landcover]
294
293
 
295
294
  df_normalized['target'] = targets.loc[df_normalized['LSID']].values/df_normalized['mean_norm']
296
295
  df_normalized = df_normalized.fillna(1) #don't make any changes to 0 concentration perlands
296
+ #df_normalized = df_normalized.join(self.uci.table('PERLND','SED-PARM3'))
297
297
  return df_normalized
298
298
 
299
299
 
@@ -0,0 +1,105 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Jan 27 11:20:47 2025
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ from pyhcal.repository import Repository
9
+ from hspf.uci import UCI
10
+ import geopandas as gpd
11
+ import matplotlib.pyplot as plt
12
+ import pandas as pd
13
+ from pathlib import Path
14
+
15
+ # Try to load statewide subwatersheds from data folder,
16
+
17
+ # try:
18
+ # SUBWATERSHEDS = gpd.read_file(str(Path(__file__).resolve().parent/'data\\statewide_subwatersheds.gpkg'))
19
+ # except:
20
+ # print("Could not load statewide subwatersheds. Please ensure the file 'statewide_subwatersheds.gpkg' is located in the 'data' folder of the pyhcal package.")
21
+
22
+
23
+
24
+ class uciMapper():
25
+ def __init__(self,model_names,gis_layer,huc6 = False):
26
+ self.mappers = []
27
+
28
+ if huc6:
29
+ model_names = Repository.HUC_DIRECTORY.loc[Repository.HUC_DIRECTORY['Repository_HUC6 Name'].isin(model_names),'Repository_HUC8 Name']
30
+ model_names = model_names.loc[model_names.isin(Repository.valid_models())].values
31
+
32
+ for model_name in model_names:
33
+ repo = Repository(model_name)
34
+ uci = UCI(repo.uci_file)
35
+ #gis_layer = SUBWATERSHEDS.loc[SUBWATERSHEDS['repo_name'] == model_name,:]
36
+ #gis_layer.set_index('SubID',inplace=True)
37
+ self.mappers.append(Mapper(model_name,uci,gis_layer))
38
+
39
+ def map_parameter(self,operation,table_name,parameter,table_id):
40
+ table = self.join_table(operation,table_name,table_id)
41
+ fig, ax = plt.subplots()
42
+ #[table.plot(column = parameter,ax = ax) for table in tables]
43
+ table.plot(column = parameter,ax = ax,cmap='viridis',legend=True)
44
+ plt.title(parameter)
45
+
46
+ def join_table(self,operation,table_name,table_id):
47
+ tables = [mapper.join_table(operation,table_name,table_id) for mapper in self.mappers]
48
+ table = pd.concat(tables)
49
+ return table
50
+
51
+ class Mapper():
52
+ def __init__(self,model_name,uci,subwatershed_gdf,hbn = None):
53
+ self.model_name = model_name
54
+ self.uci = uci
55
+ self.hbn = hbn
56
+ # if subwatershed_gdf is None:
57
+ # subwatershed_gdf = SUBWATERSHEDS.loc[SUBWATERSHEDS['repo_name'] == model_name,:]
58
+ # subwatershed_gdf.set_index('SubID',inplace=True)
59
+
60
+ self.subwatershed_gdf = subwatershed_gdf
61
+ self.subwatersheds = uci.network.subwatersheds()
62
+ self.subwatershed_ids = list(set(self.subwatersheds.index))
63
+
64
+ def map_parameter(self,operation,table_name,parameter,table_id=0,weight_by_area = True):
65
+ fig, ax = plt.subplots()
66
+ self.join_table(operation,table_name,parameter,table_id).plot(column = parameter,ax = ax,cmap='viridis',legend=True)
67
+ plt.title(parameter)
68
+
69
+ def join_table(self,operation,table_name,parameter,table_id=0,weight_by_area = True):
70
+ table = self.uci.table(operation,table_name,table_id)
71
+ subwatersheds = self.uci.network.subwatersheds()
72
+ subwatersheds = subwatersheds.loc[subwatersheds['SVOL'] == 'PERLND'].reset_index(drop=False).set_index('SVOLNO').join(table,how = 'left')
73
+ subwatersheds.index.name = 'SVOLNO' # Sometimes the index name gets dropped. I'm guessing when there are missing joins.
74
+ subwatersheds = subwatersheds.reset_index('SVOLNO').set_index(['SVOLNO','TVOLNO','SVOL','MLNO'])
75
+
76
+ #weight by area factor:
77
+ if weight_by_area:
78
+ subwatersheds['weighted_param'] = subwatersheds['AFACTR']*subwatersheds[parameter]
79
+ subwatersheds = subwatersheds.groupby(subwatersheds.index.get_level_values('TVOLNO'))['weighted_param'].sum()/subwatersheds.groupby(subwatersheds.index.get_level_values('TVOLNO'))['AFACTR'].sum()
80
+ subwatersheds.name = parameter
81
+ else:
82
+ subwatersheds = subwatersheds.groupby(subwatersheds.index.get_level_values('TVOLNO'))[parameter].mean()
83
+ subwatersheds.name = parameter
84
+ return self.subwatershed_gdf.join(subwatersheds)
85
+
86
+ def map_flag():
87
+ raise NotImplementedError()
88
+
89
+ def map_output(self,operation,output_name,t_code=5,agg_func = 'mean'):
90
+ subwatersheds = self.subwatersheds.loc[(self.subwatersheds['SVOL'] == operation),:].copy()
91
+ opnids = list(subwatersheds['SVOLNO'].unique())
92
+ output = self.hbn.get_multiple_timeseries(operation,t_code,output_name,opnids = opnids).agg(agg_func)
93
+ if operation in ['PERLND','IMPLND']:
94
+ subwatersheds = pd.merge(subwatersheds,output.to_frame(output_name),right_index = True,left_on = 'SVOLNO')
95
+ subwatersheds['area_output'] = subwatersheds['AFACTR']*subwatersheds[output_name]
96
+ subwatersheds = subwatersheds[['AFACTR','area_output']].groupby(subwatersheds.index).sum()
97
+ subwatersheds[output_name] = subwatersheds['area_output']/subwatersheds['AFACTR']
98
+
99
+ fig, ax = plt.subplots()
100
+ #[table.plot(column = parameter,ax = ax) for table in tables]
101
+ self.subwatershed_gdf.join(subwatersheds).plot(column = output_name,ax = ax,cmap='viridis',legend=True)
102
+ plt.title(output_name)
103
+
104
+ #return self.subwatershed_gdf.join(subwatersheds)
105
+
@@ -0,0 +1,319 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 1 09:51:51 2025
4
+
5
+ @author: mfratki
6
+ """
7
+ #import sqlite3
8
+ from pathlib import Path
9
+ import geopandas as gpd
10
+ import pandas as pd
11
+ import duckdb
12
+ #from hspf_tools.calibrator import etlWISKI, etlSWD
13
+
14
+
15
+ #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
16
+
17
+
18
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
19
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
20
+ stations_wiski['source'] = 'wiski'
21
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
22
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
23
+ stations_equis['source'] = 'equis'
24
+ stations_equis['wplmn_flag'] = 0
25
+
26
+
27
+ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
28
+
29
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
30
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
31
+ MODL_DB = MODL_DB.dropna(subset='opnids')
32
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
33
+
34
+ def _reload():
35
+ global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
36
+ _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
37
+ stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
38
+ stations_wiski['source'] = 'wiski'
39
+ _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
40
+ stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
41
+ stations_equis['source'] = 'equis'
42
+ stations_equis['wplmn_flag'] = 0
43
+
44
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
45
+ MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
46
+ MODL_DB = MODL_DB.dropna(subset='opnids')
47
+ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
48
+
49
+
50
+ def get_model_db(model_name: str):
51
+ return MODL_DB.query('repository_name == @model_name')
52
+
53
+ def split_opnids(opnids: list):
54
+ return [abs(int(float(j))) for i in opnids for j in i]
55
+
56
+ def valid_models():
57
+ return MODL_DB['repository_name'].unique().tolist()
58
+
59
+ def wplmn_station_opnids(model_name):
60
+ opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
61
+ return split_opnids(opnids)
62
+
63
+ def wiski_station_opnids(model_name):
64
+ opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
65
+ return split_opnids(opnids)
66
+
67
+ def equis_station_opnids(model_name):
68
+ opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
69
+ return split_opnids(opnids)
70
+
71
+ def station_opnids(model_name):
72
+ opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
73
+ return split_opnids(opnids)
74
+
75
+ def equis_stations(model_name):
76
+ return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
77
+
78
+ def wiski_stations(model_name):
79
+ return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
80
+
81
+ def wplmn_stations(model_name):
82
+ return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
83
+
84
+ def outlets(model_name):
85
+ return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
86
+
87
+ def outlet_stations(model_name):
88
+ return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
89
+
90
+ def _split_opnids(opnids: list):
91
+ return [int(float(j)) for i in opnids for j in i]
92
+
93
+ def connect(db_path):
94
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
95
+ return duckdb.connect(db_path)
96
+
97
+
98
+ def init_db(db_path: str,reset: bool = False):
99
+ """
100
+ Initialize the DuckDB database: create staging and analytics schemas
101
+ """
102
+ db_path = Path(db_path)
103
+ if reset and db_path.exists():
104
+ db_path.unlink()
105
+
106
+ with connect(db_path.as_posix()) as con:
107
+ con.execute(OUTLETS_SCHEMA)
108
+
109
+
110
+
111
+ # Accessors:
112
+ def get_outlets_by_model(model_name: str):
113
+ with connect(DB_PATH) as con:
114
+ df = con.execute(
115
+ """
116
+ SELECT r.*
117
+ FROM station_reach_pairs r
118
+ WHERE r.repository_name = ?
119
+ """,
120
+ [model_name]
121
+ ).fetchdf()
122
+ return df
123
+
124
+ def get_outlets_by_reach(reach_id: int, model_name: str):
125
+ """
126
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
127
+ """
128
+ with connect(DB_PATH) as con:
129
+ df = con.execute(
130
+ """
131
+ SELECT r.*
132
+ FROM station_reach_pairs r
133
+ WHERE r.reach_id = ? AND r.repository_name = ?
134
+ """,
135
+ [reach_id, model_name]).fetchdf()
136
+ return df
137
+
138
+ def get_outlets_by_station(station_id: str, station_origin: str):
139
+ """
140
+ Return all outlet rows for outlets that include the given reach_id in the given model_name.
141
+ """
142
+ with connect(DB_PATH) as con:
143
+
144
+ df = con.execute(
145
+ """
146
+ SELECT r.*
147
+ FROM station_reach_pairs r
148
+ WHERE r.station_id = ? AND r.station_origin = ?
149
+ """,
150
+ [station_id, station_origin]).fetchdf()
151
+ return df
152
+
153
+ # constructors:
154
+ def build_outlet_db(db_path: str = None):
155
+ if db_path is None:
156
+ db_path = DB_PATH
157
+ init_db(db_path,reset=True)
158
+ with connect(db_path) as con:
159
+ for index, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
160
+ repo_name = group['repository_name'].iloc[0]
161
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
162
+
163
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
164
+
165
+ for opnid in opnids:
166
+ if opnid < 0:
167
+ exclude = 1
168
+ else:
169
+ exclude = 0
170
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
171
+
172
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
173
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
174
+
175
+
176
+ def create_outlet_schema(con, model_name : str):
177
+ for index, (_, group) in enumerate(modl_db.outlets(model_name)):
178
+ repo_name = group['repository_name'].iloc[0]
179
+ add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
180
+
181
+ opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
182
+
183
+ for opnid in opnids:
184
+ if opnid < 0:
185
+ exclude = 1
186
+ else:
187
+ exclude = 0
188
+ add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
189
+
190
+ for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
191
+ add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
192
+
193
+
194
+ def add_outlet(con,
195
+ outlet_id: str,
196
+ repository_name: str,
197
+ outlet_name = None,
198
+ notes = None):
199
+ """
200
+ Insert an outlet. repository_name is required.
201
+ """
202
+ con.execute(
203
+ "INSERT INTO outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
204
+ [outlet_id, repository_name, outlet_name, notes]
205
+ )
206
+
207
+ def add_station(con,
208
+ outlet_id: str,
209
+ station_id: str,
210
+ station_origin: str,
211
+ true_opnid: str,
212
+ repository_name: str,
213
+ comments = None):
214
+ """
215
+ Insert a station membership for an outlet.
216
+ Constraints:
217
+ - PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
218
+ - true_opnid and true_opnid_repository_name are required per schema.
219
+ """
220
+ con.execute(
221
+ """INSERT INTO outlet_stations
222
+ (outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
223
+ VALUES (?, ?, ?, ?, ?, ?)""",
224
+ [outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
225
+ )
226
+
227
+ def add_reach(con,
228
+ outlet_id: str,
229
+ reach_id: str,
230
+ repository_name: str,
231
+ exclude: int = 0):
232
+ """
233
+ Insert a reach membership for an outlet.
234
+ - repository_name is required and participates in the PK (reach_id, repository_name).
235
+ - exclude = 1 to mark a reach as excluded from association views.
236
+ """
237
+ con.execute(
238
+ """INSERT INTO outlet_reaches (outlet_id, reach_id, repository_name, exclude)
239
+ VALUES (?, ?, ?, ?)""",
240
+ [outlet_id, reach_id, repository_name, int(exclude)]
241
+ )
242
+
243
+
244
+ OUTLETS_SCHEMA = """-- schema.sql
245
+ -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
246
+ -- Compatible with DuckDB and SQLite.
247
+
248
+ -- Table 1: outlets
249
+ -- Represents a logical grouping that ties stations and reaches together.
250
+ CREATE TABLE IF NOT EXISTS outlets (
251
+ outlet_id TEXT PRIMARY KEY,
252
+ repository_name TEXT NOT NULL,
253
+ outlet_name TEXT,
254
+ notes TEXT -- optional: general notes about the outlet grouping
255
+ );
256
+
257
+ -- Table 2: outlet_stations
258
+ -- One-to-many: outlet -> stations
259
+ CREATE TABLE IF NOT EXISTS outlet_stations (
260
+ outlet_id TEXT NOT NULL,
261
+ station_id TEXT NOT NULL,
262
+ station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
263
+ repository_name TEXT NOT NULL, -- repository model the station is physically located in
264
+ true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
265
+ comments TEXT, -- Per-station comments, issues, etc.
266
+ CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
267
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
268
+ );
269
+
270
+ -- Table 3: outlet_reaches
271
+ -- One-to-many: outlet -> reaches
272
+ -- A reach can appear in multiple outlets, enabling many-to-many overall.
273
+ CREATE TABLE IF NOT EXISTS outlet_reaches (
274
+ outlet_id TEXT NOT NULL,
275
+ reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
276
+ repository_name TEXT NOT NULL, -- optional: where the mapping comes from
277
+ exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
278
+ FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
279
+ );
280
+
281
+ -- Useful views:
282
+
283
+ -- View: station_reach_pairs
284
+ -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
285
+ CREATE VIEW IF NOT EXISTS station_reach_pairs AS
286
+ SELECT
287
+ s.outlet_id,
288
+ s.station_id,
289
+ s.station_origin,
290
+ r.reach_id,
291
+ r.exclude,
292
+ r.repository_name,
293
+ FROM outlet_stations s
294
+ JOIN outlet_reaches r
295
+ ON s.outlet_id = r.outlet_id;
296
+
297
+ -- Example indexes (SQLite will accept CREATE INDEX; DuckDB treats them as metadata but it’s okay to define):
298
+ CREATE INDEX IF NOT EXISTS idx_outlet_stations_outlet ON outlet_stations(outlet_id);
299
+ CREATE INDEX IF NOT EXISTS idx_outlet_reaches_outlet ON outlet_reaches(outlet_id);
300
+ CREATE INDEX IF NOT EXISTS idx_station_reach_pairs_station ON outlet_stations(station_id);"""
301
+
302
+
303
+ #row = modl_db.MODL_DB.iloc[0]
304
+
305
+ #info = etlWISKI.info(row['station_id'])
306
+
307
+ #modl_db.MODL_DB.query('source == "equis"')
308
+
309
+ # outlet_dict = {'stations': {'wiski': ['E66050001'],
310
+ # 'equis': ['S002-118']},
311
+ # 'reaches': {'Clearwater': [650]}
312
+
313
+
314
+
315
+
316
+ # station_ids = ['S002-118']
317
+ # #station_ids = ['E66050001']
318
+ # reach_ids = [650]
319
+ # flow_station_ids = ['E66050001']
@@ -6,7 +6,7 @@ Created on Wed Nov 27 09:16:30 2024
6
6
  """
7
7
 
8
8
  import pandas as pd
9
- from pyhcal.modl_db import MODL_DB
9
+ from pyhcal import modl_db
10
10
  from pathlib import Path
11
11
  import shutil
12
12
 
@@ -20,7 +20,7 @@ class Repository():
20
20
  'USGS HUC-4':'string',
21
21
  'USGS HUC-2':'string'})
22
22
 
23
- MODL_DB = MODL_DB
23
+
24
24
 
25
25
 
26
26
 
@@ -38,7 +38,7 @@ class Repository():
38
38
 
39
39
  self.REPOSITORY_PATH = repository_path
40
40
  huc_directory = self.HUC_DIRECTORY.loc[self.HUC_DIRECTORY['Repository_HUC8 Name'] == model_name]
41
- self.modl_db = self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
41
+ self.modl_db = modl_db.get_model_db(model_name) #self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
42
42
  #self.modl_db = pd.concat([self.MODL_DB.loc[self.MODL_DB['repository_name'].str.startswith(huc8_id,na=False)] for huc8_id in huc8_ids])
43
43
  self.model_name = model_name
44
44
  self.huc8_ids = list(huc_directory['USGS HUC-8'])
@@ -48,6 +48,9 @@ class Repository():
48
48
  self.uci_file = self.repo_folder.joinpath('HSPF','.'.join([self.model_name,'uci']))
49
49
  self.wdm_files = [item for item in self.repo_folder.joinpath('HSPF').iterdir() if (item.name.endswith('.wdm')) | (item.name.endswith('.WDM'))]
50
50
  self.shapefiles = {item.name.split('.')[0].split('_')[-1]:item for item in self.repo_folder.joinpath('GIS').iterdir() if (item.name.endswith('.shp')) | (item.name.endswith('.SHP'))}
51
+ self.wiski_stations = modl_db.wiski_stations(model_name)
52
+ self.equis_stations = modl_db.equis_stations(model_name)
53
+
51
54
 
52
55
  def copy(self,copy_path):
53
56
  copy_path = Path(copy_path)
@@ -73,7 +76,7 @@ class Repository():
73
76
  files = [file for file in shapefile.parent.iterdir() if file.stem == shapefile.stem]
74
77
  [shutil.copyfile(file,Path(copy_path).joinpath(Path(file).name)) for file in files]
75
78
 
76
-
79
+
77
80
 
78
81
  def build_folders(trg_path):
79
82
 
@@ -8,6 +8,7 @@ from mpcaHydro.data_manager import dataManager
8
8
  from hspf.wdmReader import readWDM
9
9
  from hspf.uci import UCI
10
10
  from pyhcal.repository import Repository
11
+ from pyhcal import modl_db
11
12
 
12
13
  import numpy as np
13
14
  import pandas as pd
@@ -38,6 +39,8 @@ class Builder():
38
39
  self.new_uci = None
39
40
  self.uci = None
40
41
  self.dm = None
42
+ self.calibration_reaches = modl_db.wplmn_station_opnids(model_name)
43
+
41
44
 
42
45
  def valid_models():
43
46
  return Repository.valid_models()
@@ -65,44 +68,33 @@ class Builder():
65
68
  self.download_station_data()
66
69
  self.uci.build_targets().to_csv(self.project_path.joinpath('targets.csv'))
67
70
 
68
- # if not self.project_path.joinpath('model',self.project_name + '_0-0.hbn').exists():
69
- # self.run_model()
70
-
71
- #TODO move to UCI class
72
71
 
73
72
  def format_uci(self,calibration_reaches = None):
74
-
75
-
73
+ if calibration_reaches is None:
74
+ calibration_reaches = self.calibration_reaches
75
+
76
76
  setup_files(self.uci,self.project_name,run = 0)
77
77
  setup_geninfo(self.uci)
78
-
79
- if calibration_reaches is None:
80
- calibration_reaches = self.repository.modl_db.loc[self.repository.modl_db['wplmn_flag'] == 1]['opnids'].str.split(',').to_list()
81
- calibration_reaches = [abs(int(j)) for i in calibration_reaches for j in i]
82
- #calibration_reaches = self.uci.network.station_order(calibration_reaches)[-1][0]
83
-
84
-
85
- self.uci.initialize(name = self.project_name + '_0')
86
- # for reaches in self._MODL_DB['opnids'].str.split('+').to_list():
87
- # [calibration_reaches.append(int(reach)) for reach in reaches if ~pd.isna(reach)]
88
-
78
+ self.uci.initialize(name = self.project_name + '_0')
89
79
  setup_binaryinfo(self.uci,reach_ids = calibration_reaches)
90
80
  setup_qualid(self.uci)
91
81
  self.uci.write(self.new_uci)
92
82
 
93
- # Download observation data
94
- # Sources/Databases WISKI and EQUIS (DELTA databases?)
95
- #TODO: use a single WISKI etl script for csg and wplmn data
96
-
97
-
98
- def download_station_data(self):#:,station_id,source_id):
99
- stations = self.repository.modl_db.drop_duplicates(subset = ['source','station_id'])
100
- if not stations.empty:
101
- for index, row in stations.iterrows():
102
- assert(row['source'].lower() in ['wiski','equis','swd'])
103
- station_id = row['station_id']
104
- self.dm.download_station_data(station_id,row['source'].lower())
105
-
83
+
84
+ def download_station_data(self,start_year,end_year):#:,station_id,source_id):
85
+ #%% Old approach. Store as indvidual processed station files then load to warehouse
86
+ if len(equis_stations) > 0:
87
+ if self.dm.credentials_exist():
88
+ station_origin = 'equis'
89
+ else: station_origin = 'swd'
90
+
91
+ for station_id in equis_stations:
92
+ self.dm.download_station_data(station_id,station_origin, True)
93
+
94
+ if len(wiski_stations) > 0:
95
+ for station_id in wiski_stations:
96
+ self.dm.download_station_data(station_id,'wiski', True)
97
+
106
98
 
107
99
  def convert_wdms(self):
108
100
  copy_path = Path(self.project_path.joinpath('model'))
@@ -111,11 +103,31 @@ class Builder():
111
103
  readWDM(wdm_file,
112
104
  copy_path.joinpath(wdm_file.name.replace('.wdm','.hdf5').replace('.WDM','hdf5')))
113
105
 
114
- def run_model(self):
106
+ def run_model(self, wait_for_completion=True):
107
+ #run_model(self.new_uci, wait_for_completion)
115
108
  # Run the uci file
116
109
  winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
117
110
  subprocess.run([winHSPF,self.new_uci]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
118
111
 
112
+ def run_model(uci_file, wait_for_completion=True):
113
+ winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFlt\\WinHspfLt.exe'
114
+
115
+ # Arguments for the subprocess
116
+ args = [winHSPF, uci_file.as_posix()]
117
+
118
+ if wait_for_completion:
119
+ # Use subprocess.run to wait for the process to complete (original behavior)
120
+ subprocess.run(args)
121
+ else:
122
+ # Use subprocess.Popen to run the process in the background without waiting
123
+ # On Windows, you can use creationflags to prevent a console window from appearing
124
+ if sys.platform.startswith('win'):
125
+ # Use a variable for the flag to ensure it's only used on Windows
126
+ creationflags = subprocess.CREATE_NO_WINDOW
127
+ subprocess.Popen(args, creationflags=creationflags)
128
+ else:
129
+ # For other platforms (like Linux/macOS), Popen without special flags works fine
130
+ subprocess.Popen(args)
119
131
 
120
132
 
121
133
 
@@ -134,11 +146,12 @@ def setup_files(uci,name,run,n = 5):
134
146
  table = uci.table('FILES',drop_comments = False)
135
147
  for index, row in table.iterrows():
136
148
  filename = Path(row['FILENAME'])
137
- if filename.suffix in ['.wdm','.ech','.out']:
149
+ if filename.suffix in ['.wdm','.ech','.out','.mut']:
138
150
  table.loc[index,'FILENAME'] = filename.name
139
- if filename.suffix in ['.hbn']:
151
+ elif filename.suffix in ['.hbn']:
140
152
  table.loc[index,'FILENAME'] = filename.name
141
- if filename.suffix in ['.plt']:
153
+ #if filename.suffix in ['.plt']:
154
+ else:
142
155
  table.drop(index,inplace = True)
143
156
 
144
157
  # Get new binary number and create new BINO rows
@@ -1,97 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Thu May 1 09:51:51 2025
4
-
5
- @author: mfratki
6
- """
7
- #import sqlite3
8
- from pathlib import Path
9
- import geopandas as gpd
10
- import pandas as pd
11
- #from hspf_tools.calibrator import etlWISKI, etlSWD
12
-
13
-
14
- #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
15
-
16
-
17
- stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg')).dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
18
- stations_wiski['source'] = 'wiski'
19
- stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg')).dropna(subset='opnids')[['id_code','true_opnid','opnids','comments','modeled','repository_name']]
20
- stations_equis['source'] = 'equis'
21
- stations_equis['wplmn_flag'] = 0
22
- stations_equis = stations_equis.rename(columns = {'id_code':'station_id'})
23
-
24
-
25
- MODL_DB = pd.concat([stations_wiski,stations_equis])
26
-
27
- database = """
28
- -- Stations/Locations table
29
- CREATE TABLE IF NOT EXISTS Station (
30
- stationPK INTEGER PRIMARY KEY AUTOINCREMENT,
31
- reachPK INTEGER REFERENCES Reach(reachPK),
32
- stationID TEXT NOT NULL,
33
- stationName TEXT,
34
- stationOrigin TEXT NOT NULL,
35
- latitude REAL,
36
- longitude REAL,
37
- stationType TEXT,
38
- UNIQUE(stationID, stationOrigin)
39
- );
40
-
41
- -- Station Associations table
42
- CREATE TABLE IF NOT EXISTS StationAssociations (
43
- stationPK INTEGER REFERENCES Station(stationPK),
44
- associationPK INTEGER REFERENCES Station(stationPK)
45
- );
46
-
47
- -- Station Aliases table
48
- CREATE TABLE IF NOT EXISTS StationAliases (
49
- stationPK INTEGER NOT NULL,
50
- aliasPK INTEGER NOT NULL,
51
- FOREIGN KEY (stationPK) REFERENCES Station(stationPK),
52
- FOREIGN KEY (aliasPK) REFERENCES Station(stationPK)
53
- );
54
-
55
- CREATE TABLE Reach (
56
- reachPK INTEGER PRIMARY KEY,
57
- modelName TEXT NOT NULL,
58
- reachID INTEGER NOT NULL,
59
- drainageArea FLOAT
60
- );
61
-
62
- CREATE TABLE Outlet (
63
- outletPK INTEGER PRIMARY KEY,
64
- outletName TEXT
65
- );
66
-
67
- -- Outlet-Station Associations table
68
- CREATE TABLE IF NOT EXISTS StationAssociations (
69
- outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
70
- stationPK INTEGER NOT NULL REFERENCES Station(reachPK)
71
- );
72
-
73
- -- Outlet-Reach Associations table
74
- CREATE TABLE IF NOT EXISTS StationAssociations (
75
- outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
76
- reachPK INTEGER NOT NULL REFERENCES Station(reachPK)
77
- exclude INTEGER NOT NULL
78
- );"""
79
-
80
-
81
- #row = modl_db.MODL_DB.iloc[0]
82
-
83
- #info = etlWISKI.info(row['station_id'])
84
-
85
- #modl_db.MODL_DB.query('source == "equis"')
86
-
87
- # outlet_dict = {'stations': {'wiski': ['E66050001'],
88
- # 'equis': ['S002-118']},
89
- # 'reaches': {'Clearwater': [650]}
90
-
91
-
92
-
93
-
94
- # station_ids = ['S002-118']
95
- # #station_ids = ['E66050001']
96
- # reach_ids = [650]
97
- # flow_station_ids = ['E66050001']
File without changes
File without changes
File without changes
File without changes
File without changes