pyhcal 1.1.1__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyhcal
3
- Version: 1.1.1
3
+ Version: 1.1.2
4
4
  Summary: Python package for calibrating MPCA HSPF models
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/pyhcal
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
  [project]
6
6
  name = "pyhcal"
7
7
  urls = { "Homepage" = "https://github.com/mfratkin1/pyhcal" } # ? Add this!
8
- version = "1.1.1"
8
+ version = "1.1.2"
9
9
  dependencies = [
10
10
  "hspf",
11
11
  "mpcaHydro",
@@ -38,6 +38,7 @@ def validate_project_folder(project_folder):
38
38
  assert project_path.joinpath('data').exists(), 'Data folder does not exist'
39
39
  assert project_path.joinpath('output').exists(), 'Output folder does not exist'
40
40
  assert project_path.joinpath('targets.csv').exists(), 'targets.csv file does not exist in project folder'
41
+ assert project_path.joinpath('gis').exists(), 'GIS folder does not exist'
41
42
  return True
42
43
 
43
44
  class calibrator:
@@ -56,10 +57,13 @@ class calibrator:
56
57
 
57
58
  self.targets = None
58
59
  if self.project_path.joinpath('targets.csv').exists():
59
- self.targets = pd.read_csv(self.project_path.joinpath('targets.csv'))
60
-
60
+ self._load_targets()
61
+
61
62
  self.MODL_DB = pd.read_csv(self.project_path.joinpath('_'.join([self.project_name ,'MODL_DB.csv'])))
62
63
 
64
+ if 'repo_name' in self.MODL_DB.columns:
65
+ self.model_name =self.MODL_DB['repo_name'].to_list()[0]
66
+
63
67
  # Alot of effort to try and include the subwatershed gdf if it exists. TODO: refactor
64
68
  self.subwatershed_gdf_filepath = self.gis_path.joinpath('_'.join([self.project_name ,'Subwatersheds.shp']))
65
69
  if self.subwatershed_gdf_filepath.exists():
@@ -98,6 +102,9 @@ class calibrator:
98
102
  else:
99
103
  self.subwatershed_gdf = None
100
104
 
105
+ def _load_targets(self):
106
+ self.targets = pd.read_csv(self.project_path.joinpath('targets.csv'))
107
+
101
108
  def _update_mapper(self):
102
109
  if self.subwatershed_gdf is not None:
103
110
  self.mapper = Mapper(self.project_name,self.uci,self.subwatershed_gdf,hbn = self.model.hbns)
@@ -145,14 +152,24 @@ class calibrator:
145
152
  #winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
146
153
  subprocess.run([self.model.winHSPF,uci_file]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
147
154
 
148
-
155
+ def get_outlets(self):
156
+ df = self.dm.get_outlets(self.model_name)
157
+ outlets = {}
158
+ for outlet_id in df['outlet_id'].unique():
159
+ outlets[int(outlet_id)] = {}
160
+ df_outlet = df.loc[df['outlet_id'] == outlet_id]
161
+ outlets[int(outlet_id)]['station_ids'] = list(set(df_outlet['station_id']))
162
+ outlets[int(outlet_id)]['reach_ids'] = list(set(df_outlet['reach_id']))
163
+ outlets[int(outlet_id)]['model_name'] = df_outlet['repository_name'].iloc[0]
164
+ return outlets
165
+
149
166
  def get_simulated_output(self,reach_ids,constituent,time_step = 'YE'):
150
167
  sim = self.model.hbns.get_reach_constituent(constituent,reach_ids,time_step)
151
168
  sim.name = 'simulated'
152
169
  return sim
153
170
 
154
171
  def get_observed_data(self,station_ids,constituent,time_step = 'YE',baseflow_percentage = None):
155
- obs = self.dm.get_station_data(station_ids,constituent,agg_period = time_step)['observed'].sort_index(level = 'index')
172
+ obs = self.dm.get_observation_data(station_ids,constituent,agg_period = time_step)['observed'].sort_index(level = 'index')
156
173
  obs.name = 'observed'
157
174
  return obs
158
175
 
@@ -560,110 +577,3 @@ def threshold(adjustment,threshold,max_change):
560
577
  #Note that in uci.update_table() there is further screening to account for adjustments below the model precision
561
578
  return adjustment
562
579
 
563
-
564
- #class hydrologyCalibrator(calibrator):
565
-
566
- #class nutrientCalibrator(calibrator):
567
-
568
- class sedimentCalibrator(calibrator):
569
-
570
- def update_kser(self,method,opnid = None):
571
- #TODO account for the additional comment column
572
- assert method in ['load','landcover','sftl']
573
-
574
- table = self.uci.table('PERLND','SED-PARM3',0,False)
575
-
576
-
577
- if method == 'load':
578
- adjustment = self.compare(0,aggregate = True).loc['Mean']['ratio']
579
- elif method == 'landcover':
580
- adjustment = self.landcover(0)['target']
581
- table = self.uci.table('PERLND','SED-PARM3',0)
582
- if opnid == None:
583
- opnid = table.index
584
- adjustment = np.array(adjustment.loc[opnid])[:,None]
585
- elif method == 'sftl':
586
- adjustment = self.sftl()
587
-
588
- self.uci.replace_table('PERLND','SED-PARM3',0)
589
-
590
- def update_erosivity(self,param = 'M',opnid = None,update_alg = '*'):
591
- adjustment = self.scour()
592
- table = self.uci.table('RCHRES','SILT-CLAY-PM',0)
593
- if opnid == None:
594
- opnid = table.index
595
- adjustment = np.array(adjustment.loc[opnid])[:,None]
596
- self.uci.update_table(adjustment,'RCHRES','SILT-CLAY-PM',table_id = 0,opnid = opnid,columns = [param],update_alg = update_alg)
597
-
598
- adjustment = self.scour()
599
- adjustment = np.array(adjustment.loc[opnid])[:,None]
600
- self.uci.update_table(adjustment,'RCHRES','SILT-CLAY-PM',table_id = 1,opnid = opnid,columns = [param],update_alg = update_alg)
601
-
602
-
603
- def fit_param(self,param,m_factor,N = 2,opnid = None,run = None):
604
- bounds = {'M':[.000000001,.01,2,5], #maxlow,low,high,maxhigh
605
- 'TAUCD':[.001,.01,.3,1],
606
- 'TAUCS':[.01,.05,.5,3]}
607
- if run == None:
608
- run = self.run
609
-
610
- data = self.load_data('scour',N=10000)
611
- data = data.loc[:,range(run-N+1,run+1),:]
612
-
613
- if opnid == None:
614
- opnid = data.reset_index(level=[1]).index.unique() # assumes multiindex
615
-
616
- for index in opnid:
617
- if any(data.loc[index]['LKFG'] == 0):
618
- x = data.loc[index]['depscour']
619
- y = data.loc[index][param]
620
- linear_model=np.polyfit(x,y,1)
621
- linear_model_fn=np.poly1d(linear_model)
622
- m = linear_model_fn(-data.loc[index]['nonpoint'].iloc[1]*.25)
623
- if m < bounds[param][0]:
624
- m = bounds[param][0]
625
- if m > bounds[param][3]:
626
- m = bounds[param][3]
627
- self.update_table('RCHRES','SILT-CLAY-PM',0,m,'set',opnid = index,columns = [param]) #mod.update_table(operation,table_name,table_id,adjustment,operator,opnids,columns)
628
- self.update_table('RCHRES','SILT-CLAY-PM',1,m*m_factor,'set',opnid = index,columns = [param]) #mod.update_table(operation,table_name,table_id,adjustment,operator,opnids,columns)
629
-
630
- def erosivity(self,m_factor,param = 'M',opnid = None,run = None,iterations = 1):
631
-
632
- if run == None:
633
- run = self.run
634
-
635
- # run model updating erosivity for N iterations
636
- for iteration in range(iterations):
637
- self.update_erosivity(param = param,opnid = opnid)
638
- self.run_model() # creates the run+1 uci file and runs it using WinHspfLT
639
- run = run + 1
640
- self.load_model(run)
641
- self.save_data()
642
-
643
-
644
- self.fit_param(param,m_factor,iterations+1,opnid,run)
645
- self.run_model() # creates the run+1 uci file and runs it using WinHspfLT
646
-
647
- run = run + 1
648
- self.load_model(run)
649
- self.save_data()
650
-
651
- def scour(hbn,uci):
652
- # Erosivity adjustment only
653
- scour = reports.scour_report(hbn,uci)
654
- #TODO: add check for this
655
- # Assume all nonpoint values are greater than 0...
656
- # if depscour is greater than 0
657
- target = scour['nonpoint']*.25 # Assuming nonpoint load is set
658
- adjustment = np.abs(scour['depscour'])/target
659
- adjustment[(adjustment < 1.05) & (adjustment > .95)] = 1 # Don't change reaches where the depscour is close to the target
660
- adjustment[adjustment > 1.05] = .95 # Since depscour is negative we have to swap this. I think if I do target/depscour this line would be less confusing
661
- adjustment[adjustment < .95] = 1.05
662
- adjustment[scour['depscour'] > 0] = 2 # Double any values where the depscour is positive
663
- adjustment[scour['LKFG'] == 1] = 1 # Ignore lake flags
664
- adjustment[np.isnan(adjustment)] = 1
665
-
666
- return adjustment
667
-
668
-
669
-
@@ -101,5 +101,13 @@ class Mapper():
101
101
  self.subwatershed_gdf.join(subwatersheds).plot(column = output_name,ax = ax,cmap='viridis',legend=True)
102
102
  plt.title(output_name)
103
103
 
104
+ def map_table(self,df, mapping_col):
105
+ '''Maps a dataframe column to the subwatershed geodataframe based on subwatershed IDs.
106
+ Assumes the dataframe index contains the subwatershed IDs.'''
107
+ fig, ax = plt.subplots()
108
+ #[table.plot(column = parameter,ax = ax) for table in tables]
109
+ self.subwatershed_gdf.join(df).plot(column = mapping_col,ax = ax,cmap='viridis',legend=True)
110
+ plt.title(mapping_col)
104
111
  #return self.subwatershed_gdf.join(subwatersheds)
105
112
 
113
+
@@ -6,7 +6,7 @@ Created on Wed Nov 27 09:16:30 2024
6
6
  """
7
7
 
8
8
  import pandas as pd
9
- from pyhcal import modl_db
9
+ from mpcaHydro import outlets
10
10
  from pathlib import Path
11
11
  import shutil
12
12
 
@@ -38,7 +38,7 @@ class Repository():
38
38
 
39
39
  self.REPOSITORY_PATH = repository_path
40
40
  huc_directory = self.HUC_DIRECTORY.loc[self.HUC_DIRECTORY['Repository_HUC8 Name'] == model_name]
41
- self.modl_db = modl_db.get_model_db(model_name) #self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
41
+ self.modl_db = outlets.get_model_db(model_name) #self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
42
42
  #self.modl_db = pd.concat([self.MODL_DB.loc[self.MODL_DB['repository_name'].str.startswith(huc8_id,na=False)] for huc8_id in huc8_ids])
43
43
  self.model_name = model_name
44
44
  self.huc8_ids = list(huc_directory['USGS HUC-8'])
@@ -48,8 +48,8 @@ class Repository():
48
48
  self.uci_file = self.repo_folder.joinpath('HSPF','.'.join([self.model_name,'uci']))
49
49
  self.wdm_files = [item for item in self.repo_folder.joinpath('HSPF').iterdir() if (item.name.endswith('.wdm')) | (item.name.endswith('.WDM'))]
50
50
  self.shapefiles = {item.name.split('.')[0].split('_')[-1]:item for item in self.repo_folder.joinpath('GIS').iterdir() if (item.name.endswith('.shp')) | (item.name.endswith('.SHP'))}
51
- self.wiski_stations = modl_db.wiski_stations(model_name)
52
- self.equis_stations = modl_db.equis_stations(model_name)
51
+ self.wiski_stations = outlets.wiski_stations(model_name)
52
+ self.equis_stations = outlets.equis_stations(model_name)
53
53
 
54
54
 
55
55
  def copy(self,copy_path):
@@ -8,7 +8,7 @@ from mpcaHydro.data_manager import dataManager
8
8
  from hspf.wdmReader import readWDM
9
9
  from hspf.uci import UCI
10
10
  from pyhcal.repository import Repository
11
- from pyhcal import modl_db
11
+ from mpcaHydro import outlets
12
12
 
13
13
  import numpy as np
14
14
  import pandas as pd
@@ -16,8 +16,8 @@ from pathlib import Path
16
16
  import subprocess
17
17
 
18
18
 
19
- def create_calibration_project(model_name,project_location,download_station_data = True,run_model = True,convert_wdms = True):
20
- project = Builder(model_name)
19
+ def create_calibration_project(model_name,project_location, download_station_data = True,run_model = True,convert_wdms = True,oracle_username = None, oracle_password = None):
20
+ project = Builder(model_name,oracle_username = oracle_username, oracle_password = oracle_password)
21
21
  project.copy(project_location,model_name)
22
22
  project.load_uci()
23
23
  project.format_uci()
@@ -25,6 +25,7 @@ def create_calibration_project(model_name,project_location,download_station_data
25
25
  if convert_wdms: project.convert_wdms()
26
26
  if download_station_data: project.download_station_data()
27
27
  if run_model: project.run_model()
28
+ return project
28
29
 
29
30
 
30
31
 
@@ -32,32 +33,34 @@ def create_calibration_project(model_name,project_location,download_station_data
32
33
 
33
34
  class Builder():
34
35
 
35
- def __init__(self,model_name):
36
+ def __init__(self,model_name,oracle_username = None, oracle_password = None):
36
37
  self.repository = Repository(model_name)
38
+ self.model_name = model_name
37
39
  self.project_path = None
38
40
  self.project_name = None
39
41
  self.new_uci = None
40
42
  self.uci = None
41
43
  self.dm = None
42
- self.calibration_reaches = modl_db.wplmn_station_opnids(model_name)
43
-
44
+ self.calibration_reaches = outlets.wplmn_station_opnids(model_name)
45
+ self.oracle_username = oracle_username
46
+ self.oracle_password = oracle_password
44
47
 
45
48
  def valid_models():
46
49
  return Repository.valid_models()
47
50
 
48
- def set_project_path(self,project_path):
49
- self.project_path = Path(project_path)
50
- self.project_name = Path(project_path).name
51
+ def set_project_path(self,project_location,project_name):
52
+ self.project_path = Path(project_location).joinpath(project_name)
53
+ self.project_name = project_name
54
+ self.dm = dataManager(self.project_path.joinpath('data'),oracle_username = self.oracle_username, oracle_password = self.oracle_password)
55
+ self.dm._build_warehouse()
51
56
  #self.new_uci = self.project_path.joinpath('model','_'.join([self.project_name,'0.uci']))
52
57
  #self.uci = UCI(self.project_path.joinpath('model','.'.join([self.project_name,'uci'])))
53
58
 
54
59
  def copy(self,project_location,project_name):
55
- self.project_path = Path(project_location).joinpath(project_name)
56
- self.project_name = project_name
60
+ self.set_project_path(project_location,project_name)
57
61
  self.repository.copy(self.project_path)
58
- self.dm = dataManager(self.project_path.joinpath('data'))
62
+
59
63
 
60
-
61
64
  def load_uci(self):
62
65
  self.new_uci = self.project_path.joinpath('model','_'.join([self.project_name,'0.uci']))
63
66
  self.uci = UCI(self.project_path.joinpath('model','.'.join([self.project_name,'uci'])))
@@ -80,21 +83,27 @@ class Builder():
80
83
  setup_qualid(self.uci)
81
84
  self.uci.write(self.new_uci)
82
85
 
83
-
84
- def download_station_data(self,start_year,end_year):#:,station_id,source_id):
85
- #%% Old approach. Store as indvidual processed station files then load to warehouse
86
- if len(equis_stations) > 0:
87
- if self.dm.credentials_exist():
88
- station_origin = 'equis'
89
- else: station_origin = 'swd'
90
-
91
- for station_id in equis_stations:
92
- self.dm.download_station_data(station_id,station_origin, True)
86
+ def download_wiski_data(self,station_ids):
87
+ if len(station_ids) > 0:
88
+ self.dm._download_wiski_data(station_ids)
89
+ else:
90
+ print("No Wiski stations have been manually matched to modeled reaches.")
93
91
 
94
- if len(wiski_stations) > 0:
95
- for station_id in wiski_stations:
96
- self.dm.download_station_data(station_id,'wiski', True)
97
-
92
+ def download_equis_data(self,station_ids):
93
+ if len(station_ids) > 0:
94
+ if self.dm.credentials_exist():
95
+ self.dm.connect_to_oracle()
96
+ self.dm._download_equis_data(station_ids)
97
+ else:
98
+ print("Oracle credentials not provided. Cannot download Equis data.")
99
+ else:
100
+ print("No Equis stations have been manually matched to modeled reaches.")
101
+
102
+ def download_station_data(self):
103
+ equis_stations = self.dm.outlets.mapped_equis_stations(self.model_name)
104
+ wiski_stations = self.dm.outlets.mapped_wiski_stations(self.model_name)
105
+ self.download_equis_data(equis_stations)
106
+ self.download_wiski_data(wiski_stations)
98
107
 
99
108
  def convert_wdms(self):
100
109
  copy_path = Path(self.project_path.joinpath('model'))
@@ -584,3 +593,5 @@ def setup_qualid(uci):
584
593
 
585
594
 
586
595
 
596
+
597
+ # %%