PyPI - hspf - Versions diffs - 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl - Mend

hspf 2.1.1py3-none-any.whl → 2.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

hspf/bin/WinHSPFLt/ATCoRend.dbf +0 -0
hspf/bin/WinHSPFLt/ATCoUnits.mdb +0 -0
hspf/bin/WinHSPFLt/ERROR.FIL +52657 -0
hspf/bin/WinHSPFLt/LF90.EER +0 -0
hspf/bin/WinHSPFLt/LF90WIOD.DLL +0 -0
hspf/bin/WinHSPFLt/MapWinUtility.dll +0 -0
hspf/bin/WinHSPFLt/StatusMonitor.exe +0 -0
hspf/bin/WinHSPFLt/hass_ent.dll +0 -0
hspf/bin/WinHSPFLt/hspfmsg.wdm +0 -0
hspf/bin/WinHSPFLt/hspfmsg.wdu +1 -0
hspf/build_warehouse.py +83 -3
hspf/hbn.py +160 -31
hspf/helpers.py +3 -2
hspf/hspfModel.py +47 -8
hspf/parser/graph.py +2 -1
hspf/reports.py +323 -131
hspf/uci.py +16 -5
hspf/warehouse.py +41 -34
{hspf-2.1.1.dist-info → hspf-2.1.3.dist-info}/METADATA +1 -1
{hspf-2.1.1.dist-info → hspf-2.1.3.dist-info}/RECORD +21 -11
{hspf-2.1.1.dist-info → hspf-2.1.3.dist-info}/WHEEL +0 -0

hspf/bin/WinHSPFLt/LF90.EER ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/LF90WIOD.DLL ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/MapWinUtility.dll ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/StatusMonitor.exe ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/hass_ent.dll ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/hspfmsg.wdm ADDED Viewed

Binary file

hspf/bin/WinHSPFLt/hspfmsg.wdu ADDED Viewed

	@@ -0,0 +1 @@
1	+ WDM WDM1 C:\Program Files (x86)\HSPEXP+\WinHSPFLt\hspfmsg.wdm

hspf/build_warehouse.py CHANGED Viewed

@@ -281,12 +281,49 @@ for key,ts_names in outputs.items():
     dfs.append(df)
 output_df = pd.concat(dfs).reset_index(drop=True)
+dfs = []
+for key,data in hbn.hbns[0].data_frames.items():
+    keys = key.split('_')
+    operation = keys[0]
+    activity = keys[1]
+    opnid = int(keys[2])
+    t_code = keys[3]
+    data.reset_index(inplace=True)
+    data.rename(columns={'index': 'datetime'}, inplace=True)
+    data = data.melt(id_vars = ['datetime'],var_name = 'ts_name', value_name = 'value')
+    data['operation'] = operation
+    data['activity'] = activity
+    data['opnid'] = opnid
+    data['t_code'] = t_code
+    data['model_name'] = model_name
+    dfs.append(data)
+output_df = pd.concat(dfs).reset_index(drop=True)
+# Write to Parquet with DuckDB, including "t_code" as a partition
+output_path = "model_outputs"
+con = duckdb.connect(database=':memory:')  # Temporary in-memory database
+con.execute(f"""
+    COPY output_df
+    TO '{output_path}'
+    (FORMAT 'parquet', PARTITION_BY ('model_name','operation', 'opnid'))
+""")
+print(f"Data written to {output_path}")
-ts_name = 'PERO'
-op_type = 'PERLND'
-t_code = 4
+['PERO',
+'SURO',
+'IFWO',
+'AGWO']
+for constituent in ['Q','TSS','TP','N','OP','BOD','TKN']:
+    t_cons = helpers.get_tcons(constituent,'RCHRES','lb')
+    df = hbn.get_rechres_data(constituent, units='lb', freq='daily').reset_index()
 pero = hbn.get_multiple_timeseries(op_type,t_code,ts_name).reset_index().rename(columns={'index': 'datetime'})
 pero = pero.melt(id_vars = ['datetime'],var_name = 'operation_id', value_name = 'value')
@@ -295,15 +332,58 @@ pero['t_code'] = t_code
 pero['model_name'] = model_name
+db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
 with duckdb.connect(db_path) as con:
     warehouse.insert_model_run(con, model_name, run_id)
+db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
+with duckdb.connect(db_path) as conn:
+    conn.execute("CREATE SCHEMA if not exists reports")
+    conn.execute("CREATE TABLE if not exists reports.catchment_loading AS SELECT * FROM df")
+    conn.close()
+# Average annual loading by catchment
+db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
+with duckdb.connect(db_path) as conn:
+    query = f"""
+    SELECT
+        model_name,
+        operation AS operation_type,
+        opnid AS operation_id,
+        t_code,
+        ts_name AS constituent,
+        AVG(value) * 365.25 AS annual_loading
+    FROM reports.catchment_loading
+    WHERE t_code = 'PERLND' AND constituent IN ('Q','TP','TSS','N','OP','BOD','TKN')
+    GROUP BY model_name, TVOLNO, constituent
+    """
+    annual_loadings = conn.execute(query).fetchdf()
+    conn.close()
+hbn.hbns[0].data_frames.keys()
+import duckdb
+import pandas as pd
+# Convert to DataFrame
+df = pd.DataFrame(data)
+df['datetime'] = pd.to_datetime(df['datetime'])  # Ensure datetime column is formatted properly
+# Write to Parquet with DuckDB, including "t_code" as a partition
+output_path = "model_outputs"
+con = duckdb.connect(database=':memory:')  # Temporary in-memory database
+con.execute(f"""
+    COPY df
+    TO '{output_path}'
+    (FORMAT 'parquet', PARTITION_BY ('operation_type', 'operation_id', 't_code'))
+""")
+print(f"Data written to {output_path}")

hspf/hbn.py CHANGED Viewed

@@ -6,7 +6,7 @@ nutrients relevant for our current calibration methods. (See calibration_helpers
 @author: mfratki
 """
-from . import helpers
+from hspf import helpers
 import pandas as pd
 import math
 from struct import unpack
@@ -14,6 +14,7 @@ from numpy import fromfile
 from pandas import DataFrame
 from datetime import datetime, timedelta #, timezone
 from collections import defaultdict
+from collections.abc import MutableMapping
 #from pathlib import Path
@@ -140,8 +141,15 @@ def get_simulated_flow(hbn,time_step,reach_ids,unit = None):
     flows.attrs['unit'] = unit
     return flows
-def get_simulated_temperature(hbn,units,time_step,reach_ids):
-    raise NotImplementedError()
+def get_simulated_temperature(hbn,time_step,reach_ids):
+    assert len(reach_ids) == 1, "Temperature can only be retreived for one reach at a time."
+    wt = hbn.get_multiple_timeseries('RCHRES',time_step,'TW', reach_ids)
+    wt = wt.sum(axis=1)
+    wt.attrs['unit'] = 'degf'
+    return wt
 def get_simulated_reach_constituent(hbn,constituent,time_step,reach_ids,unit = None):
@@ -152,11 +160,11 @@ def get_simulated_reach_constituent(hbn,constituent,time_step,reach_ids,unit = N
     if unit is None:
         unit = UNIT_DEFAULTS[constituent]
     else:
-        assert(unit in ['mg/l','lb','cfs','degF'])
+        assert(unit in ['mg/l','lb'])
     t_cons = helpers.get_tcons(constituent,'RCHRES','lb')
-    # Correct instances when a flow needs to be subtracted (rare)
+    # Correct instances when a reach output needs to be subtracted (rare)
     df = pd.concat([hbn.get_multiple_timeseries('RCHRES',time_step,t_con,[abs(reach_id) for reach_id in reach_ids])*sign for t_con in t_cons],axis=1).sum(axis=1)
     if constituent == 'TSS':
@@ -182,11 +190,30 @@ class hbnInterface:
     def _clear_cache(self):
         [hbn._clear_cache() for hbn in self.hbns]
     def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
-        return pd.concat([hbn.get_time_series(t_opn, t_cons, t_code, opnid, activity) for hbn in self.hbns],axis = 1)
+        df = pd.concat([hbn._get_time_series(t_opn, t_cons, t_code, opnid, activity) for hbn in self.hbns],axis = 1)
+        if df.empty:
+            raise ValueError(f"No data found for {t_opn} {t_cons} {t_code} {opnid} {activity}")
+        if long_format:
+            df = df.reset_index().melt(id_vars = ['index'],var_name = 'OPNID',value_name = t_con)
+            df.rename(columns = {'index':'datetime'},inplace = True)
+            df['OPERATION'] = t_opn
+        return df
-    def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1):
-        return pd.concat([hbn.get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
+    def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1,long_format = False):
+        df = pd.concat([hbn._get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
+        if df.empty:
+            raise ValueError(f"No data found for {t_opn} {t_con} {t_code} {opnids} {activity}")
+        if long_format:
+            df = df.reset_index().melt(id_vars = ['index'],var_name = 'OPNID',value_name = 'value')
+            df.rename(columns = {'index':'datetime'},inplace = True)
+            df['TIMESERIES'] = t_con
+            df['OPERATION'] = t_opn
+        return df
     def get_perlnd_constituent(self,constituent,perlnd_ids = None,time_step = 5):
         return get_simulated_perlnd_constituent(self,constituent,time_step)
@@ -210,13 +237,33 @@ class hbnInterface:
         # for dic in dics:
         #     for key, vals in dic.items():
         #         [dd[key].append(val) for val in vals]
-        dd = defaultdict(set)
+        # dd = defaultdict(set)
         dics =  [hbn.output_names() for hbn in self.hbns]
+        return merge_dicts(dics)
+        # for dic in dics:
+        #     for operation, vals in dic.items():
+        #         for activity,v in vals.items():
+        #             [dd[operation][activity].add(t) for t in v]
+        # return dd
+    def _timeseries(self):
+        mapn = self._mapn()
+        timeseries = []
+        for key, vals in mapn.items():
+            _key = list(key)
+            for val in vals:
+                timeseries.append(_key + [val])
+        return timeseries
+    def _mapn(self):
+        dd = defaultdict(set)
+        dics =  [hbn.mapn for hbn in self.hbns]
         for dic in dics:
             for key, vals in dic.items():
                 [dd[key].add(val) for val in vals]
-        return dd
+        return dd
     def get_perlnd_data(self,constituent,t_code = 'yearly'):
         t_cons = helpers.get_tcons(constituent,'PERLND')
@@ -229,14 +276,13 @@ class hbnInterface:
         return df
-    def get_rchres_data(self,constituent,reach_ids,units = 'mg/l',t_code = 'daily'):
+    def get_rchres_output(self,constituent,units = 'mg/l',t_code = 5):
         '''
         Convience function for accessing the hbn time series associated with our current
         calibration method. Assumes you are summing across all dataframes.
        '''
-        df = pd.concat([self.get_reach_constituent(constituent,[reach_id],t_code,units) for reach_id in reach_ids], axis = 1)
-        df.columns = reach_ids
+        t_cons = helpers.get_tcons(constituent,'RCHRES',units)
+        df = sum([self.get_multiple_timeseries('RCHRES',t_code,t_con) for t_con in t_cons])
         df.attrs['unit'] = units
         df.attrs['constituent'] = constituent
         return df
@@ -392,19 +438,27 @@ class hbnClass:
     def infer_opnids(self,t_opn, t_cons,activity):
         result = [k[-2] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn) & (k[-1] == activity)]
         if len(result) == 0:
-            return print('No Constituent-OPNID relationship found')
+            result = [-1]
+        #     return print('No Constituent-OPNID relationship found')
         return result
     def infer_activity(self,t_opn, t_cons):
         result = [k[-1] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn)]
         if len(result) == 0:
-            return print('No Constituent-Activity relationship found')
-        assert(len(set(result)) == 1)
-        return result[0]
+            result = ''
+        else:#     return print('No Constituent-Activity relationship found')
+            assert(len(set(result)) == 1)
+            result = result[0]
+        return result
     def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
+        df = self._get_time_series(t_opn, t_cons, t_code, opnid, activity)
+        if df.empty:
+            raise ValueError(f"No data found for {t_opn} {t_cons} {t_code} {opnid} {activity}")
+        return df
+    def _get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
         """
         get a single time series based on:
         1.      t_opn: RCHRES, IMPLND, PERLND
@@ -413,13 +467,15 @@ class hbnClass:
         4. t_activity: HYDR, IQUAL, etc
         5.  time_unit: yearly, monthly, full (default is 'full' simulation duration)
         """
         if isinstance(t_code,str):
             t_code = self.tcodes[t_code]
         if activity is None:
             activity = self.infer_activity(t_opn,t_cons)
-            if activity is None:
-                return None
         summaryindx = f'{t_opn}_{activity}_{opnid:03d}_{t_code}'
         if summaryindx in self.summaryindx:
             df = self.data_frames[summaryindx][t_cons].copy()
@@ -431,25 +487,31 @@ class hbnClass:
             #df.index = df.index.shift(-1,TCODES2FREQ[t_code])
             df = df[df.index >= '1996-01-01']
         else:
-            df = None
+            df = pd.DataFrame()
         return df
     def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None):
+        df = self._get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity)
+        if df.empty:
+            raise ValueError(f"No data found for {t_opn} {t_con} {t_code} {opnids} {activity}")
+        return df
+    def _get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None):
         # a single constituent but multiple opnids
         if isinstance(t_code,str):
             t_code = self.tcodes[t_code]
         if activity is None:
-            activity = self.infer_activity(t_opn,t_con)
-            if activity is None:
-                return None
+            activity = self.infer_activity(t_opn,t_con)
         if opnids is None:
             opnids = self.infer_opnids(t_opn,t_con,activity)
-            if opnids is None:
-                return None
-        df = None
+        df = pd.DataFrame()
         frames = []
         mapd_list = list(self.mapd.keys())
         for opnid in opnids:
@@ -468,9 +530,76 @@ class hbnClass:
             dic[activity] = set([item for sublist in t_cons for item in sublist])
         return dic
+    def output_names(self):
+        activities = []
+        operations = []
+        for k, v in self.mapn.items():
+            operations.append(k[0])
+            activities.append(k[-1])
+        operations = set(operations)
+        activities = set(activities)
+        #activities = set([k[-1] for k,v in self.mapn.items()])
+        dic = {}
+        for operation in operations:
+            acitivities = set([k[-1] for k,v in self.mapn.items() if k[0] == operation])
+            dic[operation] = {}
+            for activity in acitivities:
+                t_cons = [v for k,v in self.mapn.items() if (k[0] == operation) & (k[-1] == activity)]
+                dic[operation][activity] = set([item for sublist in t_cons for item in sublist])
+        # for activity in activities:
+        #     t_cons = [v for k,v in self.mapn.items() if k[-1] == activity]
+        #     dic[activity] = set([item for sublist in t_cons for item in sublist])
+        return dic
+    def get_timeseries(self):
+        mapn = self.mapn
+        timeseries = []
+        for key, vals in mapn.items():
+            _key = list(key)
+            for val in vals:
+                timeseries.append(_key + [val])
+        return timeseries
     @staticmethod
     def get_perlands(summary_indxs):
          perlands =  [int(summary_indx.split('_')[-2]) for summary_indx in summary_indxs]
          return perlands
+def merge_dicts(dicts):
+    """
+    Merge a list of dictionaries into a single dictionary, combining sets
+    at the leaf level and properly merging nested dictionaries.
+    Args:
+        dicts (list): A list of dictionaries to merge.
+    Returns:
+        dict: The merged dictionary.
+    """
+    def recursive_merge(d1, d2):
+        for key, value in d2.items():
+            if key in d1:
+                # If the value is a dictionary, recurse
+                if isinstance(d1[key], MutableMapping) and isinstance(value, MutableMapping):
+                    recursive_merge(d1[key], value)
+                # If the value is a set, merge the sets
+                elif isinstance(d1[key], set) and isinstance(value, set):
+                    d1[key].update(value)
+                else:
+                    raise ValueError(f"Incompatible types for key '{key}': {type(d1[key])} vs {type(value)}")
+            else:
+                # If the key does not exist in d1, copy it
+                d1[key] = value
+    # Start with an empty dictionary
+    merged_dict = {}
+    for d in dicts:
+        recursive_merge(merged_dict, d)
+    return merged_dict

hspf/helpers.py CHANGED Viewed

@@ -48,9 +48,10 @@ def get_tcons(nutrient_name,operation,units = 'mg/l'):
                   'N' :['NO3OUTTOT','NO2OUTTOT'], # N
                   'OP' :['PO4OUTDIS'], # Ortho
                   'TP' :['PTOTOUT'],
-                  'BOD' :['BODOUTTOT']},
+                  'BOD' :['BODOUTTOT'],},
         'cfs': {'Q': ['ROVOL']},
-        'acrft' : {'Q': ['ROVOL']}}
+        'acrft' : {'Q': ['ROVOL']},
+        'degf' : {'WT': ['TW']}}
         t_cons = MAP[units]
     elif operation == 'PERLND':

hspf/hspfModel.py CHANGED Viewed

@@ -7,17 +7,19 @@ Created on Thu Oct 13 09:26:05 2022
 from pathlib import Path
 import os.path
 import subprocess
+import concurrent.futures
-from .uci import UCI
-from . import hbn
-from .reports import Reports
-from .wdm import wdmInterface
-from . import wdmReader
+from hspf.uci import UCI
+from hspf import hbn
+from hspf.reports import Reports
+from hspf.wdm import wdmInterface
+from hspf import wdmReader
+winHSPF = str(Path(__file__).resolve().parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
 # Only for accessing information regarding a specific uci_file
@@ -51,6 +53,15 @@ class hspfModel():
         # Compositions
         self.reports = Reports(self.uci,self.hbns,self.wdms)
+    def _reinitialize(self,uci_file:str,run_model:bool = False):
+        self.uci = UCI(uci_file)
+        self.validate_uci(run_model = run_model)
+        self.hbns = hbn.hbnInterface(self.hbn_paths)
+        try:
+            self.wdms = wdmInterface(self.wdm_paths)
+        except:
+            self.wdms = None
+        self.reports = Reports(self.uci,self.hbns,self.wdms)
     def validate_wdms(self):
         # Ensure wdm files exist and the folders for the other file types exist relative
@@ -92,15 +103,16 @@ class hspfModel():
         else:
             self.run_model()
-    def run_model(self,new_uci_file = None):
+    def run_model(self,new_uci_file = None,):
         if new_uci_file is None:
             new_uci_file = self.uci_file
         # new_uci_file = self.model_path.joinpath(uci_name)
         # self.uci.write(new_uci_file)
-        subprocess.run([self.winHSPF,self.uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
-        self.load_uci(new_uci_file,run_model = False)
+        subprocess.run([winHSPF,self.uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
+        self._reinitialize(new_uci_file,run_model = False)
     def load_hbn(self,hbn_name):
         self.hbns[hbn_name] = hbn.hbnClass(self.uci_file.parent.joinpath(hbn_name).as_posix())
@@ -177,8 +189,35 @@ class hspfModel():
+def run_uci(uci_file:str, ):
+    """
+    convenience function to run a single model uci file.
+    """
+    print(f"Starting model: {uci_file}")
+    subprocess.run([winHSPF, uci_file])
+    print(f"Completed model: {uci_file}")
+def run_batch_files(file_list, max_concurrent=4):
+    """
+    Takes a list of .uci file paths and runs them N at a time.
+    """
+    # Create a pool of workers (threads)
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_concurrent) as executor:
+        # Submit all jobs to the pool
+        future_to_file = {
+            executor.submit(run_uci, uci_file): uci_file
+            for uci_file in file_list
+        }
+        # Monitor completion (optional, but good for error catching)
+        for future in concurrent.futures.as_completed(future_to_file):
+            uci_file = future_to_file[future]
+            try:
+                future.result() # This will raise exceptions if run_uci failed
+            except Exception as exc:
+                print(f"File {uci_file} generated an exception: {exc}")
 # class runManager():
 #     def __init__()

hspf/parser/graph.py CHANGED Viewed

@@ -635,7 +635,8 @@ class reachNetwork():
         areas = areas.groupby(['source_type','source_type_id','source_name'])['area'].sum()[['PERLND','IMPLND']]
         if group:
-            areas = pd.concat([areas[operation].groupby('source_name').sum()  for operation in ['PERLND','IMPLND']])
+            areas = areas.groupby(['source_type','source_name']).sum()
+            #areas = pd.concat([areas[operation].groupby('source_name').sum()  for operation in ['PERLND','IMPLND']])
             #areas = pd.concat([areas[operation].groupby(self.uci.opnid_dict[operation].loc[areas[operation].index,'LSID'].values).sum() for operation in ['PERLND','IMPLND']])
         return areas

hspf 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl

hspf 2.1.1py3-none-any.whl → 2.1.3py3-none-any.whl