PyPI - pyhcal - Versions diffs - 1.0.0__py3-none-any.whl - Mend

pyhcal 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pyhcal/__init__.py +7 -0
pyhcal/calibrators.py +642 -0
pyhcal/data/HUC_Names.csv +84 -0
pyhcal/data/WISKI_EQUIS_XREF.csv +25789 -0
pyhcal/data/stations_EQUIS.gpkg +0 -0
pyhcal/data/stations_wiski.gpkg +0 -0
pyhcal/figures.py +1024 -0
pyhcal/metrics.py +485 -0
pyhcal/modl_db.py +97 -0
pyhcal/repository.py +98 -0
pyhcal/setup_utils.py +573 -0
pyhcal-1.0.0.dist-info/METADATA +15 -0
pyhcal-1.0.0.dist-info/RECORD +14 -0
pyhcal-1.0.0.dist-info/WHEEL +4 -0

pyhcal/metrics.py ADDED Viewed

@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr  5 09:44:14 2024
+@author: mfratki
+"""
+import numpy as np
+import pandas as pd
+try:
+    import baseflow as bf
+except:
+    pass
+'''
+monthly
+  cfs - mean, median, std
+  in -  sum
+  mg/l - mean, median, std
+  lb - sum
+annual
+'''
+# @dataclass
+# class Timeseries:
+# @dataclass
+# class HydroData:
+#     sim
+#     obs
+#     units
+# class Metrics():
+#     data
+def aggregate(df,units):
+    assert units in ['lb','mg/l','in','cfs','degC']
+    if units in ['mg/l','cfs','degC']:
+        agg_func = 'mean'
+    else:
+        agg_func = 'sum'
+    df_agg = pd.DataFrame(np.ones((12,3))*np.nan,index = range(1,13),columns = ['simulated','observed','ratio'])
+    df_agg.index.name = 'month'
+    df = df.groupby(df.index.month).agg(agg_func)[['simulated','observed']]
+    df.columns = ['simulated','observed']
+    df['ratio'] = df['observed']/df['simulated']
+    df_agg.loc[df.index,df.columns] = df.values
+    df_agg.loc['Mean'] = df_agg.agg('mean')
+    df_agg['ratio'] = df_agg['observed']/df_agg['simulated']
+    return df_agg
+def NSE(df_daily,agg_func = 'mean'):
+    #df_monthly = df_daily.resample('MS').sum()
+    df_monthly = df_daily.groupby(df_daily.index.month).agg(agg_func)
+    NSE_daily = round(1 - np.sum((df_daily['observed'] - df_daily['simulated'])**2)/np.sum((df_daily['observed'] - df_daily['observed'].mean())**2),3)
+    NSE_garrick = round(1 - np.sum(np.absolute(df_daily['observed'] - df_daily['simulated']))/np.sum(np.absolute(df_daily['observed'] - df_daily['observed'].mean())),3)
+    NSE_monthly = round(1 - np.sum((df_monthly['observed'] - df_monthly['simulated'])**2)/np.sum((df_monthly['observed'] - df_monthly['observed'].mean())**2),3)
+    metric = pd.DataFrame(data = [NSE_daily, NSE_garrick, NSE_monthly]).transpose()
+    metric.columns = ['NSE','Garrick','Monthly']
+    return metric
+#%% Hydrology
+def hydro_stats(df_daily,drg_area):
+    '''
+    Parameters
+    ----------
+    df_daily : TYPE
+        Daily flow timeseries in cfs. A dataframe with a datetime index and columns labeled 'observed','simulated'
+    drg_area : TYPE
+        drainage area in acres.
+    Returns
+    -------
+    df : TYPE
+        Dataframe of various metrics used to evaluate model hydrology simulations.
+        Outputs are in inches.
+    '''
+    df_daily = hydro_sep(drg_area*0.0015625,df_daily) # in cfs
+    df_daily_inches = df_daily*60*60*24*12/(drg_area*43560) # in inches
+    dfs = [total(df_daily_inches,'in'),
+           annual(df_daily_inches,'in'),
+           monthly(df_daily_inches,'in'),
+           season(df_daily_inches,'in'),
+           low_50(df_daily_inches),
+           high_10(df_daily_inches),
+           storm(df_daily_inches),
+           storm_summer(df_daily_inches),
+           baseflow(df_daily_inches)]
+    dfs[0] ['interval'] = 'Total'
+    dfs[1] ['interval'] = 'Annual'
+    dfs[2].rename(columns = {'month':'interval'},inplace = True)
+    dfs[3].rename(columns = {'season':'interval'},inplace = True)
+    dfs[4] ['interval'] = 'Low 50'
+    dfs[5] ['interval'] = 'High 10'
+    dfs[6] ['interval'] = 'Storm'
+    dfs[7] ['interval'] = 'Summer Storm'
+    dfs[8] ['interval'] = 'Baseflow'
+    df = pd.concat(dfs)
+    df.set_index('interval',inplace=True)
+    nse = NSE(df_daily)
+    df.loc['Daily NSE',:] = pd.NA
+    df.loc['Daily Garrick',:] = pd.NA
+    df.loc['Monthly NSE',:] = pd.NA
+    df.loc['Daily NSE','observed'] = nse['NSE'].values
+    df.loc['Daily Garrick','observed'] = nse['Garrick'].values
+    df.loc['Monthly NSE','observed'] = nse['Monthly'].values
+    df.loc['Daily NSE','goal'] = .7
+    df.loc['Daily Garrick','goal'] = .55
+    df.loc['Monthly NSE','goal'] = .8
+    return df
+def hydro_sep_baseflow(df_daily,drng_area = None,method = 'Boughton'):
+    #dfs,df_kge = bf.single(df_daily['observed'],area = drg_area)
+    #df = bf.separation(df_daily['observed'],method = 'Boughton')
+    #method = df_kge.idxmax()
+    df_daily['observed_baseflow'] = bf.single(df_daily['observed'],area = drng_area, method = method,return_kge = False)[0][method]
+    df_daily['simulated_baseflow'] = bf.single(df_daily['simulated'],area = drng_area, method = method,return_kge = False)[0][method]
+    df_daily['observed_runoff'] = df_daily['observed'] - df_daily['observed_baseflow']
+    df_daily['simulated_runoff'] = df_daily['simulated'] - df_daily['simulated_baseflow']
+    return df_daily
+def hydro_sep(drg_area,df_daily):
+    try:
+        df_daily = hydro_sep_baseflow(df_daily,drg_area)
+    except:
+        twonstar = 2*drg_area**0.2
+        twonstar = round(twonstar,0)
+        if twonstar%2==0:
+            twonstar = twonstar - 1
+        twonstar = np.median([3,11,twonstar])
+        ndays = (twonstar - 1)/2
+        ndays = int(ndays)
+        df_daily['observed_baseflow'] = df_daily['observed'].rolling(2*ndays+1,center=True,min_periods=ndays+1).apply(np.nanmin)
+        df_daily['observed_runoff'] = df_daily['observed'] - df_daily['observed_baseflow']
+        df_daily['simulated_baseflow'] = df_daily['simulated'].rolling(2*ndays+1,center=True,min_periods=ndays+1).apply(np.nanmin)
+        df_daily['simulated_runoff'] = df_daily['simulated'] - df_daily['simulated_baseflow']
+    return df_daily
+# def aggregate(df,period = None,agg_func = 'mean'):
+#     if periond is None:
+#         df = df.agg(agg_func)
+#     elif period == 'Y':
+#         grouper = df.index.year
+#     elif period == 'M':
+#         grouper = df.index.month
+#     elif period == 'D':
+#         grouper = df.index.dayofyear
+def annual(df_daily,units):
+    assert units in ['cfs','in','lb','mg/l','degC']
+    if units in ['in','lb']:
+        # Total flow volume
+        n_years = df_daily.groupby(df_daily.index.year).count()/365.25
+        df_daily = ((df_daily.groupby(df_daily.index.year).sum())/n_years).mean()# (df_daily.groupby(df_daily.index.year).count()/365.25) .(agg_func)
+    else:
+        df_daily = (df_daily.groupby(df_daily.index.year).mean()).mean()# (df_daily.groupby(df_daily.index.year).count()/365.25) .(agg_func)
+    observed = df_daily['observed']
+    simulated = df_daily['simulated']
+    error = round((simulated - observed)/observed*100,2)
+    observed = float(round(observed,2))
+    simulated = float(round(simulated,2))
+    metric = { 'observed': [observed],
+             'simulated': [simulated],
+             'per_error': [error],
+             'abs_error': [simulated-observed],
+             'goal': [10]}
+    return pd.DataFrame(metric)
+def total(df_daily,units):
+    if units in ['in','lb']:
+        agg_func = 'sum'
+    else:
+        agg_func = 'mean'
+    # Calculate monthly flow in inches
+    #df_monthly = df_daily.resample('MS').sum()
+    #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
+    #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
+    # Total flow volume
+    df_daily = df_daily.agg(agg_func)
+    observed = df_daily['observed']
+    simulated = df_daily['simulated']
+    error = round((simulated - observed)/observed*100,2)
+    observed = float(round(observed,2))
+    simulated = float(round(simulated,2))
+    metric = { 'observed': [observed],
+             'simulated': [simulated],
+             'per_error': [error],
+             'abs_error': [simulated-observed],
+             'goal': [10]}
+    return pd.DataFrame(metric)
+def monthly(df_daily,units):
+    assert units in ['cfs','in','lb','mg/l','degC']
+    #df_monthly = df_daily.resample('MS').sum()
+    #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).agg(agg_func)
+    n_months = df_daily.groupby(df_daily.index.month).count()/30
+    if units in ['in','lb']:
+        df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
+    else:
+        df_monthly_grouped = df_daily.groupby(df_daily.index.month).mean()#.agg(agg_func)
+    # Monthly average flow volumes
+    mont_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+    metrics = []
+    for month in range(1,13):
+        if month in df_monthly_grouped.index:
+            observed = df_monthly_grouped.loc[month,'observed']
+            simulated = df_monthly_grouped.loc[month,'simulated']
+            error = round((simulated-observed)/observed*100,2)
+            metric = {'month' : mont_names[month-1],
+                     'observed': [observed],
+                     'simulated': [simulated],
+                     'per_error': [error],
+                     'abs_error': [simulated-observed],
+                     'goal': [30]}
+        else:
+            metric = {'month' : mont_names[month-1],
+                     'observed': [pd.NA],
+                     'simulated': [pd.NA],
+                     'per_error': [pd.NA],
+                     'abs_error': [pd.NA],
+                     'goal': [pd.NA]}
+        metrics.append(pd.DataFrame(metric))
+    return pd.concat(metrics).reset_index(drop=True)
+def exceedence(df):
+    df['simulated_rank'] = df['simulated'].rank(method = 'average', ascending = False)
+    df['simulated_exceed'] = df['simulated_rank'] / (len(df) + 1) * 100
+    df['observed_rank'] = df['observed'].rank(method = 'average', ascending = False)
+    df['observed_exceed'] = df['observed_rank'] / (len(df) + 1) * 100
+    if 'simulated_flow' in df.columns:
+        df['simulated_flow_rank'] = df['simulated_flow'].rank(method = 'average', ascending = False)
+        df['simulated_flow_exceed'] = df['simulated_flow_rank'] / (len(df) + 1) * 100
+        df['observed_flow_rank'] = df['observed_flow'].rank(method = 'average', ascending = False)
+        df['observed_flow_exceed'] = df['observed_flow_rank'] / (len(df) + 1) * 100
+def low_50(df_daily):
+     num_years = len(df_daily.index)/365.25
+     num_bottom50 =  int(len(df_daily.index)/2)
+     # 50% lowest flow volume
+     observed = df_daily.sort_values('observed',ascending=False).tail(num_bottom50).sum()/num_years
+     simulated = df_daily.sort_values('simulated',ascending=False).tail(num_bottom50).sum()/num_years
+     error = round((simulated['simulated'] - observed['observed'])/observed['observed']*100,2)
+     observed = float(round(observed['observed'],2))
+     simulated = float(round(simulated['simulated'],2))
+     metric = { 'observed': [observed],
+              'simulated': [simulated],
+              'per_error': [error],
+              'abs_error': [simulated-observed],
+              'goal': [10]}
+     return pd.DataFrame(metric)
+def high_10(df_daily):
+     num_years = len(df_daily.index)/365.25
+     num_top10 =  int(len(df_daily.index)/10)
+     # 10% highest flow volume
+     observed = df_daily.sort_values('observed',ascending=False).head(num_top10).sum()/num_years
+     simulated = df_daily.sort_values('simulated',ascending=False).head(num_top10).sum()/num_years
+     error = round((simulated['simulated'] - observed['observed'])/observed['observed']*100,2)
+     observed = float(round(observed['observed'],2))
+     simulated = float(round(simulated['simulated'],2))
+     metric = { 'observed': [observed],
+              'simulated': [simulated],
+              'per_error': [error],
+              'abs_error': [simulated-observed],
+              'goal': [15]}
+     return pd.DataFrame(metric)
+def season(df_daily,units):
+    assert units in ['cfs','in','lb','mg/l','degC']
+    # Calculate monthly flow in inches
+    #df_monthly = df_daily.resample('MS').sum()
+    #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
+    #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
+    n_months = df_daily.groupby(df_daily.index.month).count()/30
+    if units in ['in','lb']:
+        agg_func = 'sum'
+        df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
+    else:
+        agg_func = 'mean'
+        df_monthly_grouped = df_daily.groupby(df_daily.index.month).mean() #.agg(agg_func)
+    metrics = []
+    for season_start in [7,10,1,4]:
+        #summer,fall,winter,spring
+        # define seasons to allow for descriptive output
+        if season_start == 7:
+            season = 'summer '
+        elif season_start == 10:
+            season = 'fall'
+        elif season_start == 1:
+            season = 'winter'
+        elif season_start == 4:
+            season = 'spring'
+        observed = df_monthly_grouped['observed'].loc[season_start:season_start + 2].agg(agg_func)
+        simulated = df_monthly_grouped['simulated'].loc[season_start:season_start + 2].agg(agg_func)
+        error = round((simulated - observed)/observed*100,2)
+        observed = float(round(observed,2))
+        simulated = float(round(simulated,2))
+        metrics.append({'season': season,
+                  'observed': observed,
+                  'simulated': simulated,
+                  'per_error': error,
+                  'abs_error': simulated-observed,
+                  'goal': [30]})
+    return pd.DataFrame(metrics).reset_index(drop=True)
+def storm(df_daily,agg_func = 'mean'):
+    # # Calculate monthly flow in inches
+    # df_monthly = df_daily.resample('MS').sum()
+    # df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
+    # # Storm flow volume
+    # observed = df_monthly_grouped['observed_runoff'].sum()
+    # simulated = df_monthly_grouped['simulated_runoff'].sum()
+    #df_daily = df_daily.resample('Y').sum().mean()
+    #df_daily = df_daily.groupby('Y').agg(agg_func)
+    num_years = len(df_daily.index)/365.25
+    df_daily = df_daily.sum()/num_years
+    observed = df_daily['observed_runoff']
+    simulated = df_daily['simulated_runoff']
+    error = round((simulated - observed)/observed*100,2)
+    observed = float(round(observed,2))
+    simulated = float(round(simulated,2))
+    metric = { 'observed': [observed],
+             'simulated': [simulated],
+             'per_error': [error],
+             'abs_error': [simulated-observed],
+             'goal': [20]}
+    return pd.DataFrame(metric)
+def storm_summer(df_daily,agg_func = 'mean'):
+    # average annual summer volume
+    # Calculate monthly flow in inches
+    #df_monthly = df_daily.resample('MS').sum()
+    #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
+    n_months = df_daily.groupby(df_daily.index.month).count()/30
+    df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
+    # Summer storm flow volume
+    observed = df_monthly_grouped['observed_runoff'].loc[7:9].sum()
+    simulated = df_monthly_grouped['simulated_runoff'].loc[7:9].sum()
+    error = round((simulated - observed)/observed*100,2)
+    observed = float(round(observed,2))
+    simulated = float(round(simulated,2))
+    metric = { 'observed': [observed],
+             'simulated': [simulated],
+             'per_error': [error],
+             'abs_error': [simulated-observed],
+             'goal': [50]}
+    return pd.DataFrame(metric)
+def baseflow(df_daily,agg_func = 'mean'):
+    # average annual baseflow
+    num_years = len(df_daily.index)/365.25
+    #df_monthly = df_daily.resample('MS').sum()
+    #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
+    #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
+    observed = df_daily['observed_baseflow'].sum()/num_years
+    simulated = df_daily['simulated_baseflow'].sum()/num_years
+    error = round((simulated - observed)/observed*100,2)
+    observed = float(round(observed,2))
+    simulated = float(round(simulated,2))
+    metric = { 'observed': [observed],
+             'simulated': [simulated],
+             'per_error': [error],
+             'abs_error': [simulated-observed],
+             'goal': [20]}
+    return pd.DataFrame(metric)
+#%% Sediment
+# def sed_stats(df,units):
+#     if units == 'mg/l':
+#         agg_func = 'mean'
+#     else:
+#         agg_func = 'sum'
+#     df_agg = pd.DataFrame(np.ones((12,3))*np.nan,index = range(1,13),columns = ['model','flux','ratio'])
+#     df_agg.index.name = 'month'
+#     df = df.groupby(df.index.month).agg(agg_func)[['Simflow','Obsflow']]
+#     df.columns = ['model','flux']
+#     df['ratio'] = df['flux']/df['model']
+#     df_agg.loc[df.index,df.columns] = df.values
+#     df_agg.loc['Mean'] = df_agg.agg('mean')
+#     df_agg['ratio'] = df_agg['flux']/df_agg['model']
+#     return df_agg
+def stats(df_daily,units):
+    dfs = [total(df_daily,units),
+           annual(df_daily,units),
+           monthly(df_daily,units),
+           season(df_daily,units)]
+    dfs[0] ['interval'] = 'Total'
+    dfs[1] ['interval'] = 'Annual'
+    dfs[2].rename(columns = {'month':'interval'},inplace = True)
+    dfs[3].rename(columns = {'season':'interval'},inplace = True)
+    df = pd.concat(dfs)
+    df.set_index('interval',inplace=True)
+    return df
+#%% Nutrients
+def nutrient_stats(df_daily,units):
+    dfs = [total(df_daily,units),
+           annual(df_daily,units),
+           monthly(df_daily,units),
+           season(df_daily,units)]
+    dfs[0] ['interval'] = 'Total'
+    dfs[1] ['interval'] = 'Annual'
+    dfs[2].rename(columns = {'month':'interval'},inplace = True)
+    dfs[3].rename(columns = {'season':'interval'},inplace = True)
+    df = pd.concat(dfs)
+    df.set_index('interval',inplace=True)
+    return df

pyhcal/modl_db.py ADDED Viewed

@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May  1 09:51:51 2025
+@author: mfratki
+"""
+#import sqlite3
+from pathlib import Path
+import geopandas as gpd
+import pandas as pd
+#from hspf_tools.calibrator import etlWISKI, etlSWD
+#stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
+stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg')).dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
+stations_wiski['source'] = 'wiski'
+stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg')).dropna(subset='opnids')[['id_code','true_opnid','opnids','comments','modeled','repository_name']]
+stations_equis['source'] = 'equis'
+stations_equis['wplmn_flag'] = 0
+stations_equis = stations_equis.rename(columns = {'id_code':'station_id'})
+MODL_DB = pd.concat([stations_wiski,stations_equis])
+database  = """
+    -- Stations/Locations table
+    CREATE TABLE IF NOT EXISTS Station (
+        stationPK INTEGER PRIMARY KEY AUTOINCREMENT,
+        reachPK INTEGER REFERENCES Reach(reachPK),
+        stationID TEXT NOT NULL,
+        stationName TEXT,
+        stationOrigin TEXT NOT NULL,
+        latitude REAL,
+        longitude REAL,
+        stationType TEXT,
+        UNIQUE(stationID, stationOrigin)
+    );
+    -- Station Associations table
+    CREATE TABLE IF NOT EXISTS StationAssociations (
+        stationPK INTEGER REFERENCES Station(stationPK),
+        associationPK INTEGER REFERENCES Station(stationPK)
+    );
+    -- Station Aliases table
+    CREATE TABLE IF NOT EXISTS StationAliases (
+        stationPK INTEGER NOT NULL,
+        aliasPK INTEGER NOT NULL,
+        FOREIGN KEY (stationPK) REFERENCES Station(stationPK),
+        FOREIGN KEY (aliasPK) REFERENCES Station(stationPK)
+    );
+    CREATE TABLE Reach (
+        reachPK INTEGER PRIMARY KEY,
+        modelName TEXT NOT NULL,
+        reachID INTEGER NOT NULL,
+        drainageArea FLOAT
+    );
+    CREATE TABLE Outlet (
+        outletPK INTEGER PRIMARY KEY,
+        outletName TEXT
+    );
+    -- Outlet-Station Associations table
+    CREATE TABLE IF NOT EXISTS StationAssociations (
+        outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
+        stationPK  INTEGER NOT NULL REFERENCES Station(reachPK)
+    );
+    -- Outlet-Reach Associations table
+    CREATE TABLE IF NOT EXISTS StationAssociations (
+        outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
+        reachPK  INTEGER NOT NULL REFERENCES Station(reachPK)
+        exclude INTEGER NOT NULL
+    );"""
+#row = modl_db.MODL_DB.iloc[0]
+#info = etlWISKI.info(row['station_id'])
+#modl_db.MODL_DB.query('source == "equis"')
+# outlet_dict = {'stations': {'wiski': ['E66050001'],
+#                'equis': ['S002-118']},
+#                'reaches': {'Clearwater': [650]}
+# station_ids = ['S002-118']
+# #station_ids = ['E66050001']
+# reach_ids = [650]
+# flow_station_ids =  ['E66050001']

pyhcal/repository.py ADDED Viewed

@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 27 09:16:30 2024
+@author: mfratki
+"""
+import pandas as pd
+from pyhcal.modl_db import MODL_DB
+from pathlib import Path
+import shutil
+DEFUALT_REPOSITORY_PATH = Path('X:\Databases2\Water_Quality\Watershed_Modeling\MPCA_HSPF_Model_Repository') #could point to the github website if it becomes public?
+class Repository():
+    HUC_DIRECTORY = pd.read_csv(str(Path(__file__).resolve().parent/'data\\HUC_Names.csv'),dtype = {'USGS HUC-8':'string',
+                                                                                                                  'USGS HUC-6':'string',
+                                                                                                                  'USGS HUC-4':'string',
+                                                                                                                  'USGS HUC-2':'string'})
+    MODL_DB = MODL_DB
+    @classmethod
+    def valid_models(self):
+        return sorted(list(set(self.HUC_DIRECTORY['Repository_HUC8 Name'].dropna().replace('NO MODEL',pd.NA).dropna().to_list())))
+    def __init__(self,model_name,repository_path = DEFUALT_REPOSITORY_PATH):# = None, huc8_id = None)
+        if model_name not in self.valid_models():
+            print('Please provide a valid model name (see .valid_models)')
+            return
+        self.REPOSITORY_PATH = repository_path
+        huc_directory = self.HUC_DIRECTORY.loc[self.HUC_DIRECTORY['Repository_HUC8 Name'] == model_name]
+        self.modl_db = self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
+        #self.modl_db  = pd.concat([self.MODL_DB.loc[self.MODL_DB['repository_name'].str.startswith(huc8_id,na=False)] for huc8_id in huc8_ids])
+        self.model_name = model_name
+        self.huc8_ids = list(huc_directory['USGS HUC-8'])
+        self.huc6_name = huc_directory['Repository_HUC6 Name'].iloc[0]
+        self.huc6_id =  huc_directory['USGS HUC-6'].iloc[0]
+        self.repo_folder = [item for item in self.REPOSITORY_PATH.joinpath('_'.join([self.huc6_name,self.huc6_id])).iterdir() if item.name.split('_')[0] == self.model_name][0]
+        self.uci_file = self.repo_folder.joinpath('HSPF','.'.join([self.model_name,'uci']))
+        self.wdm_files = [item for item in self.repo_folder.joinpath('HSPF').iterdir() if (item.name.endswith('.wdm')) | (item.name.endswith('.WDM'))]
+        self.shapefiles = {item.name.split('.')[0].split('_')[-1]:item for item in self.repo_folder.joinpath('GIS').iterdir() if (item.name.endswith('.shp')) | (item.name.endswith('.SHP'))}
+    def copy(self,copy_path):
+        copy_path = Path(copy_path)
+        build_folders(copy_path)
+        shutil.copyfile(self.uci_file, copy_path.joinpath('model','.'.join([self.model_name,'uci'])))
+        for wdm_file in self.wdm_files:
+            shutil.copyfile(wdm_file,copy_path.joinpath('model',Path(wdm_file).name))
+        self.modl_db.to_csv(copy_path.joinpath('_'.join([self.model_name,'MODL_DB.csv'])))
+        self.copy_shapefiles(copy_path.joinpath('gis'))
+    def copy_wdm(self,copy_path):
+        for wdm_file in self.wdm_files:
+            shutil.copyfile(wdm_file,Path(copy_path).joinpath(Path(wdm_file).name))
+    def copy_uci(self,copy_path):
+        shutil.copyfile(self.uci_file, Path(copy_path).joinpath('.'.join([self.model_name,'uci'])))
+    def copy_modl_db(self,copy_path):
+        self.modl_db.to_csv(Path(copy_path).joinpath('_'.join([self.model_name,'MODL_DB.csv'])))
+    def copy_shapefiles(self,copy_path):
+        for k,shapefile in self.shapefiles.items():
+            files = [file for file in shapefile.parent.iterdir() if file.stem == shapefile.stem]
+            [shutil.copyfile(file,Path(copy_path).joinpath(Path(file).name)) for file in files]
+def build_folders(trg_path):
+    sub_folders = ['model',
+                   'output',
+                   'inputs',
+                   'figures',
+                   'gis',
+                   'data']
+    trg_path = Path(trg_path)
+    #p = Path(build_path)
+    #trg_path = p.joinpath(project_name)
+    if not trg_path.is_dir():
+        trg_path.mkdir(parents=True)
+    for path in sub_folders:
+        if not trg_path.joinpath(path).is_dir():
+            trg_path.joinpath(path).mkdir()