pyhcal 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyhcal/metrics.py ADDED
@@ -0,0 +1,485 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Apr 5 09:44:14 2024
4
+
5
+ @author: mfratki
6
+ """
7
+ import numpy as np
8
+ import pandas as pd
9
+ try:
10
+ import baseflow as bf
11
+ except:
12
+ pass
13
+
14
+ '''
15
+ monthly
16
+ cfs - mean, median, std
17
+ in - sum
18
+ mg/l - mean, median, std
19
+ lb - sum
20
+
21
+ annual
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+ '''
30
+ # @dataclass
31
+ # class Timeseries:
32
+
33
+
34
+ # @dataclass
35
+ # class HydroData:
36
+
37
+ # sim
38
+ # obs
39
+ # units
40
+
41
+
42
+
43
+
44
+ # class Metrics():
45
+
46
+ # data
47
+
48
+
49
+
50
+
51
+
52
+ def aggregate(df,units):
53
+ assert units in ['lb','mg/l','in','cfs','degC']
54
+ if units in ['mg/l','cfs','degC']:
55
+ agg_func = 'mean'
56
+ else:
57
+ agg_func = 'sum'
58
+
59
+ df_agg = pd.DataFrame(np.ones((12,3))*np.nan,index = range(1,13),columns = ['simulated','observed','ratio'])
60
+ df_agg.index.name = 'month'
61
+ df = df.groupby(df.index.month).agg(agg_func)[['simulated','observed']]
62
+ df.columns = ['simulated','observed']
63
+ df['ratio'] = df['observed']/df['simulated']
64
+ df_agg.loc[df.index,df.columns] = df.values
65
+
66
+ df_agg.loc['Mean'] = df_agg.agg('mean')
67
+ df_agg['ratio'] = df_agg['observed']/df_agg['simulated']
68
+ return df_agg
69
+
70
+ def NSE(df_daily,agg_func = 'mean'):
71
+ #df_monthly = df_daily.resample('MS').sum()
72
+ df_monthly = df_daily.groupby(df_daily.index.month).agg(agg_func)
73
+
74
+ NSE_daily = round(1 - np.sum((df_daily['observed'] - df_daily['simulated'])**2)/np.sum((df_daily['observed'] - df_daily['observed'].mean())**2),3)
75
+ NSE_garrick = round(1 - np.sum(np.absolute(df_daily['observed'] - df_daily['simulated']))/np.sum(np.absolute(df_daily['observed'] - df_daily['observed'].mean())),3)
76
+ NSE_monthly = round(1 - np.sum((df_monthly['observed'] - df_monthly['simulated'])**2)/np.sum((df_monthly['observed'] - df_monthly['observed'].mean())**2),3)
77
+ metric = pd.DataFrame(data = [NSE_daily, NSE_garrick, NSE_monthly]).transpose()
78
+ metric.columns = ['NSE','Garrick','Monthly']
79
+ return metric
80
+
81
+
82
+ #%% Hydrology
83
+ def hydro_stats(df_daily,drg_area):
84
+ '''
85
+ Parameters
86
+ ----------
87
+ df_daily : TYPE
88
+ Daily flow timeseries in cfs. A dataframe with a datetime index and columns labeled 'observed','simulated'
89
+ drg_area : TYPE
90
+ drainage area in acres.
91
+
92
+ Returns
93
+ -------
94
+ df : TYPE
95
+ Dataframe of various metrics used to evaluate model hydrology simulations.
96
+ Outputs are in inches.
97
+
98
+ '''
99
+ df_daily = hydro_sep(drg_area*0.0015625,df_daily) # in cfs
100
+ df_daily_inches = df_daily*60*60*24*12/(drg_area*43560) # in inches
101
+ dfs = [total(df_daily_inches,'in'),
102
+ annual(df_daily_inches,'in'),
103
+ monthly(df_daily_inches,'in'),
104
+ season(df_daily_inches,'in'),
105
+ low_50(df_daily_inches),
106
+ high_10(df_daily_inches),
107
+ storm(df_daily_inches),
108
+ storm_summer(df_daily_inches),
109
+ baseflow(df_daily_inches)]
110
+
111
+
112
+ dfs[0] ['interval'] = 'Total'
113
+ dfs[1] ['interval'] = 'Annual'
114
+ dfs[2].rename(columns = {'month':'interval'},inplace = True)
115
+ dfs[3].rename(columns = {'season':'interval'},inplace = True)
116
+ dfs[4] ['interval'] = 'Low 50'
117
+ dfs[5] ['interval'] = 'High 10'
118
+ dfs[6] ['interval'] = 'Storm'
119
+ dfs[7] ['interval'] = 'Summer Storm'
120
+ dfs[8] ['interval'] = 'Baseflow'
121
+
122
+
123
+ df = pd.concat(dfs)
124
+ df.set_index('interval',inplace=True)
125
+ nse = NSE(df_daily)
126
+ df.loc['Daily NSE',:] = pd.NA
127
+ df.loc['Daily Garrick',:] = pd.NA
128
+ df.loc['Monthly NSE',:] = pd.NA
129
+ df.loc['Daily NSE','observed'] = nse['NSE'].values
130
+ df.loc['Daily Garrick','observed'] = nse['Garrick'].values
131
+ df.loc['Monthly NSE','observed'] = nse['Monthly'].values
132
+ df.loc['Daily NSE','goal'] = .7
133
+ df.loc['Daily Garrick','goal'] = .55
134
+ df.loc['Monthly NSE','goal'] = .8
135
+ return df
136
+
137
+
138
+
139
+
140
+ def hydro_sep_baseflow(df_daily,drng_area = None,method = 'Boughton'):
141
+ #dfs,df_kge = bf.single(df_daily['observed'],area = drg_area)
142
+ #df = bf.separation(df_daily['observed'],method = 'Boughton')
143
+ #method = df_kge.idxmax()
144
+
145
+
146
+ df_daily['observed_baseflow'] = bf.single(df_daily['observed'],area = drng_area, method = method,return_kge = False)[0][method]
147
+ df_daily['simulated_baseflow'] = bf.single(df_daily['simulated'],area = drng_area, method = method,return_kge = False)[0][method]
148
+ df_daily['observed_runoff'] = df_daily['observed'] - df_daily['observed_baseflow']
149
+ df_daily['simulated_runoff'] = df_daily['simulated'] - df_daily['simulated_baseflow']
150
+ return df_daily
151
+
152
+
153
+
154
+
155
+ def hydro_sep(drg_area,df_daily):
156
+ try:
157
+ df_daily = hydro_sep_baseflow(df_daily,drg_area)
158
+ except:
159
+ twonstar = 2*drg_area**0.2
160
+ twonstar = round(twonstar,0)
161
+ if twonstar%2==0:
162
+ twonstar = twonstar - 1
163
+ twonstar = np.median([3,11,twonstar])
164
+ ndays = (twonstar - 1)/2
165
+ ndays = int(ndays)
166
+
167
+ df_daily['observed_baseflow'] = df_daily['observed'].rolling(2*ndays+1,center=True,min_periods=ndays+1).apply(np.nanmin)
168
+ df_daily['observed_runoff'] = df_daily['observed'] - df_daily['observed_baseflow']
169
+ df_daily['simulated_baseflow'] = df_daily['simulated'].rolling(2*ndays+1,center=True,min_periods=ndays+1).apply(np.nanmin)
170
+ df_daily['simulated_runoff'] = df_daily['simulated'] - df_daily['simulated_baseflow']
171
+ return df_daily
172
+
173
+ # def aggregate(df,period = None,agg_func = 'mean'):
174
+ # if periond is None:
175
+ # df = df.agg(agg_func)
176
+ # elif period == 'Y':
177
+ # grouper = df.index.year
178
+ # elif period == 'M':
179
+ # grouper = df.index.month
180
+ # elif period == 'D':
181
+ # grouper = df.index.dayofyear
182
+
183
+
184
+ def annual(df_daily,units):
185
+ assert units in ['cfs','in','lb','mg/l','degC']
186
+ if units in ['in','lb']:
187
+ # Total flow volume
188
+ n_years = df_daily.groupby(df_daily.index.year).count()/365.25
189
+ df_daily = ((df_daily.groupby(df_daily.index.year).sum())/n_years).mean()# (df_daily.groupby(df_daily.index.year).count()/365.25) .(agg_func)
190
+ else:
191
+ df_daily = (df_daily.groupby(df_daily.index.year).mean()).mean()# (df_daily.groupby(df_daily.index.year).count()/365.25) .(agg_func)
192
+
193
+
194
+ observed = df_daily['observed']
195
+ simulated = df_daily['simulated']
196
+ error = round((simulated - observed)/observed*100,2)
197
+ observed = float(round(observed,2))
198
+ simulated = float(round(simulated,2))
199
+ metric = { 'observed': [observed],
200
+ 'simulated': [simulated],
201
+ 'per_error': [error],
202
+ 'abs_error': [simulated-observed],
203
+ 'goal': [10]}
204
+ return pd.DataFrame(metric)
205
+
206
+ def total(df_daily,units):
207
+ if units in ['in','lb']:
208
+ agg_func = 'sum'
209
+ else:
210
+ agg_func = 'mean'
211
+ # Calculate monthly flow in inches
212
+ #df_monthly = df_daily.resample('MS').sum()
213
+ #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
214
+ #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
215
+
216
+ # Total flow volume
217
+ df_daily = df_daily.agg(agg_func)
218
+ observed = df_daily['observed']
219
+ simulated = df_daily['simulated']
220
+ error = round((simulated - observed)/observed*100,2)
221
+ observed = float(round(observed,2))
222
+ simulated = float(round(simulated,2))
223
+ metric = { 'observed': [observed],
224
+ 'simulated': [simulated],
225
+ 'per_error': [error],
226
+ 'abs_error': [simulated-observed],
227
+ 'goal': [10]}
228
+ return pd.DataFrame(metric)
229
+
230
+ def monthly(df_daily,units):
231
+ assert units in ['cfs','in','lb','mg/l','degC']
232
+ #df_monthly = df_daily.resample('MS').sum()
233
+ #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).agg(agg_func)
234
+ n_months = df_daily.groupby(df_daily.index.month).count()/30
235
+ if units in ['in','lb']:
236
+ df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
237
+ else:
238
+ df_monthly_grouped = df_daily.groupby(df_daily.index.month).mean()#.agg(agg_func)
239
+
240
+ # Monthly average flow volumes
241
+ mont_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
242
+ metrics = []
243
+ for month in range(1,13):
244
+ if month in df_monthly_grouped.index:
245
+ observed = df_monthly_grouped.loc[month,'observed']
246
+ simulated = df_monthly_grouped.loc[month,'simulated']
247
+ error = round((simulated-observed)/observed*100,2)
248
+ metric = {'month' : mont_names[month-1],
249
+ 'observed': [observed],
250
+ 'simulated': [simulated],
251
+ 'per_error': [error],
252
+ 'abs_error': [simulated-observed],
253
+ 'goal': [30]}
254
+
255
+ else:
256
+ metric = {'month' : mont_names[month-1],
257
+ 'observed': [pd.NA],
258
+ 'simulated': [pd.NA],
259
+ 'per_error': [pd.NA],
260
+ 'abs_error': [pd.NA],
261
+ 'goal': [pd.NA]}
262
+
263
+ metrics.append(pd.DataFrame(metric))
264
+ return pd.concat(metrics).reset_index(drop=True)
265
+
266
+
267
+ def exceedence(df):
268
+
269
+ df['simulated_rank'] = df['simulated'].rank(method = 'average', ascending = False)
270
+ df['simulated_exceed'] = df['simulated_rank'] / (len(df) + 1) * 100
271
+ df['observed_rank'] = df['observed'].rank(method = 'average', ascending = False)
272
+ df['observed_exceed'] = df['observed_rank'] / (len(df) + 1) * 100
273
+
274
+ if 'simulated_flow' in df.columns:
275
+ df['simulated_flow_rank'] = df['simulated_flow'].rank(method = 'average', ascending = False)
276
+ df['simulated_flow_exceed'] = df['simulated_flow_rank'] / (len(df) + 1) * 100
277
+ df['observed_flow_rank'] = df['observed_flow'].rank(method = 'average', ascending = False)
278
+ df['observed_flow_exceed'] = df['observed_flow_rank'] / (len(df) + 1) * 100
279
+
280
+
281
+ def low_50(df_daily):
282
+
283
+ num_years = len(df_daily.index)/365.25
284
+ num_bottom50 = int(len(df_daily.index)/2)
285
+ # 50% lowest flow volume
286
+ observed = df_daily.sort_values('observed',ascending=False).tail(num_bottom50).sum()/num_years
287
+ simulated = df_daily.sort_values('simulated',ascending=False).tail(num_bottom50).sum()/num_years
288
+ error = round((simulated['simulated'] - observed['observed'])/observed['observed']*100,2)
289
+ observed = float(round(observed['observed'],2))
290
+ simulated = float(round(simulated['simulated'],2))
291
+ metric = { 'observed': [observed],
292
+ 'simulated': [simulated],
293
+ 'per_error': [error],
294
+ 'abs_error': [simulated-observed],
295
+ 'goal': [10]}
296
+ return pd.DataFrame(metric)
297
+
298
+ def high_10(df_daily):
299
+ num_years = len(df_daily.index)/365.25
300
+ num_top10 = int(len(df_daily.index)/10)
301
+ # 10% highest flow volume
302
+ observed = df_daily.sort_values('observed',ascending=False).head(num_top10).sum()/num_years
303
+ simulated = df_daily.sort_values('simulated',ascending=False).head(num_top10).sum()/num_years
304
+ error = round((simulated['simulated'] - observed['observed'])/observed['observed']*100,2)
305
+ observed = float(round(observed['observed'],2))
306
+ simulated = float(round(simulated['simulated'],2))
307
+ metric = { 'observed': [observed],
308
+ 'simulated': [simulated],
309
+ 'per_error': [error],
310
+ 'abs_error': [simulated-observed],
311
+ 'goal': [15]}
312
+
313
+ return pd.DataFrame(metric)
314
+
315
+
316
+
317
+ def season(df_daily,units):
318
+ assert units in ['cfs','in','lb','mg/l','degC']
319
+
320
+ # Calculate monthly flow in inches
321
+ #df_monthly = df_daily.resample('MS').sum()
322
+ #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
323
+ #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
324
+ n_months = df_daily.groupby(df_daily.index.month).count()/30
325
+
326
+ if units in ['in','lb']:
327
+ agg_func = 'sum'
328
+ df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
329
+ else:
330
+ agg_func = 'mean'
331
+ df_monthly_grouped = df_daily.groupby(df_daily.index.month).mean() #.agg(agg_func)
332
+
333
+
334
+ metrics = []
335
+ for season_start in [7,10,1,4]:
336
+ #summer,fall,winter,spring
337
+ # define seasons to allow for descriptive output
338
+ if season_start == 7:
339
+ season = 'summer '
340
+ elif season_start == 10:
341
+ season = 'fall'
342
+ elif season_start == 1:
343
+ season = 'winter'
344
+ elif season_start == 4:
345
+ season = 'spring'
346
+ observed = df_monthly_grouped['observed'].loc[season_start:season_start + 2].agg(agg_func)
347
+ simulated = df_monthly_grouped['simulated'].loc[season_start:season_start + 2].agg(agg_func)
348
+ error = round((simulated - observed)/observed*100,2)
349
+ observed = float(round(observed,2))
350
+ simulated = float(round(simulated,2))
351
+ metrics.append({'season': season,
352
+ 'observed': observed,
353
+ 'simulated': simulated,
354
+ 'per_error': error,
355
+ 'abs_error': simulated-observed,
356
+ 'goal': [30]})
357
+ return pd.DataFrame(metrics).reset_index(drop=True)
358
+
359
+
360
+ def storm(df_daily,agg_func = 'mean'):
361
+ # # Calculate monthly flow in inches
362
+ # df_monthly = df_daily.resample('MS').sum()
363
+ # df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
364
+
365
+ # # Storm flow volume
366
+ # observed = df_monthly_grouped['observed_runoff'].sum()
367
+ # simulated = df_monthly_grouped['simulated_runoff'].sum()
368
+
369
+ #df_daily = df_daily.resample('Y').sum().mean()
370
+ #df_daily = df_daily.groupby('Y').agg(agg_func)
371
+ num_years = len(df_daily.index)/365.25
372
+ df_daily = df_daily.sum()/num_years
373
+ observed = df_daily['observed_runoff']
374
+ simulated = df_daily['simulated_runoff']
375
+
376
+ error = round((simulated - observed)/observed*100,2)
377
+ observed = float(round(observed,2))
378
+ simulated = float(round(simulated,2))
379
+ metric = { 'observed': [observed],
380
+ 'simulated': [simulated],
381
+ 'per_error': [error],
382
+ 'abs_error': [simulated-observed],
383
+ 'goal': [20]}
384
+
385
+ return pd.DataFrame(metric)
386
+
387
+ def storm_summer(df_daily,agg_func = 'mean'):
388
+ # average annual summer volume
389
+ # Calculate monthly flow in inches
390
+ #df_monthly = df_daily.resample('MS').sum()
391
+ #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
392
+ n_months = df_daily.groupby(df_daily.index.month).count()/30
393
+ df_monthly_grouped = df_daily.groupby(df_daily.index.month).sum()/n_months #.agg(agg_func)
394
+
395
+
396
+ # Summer storm flow volume
397
+ observed = df_monthly_grouped['observed_runoff'].loc[7:9].sum()
398
+ simulated = df_monthly_grouped['simulated_runoff'].loc[7:9].sum()
399
+ error = round((simulated - observed)/observed*100,2)
400
+ observed = float(round(observed,2))
401
+ simulated = float(round(simulated,2))
402
+ metric = { 'observed': [observed],
403
+ 'simulated': [simulated],
404
+ 'per_error': [error],
405
+ 'abs_error': [simulated-observed],
406
+ 'goal': [50]}
407
+
408
+ return pd.DataFrame(metric)
409
+
410
+ def baseflow(df_daily,agg_func = 'mean'):
411
+ # average annual baseflow
412
+ num_years = len(df_daily.index)/365.25
413
+ #df_monthly = df_daily.resample('MS').sum()
414
+ #df_monthly_grouped = df_monthly.groupby(df_monthly.index.month).mean()
415
+ #df_monthly_grouped = df_daily.groupby(df_daily.index.month).agg(agg_func)
416
+
417
+ observed = df_daily['observed_baseflow'].sum()/num_years
418
+ simulated = df_daily['simulated_baseflow'].sum()/num_years
419
+ error = round((simulated - observed)/observed*100,2)
420
+ observed = float(round(observed,2))
421
+ simulated = float(round(simulated,2))
422
+ metric = { 'observed': [observed],
423
+ 'simulated': [simulated],
424
+ 'per_error': [error],
425
+ 'abs_error': [simulated-observed],
426
+ 'goal': [20]}
427
+
428
+ return pd.DataFrame(metric)
429
+
430
+
431
+
432
+
433
+ #%% Sediment
434
+
435
+ # def sed_stats(df,units):
436
+ # if units == 'mg/l':
437
+ # agg_func = 'mean'
438
+ # else:
439
+ # agg_func = 'sum'
440
+
441
+ # df_agg = pd.DataFrame(np.ones((12,3))*np.nan,index = range(1,13),columns = ['model','flux','ratio'])
442
+ # df_agg.index.name = 'month'
443
+ # df = df.groupby(df.index.month).agg(agg_func)[['Simflow','Obsflow']]
444
+ # df.columns = ['model','flux']
445
+ # df['ratio'] = df['flux']/df['model']
446
+ # df_agg.loc[df.index,df.columns] = df.values
447
+
448
+ # df_agg.loc['Mean'] = df_agg.agg('mean')
449
+ # df_agg['ratio'] = df_agg['flux']/df_agg['model']
450
+ # return df_agg
451
+
452
+
453
+ def stats(df_daily,units):
454
+ dfs = [total(df_daily,units),
455
+ annual(df_daily,units),
456
+ monthly(df_daily,units),
457
+ season(df_daily,units)]
458
+
459
+
460
+ dfs[0] ['interval'] = 'Total'
461
+ dfs[1] ['interval'] = 'Annual'
462
+ dfs[2].rename(columns = {'month':'interval'},inplace = True)
463
+ dfs[3].rename(columns = {'season':'interval'},inplace = True)
464
+ df = pd.concat(dfs)
465
+ df.set_index('interval',inplace=True)
466
+
467
+ return df
468
+
469
+
470
+ #%% Nutrients
471
+ def nutrient_stats(df_daily,units):
472
+ dfs = [total(df_daily,units),
473
+ annual(df_daily,units),
474
+ monthly(df_daily,units),
475
+ season(df_daily,units)]
476
+
477
+
478
+ dfs[0] ['interval'] = 'Total'
479
+ dfs[1] ['interval'] = 'Annual'
480
+ dfs[2].rename(columns = {'month':'interval'},inplace = True)
481
+ dfs[3].rename(columns = {'season':'interval'},inplace = True)
482
+ df = pd.concat(dfs)
483
+ df.set_index('interval',inplace=True)
484
+
485
+ return df
pyhcal/modl_db.py ADDED
@@ -0,0 +1,97 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu May 1 09:51:51 2025
4
+
5
+ @author: mfratki
6
+ """
7
+ #import sqlite3
8
+ from pathlib import Path
9
+ import geopandas as gpd
10
+ import pandas as pd
11
+ #from hspf_tools.calibrator import etlWISKI, etlSWD
12
+
13
+
14
+ #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
15
+
16
+
17
+ stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg')).dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
18
+ stations_wiski['source'] = 'wiski'
19
+ stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg')).dropna(subset='opnids')[['id_code','true_opnid','opnids','comments','modeled','repository_name']]
20
+ stations_equis['source'] = 'equis'
21
+ stations_equis['wplmn_flag'] = 0
22
+ stations_equis = stations_equis.rename(columns = {'id_code':'station_id'})
23
+
24
+
25
+ MODL_DB = pd.concat([stations_wiski,stations_equis])
26
+
27
+ database = """
28
+ -- Stations/Locations table
29
+ CREATE TABLE IF NOT EXISTS Station (
30
+ stationPK INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ reachPK INTEGER REFERENCES Reach(reachPK),
32
+ stationID TEXT NOT NULL,
33
+ stationName TEXT,
34
+ stationOrigin TEXT NOT NULL,
35
+ latitude REAL,
36
+ longitude REAL,
37
+ stationType TEXT,
38
+ UNIQUE(stationID, stationOrigin)
39
+ );
40
+
41
+ -- Station Associations table
42
+ CREATE TABLE IF NOT EXISTS StationAssociations (
43
+ stationPK INTEGER REFERENCES Station(stationPK),
44
+ associationPK INTEGER REFERENCES Station(stationPK)
45
+ );
46
+
47
+ -- Station Aliases table
48
+ CREATE TABLE IF NOT EXISTS StationAliases (
49
+ stationPK INTEGER NOT NULL,
50
+ aliasPK INTEGER NOT NULL,
51
+ FOREIGN KEY (stationPK) REFERENCES Station(stationPK),
52
+ FOREIGN KEY (aliasPK) REFERENCES Station(stationPK)
53
+ );
54
+
55
+ CREATE TABLE Reach (
56
+ reachPK INTEGER PRIMARY KEY,
57
+ modelName TEXT NOT NULL,
58
+ reachID INTEGER NOT NULL,
59
+ drainageArea FLOAT
60
+ );
61
+
62
+ CREATE TABLE Outlet (
63
+ outletPK INTEGER PRIMARY KEY,
64
+ outletName TEXT
65
+ );
66
+
67
+ -- Outlet-Station Associations table
68
+ CREATE TABLE IF NOT EXISTS StationAssociations (
69
+ outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
70
+ stationPK INTEGER NOT NULL REFERENCES Station(reachPK)
71
+ );
72
+
73
+ -- Outlet-Reach Associations table
74
+ CREATE TABLE IF NOT EXISTS StationAssociations (
75
+ outletPK INTEGER NOT NULL REFERENCES Outlet(outletPK),
76
+ reachPK INTEGER NOT NULL REFERENCES Station(reachPK)
77
+ exclude INTEGER NOT NULL
78
+ );"""
79
+
80
+
81
+ #row = modl_db.MODL_DB.iloc[0]
82
+
83
+ #info = etlWISKI.info(row['station_id'])
84
+
85
+ #modl_db.MODL_DB.query('source == "equis"')
86
+
87
+ # outlet_dict = {'stations': {'wiski': ['E66050001'],
88
+ # 'equis': ['S002-118']},
89
+ # 'reaches': {'Clearwater': [650]}
90
+
91
+
92
+
93
+
94
+ # station_ids = ['S002-118']
95
+ # #station_ids = ['E66050001']
96
+ # reach_ids = [650]
97
+ # flow_station_ids = ['E66050001']
pyhcal/repository.py ADDED
@@ -0,0 +1,98 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Nov 27 09:16:30 2024
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ import pandas as pd
9
+ from pyhcal.modl_db import MODL_DB
10
+ from pathlib import Path
11
+ import shutil
12
+
13
+
14
+ DEFUALT_REPOSITORY_PATH = Path('X:\Databases2\Water_Quality\Watershed_Modeling\MPCA_HSPF_Model_Repository') #could point to the github website if it becomes public?
15
+
16
+ class Repository():
17
+
18
+ HUC_DIRECTORY = pd.read_csv(str(Path(__file__).resolve().parent/'data\\HUC_Names.csv'),dtype = {'USGS HUC-8':'string',
19
+ 'USGS HUC-6':'string',
20
+ 'USGS HUC-4':'string',
21
+ 'USGS HUC-2':'string'})
22
+
23
+ MODL_DB = MODL_DB
24
+
25
+
26
+
27
+
28
+ @classmethod
29
+ def valid_models(self):
30
+ return sorted(list(set(self.HUC_DIRECTORY['Repository_HUC8 Name'].dropna().replace('NO MODEL',pd.NA).dropna().to_list())))
31
+
32
+
33
+ def __init__(self,model_name,repository_path = DEFUALT_REPOSITORY_PATH):# = None, huc8_id = None)
34
+ if model_name not in self.valid_models():
35
+ print('Please provide a valid model name (see .valid_models)')
36
+ return
37
+
38
+
39
+ self.REPOSITORY_PATH = repository_path
40
+ huc_directory = self.HUC_DIRECTORY.loc[self.HUC_DIRECTORY['Repository_HUC8 Name'] == model_name]
41
+ self.modl_db = self.MODL_DB.loc[self.MODL_DB['repository_name'] == model_name]
42
+ #self.modl_db = pd.concat([self.MODL_DB.loc[self.MODL_DB['repository_name'].str.startswith(huc8_id,na=False)] for huc8_id in huc8_ids])
43
+ self.model_name = model_name
44
+ self.huc8_ids = list(huc_directory['USGS HUC-8'])
45
+ self.huc6_name = huc_directory['Repository_HUC6 Name'].iloc[0]
46
+ self.huc6_id = huc_directory['USGS HUC-6'].iloc[0]
47
+ self.repo_folder = [item for item in self.REPOSITORY_PATH.joinpath('_'.join([self.huc6_name,self.huc6_id])).iterdir() if item.name.split('_')[0] == self.model_name][0]
48
+ self.uci_file = self.repo_folder.joinpath('HSPF','.'.join([self.model_name,'uci']))
49
+ self.wdm_files = [item for item in self.repo_folder.joinpath('HSPF').iterdir() if (item.name.endswith('.wdm')) | (item.name.endswith('.WDM'))]
50
+ self.shapefiles = {item.name.split('.')[0].split('_')[-1]:item for item in self.repo_folder.joinpath('GIS').iterdir() if (item.name.endswith('.shp')) | (item.name.endswith('.SHP'))}
51
+
52
+ def copy(self,copy_path):
53
+ copy_path = Path(copy_path)
54
+ build_folders(copy_path)
55
+ shutil.copyfile(self.uci_file, copy_path.joinpath('model','.'.join([self.model_name,'uci'])))
56
+ for wdm_file in self.wdm_files:
57
+ shutil.copyfile(wdm_file,copy_path.joinpath('model',Path(wdm_file).name))
58
+ self.modl_db.to_csv(copy_path.joinpath('_'.join([self.model_name,'MODL_DB.csv'])))
59
+ self.copy_shapefiles(copy_path.joinpath('gis'))
60
+
61
+ def copy_wdm(self,copy_path):
62
+ for wdm_file in self.wdm_files:
63
+ shutil.copyfile(wdm_file,Path(copy_path).joinpath(Path(wdm_file).name))
64
+
65
+ def copy_uci(self,copy_path):
66
+ shutil.copyfile(self.uci_file, Path(copy_path).joinpath('.'.join([self.model_name,'uci'])))
67
+
68
+ def copy_modl_db(self,copy_path):
69
+ self.modl_db.to_csv(Path(copy_path).joinpath('_'.join([self.model_name,'MODL_DB.csv'])))
70
+
71
+ def copy_shapefiles(self,copy_path):
72
+ for k,shapefile in self.shapefiles.items():
73
+ files = [file for file in shapefile.parent.iterdir() if file.stem == shapefile.stem]
74
+ [shutil.copyfile(file,Path(copy_path).joinpath(Path(file).name)) for file in files]
75
+
76
+
77
+
78
+ def build_folders(trg_path):
79
+
80
+ sub_folders = ['model',
81
+ 'output',
82
+ 'inputs',
83
+ 'figures',
84
+ 'gis',
85
+ 'data']
86
+
87
+
88
+ trg_path = Path(trg_path)
89
+ #p = Path(build_path)
90
+ #trg_path = p.joinpath(project_name)
91
+ if not trg_path.is_dir():
92
+ trg_path.mkdir(parents=True)
93
+
94
+ for path in sub_folders:
95
+ if not trg_path.joinpath(path).is_dir():
96
+ trg_path.joinpath(path).mkdir()
97
+
98
+