pyhcal 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyhcal/figures.py ADDED
@@ -0,0 +1,1024 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed May 8 09:40:51 2024
4
+
5
+ @author: cfreema
6
+ """
7
+
8
+ #%% Add libraries
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import matplotlib.pyplot as plt
13
+ from datetime import datetime
14
+ from scipy.stats import pearsonr
15
+ from pathlib import Path
16
+
17
+
18
+ #%% Minnesota Color Scheme and Font
19
+
20
+ # Minnesota Brand Colors
21
+ COLORS = {'Minnesota Blue': '#003865',
22
+ 'Minnesota Green': '#78BE21',
23
+ 'White': '#FFFFFF',
24
+ 'Black': '000000',
25
+ 'Accent Teal': '#008EAA',
26
+ 'Accent Green': '#0D5257',
27
+ 'Accent Orange': '#8D3F2B',
28
+ 'Accent Purple': '#5D295F',
29
+ 'Extended Accent Blue Gray': '#A4BCC2',
30
+ 'Extended Accent Cream': '#F5E1A4',
31
+ 'Extended Accent Sky Blue': '#9BCBEB',
32
+ 'Extended Accent Gold': '#FFC845',
33
+ 'Dark Gray': '#53565A',
34
+ 'Medium Gray': '#97999B',
35
+ 'Light Gray': '#D9D9D6',
36
+ 'Orange': '#E57200'}
37
+
38
+ # Minnesota Substitute Typography
39
+ CFONT = {'fontname': 'Calibri'}
40
+
41
+ #%% Monthly Bar Plot
42
+ def monthly_bar(df, station_id, constituent, units, save_path=None, yaxis=None, ylimit=None):
43
+ '''
44
+ Returns a timeseries plot of hourly simulated concentrations and observed concentrations
45
+ with the same date range as the input data (df). The hourly simulated concentrations are
46
+ represented with a line and the observed concentrations are represented with points.
47
+
48
+ Parameters
49
+ ----------
50
+ df : Pandas Dataframe
51
+ -Dataframe with 2 columns: simulated, observed.
52
+ -The dates must be in the datetime Python format.
53
+ -The date range must already be filtered to the date range you wish to plot in the
54
+ figure and should be on an hourly timestep.
55
+ -The hourly simulated concentrations must be paired with observed concentrations, where
56
+ grab samples exist.
57
+ -If you wish to plot concentrations from more than one station, there should still only be one
58
+ 'obs' column, i.e., no stations overlapping in dates.
59
+ -In pre-processing, the time of the observed samples may need to be adjusted to the nearest
60
+ top of the hour in order to merge the observed data with the simulated data.
61
+
62
+ station_id : Python list
63
+ -Include one or more station ID numbers (i.e., H49009001).
64
+ -This argument is only used to include the station ID(s) in the plot title.
65
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
66
+
67
+
68
+ constituent: Python string
69
+ -Input constituent as you'd like it to appear on the figure title and axis
70
+ -Options include:
71
+ TSS (total suspended solids)
72
+ N+N (nitrate plus nitrite nitrogen)
73
+ PO4 (orthophosphate phosphorus)
74
+ TP (total phosphorus)
75
+ TKN (total Kjeldahl nitrogen)
76
+
77
+ units: Python string
78
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
79
+ -Make sure the units you enter match the units of the data in your input df
80
+
81
+
82
+ save_path: Python string
83
+ -Default is None and will not save the figure anywhere.
84
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
85
+ -If a folder path is entered, the file name will default to 'constituent_timeseries_stationID(s)'
86
+ -Make sure to use forward slashes (/) instead of backslashes (\).
87
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
88
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
89
+
90
+ yaxis: Python string
91
+ -Options: None, log, Log
92
+ -Default is None and will create a plot with a linear y-axis.
93
+ -yaxis = log (or Log) will create a plot with a log y-axis.
94
+ -Use log when you have a dataset with a high range of concentrations (i.e. TSS).
95
+
96
+ ylimit: integer
97
+ -Insert a value that will be the upper limit of the y-axis (e.g. ylimit = 400)
98
+ -Default is None and will use the dataset to set the y-axis limits.
99
+
100
+ Returns
101
+ -------
102
+ fig : Figure object of matplotlib.figure module
103
+ ax : AxesSubplot object of matplotlib.axes._subplots module
104
+
105
+ '''
106
+
107
+ units = df.attrs['unit']
108
+
109
+ assert units in ['mg/l','lb','cfs']
110
+
111
+ if units in ['cfs','mg/l']:
112
+ df = df[['observed','simulated']].groupby(df.index.month).mean()
113
+ else:
114
+ df = df[['observed','simulated']].groupby(df.index.month).sum()
115
+
116
+
117
+ # cf2cfs = {'hourly':3600,
118
+ # 'daily':86400,
119
+ # 'monthly':2592000,
120
+ # 'yearly':31536000,
121
+ # 'h':3600,
122
+ # 'D':86400,
123
+ # 'M':2592000,
124
+ # 'Y':31536000}
125
+
126
+ # if units == 'mg/l':
127
+ # df['simulated'] = 2.20462e-6*df['simulated']/0.0353147*df['simulated_flow']*cf2cfs[time_step]
128
+ # df['observed'] = 2.20462e-6*df['observed']/0.0353147*df['observed_flow']*cf2cfs[time_step]
129
+ # units = 'lb'
130
+
131
+
132
+
133
+ # Create a figure containing a single axes
134
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
135
+ # months = {1:'JAN', 2:'FEB', 3:'MAR', 4:'APR', 5:'MAY', 6:'JUN', 7:'JUL',
136
+ # 8:'AUG', 9:'SEP', 10:'OCT', 11:'NOV', 12:'DEC'}
137
+
138
+
139
+
140
+ #index = ['January','Febuary','March','April','May','June','July','August','September','October','November','December']
141
+ df[['observed','simulated']].plot.bar(ax=ax,color = {'simulated':COLORS.get('Orange'),'observed':COLORS.get('Minnesota Blue')})
142
+ stations = ', '.join(station_id)
143
+ plt.title(f'Simulated and Observed {constituent} \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
144
+ # Add x label
145
+ plt.xlabel('Month', **CFONT, fontsize = 10)
146
+ # Add y label
147
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
148
+ # Add legend
149
+ plt.legend(['Observed', 'Simulated'], fontsize = 9)
150
+ # Set legend font to Calibri
151
+ L = ax.legend()
152
+ plt.setp(L.texts, family = 'Calibri')
153
+ # Set the font and font size for the tick labels
154
+ plt.xticks(**CFONT, fontsize = 9)
155
+ plt.yticks(**CFONT, fontsize = 9)
156
+
157
+ #Axis settings depending on user input
158
+ if yaxis == 'log' or yaxis == 'Log':
159
+ # Convert y axis to log scale
160
+ plt.yscale('log')
161
+ # Add axis gridlines
162
+ plt.grid(which='major', axis='y', linewidth=0.3)
163
+ plt.grid(which='minor', axis='y', linewidth=0.3)
164
+
165
+ else:
166
+ # Add y axis gridlines and set the y axis minimum to 0
167
+ ax.set_axisbelow(True)
168
+ plt.grid(which='major', axis='y', linewidth=0.3)
169
+ plt.ylim(bottom=0)
170
+
171
+ if ylimit is not None:
172
+ # Define the upper limit of the y-axis based on user input
173
+ plt.ylim(top = ylimit)
174
+
175
+ # Save figure, if given a folder or file path in argument save_path
176
+ if save_path is not None:
177
+ save_path = Path(save_path)
178
+ if save_path.is_dir():
179
+ filepath = save_path.joinpath(constituent + '_monthly_bar' + stations + '.png')
180
+ fig.savefig(filepath, bbox_inches='tight')
181
+ else:
182
+ filepath = save_path
183
+ fig.savefig(filepath, bbox_inches='tight')
184
+
185
+ return fig, ax
186
+
187
+ #%% Timeseries
188
+ def timeseries(df, station_id, constituent, units, save_path=None, yaxis=None, ylimit=None):
189
+ '''
190
+ Returns a timeseries plot of hourly simulated concentrations and observed concentrations
191
+ with the same date range as the input data (df). The hourly simulated concentrations are
192
+ represented with a line and the observed concentrations are represented with points.
193
+
194
+ Parameters
195
+ ----------
196
+ df : Pandas Dataframe
197
+ -Dataframe with 2 columns: simulated, observed.
198
+ -The dates must be in the datetime Python format.
199
+ -The date range must already be filtered to the date range you wish to plot in the
200
+ figure and should be on an hourly timestep.
201
+ -The hourly simulated concentrations must be paired with observed concentrations, where
202
+ grab samples exist.
203
+ -If you wish to plot concentrations from more than one station, there should still only be one
204
+ 'obs' column, i.e., no stations overlapping in dates.
205
+ -In pre-processing, the time of the observed samples may need to be adjusted to the nearest
206
+ top of the hour in order to merge the observed data with the simulated data.
207
+
208
+ station_id : Python list
209
+ -Include one or more station ID numbers (i.e., H49009001).
210
+ -This argument is only used to include the station ID(s) in the plot title.
211
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
212
+
213
+
214
+ constituent: Python string
215
+ -Input constituent as you'd like it to appear on the figure title and axis
216
+ -Options include:
217
+ TSS (total suspended solids)
218
+ N+N (nitrate plus nitrite nitrogen)
219
+ PO4 (orthophosphate phosphorus)
220
+ TP (total phosphorus)
221
+ TKN (total Kjeldahl nitrogen)
222
+
223
+ units: Python string
224
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
225
+ -Make sure the units you enter match the units of the data in your input df
226
+ -Options include:
227
+ mg/L (milligrams per liter)
228
+ ppm (parts per million)
229
+ ug/L (micrograms per liter)
230
+ ppb (parts per billion)
231
+
232
+ save_path: Python string
233
+ -Default is None and will not save the figure anywhere.
234
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
235
+ -If a folder path is entered, the file name will default to 'constituent_timeseries_stationID(s)'
236
+ -Make sure to use forward slashes (/) instead of backslashes (\).
237
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
238
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
239
+
240
+ yaxis: Python string
241
+ -Options: None, log, Log
242
+ -Default is None and will create a plot with a linear y-axis.
243
+ -yaxis = log (or Log) will create a plot with a log y-axis.
244
+ -Use log when you have a dataset with a high range of concentrations (i.e. TSS).
245
+
246
+ ylimit: integer
247
+ -Insert a value that will be the upper limit of the y-axis (e.g. ylimit = 400)
248
+ -Default is None and will use the dataset to set the y-axis limits.
249
+
250
+ Returns
251
+ -------
252
+ fig : Figure object of matplotlib.figure module
253
+ ax : AxesSubplot object of matplotlib.axes._subplots module
254
+
255
+ '''
256
+ # Create a figure containing a single axes
257
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
258
+ # Plot simulated TSS concentrations (line)
259
+ sim, = ax.plot(df['simulated'], label = 'sim', color = COLORS.get('Orange'), linewidth = '0.9')
260
+ # Plot observed TSS concentrations (points)
261
+ obs, = ax.plot(df['observed'], '*', mfc = 'none', label = 'obs', mec = COLORS.get('Minnesota Blue'))
262
+ # Add plot title
263
+ stations = ', '.join(station_id)
264
+ plt.title(f'Simulated and Observed {constituent} Concentrations \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
265
+ # Add x label
266
+ plt.xlabel('Date', **CFONT, fontsize = 10)
267
+ # Add y label
268
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
269
+ # Add legend
270
+ plt.legend(handles=[sim, obs], fontsize = 9)
271
+ # Set legend font to Calibri
272
+ L = ax.legend()
273
+ plt.setp(L.texts, family = 'Calibri')
274
+ # Set the font and font size for the tick labels
275
+ plt.xticks(**CFONT, fontsize = 9)
276
+ plt.yticks(**CFONT, fontsize = 9)
277
+
278
+ #Axis settings depending on user input
279
+ if yaxis == 'log' or yaxis == 'Log':
280
+ # Convert y axis to log scale
281
+ plt.yscale('log')
282
+ # Add axis gridlines
283
+ plt.grid(which='major', axis='y', linewidth=0.3)
284
+ plt.grid(which='minor', axis='y', linewidth=0.3)
285
+
286
+ else:
287
+ # Add y axis gridlines and set the y axis minimum to 0
288
+ plt.grid(which='major', axis='y', linewidth=0.3)
289
+ plt.ylim(bottom=0)
290
+
291
+ if ylimit is not None:
292
+ # Define the upper limit of the y-axis based on user input
293
+ plt.ylim(top = ylimit)
294
+
295
+ # Save figure, if given a folder or file path in argument save_path
296
+ if save_path is not None:
297
+ save_path = Path(save_path)
298
+ if save_path.is_dir():
299
+ filepath = save_path.joinpath(constituent + '_timeseries_' + stations + '.png')
300
+ fig.savefig(filepath, bbox_inches='tight')
301
+ else:
302
+ filepath = save_path
303
+ fig.savefig(filepath, bbox_inches='tight')
304
+
305
+ return fig, ax
306
+
307
+ #%% Simulated versus Observed Concentrations Plot (Scatter): scatter
308
+
309
+ def scatter(df, station_id, constituent, units, save_path=None):
310
+ '''
311
+ Returns a scatter plot of simulated and observed concentrations.
312
+ A 1:1 fit indicates a perfect relationship between simulated and observed concentrations.
313
+
314
+ Parameters
315
+ ----------
316
+ df : Pandas Dataframe
317
+ -Dataframe with 3 columns: sim, obs.
318
+ -The simulated concentrations must be paired with observed concentrations, where grab samples exist.
319
+ -If you wish to plot concentrations from more than one station, there should still only be one
320
+ 'obs' column, i.e., no stations overlapping in dates.
321
+ -In pre-processing, the time of the observed samples may need to be adjusted to the nearest top of
322
+ the hour in order to merge the observed data with the simulated data.
323
+
324
+ station_id : Python list
325
+ -Include one or more station ID numbers (i.e., H49009001).
326
+ -This argument is only used to include the station ID(s) in the plot title.
327
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
328
+
329
+ constituent: Python string
330
+ -Input constituent as you'd like it to appear on the figure title and axis
331
+ -Options include:
332
+ TSS (total suspended solids)
333
+ N+N (nitrate plus nitrite nitrogen)
334
+ PO4 (orthophosphate phosphorus)
335
+ TP (total phosphorus)
336
+ TKN (total Kjeldahl nitrogen)
337
+
338
+ units: Python string
339
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
340
+ -Make sure the units you enter match the units of the data in your input df
341
+ -Options include:
342
+ mg/L (milligrams per liter)
343
+ ppm (parts per million)
344
+ ug/L (micrograms per liter)
345
+ ppb (parts per billion)
346
+
347
+ save_path: Python string
348
+ -Default is None and will not save the figure anywhere.
349
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
350
+ -If a folder path is entered, the file name will default to 'constituent_scatter_stationID(s)'
351
+ -Make sure to use forward slashes (/) instead of backslashes (\).
352
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
353
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
354
+
355
+ Returns
356
+ -------
357
+ fig : Figure object of matplotlib.figure module
358
+ ax : AxesSubplot object of matplotlib.axes._subplots module
359
+
360
+ '''
361
+ # Calculate correlation coefficient
362
+ corr, _ = pearsonr(df['observed'],df['simulated'])
363
+ # Create a figure containing a single axes
364
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
365
+ # Plot observed versus simulated TSS concentrations (points)
366
+ ax.plot(df['observed'], df['simulated'], 'o', alpha = 0.3, color = COLORS.get('Minnesota Green'))
367
+ # Add the correlation coefficient to the plot
368
+ plt.text(0.1, 0.9, f'r = {corr:.2f}', transform=plt.gca().transAxes)
369
+ # Add plot title
370
+ stations = ', '.join(station_id)
371
+ plt.title(f'Simulated versus Observed {constituent} Concentrations \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
372
+ # Add x label
373
+ plt.xlabel(f'Observed {constituent} ({units})', **CFONT, fontsize = 10)
374
+ # Add y label
375
+ plt.ylabel(f'Simulated {constituent} ({units})', **CFONT, fontsize = 10)
376
+ # Set the font and font size for the tick labels
377
+ plt.xticks(**CFONT, fontsize = 9)
378
+ plt.yticks(**CFONT, fontsize = 9)
379
+ # Add y axis gridlines, set the x and y axis minimums to 0, and make the axes square
380
+ plt.grid(which='major', axis='y', linewidth=0.3)
381
+ plt.axis('square')
382
+ plt.ylim(bottom=0)
383
+ plt.xlim(left=0)
384
+
385
+ # Save figure, if given a folder or file path in argument save_path
386
+ if save_path is not None:
387
+ save_path = Path(save_path)
388
+ if save_path.is_dir():
389
+ filepath = save_path.joinpath(constituent + '_scatter_' + stations + '.png')
390
+ fig.savefig(filepath, bbox_inches='tight')
391
+ else:
392
+ filepath = save_path
393
+ fig.savefig(filepath, bbox_inches='tight')
394
+
395
+ return fig, ax
396
+
397
+
398
+ #test_df = create_test_data()
399
+
400
+ #fig, ax = scatter(test_df,station_id, 'TSS', 'mg/L')
401
+
402
+
403
+ #%% Concentration vs Flow (Rating Curve): rating
404
+
405
+ def rating(df, station_id, constituent, units, save_path=None):
406
+ '''
407
+ Returns a concentration versus flow rating curve for both simulated and observed datasets.
408
+
409
+ Parameters
410
+ ----------
411
+ df : Pandas Dataframe
412
+ -Dataframe with 4 columns: simulated, simulated_flow, observed, observed_flow.
413
+ -This function assumes the flow units are in cubic feet per second (cfs).
414
+ -The simulated concentrations & flow must be paired with observed concentrations & flow.
415
+ -If you wish to plot concentrations and flow from more than one station, there should
416
+ still only be one 'obs conc' column and one 'obs flow (cfs)' column
417
+ -In pre-processing, the time of the observed samples and flow may need to be adjusted
418
+ to the nearest 15-minutes or nearest top of the hour in order to merge the observed data
419
+ with the simulated data.
420
+
421
+ station_id : Python list
422
+ -Include one or more station ID numbers (i.e., H49009001).
423
+ -This argument is only used to include the station ID(s) in the plot title.
424
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
425
+
426
+ constituent: Python string
427
+ -Input constituent as you'd like it to appear on the figure title and axis
428
+ -Options include:
429
+ TSS (total suspended solids)
430
+ N+N (nitrate plus nitrite nitrogen)
431
+ PO4 (orthophosphate phosphorus)
432
+ TP (total phosphorus)
433
+ TKN (total Kjeldahl nitrogen)
434
+
435
+ units: Python string
436
+ -Input constituent units as you'd like it to appear on the figure axis (without parentheses)
437
+ -Make sure the units you enter match the units of the data in your input df
438
+ -Options include:
439
+ mg/L (milligrams per liter)
440
+ ppm (parts per million)
441
+ ug/L (micrograms per liter)
442
+ ppb (parts per billion)
443
+
444
+ save_path: Python string
445
+ -Default is None and will not save the figure anywhere.
446
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
447
+ -If a folder path is entered, the file name will default to 'constituent_rating_stationID(s)'
448
+ -Make sure to use forward slashes (/) instead of backslashes (\).
449
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
450
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
451
+
452
+ Returns
453
+ -------
454
+ fig : Figure object of matplotlib.figure module
455
+ ax : AxesSubplot object of matplotlib.axes._subplots module
456
+
457
+ '''
458
+ # Create a figure containing a single axes
459
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
460
+ # Plot observed concentration versus flow (points)
461
+ obs, = ax.plot(df['observed_flow'], df['observed'], '*', label = 'obs', color = COLORS.get('Minnesota Blue'))
462
+ # Plot simulated concentration versus flow (points)
463
+ sim, = ax.plot(df['simulated_flow'], df['simulated'], 'v', mfc = 'none', label = 'sim', mec = COLORS.get('Orange'))
464
+ # Add plot title
465
+ stations = ', '.join(station_id)
466
+ plt.title(f'Simulated and Observed {constituent} Concentration vs Flow \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
467
+ # Add x label
468
+ plt.xlabel('Flow (cfs)', **CFONT, fontsize = 10)
469
+ # Add y label
470
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
471
+ # Add legend
472
+ plt.legend(handles=[obs, sim], fontsize = 9)
473
+ # Set legend font to Calibri
474
+ L = ax.legend()
475
+ plt.setp(L.texts, family = 'Calibri')
476
+ # Set the font and font size for the tick labels
477
+ plt.xticks(**CFONT, fontsize = 9)
478
+ plt.yticks(**CFONT, fontsize = 9)
479
+ # Convert x and y axis to log scale
480
+ plt.yscale('log')
481
+ plt.xscale('log')
482
+ # Add axis gridlines
483
+ plt.grid(which='major', axis='both', linewidth=0.3)
484
+
485
+ # Save figure, if given a folder or file path in argument save_path
486
+ if save_path is not None:
487
+ save_path = Path(save_path)
488
+ if save_path.is_dir():
489
+ filepath = save_path.joinpath(constituent + '_rating_' + stations + '.png')
490
+ fig.savefig(filepath, bbox_inches='tight')
491
+ else:
492
+ filepath = save_path
493
+ fig.savefig(filepath, bbox_inches='tight')
494
+
495
+ return fig, ax
496
+
497
+
498
+ #test_df = create_test_data()
499
+
500
+ #fig, ax = rating(test_df,station_id, 'PO4', 'ug/L')
501
+
502
+ #%% Load Duration Curve: LDC
503
+
504
+ # # Create random data for input into LDC function
505
+ # def create_test_data():
506
+ # start_date = datetime(2020,5,20)
507
+ # end_date = datetime(2023,9,30)
508
+ # date_range = ((end_date - start_date).days + 1)
509
+
510
+ # random_days = np.random.randint(date_range, size=45)
511
+ # obs_dates = pd.to_datetime(start_date) + pd.to_timedelta(random_days, unit='days')
512
+
513
+ # test_df_obs = pd.DataFrame({'date' : obs_dates,
514
+ # 'obs load': np.random.randint(9000, size = 45).astype(float),
515
+ # 'obs flow': np.random.randint(1000, size = 45).astype(float)})
516
+
517
+ # test_df_obs['obs rank'] = test_df_obs['obs flow'].rank(method = 'first', ascending = False)
518
+ # test_df_obs['obs exceed %'] = test_df_obs['obs rank'] / (len(test_df_obs) + 1) * 100
519
+
520
+ # sim_dates = pd.date_range(start='1/1/2020',end='12/31/2023')
521
+
522
+ # test_df_sim = pd.DataFrame({'date' : sim_dates,
523
+ # 'sim load': np.random.randint(9000, size = 1461).astype(float),
524
+ # 'sim flow': np.random.randint(1000, size = 1461).astype(float)})
525
+
526
+ # test_df_sim['sim rank'] = test_df_sim['sim flow'].rank(method = 'first', ascending = False)
527
+ # test_df_sim['sim exceed %'] = test_df_sim['sim rank'] / (len(test_df_sim) + 1) * 100
528
+
529
+ # test_df = test_df_sim.merge(test_df_obs, on = 'date', how = 'left')
530
+
531
+ # return test_df
532
+
533
+
534
+ def LDC(df, station_id, constituent, units, time_step, save_path=None):
535
+ '''
536
+ Returns a load duration curve - constituent load versus flow exceedance percentage
537
+ for both simulated and observed datasets.
538
+
539
+ Parameters
540
+ ----------
541
+ df : Pandas Dataframe
542
+ -Dataframe with 5 columns: date, sim load, sim exceed %, obs load, obs exceed %.
543
+ -Sim exceed % and obs exceed % represent flow exceedance (not load exceedance).
544
+ -The entire modeled period of record may be used for the simulated data.
545
+ -If you wish to use load and flow observational data from more than one station, there should
546
+ still only be one 'obs load' column and one 'obs exceed %' column.
547
+
548
+ station_id : Python list
549
+ -Include one or more station ID numbers (i.e., H49009001).
550
+ -This argument is only used to include the station ID(s) in the plot title.
551
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
552
+
553
+ constituent: Python string
554
+ -Input constituent as you'd like it to appear on the figure title and axis
555
+ -Options include:
556
+ TSS (total suspended solids)
557
+ N+N (nitrate plus nitrite nitrogen)
558
+ PO4 (orthophosphate phosphorus)
559
+ TP (total phosphorus)
560
+ TKN (total Kjeldahl nitrogen)
561
+
562
+ units: Python string
563
+ -Input constituent units as you'd like it to appear on the figure axis (without parentheses)
564
+ -Make sure the units you enter match the units of the data in your input df
565
+ -Options include:
566
+ tons per day (tons/day)
567
+ pounds per day (lb/day)
568
+
569
+ save_path: Python string
570
+ -Default is None and will not save the figure anywhere.
571
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
572
+ -If a folder path is entered, the file name will default to 'constituent_LDC_stationID(s)'
573
+ -Make sure to use forward slashes (/) instead of backslashes (\).
574
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
575
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
576
+
577
+ Returns
578
+ -------
579
+ fig : Figure object of matplotlib.figure module
580
+ ax : AxesSubplot object of matplotlib.axes._subplots module
581
+
582
+ '''
583
+ assert units in ['mg/l','lb']
584
+
585
+ cf2cfs = {'hourly':3600,
586
+ 'daily':86400,
587
+ 'monthly':2592000,
588
+ 'yearly':31536000,
589
+ 'h':3600,
590
+ 'D':86400,
591
+ 'M':2592000,
592
+ 'Y':31536000}
593
+
594
+ if units == 'mg/l':
595
+ df['simulated'] = 2.20462e-6*df['simulated']/0.0353147*df['simulated_flow']*cf2cfs[time_step]
596
+ df['observed'] = 2.20462e-6*df['observed']/0.0353147*df['observed_flow']*cf2cfs[time_step]
597
+ units = 'lb'
598
+
599
+ df['simulated_flow rank'] = df['simulated_flow'].rank(method = 'average', ascending = False)
600
+ df['simulated_flow exceed %'] = df['simulated_flow rank'] / (len(df) + 1) * 100
601
+ df['observed_flow rank'] = df['observed_flow'].rank(method = 'average', ascending = False)
602
+ df['observed_flow exceed %'] = df['observed_flow rank'] / (len(df) + 1) * 100
603
+
604
+
605
+ # Create a figure containing a single axes
606
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
607
+ # Plot simulated load versus flow exceedance (points)
608
+ sim, = ax.plot(df['simulated_flow exceed %'], df['simulated'], 'v', mfc = 'none', label = 'sim', mec = COLORS.get('Orange'))
609
+ # Plot observed load versus flow exceedance (points)
610
+ obs, = ax.plot(df['observed_flow exceed %'], df['observed'], '*', label = 'obs', color = COLORS.get('Minnesota Blue'))
611
+ # Add plot title
612
+ stations = ', '.join(station_id)
613
+ plt.title(f'{constituent} Load Duration Curve \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
614
+ # Add x label
615
+ plt.xlabel('Flow Exceedance (%)', **CFONT, fontsize = 10)
616
+ # Add y label
617
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
618
+ # Add legend
619
+ plt.legend(handles=[obs, sim], fontsize = 9)
620
+ # Set legend font to Calibri
621
+ L = ax.legend()
622
+ plt.setp(L.texts, family = 'Calibri')
623
+ # Set the font and font size for the tick labels
624
+ plt.xticks(**CFONT, fontsize = 9)
625
+ plt.yticks(**CFONT, fontsize = 9)
626
+ # Convert y axis to log scale
627
+ plt.yscale('log')
628
+ # Add axis gridlines
629
+ plt.grid(which='major', axis='both', linewidth=0.3)
630
+ plt.grid(which='minor', axis='y', linewidth=0.3)
631
+
632
+ #Save figure, if given a folder or file path in argument save_path
633
+ if save_path is not None:
634
+ save_path = Path(save_path)
635
+ if save_path.is_dir():
636
+ filepath = save_path.joinpath(constituent + '_LDC_' + stations + '.png')
637
+ fig.savefig(filepath, bbox_inches='tight')
638
+ else:
639
+ filepath = save_path
640
+ fig.savefig(filepath, bbox_inches='tight')
641
+
642
+ return fig, ax
643
+
644
+ # #test_df = create_test_data()
645
+
646
+ # #fig, ax = LDC(test_df,station_id, 'OP', 'lbs/day')
647
+
648
+ #%% Timeseries Plot: contTimeseries
649
+
650
+ def contTimeseries(df, station_id, constituent, units, save_path=None):
651
+ '''
652
+ Returns a timeseries plot of simulated and observed continuous data with the same date range as
653
+ the input data (df).
654
+
655
+ Parameters
656
+ ----------
657
+ df : Pandas Dataframe
658
+ -Dataframe with 2 columns: observed, simulated
659
+ -The dates must be in the datetime Python format.
660
+ -The date range must already be filtered to the date range and timestep you wish to plot
661
+ in the figure.
662
+ -Each simulated data point must have a corresponding observed data point.
663
+ -For TSS, N+N, PO4, TP, and TKN, the simulated data should be from HSPF and the observed data
664
+ should be from FLUX (WPLMN).
665
+ -The suggested units for each of the constituents are as follows:
666
+ -flow: cubic feet per second (cfs)
667
+ -temp: degrees Fahrenheit (ºF)
668
+ -DO: milligrams per liter (mg/L)
669
+ -TSS: kilograms per day (kg/d)
670
+ -N+N: kilograms per day (kg/d)
671
+ -PO4: kilograms per day (kg/d)
672
+ -TP: kilograms per day (kg/d)
673
+ -TKN: kilograms per day (kg/d)
674
+ -If you wish to plot observed data from more than one station, there should still only be one
675
+ column for obs, i.e., no observed data overlapping in dates.
676
+
677
+ station_id : Python list
678
+ -Include one or more station ID numbers (i.e., ['H49009001', 'H25019002']).
679
+ -This argument is only used to include the station ID(s) in the plot title.
680
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
681
+
682
+ constituent: Python string
683
+ -Input constituent as you'd like it to appear on the figure title and axis
684
+ -Options include:
685
+ Flow
686
+ Temp (water temperature)
687
+ DO (dissolved oxygen)
688
+ TSS (total suspended solids)
689
+ N+N (nitrate plus nitrite nitrogen)
690
+ PO4 (orthophosphate phosphorus)
691
+ TP (total phosphorus)
692
+ TKN (total Kjeldahl nitrogen)
693
+
694
+ units: Python string
695
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
696
+ -Make sure the units you enter match the units of the data in your input df
697
+ -Options include:
698
+ cfs
699
+ ºF (degrees F)
700
+ ºC (degrees C)
701
+ mg/L (milligrams per liter)
702
+ ppm (parts per million)
703
+ kg/d (kilograms per day)
704
+
705
+ save_path: Python string
706
+ -Default is None and will not save the figure anywhere.
707
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
708
+ -If a folder path is entered, the file name will default to 'constituent_contTimeseries_stationID(s)'
709
+ -Make sure to use forward slashes (/) instead of backslashes (\).
710
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
711
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
712
+
713
+ Returns
714
+ -------
715
+ fig : Figure object of matplotlib.figure module
716
+ ax : AxesSubplot object of matplotlib.axes._subplots module
717
+
718
+ '''
719
+ # Create a figure containing a single axes
720
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
721
+ # Plot observed flow
722
+ obs, = ax.plot(df['observed'], label = 'observed', color = COLORS.get('Minnesota Blue'), linewidth = '0.9')
723
+ # Plot simulated flow
724
+ sim, = ax.plot(df['simulated'], label = 'simulated', color = COLORS.get('Orange'), linewidth = '0.5')
725
+ # Add plot title
726
+ stations = ', '.join(station_id)
727
+ plt.title(f'Simulated and Observed {constituent} \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
728
+ # Add x label
729
+ plt.xlabel('Date', **CFONT, fontsize = 10)
730
+ # Add y label
731
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
732
+ # Add legend
733
+ plt.legend(handles=[obs, sim], fontsize = 9)
734
+ # Set legend font to Calibri
735
+ L = ax.legend()
736
+ plt.setp(L.texts, family = 'Calibri')
737
+ # Set the font and font size for the tick labels
738
+ plt.xticks(**CFONT, fontsize = 9)
739
+ plt.yticks(**CFONT, fontsize = 9)
740
+ # Add y axis gridlines and set the y axis minimum to 0
741
+ plt.grid(which='major', axis='y', linewidth=0.3)
742
+ plt.ylim(bottom=0)
743
+
744
+ # Save figure, if given a folder or file path in argument save_path
745
+ if save_path is not None:
746
+ save_path = Path(save_path)
747
+ if save_path.is_dir():
748
+ filepath = save_path.joinpath(constituent + '_contTimeseries_' + stations + '.png')
749
+ fig.savefig(filepath, bbox_inches='tight')
750
+ else:
751
+ filepath = save_path
752
+ fig.savefig(filepath, bbox_inches='tight')
753
+
754
+ return fig, ax
755
+
756
+ #test_df = create_test_data()
757
+
758
+ #fig, ax = contTimeseries(test_df,station_id, 'Nitrate + Nitrite', 'kg/d')
759
+
760
+ #%% Duration Curve/Exceedance Plot: FDCexceed
761
+
762
+ # Create random data for input into FDCexceed function
763
+ def create_test_data():
764
+ start_date = datetime(2020,5,20,8,0,0)
765
+ end_date = datetime(2023,9,30,17,0,0)
766
+ date_range = ((end_date - start_date).days + 1) * 24
767
+
768
+ random_days = np.random.randint(date_range, size=80)
769
+ dates = pd.to_datetime(start_date) + pd.to_timedelta(random_days, unit='hours')
770
+
771
+ test_df_sim = pd.DataFrame({'date' : dates,
772
+ 'sim': np.random.randint(500,size = 80).astype(float)})
773
+
774
+ test_df_sim['simulated rank'] = test_df_sim['simulated'].rank(method = 'first', ascending = False)
775
+ test_df_sim['simulated exceed %'] = test_df_sim['simulated rank'] / (len(test_df_sim) + 1) * 100
776
+
777
+ test_df_obs = pd.DataFrame({'date' : dates,
778
+ 'obs': np.random.randint(500,size = 80).astype(float)})
779
+
780
+ test_df_obs['obs rank'] = test_df_obs['observed'].rank(method = 'first', ascending = False)
781
+ test_df_obs['obs exceed %'] = test_df_obs['observed rank'] / (len(test_df_sim) + 1) * 100
782
+
783
+ test_df = test_df_obs.merge(test_df_sim,on='date')
784
+
785
+ return test_df
786
+
787
+ def FDCexceed(df, station_id, constituent, units, save_path=None):
788
+ '''
789
+ Returns a flow duration curve or exceedance plot for simulated and observed data.
790
+
791
+ Parameters
792
+ ----------
793
+ df : Pandas Dataframe
794
+ -Dataframe with 2 columns: simulated, observed
795
+ -In data preprocessing, the simulated data should be paired with observed data.
796
+ -If you wish to use observed data from more than one station, there should still only be one
797
+ 'obs' column, i.e., no overlapping observed data.
798
+ -Exceedance percentage should be calculated by ranking data (highest is ranked #1),
799
+ then calculate exceedance percentage as follows:
800
+ P = (m/n+1) * 100, where:
801
+ P = exceedance percentage
802
+ m = the ranking of all data for the period of record
803
+ n = the total number of data points
804
+
805
+ station_id : Python list
806
+ -Include one or more station ID numbers (i.e., H49009001).
807
+ -This argument is only used to include the station ID(s) in the plot title.
808
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
809
+
810
+ constituent: Python string
811
+ -Input constituent as you'd like it to appear on the figure title and axis
812
+ -Options include:
813
+ Flow
814
+ TSS (total suspended solids)
815
+ N+N (nitrate plus nitrite nitrogen)
816
+ PO4 (orthophosphate phosphorus)
817
+ TP (total phosphorus)
818
+ TKN (total Kjeldahl nitrogen)
819
+
820
+ units: Python string
821
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
822
+ -Make sure the units you enter match the units of the data in your input df
823
+ -Options include:
824
+ cfs
825
+ mg/L (milligrams per liter)
826
+ ppm (parts per million)
827
+ ug/L (micrograms per liter)
828
+ ppb (parts per billion)
829
+
830
+ save_path: Python string
831
+ -Default is None and will not save the figure anywhere.
832
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
833
+ -If a folder path is entered, the file name will default to 'constituent_FDCexceed_stationID(s)'
834
+ -Make sure to use forward slashes (/) instead of backslashes (\).
835
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
836
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
837
+
838
+ Returns
839
+ -------
840
+ fig : Figure object of matplotlib.figure module
841
+ ax : AxesSubplot object of matplotlib.axes._subplots module
842
+
843
+ '''
844
+
845
+
846
+ df['simulated rank'] = df['simulated'].rank(method = 'average', ascending = False)
847
+ df['simulated exceed %'] = df['simulated rank'] / (len(df) + 1) * 100
848
+ df['observed rank'] = df['observed'].rank(method = 'average', ascending = False)
849
+ df['observed exceed %'] = df['observed rank'] / (len(df) + 1) * 100
850
+
851
+
852
+ # Create a figure containing a single axes
853
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
854
+ # Sort obs in descending order, then plot observed FDC/exceedance plot
855
+ df = df.sort_values(by = 'observed', ascending = False)
856
+ obs, = ax.plot(df['observed exceed %'], df['observed'], label = 'observed', color = COLORS.get('Minnesota Blue'))
857
+ # Sort sim in descending order, then plot simulated FDC/exceedance plot
858
+ df = df.sort_values(by = 'simulated', ascending = False)
859
+ sim, = ax.plot(df['simulated exceed %'], df['simulated'], label = 'simulated', color = COLORS.get('Orange'))
860
+ # Add plot title
861
+ stations = ', '.join(station_id)
862
+ if constituent == 'Flow' or constituent == 'flow':
863
+ plt.title('Flow Duration Curves \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
864
+ else:
865
+ plt.title(f'{constituent} Exceedance Plot \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
866
+ # Add x label
867
+ plt.xlabel('Exceedance Percentage', **CFONT, fontsize = 10)
868
+ # Add y label
869
+ plt.ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
870
+ # Add legend
871
+ plt.legend(handles=[obs, sim], fontsize = 9)
872
+ # Set legend font to Calibri
873
+ L = ax.legend()
874
+ plt.setp(L.texts, family = 'Calibri')
875
+ # Set the font and font size for the tick labels
876
+ plt.xticks(**CFONT, fontsize = 9)
877
+ plt.yticks(**CFONT, fontsize = 9)
878
+ # Add axis gridlines
879
+ plt.grid(which='major', axis='both', linewidth=0.3)
880
+ plt.grid(which='minor', axis='y', linewidth=0.3)
881
+ # Convert y axis to log scale
882
+ plt.yscale('log')
883
+ # Set the x axis minimum to 0
884
+ plt.xlim(left=0)
885
+
886
+ # Save figure, if given a folder or file path in argument save_path
887
+ if save_path is not None:
888
+ save_path = Path(save_path)
889
+ if save_path.is_dir():
890
+ filepath = save_path.joinpath(constituent + '_FDCexceed_' + stations + '.png')
891
+ fig.savefig(filepath, bbox_inches='tight')
892
+ else:
893
+ filepath = save_path
894
+ fig.savefig(filepath, bbox_inches='tight')
895
+
896
+ return fig, ax
897
+
898
+
899
+
900
+ #test_df = create_test_data()
901
+
902
+ #fig, ax = FDCexceed(test_df,station_id, 'flow', 'cfs')
903
+
904
+ #%% Flow Duration Curve or Exceedance Plot with Points: FDCexceedPoints
905
+
906
+ def FDCexceedPoints(df, station_id, constituent, units, save_path=None):
907
+ '''
908
+ Returns a duration curve (flow) or exceedance plot (sediment and nutrients)
909
+ for observed data with points representing paired (not ranked) simulated data.
910
+
911
+ Parameters
912
+ ----------
913
+ df : Pandas Dataframe
914
+ -Dataframe with 2 columns: simulated, observed
915
+ -In data preprocessing, the simulated data should be paired with observed data.
916
+ -If you wish to use observed data from more than one station, there should still only be one
917
+ 'obs' column, i.e., no overlapping observed data.
918
+ -Exceedance percentage should be calculated by ranking data (highest is ranked #1),
919
+ then calculate exceedance percentage as follows:
920
+ P = (m/n+1) * 100, where:
921
+ P = exceedance percentage
922
+ m = the ranking of all data for the period of record
923
+ n = the total number of data points
924
+
925
+ station_id : Python list
926
+ -Include one or more station ID numbers (i.e., H49009001).
927
+ -This argument is only used to include the station ID(s) in the plot title.
928
+ -Example: ['H49009001'] or ['W49012002', 'W49012003']
929
+
930
+ constituent: Python string
931
+ -Input constituent as you'd like it to appear on the figure title and axis
932
+ -Options include:
933
+ Flow
934
+ TSS (total suspended solids)
935
+ N+N (nitrate plus nitrite nitrogen)
936
+ PO4 (orthophosphate phosphorus)
937
+ TP (total phosphorus)
938
+ TKN (total Kjeldahl nitrogen)
939
+
940
+ units: Python string
941
+ -Input units as you'd like it to appear on the figure axis (without parentheses)
942
+ -Make sure the units you enter match the units of the data in your input df
943
+ -Options include:
944
+ cfs
945
+ mg/L (milligrams per liter)
946
+ ppm (parts per million)
947
+ ug/L (micrograms per liter)
948
+ ppb (parts per billion)
949
+
950
+ save_path: Python string
951
+ -Default is None and will not save the figure anywhere.
952
+ -Enter the folder path OR the file path (it is recommended to save the file as a .png).
953
+ -If a folder path is entered, the file name will default to 'constituent_FDCexceed_stationID(s)'
954
+ -Make sure to use forward slashes (/) instead of backslashes (\).
955
+ -File path example: 'C:/Users/cfreema/Desktop/delete/figure_6.png'
956
+ -Folder path example: 'C:/Users/cfreema/Desktop/delete/'
957
+
958
+ Returns
959
+ -------
960
+ fig : Figure object of matplotlib.figure module
961
+ ax : AxesSubplot object of matplotlib.axes._subplots module
962
+
963
+ '''
964
+
965
+ df['observed rank'] = df['observed'].rank(method = 'first', ascending = False)
966
+ df['observed exceed %'] = df['observed rank'] / (len(df) + 1) * 100
967
+
968
+
969
+ # Create a figure containing a single axes
970
+ fig, ax = plt.subplots(figsize = (7,4), dpi = 600)
971
+ # Sort obs in descending order, then plot observed FDC/exceedance plot and paired sim conc
972
+ df = df.sort_values(by = 'observed', ascending = False)
973
+ sim, = ax.plot(df['observed exceed %'], df['simulated'], '.', label = 'simulated', color = COLORS.get('Orange'))
974
+ obs, = ax.plot(df['observed exceed %'], df['observed'], label = 'observed', color = COLORS.get('Minnesota Blue'))
975
+ # START HERE
976
+ # Add plot title
977
+ stations = ', '.join(station_id)
978
+ if constituent == 'Flow' or constituent == 'flow':
979
+ #plt.title('Flow Duration Curve \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
980
+ ax.set_title('Flow Duration Curve \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
981
+ else:
982
+ #plt.title(f'{constituent} Exceedance Plot \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
983
+ ax.set_title(f'{constituent} Exceedance Plot \n Station(s): ' + stations, **CFONT, fontsize = 12, weight = 'bold')
984
+ # Add x label
985
+ ax.set_xlabel('Exceedance Percentage', **CFONT, fontsize = 10)
986
+ # Add y label
987
+ ax.set_ylabel(f'{constituent} ({units})', **CFONT, fontsize = 10)
988
+ # Add legend
989
+ ax.legend(handles=[obs, sim], fontsize = 9)
990
+ # Set legend font to Calibri
991
+ L = ax.legend()
992
+ plt.setp(L.texts, family = 'Calibri')
993
+ # Set the font and font size for the tick labels
994
+ plt.xticks(**CFONT, fontsize = 9)
995
+ plt.yticks(**CFONT, fontsize = 9)
996
+ # Add axis gridlines
997
+ ax.grid(which='major', axis='both', linewidth=0.3)
998
+ ax.grid(which='minor', axis='y', linewidth=0.3)
999
+ # Convert y axis to log scale
1000
+ ax.set_yscale('log')
1001
+ # Set the x axis minimum to 0
1002
+ ax.set_xlim(left=0)
1003
+
1004
+ # Save figure, if given a folder or file path in argument save_path
1005
+ if save_path is not None:
1006
+ save_path = Path(save_path)
1007
+ if save_path.is_dir():
1008
+ filepath = save_path.joinpath(constituent + '_FDCexceedPoints_' + stations + '.png')
1009
+ fig.savefig(filepath, bbox_inches='tight')
1010
+ else:
1011
+ filepath = save_path
1012
+ fig.savefig(filepath, bbox_inches='tight')
1013
+
1014
+ return fig, ax
1015
+
1016
+
1017
+ def _exceedence(df):
1018
+
1019
+ df['simulated rank'] = df.loc[:,'simulated'].rank(method = 'average', ascending = False)
1020
+ df['simulated exceed %'] = df.loc[:,'simulated rank'] / (len(df) + 1) * 100
1021
+ df['observed rank'] = df.loc[:,'observed'].rank(method = 'average', ascending = False)
1022
+ df['observed exceed %'] = df.loc[:,'observed rank'] / (len(df) + 1) * 100
1023
+
1024
+ return df