loone-data-prep 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +2 -1
- loone_data_prep/LOONE_DATA_PREP.py +115 -2
- loone_data_prep/flow_data/S65E_total.py +71 -6
- loone_data_prep/flow_data/forecast_bias_correction.py +193 -8
- loone_data_prep/flow_data/get_inflows.py +130 -41
- loone_data_prep/flow_data/get_outflows.py +110 -26
- loone_data_prep/flow_data/hydro.py +121 -27
- loone_data_prep/utils.py +339 -62
- loone_data_prep/water_level_data/get_all.py +208 -11
- loone_data_prep/water_level_data/hydro.py +71 -3
- loone_data_prep/water_quality_data/get_inflows.py +88 -3
- loone_data_prep/water_quality_data/get_lake_wq.py +85 -3
- loone_data_prep/water_quality_data/wq.py +44 -0
- loone_data_prep/weather_data/get_all.py +126 -3
- loone_data_prep/weather_data/weather.py +185 -27
- {loone_data_prep-0.1.6.dist-info → loone_data_prep-0.1.8.dist-info}/METADATA +2 -1
- loone_data_prep-0.1.8.dist-info/RECORD +27 -0
- {loone_data_prep-0.1.6.dist-info → loone_data_prep-0.1.8.dist-info}/WHEEL +1 -1
- loone_data_prep-0.1.6.dist-info/RECORD +0 -27
- {loone_data_prep-0.1.6.dist-info → loone_data_prep-0.1.8.dist-info}/LICENSE +0 -0
- {loone_data_prep-0.1.6.dist-info → loone_data_prep-0.1.8.dist-info}/top_level.txt +0 -0
loone_data_prep/utils.py
CHANGED
|
@@ -11,7 +11,10 @@ from retry import retry
|
|
|
11
11
|
from scipy.optimize import fsolve
|
|
12
12
|
from scipy import interpolate
|
|
13
13
|
from rpy2.robjects import r
|
|
14
|
-
from rpy2.robjects.vectors import
|
|
14
|
+
from rpy2.robjects.vectors import (
|
|
15
|
+
StrVector as rpy2StrVector,
|
|
16
|
+
DataFrame as rpy2DataFrame,
|
|
17
|
+
)
|
|
15
18
|
from rpy2.rinterface_lib.embedded import RRuntimeError
|
|
16
19
|
|
|
17
20
|
|
|
@@ -44,7 +47,15 @@ INTERP_DICT = {
|
|
|
44
47
|
},
|
|
45
48
|
"PHOSPHATE, ORTHO AS P": {
|
|
46
49
|
"units": "mg/L",
|
|
47
|
-
"station_ids": [
|
|
50
|
+
"station_ids": [
|
|
51
|
+
"L001",
|
|
52
|
+
"L004",
|
|
53
|
+
"L005",
|
|
54
|
+
"L006",
|
|
55
|
+
"L007",
|
|
56
|
+
"L008",
|
|
57
|
+
"LZ40",
|
|
58
|
+
],
|
|
48
59
|
},
|
|
49
60
|
"NITRATE+NITRITE-N": {
|
|
50
61
|
"units": "mg/L",
|
|
@@ -146,9 +157,26 @@ INTERP_DICT = {
|
|
|
146
157
|
"LZ40",
|
|
147
158
|
],
|
|
148
159
|
},
|
|
149
|
-
"DISSOLVED OXYGEN": {
|
|
150
|
-
|
|
151
|
-
|
|
160
|
+
"DISSOLVED OXYGEN": {
|
|
161
|
+
"units": "mg/L",
|
|
162
|
+
"station_ids": [
|
|
163
|
+
"L001",
|
|
164
|
+
"L004",
|
|
165
|
+
"L005",
|
|
166
|
+
"L006",
|
|
167
|
+
"L007",
|
|
168
|
+
"L008",
|
|
169
|
+
"LZ40",
|
|
170
|
+
],
|
|
171
|
+
},
|
|
172
|
+
"RADP": {
|
|
173
|
+
"units": "MICROMOLE/m^2/s",
|
|
174
|
+
"station_ids": ["L001", "L005", "L006", "LZ40"],
|
|
175
|
+
},
|
|
176
|
+
"RADT": {
|
|
177
|
+
"units": "kW/m^2",
|
|
178
|
+
"station_ids": ["L001", "L005", "L006", "LZ40"],
|
|
179
|
+
},
|
|
152
180
|
}
|
|
153
181
|
DEFAULT_PREDICTION_STATIONS_IDS = [
|
|
154
182
|
"S65E_S",
|
|
@@ -263,14 +291,18 @@ def data_interpolations(
|
|
|
263
291
|
Data_In = Data_In.set_index(["date"])
|
|
264
292
|
Data_In.index = pd.to_datetime(Data_In.index, unit="ns")
|
|
265
293
|
Data_df = Data_In.resample("D").mean()
|
|
266
|
-
Data_df = Data_df.dropna(
|
|
294
|
+
Data_df = Data_df.dropna(
|
|
295
|
+
subset=["%s_%s_%s" % (station, parameter, units)]
|
|
296
|
+
)
|
|
267
297
|
Data_df = Data_df.reset_index()
|
|
268
298
|
Data_df["Yr_M"] = pd.to_datetime(Data_df["date"]).dt.to_period("M")
|
|
269
299
|
start_date = Data_df["date"].iloc[0]
|
|
270
300
|
end_date = Data_df["date"].iloc[-1]
|
|
271
301
|
date_rng = pd.date_range(start=start_date, end=end_date, freq="M")
|
|
272
302
|
Monthly_df = pd.DataFrame(date_rng, columns=["date"])
|
|
273
|
-
Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
|
|
303
|
+
Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
|
|
304
|
+
"M"
|
|
305
|
+
)
|
|
274
306
|
New_date = []
|
|
275
307
|
New_data = []
|
|
276
308
|
Days = []
|
|
@@ -282,13 +314,27 @@ def data_interpolations(
|
|
|
282
314
|
if i in Data_df.index:
|
|
283
315
|
if type(Data_df.loc[i]["date"]) == pd.Timestamp:
|
|
284
316
|
New_date.append(Data_df.loc[i]["date"])
|
|
285
|
-
New_data.append(
|
|
317
|
+
New_data.append(
|
|
318
|
+
Data_df.loc[i][
|
|
319
|
+
"%s_%s_%s" % (station, parameter, units)
|
|
320
|
+
]
|
|
321
|
+
)
|
|
286
322
|
else:
|
|
287
323
|
for j in range(len(Data_df.loc[i]["date"])):
|
|
288
324
|
New_date.append(Data_df.loc[i]["date"][j])
|
|
289
|
-
New_data.append(
|
|
325
|
+
New_data.append(
|
|
326
|
+
Data_df.loc[i][
|
|
327
|
+
"%s_%s_%s" % (station, parameter, units)
|
|
328
|
+
][j]
|
|
329
|
+
)
|
|
290
330
|
elif i not in Data_df.index:
|
|
291
|
-
New_date.append(
|
|
331
|
+
New_date.append(
|
|
332
|
+
datetime.datetime(
|
|
333
|
+
Monthly_df.loc[i]["date"].year,
|
|
334
|
+
Monthly_df.loc[i]["date"].month,
|
|
335
|
+
1,
|
|
336
|
+
)
|
|
337
|
+
)
|
|
292
338
|
New_data.append(np.NaN)
|
|
293
339
|
|
|
294
340
|
New_date = pd.to_datetime(New_date, format="%Y-%m-%d")
|
|
@@ -302,7 +348,9 @@ def data_interpolations(
|
|
|
302
348
|
Days_cum.append(
|
|
303
349
|
Days_cum[i - 1]
|
|
304
350
|
+ Days[i]
|
|
305
|
-
+ monthrange(New_date[i - 1].year, New_date[i - 1].month)[
|
|
351
|
+
+ monthrange(New_date[i - 1].year, New_date[i - 1].month)[
|
|
352
|
+
1
|
|
353
|
+
]
|
|
306
354
|
- Days[i - 1]
|
|
307
355
|
)
|
|
308
356
|
Final_df = pd.DataFrame()
|
|
@@ -316,7 +364,9 @@ def data_interpolations(
|
|
|
316
364
|
Final_df["date"] = pd.to_datetime(Final_df["date"], format="%Y-%m-%d")
|
|
317
365
|
start_date = Final_df["date"].iloc[0]
|
|
318
366
|
end_date = Final_df["date"].iloc[-1]
|
|
319
|
-
date_rng_TSS_1 = pd.date_range(
|
|
367
|
+
date_rng_TSS_1 = pd.date_range(
|
|
368
|
+
start=start_date, end=end_date, freq="D"
|
|
369
|
+
)
|
|
320
370
|
# Create a data frame with a date column
|
|
321
371
|
Data_df = pd.DataFrame(date_rng_TSS_1, columns=["date"])
|
|
322
372
|
Data_len = len(Data_df.index)
|
|
@@ -328,7 +378,9 @@ def data_interpolations(
|
|
|
328
378
|
for i in range(1, Data_len):
|
|
329
379
|
Cum_days[i] = Cum_days[i - 1] + 1
|
|
330
380
|
# Data_daily[i] = interpolate.interp1d(Final_df['Days'], Final_df['TSS'] , kind = 'linear')(Cum_days[i])
|
|
331
|
-
Data_daily[i] = np.interp(
|
|
381
|
+
Data_daily[i] = np.interp(
|
|
382
|
+
Cum_days[i], Final_df["Days_cum"], Final_df["Data"]
|
|
383
|
+
)
|
|
332
384
|
Data_df["Data"] = Data_daily
|
|
333
385
|
Data_df.to_csv(f"{workspace}/{name}_Interpolated.csv", index=False)
|
|
334
386
|
|
|
@@ -341,11 +393,17 @@ def interpolate_all(workspace: str, d: dict = INTERP_DICT) -> None:
|
|
|
341
393
|
d (dict, optional): Dict with parameter key, units, and station IDs. Defaults to INTERP_DICT.
|
|
342
394
|
"""
|
|
343
395
|
for param, values in d.items():
|
|
344
|
-
print(
|
|
345
|
-
|
|
396
|
+
print(
|
|
397
|
+
f"Interpolating parameter: {param} for station IDs: {values['station_ids']}."
|
|
398
|
+
)
|
|
399
|
+
data_interpolations(
|
|
400
|
+
workspace, param, values["units"], values["station_ids"]
|
|
401
|
+
)
|
|
346
402
|
|
|
347
403
|
|
|
348
|
-
def kinematic_viscosity(
|
|
404
|
+
def kinematic_viscosity(
|
|
405
|
+
workspace: str, in_file_name: str, out_file_name: str = "nu.csv"
|
|
406
|
+
):
|
|
349
407
|
# Read Mean H2O_T in LO
|
|
350
408
|
LO_Temp = pd.read_csv(os.path.join(workspace, in_file_name))
|
|
351
409
|
LO_T = LO_Temp["Water_T"]
|
|
@@ -354,13 +412,23 @@ def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str =
|
|
|
354
412
|
|
|
355
413
|
class nu_Func:
|
|
356
414
|
def nu(T):
|
|
357
|
-
nu20 =
|
|
415
|
+
nu20 = (
|
|
416
|
+
1.0034 / 1e6
|
|
417
|
+
) # m2/s (kinematic viscosity of water at T = 20 C)
|
|
358
418
|
|
|
359
419
|
def func(x):
|
|
360
420
|
# return[log(x[0]/nu20)-((20-T)/(T+96))*(1.2364-1.37E-3*(20-T)+5.7E-6*(20-T)**2)]
|
|
361
421
|
return [
|
|
362
422
|
(x[0] / nu20)
|
|
363
|
-
- 10
|
|
423
|
+
- 10
|
|
424
|
+
** (
|
|
425
|
+
((20 - T) / (T + 96))
|
|
426
|
+
* (
|
|
427
|
+
1.2364
|
|
428
|
+
- 1.37e-3 * (20 - T)
|
|
429
|
+
+ 5.7e-6 * (20 - T) ** 2
|
|
430
|
+
)
|
|
431
|
+
)
|
|
364
432
|
]
|
|
365
433
|
|
|
366
434
|
sol = fsolve(func, [9.70238995692062e-07])
|
|
@@ -407,7 +475,11 @@ def wind_induced_waves(
|
|
|
407
475
|
(
|
|
408
476
|
0.283
|
|
409
477
|
* np.tanh(0.53 * (g * d / WS**2) ** 0.75)
|
|
410
|
-
* np.tanh(
|
|
478
|
+
* np.tanh(
|
|
479
|
+
0.00565
|
|
480
|
+
* (g * F / WS**2) ** 0.5
|
|
481
|
+
/ np.tanh(0.53 * (g * d / WS**2) ** (3 / 8))
|
|
482
|
+
)
|
|
411
483
|
)
|
|
412
484
|
* WS**2
|
|
413
485
|
/ g
|
|
@@ -419,7 +491,11 @@ def wind_induced_waves(
|
|
|
419
491
|
(
|
|
420
492
|
7.54
|
|
421
493
|
* np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
|
|
422
|
-
* np.tanh(
|
|
494
|
+
* np.tanh(
|
|
495
|
+
0.0379
|
|
496
|
+
* (g * F / WS**2) ** 0.5
|
|
497
|
+
/ np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
|
|
498
|
+
)
|
|
423
499
|
)
|
|
424
500
|
* WS
|
|
425
501
|
/ g
|
|
@@ -428,7 +504,10 @@ def wind_induced_waves(
|
|
|
428
504
|
|
|
429
505
|
def L(g, d, T):
|
|
430
506
|
def func(x):
|
|
431
|
-
return [
|
|
507
|
+
return [
|
|
508
|
+
(g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0])
|
|
509
|
+
- x[0]
|
|
510
|
+
]
|
|
432
511
|
|
|
433
512
|
sol = fsolve(func, [1])
|
|
434
513
|
L = sol[0]
|
|
@@ -443,12 +522,18 @@ def wind_induced_waves(
|
|
|
443
522
|
W_T[i] = Wind_Func.T(g, LO_Wd[i], F, LO_WS["WS_mps"].iloc[i])
|
|
444
523
|
W_L[i] = Wind_Func.L(g, LO_Wd[i], W_T[i])
|
|
445
524
|
W_ShearStress[i] = (
|
|
446
|
-
W_H[i]
|
|
525
|
+
W_H[i]
|
|
526
|
+
* (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5)
|
|
527
|
+
/ (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
|
|
447
528
|
)
|
|
448
529
|
|
|
449
530
|
Wind_ShearStress = pd.DataFrame(LO_WS["date"], columns=["date"])
|
|
450
|
-
Wind_ShearStress["ShearStress"] =
|
|
451
|
-
|
|
531
|
+
Wind_ShearStress["ShearStress"] = (
|
|
532
|
+
W_ShearStress * 10
|
|
533
|
+
) # Convert N/m2 to Dyne/cm2
|
|
534
|
+
Wind_ShearStress.to_csv(
|
|
535
|
+
os.path.join(output_dir, wind_shear_stress_out), index=False
|
|
536
|
+
)
|
|
452
537
|
|
|
453
538
|
# # Monthly
|
|
454
539
|
# Wind_ShearStress['Date'] = pd.to_datetime(Wind_ShearStress['Date'])
|
|
@@ -484,8 +569,12 @@ def wind_induced_waves(
|
|
|
484
569
|
Current_Stress[i] = Current_bottom_shear_stress(ru, Wind_Stress[i])
|
|
485
570
|
|
|
486
571
|
Current_ShearStress_df = pd.DataFrame(LO_WS["date"], columns=["date"])
|
|
487
|
-
Current_ShearStress_df["Current_Stress"] =
|
|
488
|
-
|
|
572
|
+
Current_ShearStress_df["Current_Stress"] = (
|
|
573
|
+
Current_Stress * 10
|
|
574
|
+
) # Convert N/m2 to Dyne/cm2
|
|
575
|
+
Current_ShearStress_df["Wind_Stress"] = (
|
|
576
|
+
Wind_Stress * 10
|
|
577
|
+
) # Convert N/m2 to Dyne/cm2
|
|
489
578
|
Current_ShearStress_df["Wind_Speed_m/s"] = LO_WS["WS_mps"]
|
|
490
579
|
|
|
491
580
|
def Current_bottom_shear_stress_2(u, k, nu, ks, z, ru):
|
|
@@ -500,7 +589,10 @@ def wind_induced_waves(
|
|
|
500
589
|
sol2 = fsolve(func2, [1])
|
|
501
590
|
|
|
502
591
|
def func3(u_str3):
|
|
503
|
-
return [
|
|
592
|
+
return [
|
|
593
|
+
u_str3[0]
|
|
594
|
+
- u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))
|
|
595
|
+
]
|
|
504
596
|
|
|
505
597
|
sol3 = fsolve(func3, [1])
|
|
506
598
|
if sol1[0] * ks / nu <= 5:
|
|
@@ -514,7 +606,9 @@ def wind_induced_waves(
|
|
|
514
606
|
|
|
515
607
|
def Current_bottom_shear_stress_3(u, k, nu, ks, z, ru):
|
|
516
608
|
def func1(u_str1):
|
|
517
|
-
return [
|
|
609
|
+
return [
|
|
610
|
+
u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))
|
|
611
|
+
]
|
|
518
612
|
|
|
519
613
|
sol1 = fsolve(func1, [1])
|
|
520
614
|
|
|
@@ -524,7 +618,12 @@ def wind_induced_waves(
|
|
|
524
618
|
sol2 = fsolve(func2, [1])
|
|
525
619
|
|
|
526
620
|
def func3(u_str3):
|
|
527
|
-
return [
|
|
621
|
+
return [
|
|
622
|
+
u_str3[0]
|
|
623
|
+
- u
|
|
624
|
+
* k
|
|
625
|
+
* (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))
|
|
626
|
+
]
|
|
528
627
|
|
|
529
628
|
sol3 = fsolve(func3, [1])
|
|
530
629
|
if sol1[0] * ks / nu <= 5:
|
|
@@ -541,22 +640,34 @@ def wind_induced_waves(
|
|
|
541
640
|
ks = 5.27e-4 # m
|
|
542
641
|
current_stress_3 = np.zeros(n, dtype=object)
|
|
543
642
|
for i in range(n):
|
|
544
|
-
current_stress_3[i] = Current_bottom_shear_stress_3(
|
|
545
|
-
|
|
546
|
-
|
|
643
|
+
current_stress_3[i] = Current_bottom_shear_stress_3(
|
|
644
|
+
0.05, 0.41, nu, ks, LO_Wd[i], ru
|
|
645
|
+
)
|
|
646
|
+
Current_ShearStress_df["Current_Stress_3"] = (
|
|
647
|
+
current_stress_3 * 10
|
|
648
|
+
) # Convert N/m2 to Dyne/cm2
|
|
649
|
+
Current_ShearStress_df.to_csv(
|
|
650
|
+
os.path.join(output_dir, current_shear_stress_out), index=False
|
|
651
|
+
)
|
|
547
652
|
|
|
548
653
|
|
|
549
|
-
def stg2sto(
|
|
654
|
+
def stg2sto(
|
|
655
|
+
stg_sto_data_path: str, v: pd.Series, i: int
|
|
656
|
+
) -> interpolate.interp1d:
|
|
550
657
|
stgsto_data = pd.read_csv(stg_sto_data_path)
|
|
551
658
|
# NOTE: We Can use cubic interpolation instead of linear
|
|
552
659
|
x = stgsto_data["Stage"]
|
|
553
660
|
y = stgsto_data["Storage"]
|
|
554
661
|
if i == 0:
|
|
555
662
|
# return storage given stage
|
|
556
|
-
return interpolate.interp1d(
|
|
663
|
+
return interpolate.interp1d(
|
|
664
|
+
x, y, fill_value="extrapolate", kind="linear"
|
|
665
|
+
)(v)
|
|
557
666
|
else:
|
|
558
667
|
# return stage given storage
|
|
559
|
-
return interpolate.interp1d(
|
|
668
|
+
return interpolate.interp1d(
|
|
669
|
+
y, x, fill_value="extrapolate", kind="linear"
|
|
670
|
+
)(v)
|
|
560
671
|
|
|
561
672
|
|
|
562
673
|
def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
|
|
@@ -569,10 +680,14 @@ def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
|
|
|
569
680
|
y = stgar_data["Surf_Area"]
|
|
570
681
|
if i == 0:
|
|
571
682
|
# return surface area given stage
|
|
572
|
-
return interpolate.interp1d(
|
|
683
|
+
return interpolate.interp1d(
|
|
684
|
+
x, y, fill_value="extrapolate", kind="linear"
|
|
685
|
+
)(v)
|
|
573
686
|
else:
|
|
574
687
|
# return stage given surface area
|
|
575
|
-
return interpolate.interp1d(
|
|
688
|
+
return interpolate.interp1d(
|
|
689
|
+
y, x, fill_value="extrapolate", kind="linear"
|
|
690
|
+
)(v)
|
|
576
691
|
|
|
577
692
|
|
|
578
693
|
@retry(Exception, tries=3, delay=15, backoff=2)
|
|
@@ -580,20 +695,27 @@ def get_pi(workspace: str) -> None:
|
|
|
580
695
|
# Weekly data is downloaded from:
|
|
581
696
|
# https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv
|
|
582
697
|
# State:Florida Division:4.South Central
|
|
583
|
-
df = pd.read_csv(
|
|
698
|
+
df = pd.read_csv(
|
|
699
|
+
"https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv"
|
|
700
|
+
)
|
|
584
701
|
df.to_csv(os.path.join(workspace, "PI.csv"))
|
|
585
702
|
|
|
586
703
|
|
|
587
704
|
def nutrient_prediction(
|
|
588
|
-
input_dir: str,
|
|
705
|
+
input_dir: str,
|
|
706
|
+
output_dir: str,
|
|
707
|
+
station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS,
|
|
708
|
+
constants: dict = DEFAULT_EXPFUNC_CONSTANTS,
|
|
589
709
|
) -> None:
|
|
590
710
|
for station in station_ids:
|
|
591
711
|
print(f"Predicting nutrient loads for station: {station}.")
|
|
592
712
|
# Construct paths for flow file
|
|
593
|
-
flow_file_path =
|
|
713
|
+
flow_file_path = ""
|
|
594
714
|
flow_file_path_exists = True
|
|
595
715
|
try:
|
|
596
|
-
flow_file_path = glob(
|
|
716
|
+
flow_file_path = glob(
|
|
717
|
+
os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv")
|
|
718
|
+
)[0]
|
|
597
719
|
except Exception as e:
|
|
598
720
|
flow_file_path_exists = False
|
|
599
721
|
|
|
@@ -603,7 +725,9 @@ def nutrient_prediction(
|
|
|
603
725
|
flow = pd.read_csv(flow_file_path)
|
|
604
726
|
else:
|
|
605
727
|
# If it doesn't exist, skip to the next iteration of the loop
|
|
606
|
-
print(
|
|
728
|
+
print(
|
|
729
|
+
f"Skipping nutrient prediction for station: {station}. Flow file does not exist."
|
|
730
|
+
)
|
|
607
731
|
continue
|
|
608
732
|
|
|
609
733
|
# Create structures to hold resulting data
|
|
@@ -615,6 +739,7 @@ def nutrient_prediction(
|
|
|
615
739
|
if "ensemble" not in column_name:
|
|
616
740
|
continue
|
|
617
741
|
import warnings
|
|
742
|
+
|
|
618
743
|
warnings.filterwarnings("error")
|
|
619
744
|
|
|
620
745
|
try:
|
|
@@ -623,16 +748,22 @@ def nutrient_prediction(
|
|
|
623
748
|
|
|
624
749
|
# Calculate the logarithm of the flow data
|
|
625
750
|
|
|
626
|
-
Q_Log = np.log(
|
|
751
|
+
Q_Log = np.log(
|
|
752
|
+
flow_column + 1e-8
|
|
753
|
+
) # Add a small number to prevent log(0) errors
|
|
627
754
|
|
|
628
755
|
# Calculate the predicted TP loads using the logarithm of the flow data
|
|
629
|
-
TP_Loads_Predicted_Log =
|
|
756
|
+
TP_Loads_Predicted_Log = (
|
|
757
|
+
constants[station]["a"] * Q_Log ** constants[station]["b"]
|
|
758
|
+
)
|
|
630
759
|
|
|
631
760
|
# Calculate the predicted TP loads using the exponential of the predicted TP loads logarithm
|
|
632
761
|
predicted_column = np.exp(TP_Loads_Predicted_Log)
|
|
633
762
|
|
|
634
763
|
# Store prediction data in a pandas DataFrame (So we can concat all ensemble data into one dataframe)
|
|
635
|
-
predicted_column = pd.DataFrame(
|
|
764
|
+
predicted_column = pd.DataFrame(
|
|
765
|
+
predicted_column.tolist(), index=flow["date"].copy()
|
|
766
|
+
)
|
|
636
767
|
predicted_column.columns = [column_name]
|
|
637
768
|
|
|
638
769
|
prediction_columns.append(predicted_column)
|
|
@@ -642,30 +773,174 @@ def nutrient_prediction(
|
|
|
642
773
|
|
|
643
774
|
# Concat individual ensemble columns together into one pandas DataFrame
|
|
644
775
|
out_dataframe = pd.concat(objs=prediction_columns, axis="columns")
|
|
645
|
-
|
|
646
|
-
column_mean = out_dataframe.mean(axis=
|
|
647
|
-
column_percentile_25 = out_dataframe.quantile(q=0.25, axis=
|
|
648
|
-
column_percentile_75 = out_dataframe.quantile(q=0.75, axis=
|
|
649
|
-
column_median = out_dataframe.median(axis=
|
|
650
|
-
column_std = out_dataframe.std(axis=
|
|
651
|
-
|
|
652
|
-
out_dataframe[
|
|
653
|
-
out_dataframe[
|
|
654
|
-
out_dataframe[
|
|
655
|
-
out_dataframe[
|
|
656
|
-
out_dataframe[
|
|
776
|
+
|
|
777
|
+
column_mean = out_dataframe.mean(axis="columns")
|
|
778
|
+
column_percentile_25 = out_dataframe.quantile(q=0.25, axis="columns")
|
|
779
|
+
column_percentile_75 = out_dataframe.quantile(q=0.75, axis="columns")
|
|
780
|
+
column_median = out_dataframe.median(axis="columns")
|
|
781
|
+
column_std = out_dataframe.std(axis="columns")
|
|
782
|
+
|
|
783
|
+
out_dataframe["mean"] = column_mean
|
|
784
|
+
out_dataframe["percentile_25"] = column_percentile_25
|
|
785
|
+
out_dataframe["percentile_75"] = column_percentile_75
|
|
786
|
+
out_dataframe["median"] = column_median
|
|
787
|
+
out_dataframe["standard_deviation"] = column_std
|
|
657
788
|
|
|
658
789
|
# Save the predicted TP loads to a CSV file
|
|
659
|
-
out_dataframe.to_csv(
|
|
660
|
-
|
|
790
|
+
out_dataframe.to_csv(
|
|
791
|
+
os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv")
|
|
792
|
+
)
|
|
793
|
+
|
|
661
794
|
# Save the predicted TP loads to a CSV file (in input_dir)
|
|
662
795
|
# Output is needed in input_dir by GEOGLOWS_LOONE_DATA_PREP.py and in output_dir for graph visualization in the app
|
|
663
|
-
out_dataframe.to_csv(
|
|
796
|
+
out_dataframe.to_csv(
|
|
797
|
+
os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv")
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def photo_period(
|
|
802
|
+
workspace: str,
|
|
803
|
+
phi: float = 26.982052,
|
|
804
|
+
doy: np.ndarray = np.arange(1, 365),
|
|
805
|
+
verbose: bool = False,
|
|
806
|
+
):
|
|
807
|
+
"""Generate PhotoPeriod.csv file for the given latitude and days of the year.
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
workspace (str): A path to the directory where the file will be generated.
|
|
811
|
+
phi (float, optional): Latitude of the location. Defaults to 26.982052.
|
|
812
|
+
doy (np.ndarray, optional): An array holding the days of the year that you want the photo period for. Defaults to np.arange(1,365).
|
|
813
|
+
verbose (bool, optional): Print results of each computation. Defaults to False.
|
|
814
|
+
"""
|
|
815
|
+
phi = np.radians(phi) # Convert to radians
|
|
816
|
+
light_intensity = 2.206 * 10**-3
|
|
817
|
+
|
|
818
|
+
C = np.sin(np.radians(23.44)) # sin of the obliquity of 23.44 degrees.
|
|
819
|
+
B = -4.76 - 1.03 * np.log(
|
|
820
|
+
light_intensity
|
|
821
|
+
) # Eq. [5]. Angle of the sun below the horizon. Civil twilight is -4.76 degrees.
|
|
822
|
+
|
|
823
|
+
# Calculations
|
|
824
|
+
alpha = np.radians(90 + B) # Eq. [6]. Value at sunrise and sunset.
|
|
825
|
+
M = 0.9856 * doy - 3.251 # Eq. [4].
|
|
826
|
+
lmd = (
|
|
827
|
+
M
|
|
828
|
+
+ 1.916 * np.sin(np.radians(M))
|
|
829
|
+
+ 0.020 * np.sin(np.radians(2 * M))
|
|
830
|
+
+ 282.565
|
|
831
|
+
) # Eq. [3]. Lambda
|
|
832
|
+
delta = np.arcsin(C * np.sin(np.radians(lmd))) # Eq. [2].
|
|
833
|
+
|
|
834
|
+
# Defining sec(x) = 1/cos(x)
|
|
835
|
+
P = (
|
|
836
|
+
2
|
|
837
|
+
/ 15
|
|
838
|
+
* np.degrees(
|
|
839
|
+
np.arccos(
|
|
840
|
+
np.cos(alpha) * (1 / np.cos(phi)) * (1 / np.cos(delta))
|
|
841
|
+
- np.tan(phi) * np.tan(delta)
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
) # Eq. [1].
|
|
845
|
+
|
|
846
|
+
# Print results in order for each computation to match example in paper
|
|
847
|
+
if verbose:
|
|
848
|
+
print("Input latitude =", np.degrees(phi))
|
|
849
|
+
print("[Eq 5] B =", B)
|
|
850
|
+
print("[Eq 6] alpha =", np.degrees(alpha))
|
|
851
|
+
print("[Eq 4] M =", M[0])
|
|
852
|
+
print("[Eq 3] Lambda =", lmd[0])
|
|
853
|
+
print("[Eq 2] delta=", np.degrees(delta[0]))
|
|
854
|
+
print("[Eq 1] Daylength =", P[0])
|
|
855
|
+
|
|
856
|
+
photo_period_df = pd.DataFrame()
|
|
857
|
+
photo_period_df["Day"] = doy
|
|
858
|
+
photo_period_df["Data"] = P
|
|
859
|
+
|
|
860
|
+
photo_period_df.to_csv(
|
|
861
|
+
os.path.join(workspace, "PhotoPeriod.csv"), index=False
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def find_last_date_in_csv(workspace: str, file_name: str) -> str:
|
|
866
|
+
"""
|
|
867
|
+
Gets the most recent date from the last line of a .csv file.
|
|
868
|
+
Assumes the file is formatted as a .csv file, encoded in UTF-8,
|
|
869
|
+
and the rows in the file are sorted by date in ascending order.
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
workspace (str): The directory where the file is located.
|
|
873
|
+
file_name (str): The name of the file.
|
|
874
|
+
|
|
875
|
+
Returns:
|
|
876
|
+
str: The most recent date as a string in YYYY-MM-DD format, or None if the file does not exist or the date cannot be found.
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
# Helper Functions
|
|
880
|
+
def is_valid_date(date_string):
|
|
881
|
+
try:
|
|
882
|
+
datetime.datetime.strptime(date_string, "%Y-%m-%d")
|
|
883
|
+
return True
|
|
884
|
+
except ValueError:
|
|
885
|
+
return False
|
|
886
|
+
|
|
887
|
+
# Check that file exists
|
|
888
|
+
file_path = os.path.join(workspace, file_name)
|
|
889
|
+
if not os.path.exists(file_path):
|
|
890
|
+
return None
|
|
891
|
+
|
|
892
|
+
# Attempt to extract the date of the last line in the file
|
|
893
|
+
try:
|
|
894
|
+
with open(file_path, "rb") as file:
|
|
895
|
+
# Go to the end of the file
|
|
896
|
+
file.seek(-2, os.SEEK_END)
|
|
897
|
+
|
|
898
|
+
# Loop backwards until you find the first newline character
|
|
899
|
+
while file.read(1) != b"\n":
|
|
900
|
+
file.seek(-2, os.SEEK_CUR)
|
|
901
|
+
|
|
902
|
+
# Read the last line
|
|
903
|
+
last_line = file.readline().decode()
|
|
904
|
+
|
|
905
|
+
# Extract the date from the last line
|
|
906
|
+
date = None
|
|
907
|
+
|
|
908
|
+
for value in last_line.split(","):
|
|
909
|
+
if is_valid_date(value):
|
|
910
|
+
date = value
|
|
911
|
+
break
|
|
912
|
+
|
|
913
|
+
# Return date
|
|
914
|
+
return date
|
|
915
|
+
except OSError as e:
|
|
916
|
+
print(f"Error reading file {file_name}: {e}")
|
|
917
|
+
return None
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def dbhydro_data_is_latest(date_latest: str):
|
|
921
|
+
"""
|
|
922
|
+
Checks whether the given date is the most recent date possible to get data from dbhydro.
|
|
923
|
+
Can be used to check whether dbhydro data is up-to-date.
|
|
924
|
+
|
|
925
|
+
Args:
|
|
926
|
+
date_latest (str): The date of the most recent data of the dbhydro data you have
|
|
927
|
+
|
|
928
|
+
Returns:
|
|
929
|
+
bool: True if the date_latest is the most recent date possible to get data from dbhydro, False otherwise
|
|
930
|
+
"""
|
|
931
|
+
date_latest_object = datetime.datetime.strptime(
|
|
932
|
+
date_latest, "%Y-%m-%d"
|
|
933
|
+
).date()
|
|
934
|
+
return date_latest_object == (
|
|
935
|
+
datetime.datetime.now().date() - datetime.timedelta(days=1)
|
|
936
|
+
)
|
|
664
937
|
|
|
665
938
|
|
|
666
939
|
if __name__ == "__main__":
|
|
667
940
|
if sys.argv[1] == "get_dbkeys":
|
|
668
|
-
get_dbkeys(
|
|
941
|
+
get_dbkeys(
|
|
942
|
+
sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:]
|
|
943
|
+
)
|
|
669
944
|
elif sys.argv[1] == "data_interp":
|
|
670
945
|
interp_args = [x for x in sys.argv[2:]]
|
|
671
946
|
interp_args[0] = interp_args[0].rstrip("/")
|
|
@@ -677,7 +952,9 @@ if __name__ == "__main__":
|
|
|
677
952
|
elif sys.argv[1] == "kinematic_viscosity":
|
|
678
953
|
kinematic_viscosity(sys.argv[2].rstrip("/"), *sys.argv[3:])
|
|
679
954
|
elif sys.argv[1] == "wind_induced_waves":
|
|
680
|
-
wind_induced_waves(
|
|
955
|
+
wind_induced_waves(
|
|
956
|
+
sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:]
|
|
957
|
+
)
|
|
681
958
|
elif sys.argv[1] == "get_pi":
|
|
682
959
|
get_pi(sys.argv[2].rstrip("/"))
|
|
683
960
|
elif sys.argv[1] == "nutrient_prediction":
|