loone-data-prep 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loone_data_prep/utils.py CHANGED
@@ -11,7 +11,10 @@ from retry import retry
11
11
  from scipy.optimize import fsolve
12
12
  from scipy import interpolate
13
13
  from rpy2.robjects import r
14
- from rpy2.robjects.vectors import StrVector as rpy2StrVector, DataFrame as rpy2DataFrame
14
+ from rpy2.robjects.vectors import (
15
+ StrVector as rpy2StrVector,
16
+ DataFrame as rpy2DataFrame,
17
+ )
15
18
  from rpy2.rinterface_lib.embedded import RRuntimeError
16
19
 
17
20
 
@@ -44,7 +47,15 @@ INTERP_DICT = {
44
47
  },
45
48
  "PHOSPHATE, ORTHO AS P": {
46
49
  "units": "mg/L",
47
- "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"],
50
+ "station_ids": [
51
+ "L001",
52
+ "L004",
53
+ "L005",
54
+ "L006",
55
+ "L007",
56
+ "L008",
57
+ "LZ40",
58
+ ],
48
59
  },
49
60
  "NITRATE+NITRITE-N": {
50
61
  "units": "mg/L",
@@ -146,9 +157,26 @@ INTERP_DICT = {
146
157
  "LZ40",
147
158
  ],
148
159
  },
149
- "DISSOLVED OXYGEN": {"units": "mg/L", "station_ids": ["L001", "L004", "L005", "L006", "L007", "L008", "LZ40"]},
150
- "RADP": {"units": "MICROMOLE/m^2/s", "station_ids": ["L001", "L005", "L006", "LZ40"]},
151
- "RADT": {"units": "kW/m^2", "station_ids": ["L001", "L005", "L006", "LZ40"]},
160
+ "DISSOLVED OXYGEN": {
161
+ "units": "mg/L",
162
+ "station_ids": [
163
+ "L001",
164
+ "L004",
165
+ "L005",
166
+ "L006",
167
+ "L007",
168
+ "L008",
169
+ "LZ40",
170
+ ],
171
+ },
172
+ "RADP": {
173
+ "units": "MICROMOLE/m^2/s",
174
+ "station_ids": ["L001", "L005", "L006", "LZ40"],
175
+ },
176
+ "RADT": {
177
+ "units": "kW/m^2",
178
+ "station_ids": ["L001", "L005", "L006", "LZ40"],
179
+ },
152
180
  }
153
181
  DEFAULT_PREDICTION_STATIONS_IDS = [
154
182
  "S65E_S",
@@ -263,14 +291,18 @@ def data_interpolations(
263
291
  Data_In = Data_In.set_index(["date"])
264
292
  Data_In.index = pd.to_datetime(Data_In.index, unit="ns")
265
293
  Data_df = Data_In.resample("D").mean()
266
- Data_df = Data_df.dropna(subset=["%s_%s_%s" % (station, parameter, units)])
294
+ Data_df = Data_df.dropna(
295
+ subset=["%s_%s_%s" % (station, parameter, units)]
296
+ )
267
297
  Data_df = Data_df.reset_index()
268
298
  Data_df["Yr_M"] = pd.to_datetime(Data_df["date"]).dt.to_period("M")
269
299
  start_date = Data_df["date"].iloc[0]
270
300
  end_date = Data_df["date"].iloc[-1]
271
301
  date_rng = pd.date_range(start=start_date, end=end_date, freq="M")
272
302
  Monthly_df = pd.DataFrame(date_rng, columns=["date"])
273
- Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period("M")
303
+ Monthly_df["Yr_M"] = pd.to_datetime(Monthly_df["date"]).dt.to_period(
304
+ "M"
305
+ )
274
306
  New_date = []
275
307
  New_data = []
276
308
  Days = []
@@ -282,13 +314,27 @@ def data_interpolations(
282
314
  if i in Data_df.index:
283
315
  if type(Data_df.loc[i]["date"]) == pd.Timestamp:
284
316
  New_date.append(Data_df.loc[i]["date"])
285
- New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)])
317
+ New_data.append(
318
+ Data_df.loc[i][
319
+ "%s_%s_%s" % (station, parameter, units)
320
+ ]
321
+ )
286
322
  else:
287
323
  for j in range(len(Data_df.loc[i]["date"])):
288
324
  New_date.append(Data_df.loc[i]["date"][j])
289
- New_data.append(Data_df.loc[i]["%s_%s_%s" % (station, parameter, units)][j])
325
+ New_data.append(
326
+ Data_df.loc[i][
327
+ "%s_%s_%s" % (station, parameter, units)
328
+ ][j]
329
+ )
290
330
  elif i not in Data_df.index:
291
- New_date.append(datetime.datetime(Monthly_df.loc[i]["date"].year, Monthly_df.loc[i]["date"].month, 1))
331
+ New_date.append(
332
+ datetime.datetime(
333
+ Monthly_df.loc[i]["date"].year,
334
+ Monthly_df.loc[i]["date"].month,
335
+ 1,
336
+ )
337
+ )
292
338
  New_data.append(np.NaN)
293
339
 
294
340
  New_date = pd.to_datetime(New_date, format="%Y-%m-%d")
@@ -302,7 +348,9 @@ def data_interpolations(
302
348
  Days_cum.append(
303
349
  Days_cum[i - 1]
304
350
  + Days[i]
305
- + monthrange(New_date[i - 1].year, New_date[i - 1].month)[1]
351
+ + monthrange(New_date[i - 1].year, New_date[i - 1].month)[
352
+ 1
353
+ ]
306
354
  - Days[i - 1]
307
355
  )
308
356
  Final_df = pd.DataFrame()
@@ -316,7 +364,9 @@ def data_interpolations(
316
364
  Final_df["date"] = pd.to_datetime(Final_df["date"], format="%Y-%m-%d")
317
365
  start_date = Final_df["date"].iloc[0]
318
366
  end_date = Final_df["date"].iloc[-1]
319
- date_rng_TSS_1 = pd.date_range(start=start_date, end=end_date, freq="D")
367
+ date_rng_TSS_1 = pd.date_range(
368
+ start=start_date, end=end_date, freq="D"
369
+ )
320
370
  # Create a data frame with a date column
321
371
  Data_df = pd.DataFrame(date_rng_TSS_1, columns=["date"])
322
372
  Data_len = len(Data_df.index)
@@ -328,7 +378,9 @@ def data_interpolations(
328
378
  for i in range(1, Data_len):
329
379
  Cum_days[i] = Cum_days[i - 1] + 1
330
380
  # Data_daily[i] = interpolate.interp1d(Final_df['Days'], Final_df['TSS'] , kind = 'linear')(Cum_days[i])
331
- Data_daily[i] = np.interp(Cum_days[i], Final_df["Days_cum"], Final_df["Data"])
381
+ Data_daily[i] = np.interp(
382
+ Cum_days[i], Final_df["Days_cum"], Final_df["Data"]
383
+ )
332
384
  Data_df["Data"] = Data_daily
333
385
  Data_df.to_csv(f"{workspace}/{name}_Interpolated.csv", index=False)
334
386
 
@@ -341,11 +393,17 @@ def interpolate_all(workspace: str, d: dict = INTERP_DICT) -> None:
341
393
  d (dict, optional): Dict with parameter key, units, and station IDs. Defaults to INTERP_DICT.
342
394
  """
343
395
  for param, values in d.items():
344
- print(f"Interpolating parameter: {param} for station IDs: {values['station_ids']}.")
345
- data_interpolations(workspace, param, values["units"], values["station_ids"])
396
+ print(
397
+ f"Interpolating parameter: {param} for station IDs: {values['station_ids']}."
398
+ )
399
+ data_interpolations(
400
+ workspace, param, values["units"], values["station_ids"]
401
+ )
346
402
 
347
403
 
348
- def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str = "nu.csv"):
404
+ def kinematic_viscosity(
405
+ workspace: str, in_file_name: str, out_file_name: str = "nu.csv"
406
+ ):
349
407
  # Read Mean H2O_T in LO
350
408
  LO_Temp = pd.read_csv(os.path.join(workspace, in_file_name))
351
409
  LO_T = LO_Temp["Water_T"]
@@ -354,13 +412,23 @@ def kinematic_viscosity(workspace: str, in_file_name: str, out_file_name: str =
354
412
 
355
413
  class nu_Func:
356
414
  def nu(T):
357
- nu20 = 1.0034 / 1e6 # m2/s (kinematic viscosity of water at T = 20 C)
415
+ nu20 = (
416
+ 1.0034 / 1e6
417
+ ) # m2/s (kinematic viscosity of water at T = 20 C)
358
418
 
359
419
  def func(x):
360
420
  # return[log(x[0]/nu20)-((20-T)/(T+96))*(1.2364-1.37E-3*(20-T)+5.7E-6*(20-T)**2)]
361
421
  return [
362
422
  (x[0] / nu20)
363
- - 10 ** (((20 - T) / (T + 96)) * (1.2364 - 1.37e-3 * (20 - T) + 5.7e-6 * (20 - T) ** 2))
423
+ - 10
424
+ ** (
425
+ ((20 - T) / (T + 96))
426
+ * (
427
+ 1.2364
428
+ - 1.37e-3 * (20 - T)
429
+ + 5.7e-6 * (20 - T) ** 2
430
+ )
431
+ )
364
432
  ]
365
433
 
366
434
  sol = fsolve(func, [9.70238995692062e-07])
@@ -407,7 +475,11 @@ def wind_induced_waves(
407
475
  (
408
476
  0.283
409
477
  * np.tanh(0.53 * (g * d / WS**2) ** 0.75)
410
- * np.tanh(0.00565 * (g * F / WS**2) ** 0.5 / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8)))
478
+ * np.tanh(
479
+ 0.00565
480
+ * (g * F / WS**2) ** 0.5
481
+ / np.tanh(0.53 * (g * d / WS**2) ** (3 / 8))
482
+ )
411
483
  )
412
484
  * WS**2
413
485
  / g
@@ -419,7 +491,11 @@ def wind_induced_waves(
419
491
  (
420
492
  7.54
421
493
  * np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
422
- * np.tanh(0.0379 * (g * F / WS**2) ** 0.5 / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8)))
494
+ * np.tanh(
495
+ 0.0379
496
+ * (g * F / WS**2) ** 0.5
497
+ / np.tanh(0.833 * (g * d / WS**2) ** (3 / 8))
498
+ )
423
499
  )
424
500
  * WS
425
501
  / g
@@ -428,7 +504,10 @@ def wind_induced_waves(
428
504
 
429
505
  def L(g, d, T):
430
506
  def func(x):
431
- return [(g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0]) - x[0]]
507
+ return [
508
+ (g * T**2 / 2 * np.pi) * np.tanh(2 * np.pi * d / x[0])
509
+ - x[0]
510
+ ]
432
511
 
433
512
  sol = fsolve(func, [1])
434
513
  L = sol[0]
@@ -443,12 +522,18 @@ def wind_induced_waves(
443
522
  W_T[i] = Wind_Func.T(g, LO_Wd[i], F, LO_WS["WS_mps"].iloc[i])
444
523
  W_L[i] = Wind_Func.L(g, LO_Wd[i], W_T[i])
445
524
  W_ShearStress[i] = (
446
- W_H[i] * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5) / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
525
+ W_H[i]
526
+ * (ru * (nu * (2 * np.pi / W_T[i]) ** 3) ** 0.5)
527
+ / (2 * np.sinh(2 * np.pi * LO_Wd[i] / W_L[i]))
447
528
  )
448
529
 
449
530
  Wind_ShearStress = pd.DataFrame(LO_WS["date"], columns=["date"])
450
- Wind_ShearStress["ShearStress"] = W_ShearStress * 10 # Convert N/m2 to Dyne/cm2
451
- Wind_ShearStress.to_csv(os.path.join(output_dir, wind_shear_stress_out), index=False)
531
+ Wind_ShearStress["ShearStress"] = (
532
+ W_ShearStress * 10
533
+ ) # Convert N/m2 to Dyne/cm2
534
+ Wind_ShearStress.to_csv(
535
+ os.path.join(output_dir, wind_shear_stress_out), index=False
536
+ )
452
537
 
453
538
  # # Monthly
454
539
  # Wind_ShearStress['Date'] = pd.to_datetime(Wind_ShearStress['Date'])
@@ -484,8 +569,12 @@ def wind_induced_waves(
484
569
  Current_Stress[i] = Current_bottom_shear_stress(ru, Wind_Stress[i])
485
570
 
486
571
  Current_ShearStress_df = pd.DataFrame(LO_WS["date"], columns=["date"])
487
- Current_ShearStress_df["Current_Stress"] = Current_Stress * 10 # Convert N/m2 to Dyne/cm2
488
- Current_ShearStress_df["Wind_Stress"] = Wind_Stress * 10 # Convert N/m2 to Dyne/cm2
572
+ Current_ShearStress_df["Current_Stress"] = (
573
+ Current_Stress * 10
574
+ ) # Convert N/m2 to Dyne/cm2
575
+ Current_ShearStress_df["Wind_Stress"] = (
576
+ Wind_Stress * 10
577
+ ) # Convert N/m2 to Dyne/cm2
489
578
  Current_ShearStress_df["Wind_Speed_m/s"] = LO_WS["WS_mps"]
490
579
 
491
580
  def Current_bottom_shear_stress_2(u, k, nu, ks, z, ru):
@@ -500,7 +589,10 @@ def wind_induced_waves(
500
589
  sol2 = fsolve(func2, [1])
501
590
 
502
591
  def func3(u_str3):
503
- return [u_str3[0] - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))]
592
+ return [
593
+ u_str3[0]
594
+ - u * k * np.exp(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks))
595
+ ]
504
596
 
505
597
  sol3 = fsolve(func3, [1])
506
598
  if sol1[0] * ks / nu <= 5:
@@ -514,7 +606,9 @@ def wind_induced_waves(
514
606
 
515
607
  def Current_bottom_shear_stress_3(u, k, nu, ks, z, ru):
516
608
  def func1(u_str1):
517
- return [u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))]
609
+ return [
610
+ u_str1[0] - u * k * (1 / np.log(z / (0.11 * nu / u_str1[0])))
611
+ ]
518
612
 
519
613
  sol1 = fsolve(func1, [1])
520
614
 
@@ -524,7 +618,12 @@ def wind_induced_waves(
524
618
  sol2 = fsolve(func2, [1])
525
619
 
526
620
  def func3(u_str3):
527
- return [u_str3[0] - u * k * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))]
621
+ return [
622
+ u_str3[0]
623
+ - u
624
+ * k
625
+ * (1 / np.log(z / ((0.11 * nu / u_str3[0]) + 0.0333 * ks)))
626
+ ]
528
627
 
529
628
  sol3 = fsolve(func3, [1])
530
629
  if sol1[0] * ks / nu <= 5:
@@ -541,22 +640,34 @@ def wind_induced_waves(
541
640
  ks = 5.27e-4 # m
542
641
  current_stress_3 = np.zeros(n, dtype=object)
543
642
  for i in range(n):
544
- current_stress_3[i] = Current_bottom_shear_stress_3(0.05, 0.41, nu, ks, LO_Wd[i], ru)
545
- Current_ShearStress_df["Current_Stress_3"] = current_stress_3 * 10 # Convert N/m2 to Dyne/cm2
546
- Current_ShearStress_df.to_csv(os.path.join(output_dir, current_shear_stress_out), index=False)
643
+ current_stress_3[i] = Current_bottom_shear_stress_3(
644
+ 0.05, 0.41, nu, ks, LO_Wd[i], ru
645
+ )
646
+ Current_ShearStress_df["Current_Stress_3"] = (
647
+ current_stress_3 * 10
648
+ ) # Convert N/m2 to Dyne/cm2
649
+ Current_ShearStress_df.to_csv(
650
+ os.path.join(output_dir, current_shear_stress_out), index=False
651
+ )
547
652
 
548
653
 
549
- def stg2sto(stg_sto_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
654
+ def stg2sto(
655
+ stg_sto_data_path: str, v: pd.Series, i: int
656
+ ) -> interpolate.interp1d:
550
657
  stgsto_data = pd.read_csv(stg_sto_data_path)
551
658
  # NOTE: We Can use cubic interpolation instead of linear
552
659
  x = stgsto_data["Stage"]
553
660
  y = stgsto_data["Storage"]
554
661
  if i == 0:
555
662
  # return storage given stage
556
- return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
663
+ return interpolate.interp1d(
664
+ x, y, fill_value="extrapolate", kind="linear"
665
+ )(v)
557
666
  else:
558
667
  # return stage given storage
559
- return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
668
+ return interpolate.interp1d(
669
+ y, x, fill_value="extrapolate", kind="linear"
670
+ )(v)
560
671
 
561
672
 
562
673
  def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
@@ -569,10 +680,14 @@ def stg2ar(stgar_data_path: str, v: pd.Series, i: int) -> interpolate.interp1d:
569
680
  y = stgar_data["Surf_Area"]
570
681
  if i == 0:
571
682
  # return surface area given stage
572
- return interpolate.interp1d(x, y, fill_value="extrapolate", kind="linear")(v)
683
+ return interpolate.interp1d(
684
+ x, y, fill_value="extrapolate", kind="linear"
685
+ )(v)
573
686
  else:
574
687
  # return stage given surface area
575
- return interpolate.interp1d(y, x, fill_value="extrapolate", kind="linear")(v)
688
+ return interpolate.interp1d(
689
+ y, x, fill_value="extrapolate", kind="linear"
690
+ )(v)
576
691
 
577
692
 
578
693
  @retry(Exception, tries=3, delay=15, backoff=2)
@@ -580,20 +695,27 @@ def get_pi(workspace: str) -> None:
580
695
  # Weekly data is downloaded from:
581
696
  # https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv
582
697
  # State:Florida Division:4.South Central
583
- df = pd.read_csv("https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv")
698
+ df = pd.read_csv(
699
+ "https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/pdi-0804.csv"
700
+ )
584
701
  df.to_csv(os.path.join(workspace, "PI.csv"))
585
702
 
586
703
 
587
704
  def nutrient_prediction(
588
- input_dir: str, output_dir: str, station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS, constants: dict = DEFAULT_EXPFUNC_CONSTANTS
705
+ input_dir: str,
706
+ output_dir: str,
707
+ station_ids: dict = DEFAULT_PREDICTION_STATIONS_IDS,
708
+ constants: dict = DEFAULT_EXPFUNC_CONSTANTS,
589
709
  ) -> None:
590
710
  for station in station_ids:
591
711
  print(f"Predicting nutrient loads for station: {station}.")
592
712
  # Construct paths for flow file
593
- flow_file_path = ''
713
+ flow_file_path = ""
594
714
  flow_file_path_exists = True
595
715
  try:
596
- flow_file_path = glob(os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv"))[0]
716
+ flow_file_path = glob(
717
+ os.path.join(input_dir, f"{station}*_FLOW_cmd_geoglows.csv")
718
+ )[0]
597
719
  except Exception as e:
598
720
  flow_file_path_exists = False
599
721
 
@@ -603,7 +725,9 @@ def nutrient_prediction(
603
725
  flow = pd.read_csv(flow_file_path)
604
726
  else:
605
727
  # If it doesn't exist, skip to the next iteration of the loop
606
- print(f'Skipping nutrient prediction for station: {station}. Flow file does not exist.')
728
+ print(
729
+ f"Skipping nutrient prediction for station: {station}. Flow file does not exist."
730
+ )
607
731
  continue
608
732
 
609
733
  # Create structures to hold resulting data
@@ -615,6 +739,7 @@ def nutrient_prediction(
615
739
  if "ensemble" not in column_name:
616
740
  continue
617
741
  import warnings
742
+
618
743
  warnings.filterwarnings("error")
619
744
 
620
745
  try:
@@ -623,16 +748,22 @@ def nutrient_prediction(
623
748
 
624
749
  # Calculate the logarithm of the flow data
625
750
 
626
- Q_Log = np.log(flow_column + 1e-8) # Add a small number to prevent log(0) errors
751
+ Q_Log = np.log(
752
+ flow_column + 1e-8
753
+ ) # Add a small number to prevent log(0) errors
627
754
 
628
755
  # Calculate the predicted TP loads using the logarithm of the flow data
629
- TP_Loads_Predicted_Log = constants[station]["a"] * Q_Log ** constants[station]["b"]
756
+ TP_Loads_Predicted_Log = (
757
+ constants[station]["a"] * Q_Log ** constants[station]["b"]
758
+ )
630
759
 
631
760
  # Calculate the predicted TP loads using the exponential of the predicted TP loads logarithm
632
761
  predicted_column = np.exp(TP_Loads_Predicted_Log)
633
762
 
634
763
  # Store prediction data in a pandas DataFrame (So we can concat all ensemble data into one dataframe)
635
- predicted_column = pd.DataFrame(predicted_column.tolist(), index=flow["date"].copy())
764
+ predicted_column = pd.DataFrame(
765
+ predicted_column.tolist(), index=flow["date"].copy()
766
+ )
636
767
  predicted_column.columns = [column_name]
637
768
 
638
769
  prediction_columns.append(predicted_column)
@@ -642,30 +773,174 @@ def nutrient_prediction(
642
773
 
643
774
  # Concat individual ensemble columns together into one pandas DataFrame
644
775
  out_dataframe = pd.concat(objs=prediction_columns, axis="columns")
645
-
646
- column_mean = out_dataframe.mean(axis='columns')
647
- column_percentile_25 = out_dataframe.quantile(q=0.25, axis='columns')
648
- column_percentile_75 = out_dataframe.quantile(q=0.75, axis='columns')
649
- column_median = out_dataframe.median(axis='columns')
650
- column_std = out_dataframe.std(axis='columns')
651
-
652
- out_dataframe['mean'] = column_mean
653
- out_dataframe['percentile_25'] = column_percentile_25
654
- out_dataframe['percentile_75'] = column_percentile_75
655
- out_dataframe['median'] = column_median
656
- out_dataframe['standard_deviation'] = column_std
776
+
777
+ column_mean = out_dataframe.mean(axis="columns")
778
+ column_percentile_25 = out_dataframe.quantile(q=0.25, axis="columns")
779
+ column_percentile_75 = out_dataframe.quantile(q=0.75, axis="columns")
780
+ column_median = out_dataframe.median(axis="columns")
781
+ column_std = out_dataframe.std(axis="columns")
782
+
783
+ out_dataframe["mean"] = column_mean
784
+ out_dataframe["percentile_25"] = column_percentile_25
785
+ out_dataframe["percentile_75"] = column_percentile_75
786
+ out_dataframe["median"] = column_median
787
+ out_dataframe["standard_deviation"] = column_std
657
788
 
658
789
  # Save the predicted TP loads to a CSV file
659
- out_dataframe.to_csv(os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv"))
660
-
790
+ out_dataframe.to_csv(
791
+ os.path.join(output_dir, f"{station}_PHOSPHATE_predicted.csv")
792
+ )
793
+
661
794
  # Save the predicted TP loads to a CSV file (in input_dir)
662
795
  # Output is needed in input_dir by GEOGLOWS_LOONE_DATA_PREP.py and in output_dir for graph visualization in the app
663
- out_dataframe.to_csv(os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv"))
796
+ out_dataframe.to_csv(
797
+ os.path.join(input_dir, f"{station}_PHOSPHATE_predicted.csv")
798
+ )
799
+
800
+
801
+ def photo_period(
802
+ workspace: str,
803
+ phi: float = 26.982052,
804
+ doy: np.ndarray = np.arange(1, 365),
805
+ verbose: bool = False,
806
+ ):
807
+ """Generate PhotoPeriod.csv file for the given latitude and days of the year.
808
+
809
+ Args:
810
+ workspace (str): A path to the directory where the file will be generated.
811
+ phi (float, optional): Latitude of the location. Defaults to 26.982052.
812
+ doy (np.ndarray, optional): An array holding the days of the year that you want the photo period for. Defaults to np.arange(1,365).
813
+ verbose (bool, optional): Print results of each computation. Defaults to False.
814
+ """
815
+ phi = np.radians(phi) # Convert to radians
816
+ light_intensity = 2.206 * 10**-3
817
+
818
+ C = np.sin(np.radians(23.44)) # sin of the obliquity of 23.44 degrees.
819
+ B = -4.76 - 1.03 * np.log(
820
+ light_intensity
821
+ ) # Eq. [5]. Angle of the sun below the horizon. Civil twilight is -4.76 degrees.
822
+
823
+ # Calculations
824
+ alpha = np.radians(90 + B) # Eq. [6]. Value at sunrise and sunset.
825
+ M = 0.9856 * doy - 3.251 # Eq. [4].
826
+ lmd = (
827
+ M
828
+ + 1.916 * np.sin(np.radians(M))
829
+ + 0.020 * np.sin(np.radians(2 * M))
830
+ + 282.565
831
+ ) # Eq. [3]. Lambda
832
+ delta = np.arcsin(C * np.sin(np.radians(lmd))) # Eq. [2].
833
+
834
+ # Defining sec(x) = 1/cos(x)
835
+ P = (
836
+ 2
837
+ / 15
838
+ * np.degrees(
839
+ np.arccos(
840
+ np.cos(alpha) * (1 / np.cos(phi)) * (1 / np.cos(delta))
841
+ - np.tan(phi) * np.tan(delta)
842
+ )
843
+ )
844
+ ) # Eq. [1].
845
+
846
+ # Print results in order for each computation to match example in paper
847
+ if verbose:
848
+ print("Input latitude =", np.degrees(phi))
849
+ print("[Eq 5] B =", B)
850
+ print("[Eq 6] alpha =", np.degrees(alpha))
851
+ print("[Eq 4] M =", M[0])
852
+ print("[Eq 3] Lambda =", lmd[0])
853
+ print("[Eq 2] delta=", np.degrees(delta[0]))
854
+ print("[Eq 1] Daylength =", P[0])
855
+
856
+ photo_period_df = pd.DataFrame()
857
+ photo_period_df["Day"] = doy
858
+ photo_period_df["Data"] = P
859
+
860
+ photo_period_df.to_csv(
861
+ os.path.join(workspace, "PhotoPeriod.csv"), index=False
862
+ )
863
+
864
+
865
+ def find_last_date_in_csv(workspace: str, file_name: str) -> str:
866
+ """
867
+ Gets the most recent date from the last line of a .csv file.
868
+ Assumes the file is formatted as a .csv file, encoded in UTF-8,
869
+ and the rows in the file are sorted by date in ascending order.
870
+
871
+ Args:
872
+ workspace (str): The directory where the file is located.
873
+ file_name (str): The name of the file.
874
+
875
+ Returns:
876
+ str: The most recent date as a string in YYYY-MM-DD format, or None if the file does not exist or the date cannot be found.
877
+ """
878
+
879
+ # Helper Functions
880
+ def is_valid_date(date_string):
881
+ try:
882
+ datetime.datetime.strptime(date_string, "%Y-%m-%d")
883
+ return True
884
+ except ValueError:
885
+ return False
886
+
887
+ # Check that file exists
888
+ file_path = os.path.join(workspace, file_name)
889
+ if not os.path.exists(file_path):
890
+ return None
891
+
892
+ # Attempt to extract the date of the last line in the file
893
+ try:
894
+ with open(file_path, "rb") as file:
895
+ # Go to the end of the file
896
+ file.seek(-2, os.SEEK_END)
897
+
898
+ # Loop backwards until you find the first newline character
899
+ while file.read(1) != b"\n":
900
+ file.seek(-2, os.SEEK_CUR)
901
+
902
+ # Read the last line
903
+ last_line = file.readline().decode()
904
+
905
+ # Extract the date from the last line
906
+ date = None
907
+
908
+ for value in last_line.split(","):
909
+ if is_valid_date(value):
910
+ date = value
911
+ break
912
+
913
+ # Return date
914
+ return date
915
+ except OSError as e:
916
+ print(f"Error reading file {file_name}: {e}")
917
+ return None
918
+
919
+
920
+ def dbhydro_data_is_latest(date_latest: str):
921
+ """
922
+ Checks whether the given date is the most recent date possible to get data from dbhydro.
923
+ Can be used to check whether dbhydro data is up-to-date.
924
+
925
+ Args:
926
+ date_latest (str): The date of the most recent data of the dbhydro data you have
927
+
928
+ Returns:
929
+ bool: True if the date_latest is the most recent date possible to get data from dbhydro, False otherwise
930
+ """
931
+ date_latest_object = datetime.datetime.strptime(
932
+ date_latest, "%Y-%m-%d"
933
+ ).date()
934
+ return date_latest_object == (
935
+ datetime.datetime.now().date() - datetime.timedelta(days=1)
936
+ )
664
937
 
665
938
 
666
939
  if __name__ == "__main__":
667
940
  if sys.argv[1] == "get_dbkeys":
668
- get_dbkeys(sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:])
941
+ get_dbkeys(
942
+ sys.argv[2].strip("[]").replace(" ", "").split(","), *sys.argv[3:]
943
+ )
669
944
  elif sys.argv[1] == "data_interp":
670
945
  interp_args = [x for x in sys.argv[2:]]
671
946
  interp_args[0] = interp_args[0].rstrip("/")
@@ -677,7 +952,9 @@ if __name__ == "__main__":
677
952
  elif sys.argv[1] == "kinematic_viscosity":
678
953
  kinematic_viscosity(sys.argv[2].rstrip("/"), *sys.argv[3:])
679
954
  elif sys.argv[1] == "wind_induced_waves":
680
- wind_induced_waves(sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:])
955
+ wind_induced_waves(
956
+ sys.argv[2].rstrip("/"), sys.argv[3].rstrip("/"), *sys.argv[4:]
957
+ )
681
958
  elif sys.argv[1] == "get_pi":
682
959
  get_pi(sys.argv[2].rstrip("/"))
683
960
  elif sys.argv[1] == "nutrient_prediction":