loone-data-prep 1.2.4__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/PKG-INFO +2 -8
  2. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/README.md +0 -6
  3. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
  4. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/LOONE_DATA_PREP.py +0 -1
  5. loone_data_prep-1.3.1/loone_data_prep/dbhydro_insights.py +195 -0
  6. loone_data_prep-1.3.1/loone_data_prep/flow_data/S65E_total.py +89 -0
  7. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
  8. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_forecast_flows.py +19 -105
  9. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_inflows.py +18 -8
  10. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_outflows.py +16 -7
  11. loone_data_prep-1.3.1/loone_data_prep/flow_data/hydro.py +126 -0
  12. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
  13. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
  14. loone_data_prep-1.3.1/loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
  15. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/utils.py +262 -32
  16. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_level_data/get_all.py +52 -44
  17. loone_data_prep-1.3.1/loone_data_prep/water_level_data/hydro.py +95 -0
  18. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_quality_data/get_inflows.py +69 -27
  19. loone_data_prep-1.3.1/loone_data_prep/water_quality_data/get_lake_wq.py +226 -0
  20. loone_data_prep-1.3.1/loone_data_prep/water_quality_data/wq.py +155 -0
  21. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/weather_data/get_all.py +5 -3
  22. loone_data_prep-1.3.1/loone_data_prep/weather_data/weather.py +217 -0
  23. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/PKG-INFO +2 -8
  24. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/SOURCES.txt +2 -2
  25. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/requires.txt +1 -1
  26. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/pyproject.toml +3 -3
  27. loone_data_prep-1.2.4/loone_data_prep/flow_data/S65E_total.py +0 -89
  28. loone_data_prep-1.2.4/loone_data_prep/flow_data/hydro.py +0 -155
  29. loone_data_prep-1.2.4/loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
  30. loone_data_prep-1.2.4/loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
  31. loone_data_prep-1.2.4/loone_data_prep/water_level_data/hydro.py +0 -114
  32. loone_data_prep-1.2.4/loone_data_prep/water_quality_data/get_lake_wq.py +0 -129
  33. loone_data_prep-1.2.4/loone_data_prep/water_quality_data/wq.py +0 -129
  34. loone_data_prep-1.2.4/loone_data_prep/weather_data/weather.py +0 -280
  35. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/LICENSE +0 -0
  36. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/__init__.py +0 -0
  37. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/data_analyses_fns.py +0 -0
  38. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/__init__.py +0 -0
  39. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/Chla_merged.py +0 -0
  40. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/forecast_stages.py +0 -0
  41. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/loone_q_predict.py +0 -0
  42. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/loone_wq_predict.py +0 -0
  43. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/predict_PI.py +0 -0
  44. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/trib_cond.py +0 -0
  45. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/herbie_utils.py +0 -0
  46. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_level_data/__init__.py +0 -0
  47. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_quality_data/__init__.py +0 -0
  48. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/weather_data/__init__.py +0 -0
  49. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/dependency_links.txt +0 -0
  50. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/top_level.txt +0 -0
  51. {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loone_data_prep
3
- Version: 1.2.4
3
+ Version: 1.3.1
4
4
  Summary: Prepare data to run the LOONE model.
5
5
  Author-email: Osama Tarabih <osamatarabih@usf.edu>
6
6
  Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
@@ -18,7 +18,6 @@ License: BSD-3-Clause License
18
18
 
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
- Requires-Dist: rpy2
22
21
  Requires-Dist: retry
23
22
  Requires-Dist: numpy<2
24
23
  Requires-Dist: pandas
@@ -30,6 +29,7 @@ Requires-Dist: requests_cache
30
29
  Requires-Dist: retry-requests
31
30
  Requires-Dist: eccodes==2.41.0
32
31
  Requires-Dist: xarray==2025.4.0
32
+ Requires-Dist: dbhydro-py
33
33
  Dynamic: license-file
34
34
 
35
35
  LOONE_DATA_PREP
@@ -40,11 +40,6 @@ Prepare data for the LOONE water quality model.
40
40
  Line to the LOONE model: [https://pypi.org/project/loone](https://pypi.org/project/loone)
41
41
  Link to LOONE model repository: [https://github.com/Aquaveo/LOONE](https://github.com/Aquaveo/LOONE)
42
42
 
43
- ## Prerequisites:
44
-
45
- * R ([https://www.r-project.org/](https://www.r-project.org/))
46
- * R packages: dbhydroR, rio, dplyr
47
-
48
43
  ## Installation:
49
44
 
50
45
  ```bash
@@ -103,7 +98,6 @@ dbkeys = get_dbkeys(
103
98
  stat="MEAN",
104
99
  recorder="CR10",
105
100
  freq="DA",
106
- detail_level="dbkey"
107
101
  )
108
102
 
109
103
  # Get water level data
@@ -6,11 +6,6 @@ Prepare data for the LOONE water quality model.
6
6
  Line to the LOONE model: [https://pypi.org/project/loone](https://pypi.org/project/loone)
7
7
  Link to LOONE model repository: [https://github.com/Aquaveo/LOONE](https://github.com/Aquaveo/LOONE)
8
8
 
9
- ## Prerequisites:
10
-
11
- * R ([https://www.r-project.org/](https://www.r-project.org/))
12
- * R packages: dbhydroR, rio, dplyr
13
-
14
9
  ## Installation:
15
10
 
16
11
  ```bash
@@ -69,7 +64,6 @@ dbkeys = get_dbkeys(
69
64
  stat="MEAN",
70
65
  recorder="CR10",
71
66
  freq="DA",
72
- detail_level="dbkey"
73
67
  )
74
68
 
75
69
  # Get water level data
@@ -15,7 +15,7 @@ from loone_data_prep.utils import stg2sto, stg2ar
15
15
  import datetime
16
16
 
17
17
  START_DATE = datetime.datetime.now()
18
- END_DATE = START_DATE + datetime.timedelta(days=15)
18
+ END_DATE = START_DATE + datetime.timedelta(days=14)
19
19
 
20
20
  M3_Yr = 2008
21
21
  M3_M = 1
@@ -373,8 +373,8 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
373
373
  C44RO_df['C44RO_cmd'] = C44RO
374
374
  C43RO_df['C43RO'] = C43RO_df['C43RO_cmd']/(0.0283168466 * 86400)
375
375
  C44RO_df['C44RO'] = C44RO_df['C44RO_cmd']/(0.0283168466 * 86400)
376
- C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv', index=False)
377
- C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv', index=False)
376
+ C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv')
377
+ C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv')
378
378
  C43RO_df.index = pd.to_datetime(C43RO_df["date"])
379
379
  C43RO_df = C43RO_df.drop(columns="date")
380
380
 
@@ -384,13 +384,13 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
384
384
  C43Mon = C43RO_df.resample('ME').mean()
385
385
  C44Mon = C44RO_df.resample('ME').mean()
386
386
 
387
- C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv', index=False)
388
- C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv', index=False)
387
+ C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv')
388
+ C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv')
389
389
  Basin_RO = pd.DataFrame(C44Mon.index, columns=['date'])
390
390
  # Basin_RO['SLTRIB'] = SLTRIBMon['SLTRIB_cfs'].values * 1.9835 # cfs to acft
391
391
  Basin_RO['C44RO'] = C44Mon['C44RO'].values * 86400
392
392
  Basin_RO['C43RO'] = C43Mon['C43RO'].values * 86400
393
- Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv', index=False)
393
+ Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv')
394
394
 
395
395
  # # Get monthly C43RO and C44RO from historical run
396
396
  # shutil.copyfile(os.path.join(historical_files_src, "C43RO_Monthly.csv"), os.path.join(output_dir, 'C43RO_Monthly.csv'))
@@ -461,16 +461,47 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
461
461
  LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
462
462
 
463
463
  # # RFVol acft
464
- # # Create File (RF_Volume)
465
- # RFVol = pd.DataFrame(RF_data["date"], columns=["date"])
466
- # RFVol["RFVol_acft"] = (RF_data["average_rainfall"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
467
- # RFVol.to_csv(f"{output_dir}/RFVol_LORS_20082023.csv", index=False)
468
-
469
- # # ETVol acft
470
- # # Create File (ETVol)
471
- # ETVol = pd.DataFrame(ET_data["date"], columns=["date"])
472
- # ETVol["ETVol_acft"] = (ET_data["average_ETPI"].values / 12) * LO_Stg_Sto_SA_df["SA_acres"].values
473
- # ETVol.to_csv(f"{output_dir}/ETVol_LORS_20082023.csv", index=False)
464
+ RF_data = pd.read_csv(f'{input_dir}/LAKE_RAINFALL_DATA_FORECAST.csv')
465
+ # RF_data_copy = RF_data.copy()
466
+ # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
467
+ RF_data['date'] = pd.to_datetime(RF_data['date'])
468
+ # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
469
+ # LO_Stg_Sto_SA_df_copy.index.name = None
470
+
471
+
472
+ # merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
473
+ # LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
474
+ # on='date', how='inner')
475
+ #I am just using the most recent SA_acres value for all forecast dates since we do not have forecasted surface area
476
+ RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
477
+ RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
478
+
479
+ date_reference = RFVol['date'].iloc[0]
480
+ date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
481
+ df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
482
+ RFVol = pd.concat([df_insert, RFVol])
483
+ RFVol.to_csv(f'{output_dir}/RFVol_Forecast.csv', index=False)
484
+
485
+ # ETVol acft
486
+ # Create File (ETVol)
487
+ # Merge the DataFrames on date to ensure matching rows
488
+ ET_data = pd.read_csv(f'{input_dir}/LOONE_AVERAGE_ETPI_DATA_FORECAST.csv')
489
+ # ET_data_copy = ET_data.copy()
490
+ # LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
491
+ ET_data['date'] = pd.to_datetime(ET_data['date'])
492
+ # LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
493
+ # merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
494
+ # LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
495
+ # on='date', how='inner')
496
+
497
+ ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
498
+ ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
499
+ date_reference = ETVol['date'].iloc[0]
500
+ date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
501
+ df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
502
+ ETVol = pd.concat([df_insert, ETVol])
503
+ ETVol.to_csv(f'{output_dir}/ETVol_forecast.csv', index=False)
504
+
474
505
 
475
506
  # # WCA Stages
476
507
  # # Create File (WCA_Stages_Inputs)
@@ -351,7 +351,6 @@ def main(input_dir: str, output_dir: str) -> None:
351
351
  S65E.index = pd.to_datetime(S65E.index, unit='ns')
352
352
  S65E_Weekly = S65E.resample('W-FRI').mean()
353
353
  # PI
354
- # TODO
355
354
  # This is prepared manually
356
355
  # Weekly data is downloaded from https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/time-series/0804
357
356
  # State:Florida Division:4.South Central
@@ -0,0 +1,195 @@
1
+ """
2
+ Utilities for interacting with the DBHYDRO Insights database services.
3
+
4
+ This module provides functions for fetching data from endpoints used
5
+ by the South Florida Water Management District's DBHYDRO Insights app.
6
+ """
7
+
8
+ from datetime import datetime
9
+ import requests
10
+ from typing import Literal, Tuple
11
+
12
+
13
+ def get_dbhydro_station_metadata(station_id: str) -> dict | None:
14
+ """
15
+ Fetches metadata for a specific station from the DBHYDRO_SiteStation service.
16
+
17
+ Args:
18
+ station_id (str): The ID of the station for which to fetch metadata. Examples: 'FISHP', 'L OKEE', etc.
19
+
20
+ Returns:
21
+ dict: A dictionary containing the metadata of the station, or None if the request fails.
22
+ """
23
+ # Build the request URL with the provided station ID
24
+ request_url = 'https://geoweb.sfwmd.gov/agsext2/rest/services/MonitoringLocations/DBHYDRO_SiteStation/MapServer/4/query'
25
+
26
+ params = {
27
+ 'f': 'json',
28
+ 'outFields': '*',
29
+ 'spatialRel': 'esriSpatialRelIntersects',
30
+ 'where': f"(STATION = '{station_id}')"
31
+ }
32
+
33
+ # Send the GET request to the specified URL with the parameters
34
+ try:
35
+ response = requests.get(request_url, params=params)
36
+ except requests.exceptions.RequestException:
37
+ return None
38
+
39
+ # Successful Request
40
+ if response.status_code == 200:
41
+ # Parse the JSON response
42
+ json = response.json()
43
+
44
+ # No data given back for given station ID
45
+ if not json['features']:
46
+ return None
47
+
48
+ # Data given back, return the JSON response
49
+ return json
50
+
51
+ # Failure
52
+ return None
53
+
54
+
55
+ def get_dbhydro_continuous_timeseries_metadata(
56
+ station_ids: list[str],
57
+ categories: list[str] | None = ['ALL'],
58
+ parameters: list[str] | None = ['ALL'],
59
+ statistics: list[str] | None = ['ALL'],
60
+ recorders: list[str] | None = ['ALL'],
61
+ frequencies: list[str] | None = ['ALL']
62
+ ) -> dict | None:
63
+ """Fetches metadata for continuous time series data from the DBHYDRO Insights service.
64
+
65
+ Args:
66
+ station_ids (list[str]): List of station IDs to query.
67
+ categories (list[str] | None): List of categories to filter by. Defaults to ['ALL'].
68
+ parameters (list[str] | None): List of parameters to filter by. Defaults to ['ALL'].
69
+ statistics (list[str] | None): List of statistics to filter by. Defaults to ['ALL'].
70
+ recorders (list[str] | None): List of recorders to filter by. Defaults to ['ALL'].
71
+ frequencies (list[str] | None): List of frequencies to filter by. Defaults to ['ALL'].
72
+
73
+ Returns:
74
+ dict | None: The JSON response from the API if successful, otherwise None.
75
+
76
+ Raises:
77
+ Exception: If the request fails.
78
+ """
79
+ # Build the request URL
80
+ request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/cont/ts'
81
+
82
+ # Build the locations list
83
+ locations = []
84
+
85
+ for station_id in station_ids:
86
+ # Build the location dictionary for this station_id
87
+ location = {
88
+ 'name': station_id,
89
+ 'type': 'STATION',
90
+ }
91
+
92
+ # Add location to the locations list
93
+ locations.append(location)
94
+
95
+ # Build the data payload
96
+ data = {
97
+ 'query': {
98
+ 'locations': locations,
99
+ 'parameters': parameters,
100
+ 'category': categories,
101
+ 'statistic': statistics,
102
+ 'recorder': recorders,
103
+ 'frequency': frequencies,
104
+ 'dbkeys': ['ALL'],
105
+ }
106
+ }
107
+
108
+ # Send the POST request to the specified URL with the parameters
109
+ response = requests.post(request_url, json=data)
110
+
111
+ # Successful Request
112
+ if response.status_code == 200:
113
+ # Parse the JSON response
114
+ json = response.json()
115
+
116
+ # No data given back for given station ID
117
+ if not json['results']:
118
+ return None
119
+
120
+ # Data given back, return the JSON response
121
+ return json
122
+
123
+ # Failure
124
+ raise Exception(f"Request failed with status code {response.status_code}: {response.text}")
125
+
126
+
127
+ def get_dbhydro_water_quality_metadata(stations: list[Tuple[str,Literal['SITE', 'STATION']]], test_numbers: list[int]) -> dict | None:
128
+ """Fetches metadata for water quality data from the DBHYDRO Insights service.
129
+
130
+ Args:
131
+ stations (list[Tuple[str, Literal['SITE', 'STATION']]]): List of tuples containing station names and station types ('SITE' or 'STATION') to get water quality metadata for.
132
+ test_numbers (list[int]): List of test numbers to get data for. Test numbers map to parameters. Example: 25 maps to 'PHOSPHATE, TOTAL AS P'.
133
+
134
+ Returns:
135
+ dict | None: The JSON response from the API if successful, otherwise None.
136
+
137
+ Raises:
138
+ Exception: If the request fails.
139
+ """
140
+ # Build the request URL
141
+ request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/chem/ts'
142
+
143
+ # Build the locations list
144
+ locations = []
145
+
146
+ for station in stations:
147
+ # Build the location dictionary for this station/site
148
+ location = {
149
+ 'name': station[0],
150
+ 'type': station[1],
151
+ }
152
+
153
+ # Add location to the locations list
154
+ locations.append(location)
155
+
156
+ # Build the query parameters
157
+ query_parameters = {
158
+ 'offset': 0,
159
+ 'limit': 1000,
160
+ 'sort': 'project,location,parameterDesc,matrix,method',
161
+ 'startDate': '19000101',
162
+ 'endDate': datetime.now().strftime("%Y%m%d"),
163
+ 'period': '',
164
+ }
165
+
166
+ # Build the data payload
167
+ payload = {
168
+ 'query': {
169
+ 'locations': locations,
170
+ 'matrices': ['ALL'],
171
+ 'methods': ['ALL'],
172
+ 'paramGroups': ['ALL'],
173
+ 'parameters': [str(num) for num in test_numbers],
174
+ 'projects': ['ALL'],
175
+ 'sampleTypes': ['ALL'],
176
+ }
177
+ }
178
+
179
+ # Send the POST request to the specified URL with the parameters
180
+ response = requests.post(request_url, params=query_parameters, json=payload)
181
+
182
+ # Successful Request
183
+ if response.status_code == 200:
184
+ # Parse the JSON response
185
+ json = response.json()
186
+
187
+ # No data given back for given station ID
188
+ if not json['results']:
189
+ return None
190
+
191
+ # Data given back, return the JSON response
192
+ return json
193
+
194
+ # Failure
195
+ raise Exception(f"Request failed with status code {response.status_code}: {response.text}")
@@ -0,0 +1,89 @@
1
+ import sys
2
+ from retry import retry
3
+ import pandas as pd
4
+ from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
5
+
6
+
7
+ @retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
8
+ def get(
9
+ workspace,
10
+ date_min: str = "1972-01-01",
11
+ date_max: str = "2023-06-30"
12
+ ) -> None:
13
+ """Retrieve total flow data for S65E structure (S65E_S + S65EX1_S) and save to CSV.
14
+
15
+ Args:
16
+ workspace (str): Path to workspace where data will be downloaded.
17
+ date_min (str): Minimum date for data retrieval in 'YYYY-MM-DD' format.
18
+ date_max (str): Maximum date for data retrieval in 'YYYY-MM-DD' format.
19
+ """
20
+ # Get a DbHydroApi instance
21
+ api = get_dbhydro_api()
22
+
23
+ # S65E_S
24
+ s65e_s = api.get_daily_data(['91656'], 'id', date_min, date_max, 'NGVD29', False)
25
+
26
+ if not s65e_s.has_data():
27
+ return
28
+
29
+ df_s65e_s = s65e_s.to_dataframe(True)
30
+ df_s65e_s = df_replace_missing_with_nan(df_s65e_s) # Replace flagged 0 values and -99999.0 with NaN
31
+ df_s65e_s.reset_index(inplace=True) # Reset index so datetime is a column
32
+ df_s65e_s['value'] = df_s65e_s['value'] * (0.0283168466 * 86400) # Convert flow from cfs to cmd
33
+ df_s65e_s = df_s65e_s[['datetime', 'value']].copy() # Grab only the columns we need
34
+ df_s65e_s.rename(columns={'datetime': 'date', 'value': f'S65E_S_FLOW_cfs'}, inplace=True) # Rename columns to expected names
35
+
36
+ # S65EX1_S
37
+ s65ex1_s = api.get_daily_data(['AL760'], 'id', date_min, date_max, 'NGVD29', False)
38
+
39
+ if not s65ex1_s.has_data():
40
+ return
41
+
42
+ df_s65ex1_s = s65ex1_s.to_dataframe(True)
43
+ df_s65ex1_s = df_replace_missing_with_nan(df_s65ex1_s) # Replace flagged 0 values and -99999.0 with NaN
44
+ df_s65ex1_s.reset_index(inplace=True) # Reset index so datetime is a column
45
+ df_s65ex1_s['value'] = df_s65ex1_s['value'] * (0.0283168466 * 86400) # Convert flow from cfs to cmd
46
+ df_s65ex1_s = df_s65ex1_s[['datetime', 'value']].copy() # Grab only the columns we need
47
+ df_s65ex1_s.rename(columns={'datetime': 'date', 'value': f'S65EX1_S_FLOW_cfs'}, inplace=True) # Rename columns to expected names
48
+
49
+ # Combine the data from both stations into a single dataframe
50
+ df = pd.merge(df_s65e_s, df_s65ex1_s, on='date', how='outer', suffixes=('_S65E_S', '_S65EX1_S'))
51
+
52
+ # Reformat the data to the expected layout
53
+ df = _reformat_s65e_total_df(df)
54
+
55
+ # Write the data to a file
56
+ df.to_csv(f"{workspace}/S65E_total.csv")
57
+
58
+
59
+ def _reformat_s65e_total_file(workspace: str):
60
+ # Read in the data
61
+ df = pd.read_csv(f"{workspace}/S65E_total.csv")
62
+
63
+ # Reformat the data
64
+ df = _reformat_s65e_total_df(df)
65
+
66
+ # Write the updated data back to the file
67
+ df.to_csv(f"{workspace}/S65E_total.csv")
68
+
69
+
70
+ def _reformat_s65e_total_df(df: pd.DataFrame) -> pd.DataFrame:
71
+ # Convert date column to datetime
72
+ df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
73
+
74
+ # Sort the data by date
75
+ df.sort_values('date', inplace=True)
76
+
77
+ # Renumber the index
78
+ df.reset_index(drop=True, inplace=True)
79
+
80
+ # Drop rows that are missing all their values
81
+ df.dropna(how='all', inplace=True)
82
+
83
+ # Return the reformatted dataframe
84
+ return df
85
+
86
+
87
+ if __name__ == "__main__":
88
+ workspace = sys.argv[1].rstrip("/")
89
+ get(workspace)
@@ -38,7 +38,7 @@ def get_bias_corrected_data(
38
38
  # Prepare the observed data by filling NaN values with the 10yr average
39
39
  prepared_od = prep_observed_data(observed_data)
40
40
  historical_data = geoglows.data.retro_daily(reach_id)
41
- # Get the historical simulation data for the given reach ID - TODO: Do we for sure want to cache the historical data?
41
+ # Get the historical simulation data for the given reach ID
42
42
  # I am reading the observed data that we queried earlier instead of caching it
43
43
  # historical_data = None
44
44
 
@@ -1,11 +1,8 @@
1
1
  import os
2
2
  import sys
3
3
  import pandas as pd
4
- import rpy2.robjects as ro
5
- from rpy2.robjects import pandas2ri
6
4
  import geoglows
7
5
  import datetime
8
- from loone_data_prep.utils import get_dbkeys
9
6
  from loone_data_prep.flow_data.forecast_bias_correction import (
10
7
  get_bias_corrected_data,
11
8
  )
@@ -63,51 +60,6 @@ FORECAST_DATE = (datetime.datetime.now()).strftime("%Y%m%d")
63
60
  GEOGLOWS_ENDPOINT = "https://geoglows.ecmwf.int/api/"
64
61
 
65
62
 
66
- def get_stations_latitude_longitude(station_ids: list[str]):
67
- """Gets the latitudes and longitudes of the given stations.
68
-
69
- Args:
70
- station_ids (list[str]): The ids of the stations to get the
71
- latitudes/longitudes of
72
-
73
- Returns:
74
- (dict[str, tuple[numpy.float64, numpy.float64]]): A dictionary of
75
- format dict<station_id:(latitude,longitude)>
76
-
77
- If a station's latitude/longitude fails to download then its station_id
78
- won't be a key in the returned dictionary.
79
- """
80
- # The dict that holds the data that gets returned
81
- station_data = {}
82
-
83
- # Get the station/dbkey data
84
- r_dataframe = get_dbkeys(
85
- station_ids=station_ids,
86
- category="SW",
87
- param="",
88
- stat="",
89
- recorder="",
90
- detail_level="full",
91
- )
92
-
93
- # Convert the r dataframe to a pandas dataframe
94
- with (ro.default_converter + pandas2ri.converter).context():
95
- pd_dataframe = ro.conversion.get_conversion().rpy2py(r_dataframe)
96
-
97
- # Filter out extra rows for each station from the dataframe
98
- pd_dataframe.drop_duplicates(subset="Station", keep="first", inplace=True)
99
-
100
- # Get latitude/longitude of each station
101
- for index in pd_dataframe.index:
102
- station = pd_dataframe["Station"][index]
103
- latitude = pd_dataframe["Latitude"][index]
104
- longitude = pd_dataframe["Longitude"][index]
105
-
106
- station_data[station] = latitude, longitude
107
-
108
- return station_data
109
-
110
-
111
63
  def get_reach_id(latitude: float, longitude: float):
112
64
  """Gets the reach id for the given latitude/longitude.
113
65
 
@@ -273,70 +225,32 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
273
225
  dataframe.index = dataframe.index.normalize()
274
226
 
275
227
  # Convert m^3/s data to m^3/h
276
- dataframe = dataframe.transform(lambda x: x * SECONDS_IN_HOUR)
228
+ dataframe = dataframe * SECONDS_IN_HOUR
277
229
 
278
230
  # Make negative values 0
279
231
  dataframe.clip(0, inplace=True)
280
232
 
281
- # Max Column (Max)
282
- column_max = dataframe[["flow_max"]].copy()
283
- column_max = column_max.groupby([column_max.index]).max()
284
-
285
- # 75th Percentile Column (Average)
286
- column_75percentile = dataframe[["flow_75p"]].copy()
287
- column_75percentile = column_75percentile.groupby(
288
- [column_75percentile.index]
289
- ).mean()
290
-
291
- # Average Column (Weighted Average)
292
- column_average = dataframe[["flow_avg"]].copy()
293
- column_average.transform(lambda x: x / 8)
294
- column_average = column_average.groupby([column_average.index]).sum()
295
-
296
- # 25th Percentile Column (Average)
297
- column_25percentile = dataframe[["flow_25p"]].copy()
298
- column_25percentile = column_25percentile.groupby(
299
- [column_25percentile.index]
300
- ).mean()
301
-
302
- # Min Column (Min)
303
- column_min = dataframe[["flow_min"]].copy()
304
- column_min = column_min.groupby([column_min.index]).min()
305
-
306
- # Convert values in each column from m^3/h to m^3/d
307
- column_max = column_max.transform(lambda x: x * HOURS_IN_DAY)
308
- column_75percentile = column_75percentile.transform(
309
- lambda x: x * HOURS_IN_DAY
310
- )
311
- column_average = column_average.transform(lambda x: x * HOURS_IN_DAY)
312
- column_25percentile = column_25percentile.transform(
313
- lambda x: x * HOURS_IN_DAY
233
+ grouped = dataframe.groupby(dataframe.index).mean()
234
+ # Convert from m^3/h → m^3/d
235
+ grouped = grouped * HOURS_IN_DAY
236
+
237
+ # Rename columns
238
+ grouped = grouped.rename(
239
+ columns={
240
+ "flow_max": "flow_max_m^3/d",
241
+ "flow_75p": "flow_75%_m^3/d",
242
+ "flow_avg": "flow_avg_m^3/d",
243
+ "flow_med": "flow_med_m^3/d",
244
+ "flow_25p": "flow_25%_m^3/d",
245
+ "flow_min": "flow_min_m^3/d",
246
+ }
314
247
  )
315
- column_min = column_min.transform(lambda x: x * HOURS_IN_DAY)
316
-
317
- # Append modified columns into one pandas DataFrame
318
- dataframe_result = pd.DataFrame()
319
- dataframe_result.index = dataframe.groupby([dataframe.index]).mean().index
320
- dataframe_result["flow_max_m^3/d"] = column_max["flow_max"].tolist()
321
- dataframe_result["flow_75%_m^3/d"] = column_75percentile[
322
- "flow_75p"
323
- ].tolist()
324
- dataframe_result["flow_avg_m^3/d"] = column_average[
325
- "flow_avg"
326
- ].tolist()
327
- dataframe_result["flow_25%_m^3/d"] = column_25percentile[
328
- "flow_25p"
329
- ].tolist()
330
- dataframe_result["flow_min_m^3/d"] = column_min["flow_min"].tolist()
331
248
 
332
- # Format datetimes to just dates
333
- dataframe_result.index = dataframe_result.index.strftime("%Y-%m-%d")
249
+ # Format index as date string and rename
250
+ grouped.index = grouped.index.strftime("%Y-%m-%d")
251
+ grouped.index.name = "date"
334
252
 
335
- # Rename index from datetimes to date
336
- dataframe_result.rename_axis("date", inplace=True)
337
-
338
- # Return resulting DataFrame
339
- return dataframe_result
253
+ return grouped
340
254
 
341
255
 
342
256
  def main(