loone-data-prep 1.2.4__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/PKG-INFO +2 -8
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/README.md +0 -6
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/GEOGLOWS_LOONE_DATA_PREP.py +47 -16
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/LOONE_DATA_PREP.py +0 -1
- loone_data_prep-1.3.1/loone_data_prep/dbhydro_insights.py +195 -0
- loone_data_prep-1.3.1/loone_data_prep/flow_data/S65E_total.py +89 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/forecast_bias_correction.py +1 -1
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_forecast_flows.py +19 -105
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_inflows.py +18 -8
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_outflows.py +16 -7
- loone_data_prep-1.3.1/loone_data_prep/flow_data/hydro.py +126 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/get_Chla_predicted.py +1 -1
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/get_NO_Loads_predicted.py +1 -1
- loone_data_prep-1.3.1/loone_data_prep/forecast_scripts/new_combined_weather_forecast.py +220 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/utils.py +262 -32
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_level_data/get_all.py +52 -44
- loone_data_prep-1.3.1/loone_data_prep/water_level_data/hydro.py +95 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_quality_data/get_inflows.py +69 -27
- loone_data_prep-1.3.1/loone_data_prep/water_quality_data/get_lake_wq.py +226 -0
- loone_data_prep-1.3.1/loone_data_prep/water_quality_data/wq.py +155 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/weather_data/get_all.py +5 -3
- loone_data_prep-1.3.1/loone_data_prep/weather_data/weather.py +217 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/PKG-INFO +2 -8
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/SOURCES.txt +2 -2
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/requires.txt +1 -1
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/pyproject.toml +3 -3
- loone_data_prep-1.2.4/loone_data_prep/flow_data/S65E_total.py +0 -89
- loone_data_prep-1.2.4/loone_data_prep/flow_data/hydro.py +0 -155
- loone_data_prep-1.2.4/loone_data_prep/forecast_scripts/create_forecast_LOWs.py +0 -170
- loone_data_prep-1.2.4/loone_data_prep/forecast_scripts/weather_forecast.py +0 -199
- loone_data_prep-1.2.4/loone_data_prep/water_level_data/hydro.py +0 -114
- loone_data_prep-1.2.4/loone_data_prep/water_quality_data/get_lake_wq.py +0 -129
- loone_data_prep-1.2.4/loone_data_prep/water_quality_data/wq.py +0 -129
- loone_data_prep-1.2.4/loone_data_prep/weather_data/weather.py +0 -280
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/LICENSE +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/__init__.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/data_analyses_fns.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/__init__.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/Chla_merged.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/forecast_stages.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/loone_q_predict.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/loone_wq_predict.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/predict_PI.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/forecast_scripts/trib_cond.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/herbie_utils.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_level_data/__init__.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/water_quality_data/__init__.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/weather_data/__init__.py +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/dependency_links.txt +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep.egg-info/top_level.txt +0 -0
- {loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loone_data_prep
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary: Prepare data to run the LOONE model.
|
|
5
5
|
Author-email: Osama Tarabih <osamatarabih@usf.edu>
|
|
6
6
|
Maintainer-email: Michael Souffront <msouffront@aquaveo.com>, James Dolinar <jdolinar@aquaveo.com>
|
|
@@ -18,7 +18,6 @@ License: BSD-3-Clause License
|
|
|
18
18
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
-
Requires-Dist: rpy2
|
|
22
21
|
Requires-Dist: retry
|
|
23
22
|
Requires-Dist: numpy<2
|
|
24
23
|
Requires-Dist: pandas
|
|
@@ -30,6 +29,7 @@ Requires-Dist: requests_cache
|
|
|
30
29
|
Requires-Dist: retry-requests
|
|
31
30
|
Requires-Dist: eccodes==2.41.0
|
|
32
31
|
Requires-Dist: xarray==2025.4.0
|
|
32
|
+
Requires-Dist: dbhydro-py
|
|
33
33
|
Dynamic: license-file
|
|
34
34
|
|
|
35
35
|
LOONE_DATA_PREP
|
|
@@ -40,11 +40,6 @@ Prepare data for the LOONE water quality model.
|
|
|
40
40
|
Line to the LOONE model: [https://pypi.org/project/loone](https://pypi.org/project/loone)
|
|
41
41
|
Link to LOONE model repository: [https://github.com/Aquaveo/LOONE](https://github.com/Aquaveo/LOONE)
|
|
42
42
|
|
|
43
|
-
## Prerequisites:
|
|
44
|
-
|
|
45
|
-
* R ([https://www.r-project.org/](https://www.r-project.org/))
|
|
46
|
-
* R packages: dbhydroR, rio, dplyr
|
|
47
|
-
|
|
48
43
|
## Installation:
|
|
49
44
|
|
|
50
45
|
```bash
|
|
@@ -103,7 +98,6 @@ dbkeys = get_dbkeys(
|
|
|
103
98
|
stat="MEAN",
|
|
104
99
|
recorder="CR10",
|
|
105
100
|
freq="DA",
|
|
106
|
-
detail_level="dbkey"
|
|
107
101
|
)
|
|
108
102
|
|
|
109
103
|
# Get water level data
|
|
@@ -6,11 +6,6 @@ Prepare data for the LOONE water quality model.
|
|
|
6
6
|
Line to the LOONE model: [https://pypi.org/project/loone](https://pypi.org/project/loone)
|
|
7
7
|
Link to LOONE model repository: [https://github.com/Aquaveo/LOONE](https://github.com/Aquaveo/LOONE)
|
|
8
8
|
|
|
9
|
-
## Prerequisites:
|
|
10
|
-
|
|
11
|
-
* R ([https://www.r-project.org/](https://www.r-project.org/))
|
|
12
|
-
* R packages: dbhydroR, rio, dplyr
|
|
13
|
-
|
|
14
9
|
## Installation:
|
|
15
10
|
|
|
16
11
|
```bash
|
|
@@ -69,7 +64,6 @@ dbkeys = get_dbkeys(
|
|
|
69
64
|
stat="MEAN",
|
|
70
65
|
recorder="CR10",
|
|
71
66
|
freq="DA",
|
|
72
|
-
detail_level="dbkey"
|
|
73
67
|
)
|
|
74
68
|
|
|
75
69
|
# Get water level data
|
|
@@ -15,7 +15,7 @@ from loone_data_prep.utils import stg2sto, stg2ar
|
|
|
15
15
|
import datetime
|
|
16
16
|
|
|
17
17
|
START_DATE = datetime.datetime.now()
|
|
18
|
-
END_DATE = START_DATE + datetime.timedelta(days=
|
|
18
|
+
END_DATE = START_DATE + datetime.timedelta(days=14)
|
|
19
19
|
|
|
20
20
|
M3_Yr = 2008
|
|
21
21
|
M3_M = 1
|
|
@@ -373,8 +373,8 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
373
373
|
C44RO_df['C44RO_cmd'] = C44RO
|
|
374
374
|
C43RO_df['C43RO'] = C43RO_df['C43RO_cmd']/(0.0283168466 * 86400)
|
|
375
375
|
C44RO_df['C44RO'] = C44RO_df['C44RO_cmd']/(0.0283168466 * 86400)
|
|
376
|
-
C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv'
|
|
377
|
-
C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv'
|
|
376
|
+
C43RO_df.to_csv(f'{output_dir}/C43RO_{ensemble_number}.csv')
|
|
377
|
+
C44RO_df.to_csv(f'{output_dir}/C44RO_{ensemble_number}.csv')
|
|
378
378
|
C43RO_df.index = pd.to_datetime(C43RO_df["date"])
|
|
379
379
|
C43RO_df = C43RO_df.drop(columns="date")
|
|
380
380
|
|
|
@@ -384,13 +384,13 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
384
384
|
C43Mon = C43RO_df.resample('ME').mean()
|
|
385
385
|
C44Mon = C44RO_df.resample('ME').mean()
|
|
386
386
|
|
|
387
|
-
C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv'
|
|
388
|
-
C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv'
|
|
387
|
+
C43Mon.to_csv(f'{output_dir}/C43RO_Monthly_{ensemble_number}.csv')
|
|
388
|
+
C44Mon.to_csv(f'{output_dir}/C44RO_Monthly_{ensemble_number}.csv')
|
|
389
389
|
Basin_RO = pd.DataFrame(C44Mon.index, columns=['date'])
|
|
390
390
|
# Basin_RO['SLTRIB'] = SLTRIBMon['SLTRIB_cfs'].values * 1.9835 # cfs to acft
|
|
391
391
|
Basin_RO['C44RO'] = C44Mon['C44RO'].values * 86400
|
|
392
392
|
Basin_RO['C43RO'] = C43Mon['C43RO'].values * 86400
|
|
393
|
-
Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv'
|
|
393
|
+
Basin_RO.to_csv(f'{output_dir}/Basin_RO_inputs_{ensemble_number}.csv')
|
|
394
394
|
|
|
395
395
|
# # Get monthly C43RO and C44RO from historical run
|
|
396
396
|
# shutil.copyfile(os.path.join(historical_files_src, "C43RO_Monthly.csv"), os.path.join(output_dir, 'C43RO_Monthly.csv'))
|
|
@@ -461,16 +461,47 @@ def main(input_dir: str, output_dir: str, ensemble_number: str) -> None: # , hi
|
|
|
461
461
|
LOWS.to_csv(f"{output_dir}/LOWS_predicted.csv")
|
|
462
462
|
|
|
463
463
|
# # RFVol acft
|
|
464
|
-
|
|
465
|
-
#
|
|
466
|
-
#
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
#
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
#
|
|
473
|
-
#
|
|
464
|
+
RF_data = pd.read_csv(f'{input_dir}/LAKE_RAINFALL_DATA_FORECAST.csv')
|
|
465
|
+
# RF_data_copy = RF_data.copy()
|
|
466
|
+
# LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
|
|
467
|
+
RF_data['date'] = pd.to_datetime(RF_data['date'])
|
|
468
|
+
# LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
|
|
469
|
+
# LO_Stg_Sto_SA_df_copy.index.name = None
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# merged_rf_sa = pd.merge(RF_data_copy[['date', 'average_rainfall']],
|
|
473
|
+
# LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
|
|
474
|
+
# on='date', how='inner')
|
|
475
|
+
#I am just using the most recent SA_acres value for all forecast dates since we do not have forecasted surface area
|
|
476
|
+
RFVol = pd.DataFrame(RF_data['date'], columns=['date'])
|
|
477
|
+
RFVol['RFVol_acft'] = (RF_data['average_rainfall'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
|
|
478
|
+
|
|
479
|
+
date_reference = RFVol['date'].iloc[0]
|
|
480
|
+
date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
|
|
481
|
+
df_insert = pd.DataFrame(data={'date': date_inserts, 'RFVol_acft': [0.0, 0.0]})
|
|
482
|
+
RFVol = pd.concat([df_insert, RFVol])
|
|
483
|
+
RFVol.to_csv(f'{output_dir}/RFVol_Forecast.csv', index=False)
|
|
484
|
+
|
|
485
|
+
# ETVol acft
|
|
486
|
+
# Create File (ETVol)
|
|
487
|
+
# Merge the DataFrames on date to ensure matching rows
|
|
488
|
+
ET_data = pd.read_csv(f'{input_dir}/LOONE_AVERAGE_ETPI_DATA_FORECAST.csv')
|
|
489
|
+
# ET_data_copy = ET_data.copy()
|
|
490
|
+
# LO_Stg_Sto_SA_df_copy = LO_Stg_Sto_SA_df.copy()
|
|
491
|
+
ET_data['date'] = pd.to_datetime(ET_data['date'])
|
|
492
|
+
# LO_Stg_Sto_SA_df_copy['date'] = pd.to_datetime(LO_Stg_Sto_SA_df_copy['date'])
|
|
493
|
+
# merged_et_sa = pd.merge(ET_data_copy[['date', 'average_ETPI']],
|
|
494
|
+
# LO_Stg_Sto_SA_df_copy[['date', 'SA_acres']],
|
|
495
|
+
# on='date', how='inner')
|
|
496
|
+
|
|
497
|
+
ETVol = pd.DataFrame(ET_data['date'], columns=['date'])
|
|
498
|
+
ETVol['ETVol_acft'] = (ET_data['average_ETPI'].values/12) * LO_Stg_Sto_SA_df["SA_acres"].iloc[-1]
|
|
499
|
+
date_reference = ETVol['date'].iloc[0]
|
|
500
|
+
date_inserts = [date_reference - datetime.timedelta(days=2), date_reference - datetime.timedelta(days=1)]
|
|
501
|
+
df_insert = pd.DataFrame(data={'date': date_inserts, 'ETVol_acft': [0.0, 0.0]})
|
|
502
|
+
ETVol = pd.concat([df_insert, ETVol])
|
|
503
|
+
ETVol.to_csv(f'{output_dir}/ETVol_forecast.csv', index=False)
|
|
504
|
+
|
|
474
505
|
|
|
475
506
|
# # WCA Stages
|
|
476
507
|
# # Create File (WCA_Stages_Inputs)
|
|
@@ -351,7 +351,6 @@ def main(input_dir: str, output_dir: str) -> None:
|
|
|
351
351
|
S65E.index = pd.to_datetime(S65E.index, unit='ns')
|
|
352
352
|
S65E_Weekly = S65E.resample('W-FRI').mean()
|
|
353
353
|
# PI
|
|
354
|
-
# TODO
|
|
355
354
|
# This is prepared manually
|
|
356
355
|
# Weekly data is downloaded from https://www.ncei.noaa.gov/access/monitoring/weekly-palmers/time-series/0804
|
|
357
356
|
# State:Florida Division:4.South Central
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for interacting with the DBHYDRO Insights database services.
|
|
3
|
+
|
|
4
|
+
This module provides functions for fetching data from endpoints used
|
|
5
|
+
by the South Florida Water Management District's DBHYDRO Insights app.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import requests
|
|
10
|
+
from typing import Literal, Tuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_dbhydro_station_metadata(station_id: str) -> dict | None:
|
|
14
|
+
"""
|
|
15
|
+
Fetches metadata for a specific station from the DBHYDRO_SiteStation service.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
station_id (str): The ID of the station for which to fetch metadata. Examples: 'FISHP', 'L OKEE', etc.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
dict: A dictionary containing the metadata of the station, or None if the request fails.
|
|
22
|
+
"""
|
|
23
|
+
# Build the request URL with the provided station ID
|
|
24
|
+
request_url = 'https://geoweb.sfwmd.gov/agsext2/rest/services/MonitoringLocations/DBHYDRO_SiteStation/MapServer/4/query'
|
|
25
|
+
|
|
26
|
+
params = {
|
|
27
|
+
'f': 'json',
|
|
28
|
+
'outFields': '*',
|
|
29
|
+
'spatialRel': 'esriSpatialRelIntersects',
|
|
30
|
+
'where': f"(STATION = '{station_id}')"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Send the GET request to the specified URL with the parameters
|
|
34
|
+
try:
|
|
35
|
+
response = requests.get(request_url, params=params)
|
|
36
|
+
except requests.exceptions.RequestException:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
# Successful Request
|
|
40
|
+
if response.status_code == 200:
|
|
41
|
+
# Parse the JSON response
|
|
42
|
+
json = response.json()
|
|
43
|
+
|
|
44
|
+
# No data given back for given station ID
|
|
45
|
+
if not json['features']:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# Data given back, return the JSON response
|
|
49
|
+
return json
|
|
50
|
+
|
|
51
|
+
# Failure
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_dbhydro_continuous_timeseries_metadata(
|
|
56
|
+
station_ids: list[str],
|
|
57
|
+
categories: list[str] | None = ['ALL'],
|
|
58
|
+
parameters: list[str] | None = ['ALL'],
|
|
59
|
+
statistics: list[str] | None = ['ALL'],
|
|
60
|
+
recorders: list[str] | None = ['ALL'],
|
|
61
|
+
frequencies: list[str] | None = ['ALL']
|
|
62
|
+
) -> dict | None:
|
|
63
|
+
"""Fetches metadata for continuous time series data from the DBHYDRO Insights service.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
station_ids (list[str]): List of station IDs to query.
|
|
67
|
+
categories (list[str] | None): List of categories to filter by. Defaults to ['ALL'].
|
|
68
|
+
parameters (list[str] | None): List of parameters to filter by. Defaults to ['ALL'].
|
|
69
|
+
statistics (list[str] | None): List of statistics to filter by. Defaults to ['ALL'].
|
|
70
|
+
recorders (list[str] | None): List of recorders to filter by. Defaults to ['ALL'].
|
|
71
|
+
frequencies (list[str] | None): List of frequencies to filter by. Defaults to ['ALL'].
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
dict | None: The JSON response from the API if successful, otherwise None.
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
Exception: If the request fails.
|
|
78
|
+
"""
|
|
79
|
+
# Build the request URL
|
|
80
|
+
request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/cont/ts'
|
|
81
|
+
|
|
82
|
+
# Build the locations list
|
|
83
|
+
locations = []
|
|
84
|
+
|
|
85
|
+
for station_id in station_ids:
|
|
86
|
+
# Build the location dictionary for this station_id
|
|
87
|
+
location = {
|
|
88
|
+
'name': station_id,
|
|
89
|
+
'type': 'STATION',
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Add location to the locations list
|
|
93
|
+
locations.append(location)
|
|
94
|
+
|
|
95
|
+
# Build the data payload
|
|
96
|
+
data = {
|
|
97
|
+
'query': {
|
|
98
|
+
'locations': locations,
|
|
99
|
+
'parameters': parameters,
|
|
100
|
+
'category': categories,
|
|
101
|
+
'statistic': statistics,
|
|
102
|
+
'recorder': recorders,
|
|
103
|
+
'frequency': frequencies,
|
|
104
|
+
'dbkeys': ['ALL'],
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Send the POST request to the specified URL with the parameters
|
|
109
|
+
response = requests.post(request_url, json=data)
|
|
110
|
+
|
|
111
|
+
# Successful Request
|
|
112
|
+
if response.status_code == 200:
|
|
113
|
+
# Parse the JSON response
|
|
114
|
+
json = response.json()
|
|
115
|
+
|
|
116
|
+
# No data given back for given station ID
|
|
117
|
+
if not json['results']:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
# Data given back, return the JSON response
|
|
121
|
+
return json
|
|
122
|
+
|
|
123
|
+
# Failure
|
|
124
|
+
raise Exception(f"Request failed with status code {response.status_code}: {response.text}")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_dbhydro_water_quality_metadata(stations: list[Tuple[str,Literal['SITE', 'STATION']]], test_numbers: list[int]) -> dict | None:
|
|
128
|
+
"""Fetches metadata for water quality data from the DBHYDRO Insights service.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
stations (list[Tuple[str, Literal['SITE', 'STATION']]]): List of tuples containing station names and station types ('SITE' or 'STATION') to get water quality metadata for.
|
|
132
|
+
test_numbers (list[int]): List of test numbers to get data for. Test numbers map to parameters. Example: 25 maps to 'PHOSPHATE, TOTAL AS P'.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
dict | None: The JSON response from the API if successful, otherwise None.
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
Exception: If the request fails.
|
|
139
|
+
"""
|
|
140
|
+
# Build the request URL
|
|
141
|
+
request_url = 'https://insightsdata.api.sfwmd.gov/v1/insights-data/chem/ts'
|
|
142
|
+
|
|
143
|
+
# Build the locations list
|
|
144
|
+
locations = []
|
|
145
|
+
|
|
146
|
+
for station in stations:
|
|
147
|
+
# Build the location dictionary for this station/site
|
|
148
|
+
location = {
|
|
149
|
+
'name': station[0],
|
|
150
|
+
'type': station[1],
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Add location to the locations list
|
|
154
|
+
locations.append(location)
|
|
155
|
+
|
|
156
|
+
# Build the query parameters
|
|
157
|
+
query_parameters = {
|
|
158
|
+
'offset': 0,
|
|
159
|
+
'limit': 1000,
|
|
160
|
+
'sort': 'project,location,parameterDesc,matrix,method',
|
|
161
|
+
'startDate': '19000101',
|
|
162
|
+
'endDate': datetime.now().strftime("%Y%m%d"),
|
|
163
|
+
'period': '',
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Build the data payload
|
|
167
|
+
payload = {
|
|
168
|
+
'query': {
|
|
169
|
+
'locations': locations,
|
|
170
|
+
'matrices': ['ALL'],
|
|
171
|
+
'methods': ['ALL'],
|
|
172
|
+
'paramGroups': ['ALL'],
|
|
173
|
+
'parameters': [str(num) for num in test_numbers],
|
|
174
|
+
'projects': ['ALL'],
|
|
175
|
+
'sampleTypes': ['ALL'],
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
# Send the POST request to the specified URL with the parameters
|
|
180
|
+
response = requests.post(request_url, params=query_parameters, json=payload)
|
|
181
|
+
|
|
182
|
+
# Successful Request
|
|
183
|
+
if response.status_code == 200:
|
|
184
|
+
# Parse the JSON response
|
|
185
|
+
json = response.json()
|
|
186
|
+
|
|
187
|
+
# No data given back for given station ID
|
|
188
|
+
if not json['results']:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# Data given back, return the JSON response
|
|
192
|
+
return json
|
|
193
|
+
|
|
194
|
+
# Failure
|
|
195
|
+
raise Exception(f"Request failed with status code {response.status_code}: {response.text}")
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from retry import retry
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from loone_data_prep.utils import df_replace_missing_with_nan, get_dbhydro_api
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@retry(Exception, tries=5, delay=15, max_delay=60, backoff=2)
|
|
8
|
+
def get(
|
|
9
|
+
workspace,
|
|
10
|
+
date_min: str = "1972-01-01",
|
|
11
|
+
date_max: str = "2023-06-30"
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Retrieve total flow data for S65E structure (S65E_S + S65EX1_S) and save to CSV.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
workspace (str): Path to workspace where data will be downloaded.
|
|
17
|
+
date_min (str): Minimum date for data retrieval in 'YYYY-MM-DD' format.
|
|
18
|
+
date_max (str): Maximum date for data retrieval in 'YYYY-MM-DD' format.
|
|
19
|
+
"""
|
|
20
|
+
# Get a DbHydroApi instance
|
|
21
|
+
api = get_dbhydro_api()
|
|
22
|
+
|
|
23
|
+
# S65E_S
|
|
24
|
+
s65e_s = api.get_daily_data(['91656'], 'id', date_min, date_max, 'NGVD29', False)
|
|
25
|
+
|
|
26
|
+
if not s65e_s.has_data():
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
df_s65e_s = s65e_s.to_dataframe(True)
|
|
30
|
+
df_s65e_s = df_replace_missing_with_nan(df_s65e_s) # Replace flagged 0 values and -99999.0 with NaN
|
|
31
|
+
df_s65e_s.reset_index(inplace=True) # Reset index so datetime is a column
|
|
32
|
+
df_s65e_s['value'] = df_s65e_s['value'] * (0.0283168466 * 86400) # Convert flow from cfs to cmd
|
|
33
|
+
df_s65e_s = df_s65e_s[['datetime', 'value']].copy() # Grab only the columns we need
|
|
34
|
+
df_s65e_s.rename(columns={'datetime': 'date', 'value': f'S65E_S_FLOW_cfs'}, inplace=True) # Rename columns to expected names
|
|
35
|
+
|
|
36
|
+
# S65EX1_S
|
|
37
|
+
s65ex1_s = api.get_daily_data(['AL760'], 'id', date_min, date_max, 'NGVD29', False)
|
|
38
|
+
|
|
39
|
+
if not s65ex1_s.has_data():
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
df_s65ex1_s = s65ex1_s.to_dataframe(True)
|
|
43
|
+
df_s65ex1_s = df_replace_missing_with_nan(df_s65ex1_s) # Replace flagged 0 values and -99999.0 with NaN
|
|
44
|
+
df_s65ex1_s.reset_index(inplace=True) # Reset index so datetime is a column
|
|
45
|
+
df_s65ex1_s['value'] = df_s65ex1_s['value'] * (0.0283168466 * 86400) # Convert flow from cfs to cmd
|
|
46
|
+
df_s65ex1_s = df_s65ex1_s[['datetime', 'value']].copy() # Grab only the columns we need
|
|
47
|
+
df_s65ex1_s.rename(columns={'datetime': 'date', 'value': f'S65EX1_S_FLOW_cfs'}, inplace=True) # Rename columns to expected names
|
|
48
|
+
|
|
49
|
+
# Combine the data from both stations into a single dataframe
|
|
50
|
+
df = pd.merge(df_s65e_s, df_s65ex1_s, on='date', how='outer', suffixes=('_S65E_S', '_S65EX1_S'))
|
|
51
|
+
|
|
52
|
+
# Reformat the data to the expected layout
|
|
53
|
+
df = _reformat_s65e_total_df(df)
|
|
54
|
+
|
|
55
|
+
# Write the data to a file
|
|
56
|
+
df.to_csv(f"{workspace}/S65E_total.csv")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _reformat_s65e_total_file(workspace: str):
|
|
60
|
+
# Read in the data
|
|
61
|
+
df = pd.read_csv(f"{workspace}/S65E_total.csv")
|
|
62
|
+
|
|
63
|
+
# Reformat the data
|
|
64
|
+
df = _reformat_s65e_total_df(df)
|
|
65
|
+
|
|
66
|
+
# Write the updated data back to the file
|
|
67
|
+
df.to_csv(f"{workspace}/S65E_total.csv")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _reformat_s65e_total_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
71
|
+
# Convert date column to datetime
|
|
72
|
+
df['date'] = pd.to_datetime(df['date'], format='%d-%b-%Y')
|
|
73
|
+
|
|
74
|
+
# Sort the data by date
|
|
75
|
+
df.sort_values('date', inplace=True)
|
|
76
|
+
|
|
77
|
+
# Renumber the index
|
|
78
|
+
df.reset_index(drop=True, inplace=True)
|
|
79
|
+
|
|
80
|
+
# Drop rows that are missing all their values
|
|
81
|
+
df.dropna(how='all', inplace=True)
|
|
82
|
+
|
|
83
|
+
# Return the reformatted dataframe
|
|
84
|
+
return df
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
workspace = sys.argv[1].rstrip("/")
|
|
89
|
+
get(workspace)
|
|
@@ -38,7 +38,7 @@ def get_bias_corrected_data(
|
|
|
38
38
|
# Prepare the observed data by filling NaN values with the 10yr average
|
|
39
39
|
prepared_od = prep_observed_data(observed_data)
|
|
40
40
|
historical_data = geoglows.data.retro_daily(reach_id)
|
|
41
|
-
# Get the historical simulation data for the given reach ID
|
|
41
|
+
# Get the historical simulation data for the given reach ID
|
|
42
42
|
# I am reading the observed data that we queried earlier instead of caching it
|
|
43
43
|
# historical_data = None
|
|
44
44
|
|
{loone_data_prep-1.2.4 → loone_data_prep-1.3.1}/loone_data_prep/flow_data/get_forecast_flows.py
RENAMED
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
import pandas as pd
|
|
4
|
-
import rpy2.robjects as ro
|
|
5
|
-
from rpy2.robjects import pandas2ri
|
|
6
4
|
import geoglows
|
|
7
5
|
import datetime
|
|
8
|
-
from loone_data_prep.utils import get_dbkeys
|
|
9
6
|
from loone_data_prep.flow_data.forecast_bias_correction import (
|
|
10
7
|
get_bias_corrected_data,
|
|
11
8
|
)
|
|
@@ -63,51 +60,6 @@ FORECAST_DATE = (datetime.datetime.now()).strftime("%Y%m%d")
|
|
|
63
60
|
GEOGLOWS_ENDPOINT = "https://geoglows.ecmwf.int/api/"
|
|
64
61
|
|
|
65
62
|
|
|
66
|
-
def get_stations_latitude_longitude(station_ids: list[str]):
|
|
67
|
-
"""Gets the latitudes and longitudes of the given stations.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
station_ids (list[str]): The ids of the stations to get the
|
|
71
|
-
latitudes/longitudes of
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
(dict[str, tuple[numpy.float64, numpy.float64]]): A dictionary of
|
|
75
|
-
format dict<station_id:(latitude,longitude)>
|
|
76
|
-
|
|
77
|
-
If a station's latitude/longitude fails to download then its station_id
|
|
78
|
-
won't be a key in the returned dictionary.
|
|
79
|
-
"""
|
|
80
|
-
# The dict that holds the data that gets returned
|
|
81
|
-
station_data = {}
|
|
82
|
-
|
|
83
|
-
# Get the station/dbkey data
|
|
84
|
-
r_dataframe = get_dbkeys(
|
|
85
|
-
station_ids=station_ids,
|
|
86
|
-
category="SW",
|
|
87
|
-
param="",
|
|
88
|
-
stat="",
|
|
89
|
-
recorder="",
|
|
90
|
-
detail_level="full",
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
# Convert the r dataframe to a pandas dataframe
|
|
94
|
-
with (ro.default_converter + pandas2ri.converter).context():
|
|
95
|
-
pd_dataframe = ro.conversion.get_conversion().rpy2py(r_dataframe)
|
|
96
|
-
|
|
97
|
-
# Filter out extra rows for each station from the dataframe
|
|
98
|
-
pd_dataframe.drop_duplicates(subset="Station", keep="first", inplace=True)
|
|
99
|
-
|
|
100
|
-
# Get latitude/longitude of each station
|
|
101
|
-
for index in pd_dataframe.index:
|
|
102
|
-
station = pd_dataframe["Station"][index]
|
|
103
|
-
latitude = pd_dataframe["Latitude"][index]
|
|
104
|
-
longitude = pd_dataframe["Longitude"][index]
|
|
105
|
-
|
|
106
|
-
station_data[station] = latitude, longitude
|
|
107
|
-
|
|
108
|
-
return station_data
|
|
109
|
-
|
|
110
|
-
|
|
111
63
|
def get_reach_id(latitude: float, longitude: float):
|
|
112
64
|
"""Gets the reach id for the given latitude/longitude.
|
|
113
65
|
|
|
@@ -273,70 +225,32 @@ def _format_stats_DataFrame(dataframe: pd.core.frame.DataFrame):
|
|
|
273
225
|
dataframe.index = dataframe.index.normalize()
|
|
274
226
|
|
|
275
227
|
# Convert m^3/s data to m^3/h
|
|
276
|
-
dataframe = dataframe
|
|
228
|
+
dataframe = dataframe * SECONDS_IN_HOUR
|
|
277
229
|
|
|
278
230
|
# Make negative values 0
|
|
279
231
|
dataframe.clip(0, inplace=True)
|
|
280
232
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
#
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# 25th Percentile Column (Average)
|
|
297
|
-
column_25percentile = dataframe[["flow_25p"]].copy()
|
|
298
|
-
column_25percentile = column_25percentile.groupby(
|
|
299
|
-
[column_25percentile.index]
|
|
300
|
-
).mean()
|
|
301
|
-
|
|
302
|
-
# Min Column (Min)
|
|
303
|
-
column_min = dataframe[["flow_min"]].copy()
|
|
304
|
-
column_min = column_min.groupby([column_min.index]).min()
|
|
305
|
-
|
|
306
|
-
# Convert values in each column from m^3/h to m^3/d
|
|
307
|
-
column_max = column_max.transform(lambda x: x * HOURS_IN_DAY)
|
|
308
|
-
column_75percentile = column_75percentile.transform(
|
|
309
|
-
lambda x: x * HOURS_IN_DAY
|
|
310
|
-
)
|
|
311
|
-
column_average = column_average.transform(lambda x: x * HOURS_IN_DAY)
|
|
312
|
-
column_25percentile = column_25percentile.transform(
|
|
313
|
-
lambda x: x * HOURS_IN_DAY
|
|
233
|
+
grouped = dataframe.groupby(dataframe.index).mean()
|
|
234
|
+
# Convert from m^3/h → m^3/d
|
|
235
|
+
grouped = grouped * HOURS_IN_DAY
|
|
236
|
+
|
|
237
|
+
# Rename columns
|
|
238
|
+
grouped = grouped.rename(
|
|
239
|
+
columns={
|
|
240
|
+
"flow_max": "flow_max_m^3/d",
|
|
241
|
+
"flow_75p": "flow_75%_m^3/d",
|
|
242
|
+
"flow_avg": "flow_avg_m^3/d",
|
|
243
|
+
"flow_med": "flow_med_m^3/d",
|
|
244
|
+
"flow_25p": "flow_25%_m^3/d",
|
|
245
|
+
"flow_min": "flow_min_m^3/d",
|
|
246
|
+
}
|
|
314
247
|
)
|
|
315
|
-
column_min = column_min.transform(lambda x: x * HOURS_IN_DAY)
|
|
316
|
-
|
|
317
|
-
# Append modified columns into one pandas DataFrame
|
|
318
|
-
dataframe_result = pd.DataFrame()
|
|
319
|
-
dataframe_result.index = dataframe.groupby([dataframe.index]).mean().index
|
|
320
|
-
dataframe_result["flow_max_m^3/d"] = column_max["flow_max"].tolist()
|
|
321
|
-
dataframe_result["flow_75%_m^3/d"] = column_75percentile[
|
|
322
|
-
"flow_75p"
|
|
323
|
-
].tolist()
|
|
324
|
-
dataframe_result["flow_avg_m^3/d"] = column_average[
|
|
325
|
-
"flow_avg"
|
|
326
|
-
].tolist()
|
|
327
|
-
dataframe_result["flow_25%_m^3/d"] = column_25percentile[
|
|
328
|
-
"flow_25p"
|
|
329
|
-
].tolist()
|
|
330
|
-
dataframe_result["flow_min_m^3/d"] = column_min["flow_min"].tolist()
|
|
331
248
|
|
|
332
|
-
# Format
|
|
333
|
-
|
|
249
|
+
# Format index as date string and rename
|
|
250
|
+
grouped.index = grouped.index.strftime("%Y-%m-%d")
|
|
251
|
+
grouped.index.name = "date"
|
|
334
252
|
|
|
335
|
-
|
|
336
|
-
dataframe_result.rename_axis("date", inplace=True)
|
|
337
|
-
|
|
338
|
-
# Return resulting DataFrame
|
|
339
|
-
return dataframe_result
|
|
253
|
+
return grouped
|
|
340
254
|
|
|
341
255
|
|
|
342
256
|
def main(
|