ngiab-data-preprocess 4.2.1__tar.gz → 4.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/.gitignore +2 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/PKG-INFO +15 -13
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/README.md +13 -9
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/create_realization.py +42 -50
- ngiab_data_preprocess-4.3.0/modules/data_processing/dask_utils.py +92 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/dataset_utils.py +127 -44
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/datasets.py +18 -29
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/file_paths.py +7 -7
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/forcings.py +102 -102
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/gpkg_utils.py +18 -18
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/graph_utils.py +4 -4
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/s3fs_utils.py +1 -1
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_processing/subset.py +1 -2
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/source_validation.py +57 -32
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/__main__.py +3 -2
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/css/main.css +33 -10
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/css/toggle.css +8 -5
- ngiab_data_preprocess-4.3.0/modules/map_app/static/js/main.js +280 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/templates/index.html +31 -9
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/views.py +8 -8
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_cli/__main__.py +31 -28
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_cli/arguments.py +0 -1
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_cli/forcing_cli.py +10 -19
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +15 -13
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +2 -4
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/requires.txt +1 -3
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/pyproject.toml +49 -6
- ngiab_data_preprocess-4.3.0/tests/test_nan_impute.py +200 -0
- ngiab_data_preprocess-4.2.1/map.html +0 -98
- ngiab_data_preprocess-4.2.1/modules/map_app/static/js/main.js +0 -138
- ngiab_data_preprocess-4.2.1/modules/map_app/static/resources/dark-style.json +0 -11068
- ngiab_data_preprocess-4.2.1/modules/map_app/static/resources/light-style.json +0 -11068
- ngiab_data_preprocess-4.2.1/output/.gitkeep +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/.github/workflows/build_only.yml +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/.github/workflows/publish.yml +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/LICENSE +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/cfe-nowpm-realization-template.json +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/cfe-template.ini +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/em-catchment-template.yml +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/em-config.yml +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/em-realization-template.json +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/forcing_template.nc +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/ngen-routing-template.yaml +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/noah-owp-modular-init.namelist.input +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/template.sql +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/data_sources/triggers.sql +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/__init__.py +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/css/console.css +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/js/console.js +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/js/data_processing.js +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/resources/loading.gif +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/map_app/static/resources/screenshot.jpg +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_cli/custom_logging.py +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
- {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3.0
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -15,7 +15,7 @@ Requires-Dist: pyogrio>=0.7.2
|
|
|
15
15
|
Requires-Dist: pyproj>=3.6.1
|
|
16
16
|
Requires-Dist: Flask==3.0.2
|
|
17
17
|
Requires-Dist: geopandas>=1.0.0
|
|
18
|
-
Requires-Dist: requests==2.32.
|
|
18
|
+
Requires-Dist: requests==2.32.4
|
|
19
19
|
Requires-Dist: igraph==0.11.4
|
|
20
20
|
Requires-Dist: s3fs==2024.3.1
|
|
21
21
|
Requires-Dist: xarray==2024.2.0
|
|
@@ -23,8 +23,6 @@ Requires-Dist: zarr==2.17.1
|
|
|
23
23
|
Requires-Dist: netCDF4>=1.6.5
|
|
24
24
|
Requires-Dist: dask==2024.4.1
|
|
25
25
|
Requires-Dist: dask[distributed]==2024.4.1
|
|
26
|
-
Requires-Dist: black==24.3.0
|
|
27
|
-
Requires-Dist: isort==5.13.2
|
|
28
26
|
Requires-Dist: h5netcdf==1.3.0
|
|
29
27
|
Requires-Dist: exactextract==0.2.0
|
|
30
28
|
Requires-Dist: numpy>=1.26.4
|
|
@@ -49,15 +47,19 @@ This repository contains tools for preparing data to run a [next gen](https://gi
|
|
|
49
47
|
## Table of Contents
|
|
50
48
|
|
|
51
49
|
1. [What does this tool do?](#what-does-this-tool-do)
|
|
52
|
-
2. [
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
2. [What does it not do?](#what-does-it-not-do)
|
|
51
|
+
- [Evaluation](#evaluation)
|
|
52
|
+
- [Visualisation](#visualisation)
|
|
53
|
+
3. [Requirements](#requirements)
|
|
54
|
+
4. [Installation and Running](#installation-and-running)
|
|
55
|
+
- [Running without install](#running-without-install)
|
|
56
|
+
5. [For legacy pip installation](#for-legacy-pip-installation)
|
|
57
|
+
6. [Development Installation](#development-installation)
|
|
58
|
+
7. [Usage](#usage)
|
|
59
|
+
8. [CLI Documentation](#cli-documentation)
|
|
57
60
|
- [Arguments](#arguments)
|
|
61
|
+
- [Usage Notes](#usage-notes)
|
|
58
62
|
- [Examples](#examples)
|
|
59
|
-
- [File Formats](#file-formats)
|
|
60
|
-
- [Output](#output)
|
|
61
63
|
|
|
62
64
|
## What does this tool do?
|
|
63
65
|
|
|
@@ -231,12 +233,12 @@ Once all the steps are finished, you can run NGIAB on the folder shown underneat
|
|
|
231
233
|
|
|
232
234
|
3. Create realization using a lat/lon pair and output to a named folder:
|
|
233
235
|
```bash
|
|
234
|
-
python -m ngiab_data_cli -i
|
|
236
|
+
python -m ngiab_data_cli -i 33.22,-87.54 -l -r --start 2022-01-01 --end 2022-02-28 -o custom_output
|
|
235
237
|
```
|
|
236
238
|
|
|
237
239
|
4. Perform all operations using a lat/lon pair:
|
|
238
240
|
```bash
|
|
239
|
-
python -m ngiab_data_cli -i
|
|
241
|
+
python -m ngiab_data_cli -i 33.22,-87.54 -l -s -f -r --start 2022-01-01 --end 2022-02-28
|
|
240
242
|
```
|
|
241
243
|
|
|
242
244
|
5. Subset hydrofabric using gage ID:
|
|
@@ -7,15 +7,19 @@ This repository contains tools for preparing data to run a [next gen](https://gi
|
|
|
7
7
|
## Table of Contents
|
|
8
8
|
|
|
9
9
|
1. [What does this tool do?](#what-does-this-tool-do)
|
|
10
|
-
2. [
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
2. [What does it not do?](#what-does-it-not-do)
|
|
11
|
+
- [Evaluation](#evaluation)
|
|
12
|
+
- [Visualisation](#visualisation)
|
|
13
|
+
3. [Requirements](#requirements)
|
|
14
|
+
4. [Installation and Running](#installation-and-running)
|
|
15
|
+
- [Running without install](#running-without-install)
|
|
16
|
+
5. [For legacy pip installation](#for-legacy-pip-installation)
|
|
17
|
+
6. [Development Installation](#development-installation)
|
|
18
|
+
7. [Usage](#usage)
|
|
19
|
+
8. [CLI Documentation](#cli-documentation)
|
|
15
20
|
- [Arguments](#arguments)
|
|
21
|
+
- [Usage Notes](#usage-notes)
|
|
16
22
|
- [Examples](#examples)
|
|
17
|
-
- [File Formats](#file-formats)
|
|
18
|
-
- [Output](#output)
|
|
19
23
|
|
|
20
24
|
## What does this tool do?
|
|
21
25
|
|
|
@@ -189,12 +193,12 @@ Once all the steps are finished, you can run NGIAB on the folder shown underneat
|
|
|
189
193
|
|
|
190
194
|
3. Create realization using a lat/lon pair and output to a named folder:
|
|
191
195
|
```bash
|
|
192
|
-
python -m ngiab_data_cli -i
|
|
196
|
+
python -m ngiab_data_cli -i 33.22,-87.54 -l -r --start 2022-01-01 --end 2022-02-28 -o custom_output
|
|
193
197
|
```
|
|
194
198
|
|
|
195
199
|
4. Perform all operations using a lat/lon pair:
|
|
196
200
|
```bash
|
|
197
|
-
python -m ngiab_data_cli -i
|
|
201
|
+
python -m ngiab_data_cli -i 33.22,-87.54 -l -s -f -r --start 2022-01-01 --end 2022-02-28
|
|
198
202
|
```
|
|
199
203
|
|
|
200
204
|
5. Subset hydrofabric using gage ID:
|
|
@@ -1,30 +1,32 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import multiprocessing
|
|
4
|
+
import shutil
|
|
3
5
|
import sqlite3
|
|
4
6
|
from datetime import datetime
|
|
5
7
|
from pathlib import Path
|
|
6
|
-
import
|
|
7
|
-
import requests
|
|
8
|
+
from typing import Dict, Optional
|
|
8
9
|
|
|
9
10
|
import pandas
|
|
11
|
+
import requests
|
|
10
12
|
import s3fs
|
|
11
13
|
import xarray as xr
|
|
12
|
-
import
|
|
13
|
-
from collections import defaultdict
|
|
14
|
-
from dask.distributed import Client, LocalCluster
|
|
14
|
+
from data_processing.dask_utils import temp_cluster
|
|
15
15
|
from data_processing.file_paths import file_paths
|
|
16
16
|
from data_processing.gpkg_utils import (
|
|
17
17
|
GeoPackage,
|
|
18
|
+
get_cat_to_nex_flowpairs,
|
|
18
19
|
get_cat_to_nhd_feature_id,
|
|
19
20
|
get_table_crs_short,
|
|
20
|
-
get_cat_to_nex_flowpairs,
|
|
21
21
|
)
|
|
22
|
-
from tqdm.rich import tqdm
|
|
23
22
|
from pyproj import Transformer
|
|
23
|
+
from tqdm.rich import tqdm
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
|
|
28
|
+
@temp_cluster
|
|
29
|
+
def get_approximate_gw_storage(paths: file_paths, start_date: datetime) -> Dict[str, int]:
|
|
28
30
|
# get the gw levels from the NWM output on a given start date
|
|
29
31
|
# this kind of works in place of warmstates for now
|
|
30
32
|
year = start_date.strftime("%Y")
|
|
@@ -34,17 +36,10 @@ def get_approximate_gw_storage(paths: file_paths, start_date: datetime):
|
|
|
34
36
|
fs = s3fs.S3FileSystem(anon=True)
|
|
35
37
|
nc_url = f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/netcdf/GWOUT/{year}/{formatted_dt}.GWOUT_DOMAIN1"
|
|
36
38
|
|
|
37
|
-
# make sure there's a dask cluster running
|
|
38
|
-
try:
|
|
39
|
-
client = Client.current()
|
|
40
|
-
except ValueError:
|
|
41
|
-
cluster = LocalCluster()
|
|
42
|
-
client = Client(cluster)
|
|
43
|
-
|
|
44
39
|
with fs.open(nc_url) as file_obj:
|
|
45
|
-
ds = xr.open_dataset(file_obj)
|
|
40
|
+
ds = xr.open_dataset(file_obj) # type: ignore
|
|
46
41
|
|
|
47
|
-
water_levels = dict()
|
|
42
|
+
water_levels: Dict[str, int] = dict()
|
|
48
43
|
for cat, feature in tqdm(cat_to_feature.items()):
|
|
49
44
|
# this value is in CM, we need meters to match max_gw_depth
|
|
50
45
|
# xarray says it's in mm, with 0.1 scale factor. calling .values doesn't apply the scale
|
|
@@ -78,7 +73,9 @@ def make_cfe_config(
|
|
|
78
73
|
slope=row["mean.slope_1km"],
|
|
79
74
|
smcmax=row["mean.smcmax_soil_layers_stag=2"],
|
|
80
75
|
smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
|
|
81
|
-
max_gw_storage=row["mean.Zmax"]/1000
|
|
76
|
+
max_gw_storage=row["mean.Zmax"] / 1000
|
|
77
|
+
if row["mean.Zmax"] is not None
|
|
78
|
+
else "0.011[m]", # mean.Zmax is in mm!
|
|
82
79
|
gw_Coeff=row["mean.Coeff"] if row["mean.Coeff"] is not None else "0.0018[m h-1]",
|
|
83
80
|
gw_Expon=row["mode.Expon"],
|
|
84
81
|
gw_storage="{:.5}".format(gw_storage_ratio),
|
|
@@ -92,7 +89,6 @@ def make_cfe_config(
|
|
|
92
89
|
def make_noahowp_config(
|
|
93
90
|
base_dir: Path, divide_conf_df: pandas.DataFrame, start_time: datetime, end_time: datetime
|
|
94
91
|
) -> None:
|
|
95
|
-
|
|
96
92
|
divide_conf_df.set_index("divide_id", inplace=True)
|
|
97
93
|
start_datetime = start_time.strftime("%Y%m%d%H%M")
|
|
98
94
|
end_datetime = end_time.strftime("%Y%m%d%H%M")
|
|
@@ -110,15 +106,15 @@ def make_noahowp_config(
|
|
|
110
106
|
end_datetime=end_datetime,
|
|
111
107
|
lat=divide_conf_df.loc[divide, "latitude"],
|
|
112
108
|
lon=divide_conf_df.loc[divide, "longitude"],
|
|
113
|
-
terrain_slope=
|
|
114
|
-
azimuth=
|
|
115
|
-
ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
|
|
116
|
-
IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
|
|
109
|
+
terrain_slope=divide_conf_df.loc[divide, "mean.slope_1km"],
|
|
110
|
+
azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
|
|
111
|
+
ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]), # type: ignore
|
|
112
|
+
IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]), # type: ignore
|
|
117
113
|
)
|
|
118
114
|
)
|
|
119
115
|
|
|
120
116
|
|
|
121
|
-
def get_model_attributes_modspatialite(hydrofabric: Path):
|
|
117
|
+
def get_model_attributes_modspatialite(hydrofabric: Path) -> pandas.DataFrame:
|
|
122
118
|
# modspatialite is faster than pyproj but can't be added as a pip dependency
|
|
123
119
|
# This incantation took a while
|
|
124
120
|
with GeoPackage(hydrofabric) as conn:
|
|
@@ -149,7 +145,7 @@ def get_model_attributes_modspatialite(hydrofabric: Path):
|
|
|
149
145
|
return divide_conf_df
|
|
150
146
|
|
|
151
147
|
|
|
152
|
-
def get_model_attributes_pyproj(hydrofabric: Path):
|
|
148
|
+
def get_model_attributes_pyproj(hydrofabric: Path) -> pandas.DataFrame:
|
|
153
149
|
# if modspatialite is not available, use pyproj
|
|
154
150
|
with sqlite3.connect(hydrofabric) as conn:
|
|
155
151
|
sql = """
|
|
@@ -182,7 +178,8 @@ def get_model_attributes_pyproj(hydrofabric: Path):
|
|
|
182
178
|
|
|
183
179
|
return divide_conf_df
|
|
184
180
|
|
|
185
|
-
|
|
181
|
+
|
|
182
|
+
def get_model_attributes(hydrofabric: Path) -> pandas.DataFrame:
|
|
186
183
|
try:
|
|
187
184
|
with GeoPackage(hydrofabric) as conn:
|
|
188
185
|
conf_df = pandas.read_sql_query(
|
|
@@ -205,30 +202,31 @@ def get_model_attributes(hydrofabric: Path):
|
|
|
205
202
|
)
|
|
206
203
|
except sqlite3.OperationalError:
|
|
207
204
|
with sqlite3.connect(hydrofabric) as conn:
|
|
208
|
-
conf_df = pandas.read_sql_query(
|
|
205
|
+
conf_df = pandas.read_sql_query(
|
|
206
|
+
"SELECT* FROM 'divide-attributes';",
|
|
207
|
+
conn,
|
|
208
|
+
)
|
|
209
209
|
source_crs = get_table_crs_short(hydrofabric, "divides")
|
|
210
210
|
transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
|
|
211
|
-
lon, lat = transformer.transform(
|
|
212
|
-
conf_df["centroid_x"].values, conf_df["centroid_y"].values
|
|
213
|
-
)
|
|
211
|
+
lon, lat = transformer.transform(conf_df["centroid_x"].values, conf_df["centroid_y"].values)
|
|
214
212
|
conf_df["longitude"] = lon
|
|
215
213
|
conf_df["latitude"] = lat
|
|
216
214
|
|
|
217
215
|
conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
|
|
218
216
|
return conf_df
|
|
219
217
|
|
|
218
|
+
|
|
220
219
|
def make_em_config(
|
|
221
220
|
hydrofabric: Path,
|
|
222
221
|
output_dir: Path,
|
|
223
222
|
template_path: Path = file_paths.template_em_config,
|
|
224
223
|
):
|
|
225
|
-
|
|
226
224
|
# test if modspatialite is available
|
|
227
225
|
try:
|
|
228
226
|
divide_conf_df = get_model_attributes_modspatialite(hydrofabric)
|
|
229
227
|
except Exception as e:
|
|
230
228
|
logger.warning(f"mod_spatialite not available, using pyproj instead: {e}")
|
|
231
|
-
logger.warning(
|
|
229
|
+
logger.warning("Install mod_spatialite for improved performance")
|
|
232
230
|
divide_conf_df = get_model_attributes_pyproj(hydrofabric)
|
|
233
231
|
|
|
234
232
|
cat_config_dir = output_dir / "cat_config" / "empirical_model"
|
|
@@ -255,8 +253,7 @@ def make_em_config(
|
|
|
255
253
|
|
|
256
254
|
def configure_troute(
|
|
257
255
|
cat_id: str, config_dir: Path, start_time: datetime, end_time: datetime
|
|
258
|
-
) ->
|
|
259
|
-
|
|
256
|
+
) -> None:
|
|
260
257
|
with open(file_paths.template_troute_config, "r") as file:
|
|
261
258
|
troute_template = file.read()
|
|
262
259
|
time_step_size = 300
|
|
@@ -269,7 +266,7 @@ def configure_troute(
|
|
|
269
266
|
geo_file_path=f"./config/{cat_id}_subset.gpkg",
|
|
270
267
|
start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
271
268
|
nts=nts,
|
|
272
|
-
max_loop_size=nts,
|
|
269
|
+
max_loop_size=nts,
|
|
273
270
|
)
|
|
274
271
|
|
|
275
272
|
with open(config_dir / "troute.yaml", "w") as file:
|
|
@@ -301,9 +298,7 @@ def create_em_realization(cat_id: str, start_time: datetime, end_time: datetime)
|
|
|
301
298
|
f.write(em_config)
|
|
302
299
|
|
|
303
300
|
configure_troute(cat_id, paths.config_dir, start_time, end_time)
|
|
304
|
-
make_ngen_realization_json(
|
|
305
|
-
paths.config_dir, template_path, start_time, end_time
|
|
306
|
-
)
|
|
301
|
+
make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
|
|
307
302
|
make_em_config(paths.geopackage_path, paths.config_dir)
|
|
308
303
|
# create some partitions for parallelization
|
|
309
304
|
paths.setup_run_folders()
|
|
@@ -315,7 +310,7 @@ def create_realization(
|
|
|
315
310
|
start_time: datetime,
|
|
316
311
|
end_time: datetime,
|
|
317
312
|
use_nwm_gw: bool = False,
|
|
318
|
-
gage_id: str = None,
|
|
313
|
+
gage_id: Optional[str] = None,
|
|
319
314
|
):
|
|
320
315
|
paths = file_paths(cat_id)
|
|
321
316
|
|
|
@@ -324,15 +319,14 @@ def create_realization(
|
|
|
324
319
|
if gage_id is not None:
|
|
325
320
|
# try and download s3:communityhydrofabric/hydrofabrics/community/gage_parameters/gage_id
|
|
326
321
|
# if it doesn't exist, use the default
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
322
|
+
url = f"https://communityhydrofabric.s3.us-east-1.amazonaws.com/hydrofabrics/community/gage_parameters/{gage_id}.json"
|
|
323
|
+
response = requests.get(url)
|
|
324
|
+
if response.status_code == 200:
|
|
330
325
|
new_template = requests.get(url).json()
|
|
331
|
-
template_path = paths.config_dir / "
|
|
326
|
+
template_path = paths.config_dir / "downloaded_params.json"
|
|
332
327
|
with open(template_path, "w") as f:
|
|
333
328
|
json.dump(new_template, f)
|
|
334
|
-
|
|
335
|
-
logger.warning(f"Failed to download gage parameters")
|
|
329
|
+
logger.info(f"downloaded calibrated parameters for {gage_id}")
|
|
336
330
|
|
|
337
331
|
conf_df = get_model_attributes(paths.geopackage_path)
|
|
338
332
|
|
|
@@ -347,21 +341,19 @@ def create_realization(
|
|
|
347
341
|
|
|
348
342
|
configure_troute(cat_id, paths.config_dir, start_time, end_time)
|
|
349
343
|
|
|
350
|
-
make_ngen_realization_json(
|
|
351
|
-
paths.config_dir, template_path, start_time, end_time
|
|
352
|
-
)
|
|
344
|
+
make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
|
|
353
345
|
|
|
354
346
|
# create some partitions for parallelization
|
|
355
347
|
paths.setup_run_folders()
|
|
356
348
|
create_partitions(paths)
|
|
357
349
|
|
|
358
350
|
|
|
359
|
-
def create_partitions(paths:
|
|
351
|
+
def create_partitions(paths: file_paths, num_partitions: Optional[int] = None) -> None:
|
|
360
352
|
if num_partitions is None:
|
|
361
353
|
num_partitions = multiprocessing.cpu_count()
|
|
362
354
|
|
|
363
355
|
cat_to_nex_pairs = get_cat_to_nex_flowpairs(hydrofabric=paths.geopackage_path)
|
|
364
|
-
nexus = defaultdict(list)
|
|
356
|
+
# nexus = defaultdict(list)
|
|
365
357
|
|
|
366
358
|
# for cat, nex in cat_to_nex_pairs:
|
|
367
359
|
# nexus[nex].append(cat)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from dask.distributed import Client
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def shutdown_cluster():
|
|
9
|
+
try:
|
|
10
|
+
client = Client.current()
|
|
11
|
+
client.shutdown()
|
|
12
|
+
except ValueError:
|
|
13
|
+
logger.debug("No cluster found to shutdown")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def no_cluster(func):
|
|
17
|
+
"""
|
|
18
|
+
Decorator that ensures the wrapped function runs with no active Dask cluster.
|
|
19
|
+
|
|
20
|
+
This decorator attempts to shut down any existing Dask cluster before
|
|
21
|
+
executing the wrapped function. If no cluster is found, it logs a debug message
|
|
22
|
+
and continues execution.
|
|
23
|
+
|
|
24
|
+
Parameters:
|
|
25
|
+
func: The function to be executed without a Dask cluster
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
wrapper: The wrapped function that will be executed without a Dask cluster
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def wrapper(*args, **kwargs):
|
|
32
|
+
shutdown_cluster()
|
|
33
|
+
result = func(*args, **kwargs)
|
|
34
|
+
return result
|
|
35
|
+
|
|
36
|
+
return wrapper
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def use_cluster(func):
|
|
40
|
+
"""
|
|
41
|
+
Decorator that ensures the wrapped function has access to a Dask cluster.
|
|
42
|
+
|
|
43
|
+
If a Dask cluster is already running, it uses the existing one.
|
|
44
|
+
If no cluster is available, it creates a new one before executing the function.
|
|
45
|
+
The cluster remains active after the function completes.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
func: The function to be executed with a Dask cluster
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
wrapper: The wrapped function with access to a Dask cluster
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def wrapper(*args, **kwargs):
|
|
55
|
+
try:
|
|
56
|
+
client = Client.current()
|
|
57
|
+
except ValueError:
|
|
58
|
+
client = Client()
|
|
59
|
+
result = func(*args, **kwargs)
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
return wrapper
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def temp_cluster(func):
|
|
66
|
+
"""
|
|
67
|
+
Decorator that provides a temporary Dask cluster for the wrapped function.
|
|
68
|
+
|
|
69
|
+
If a Dask cluster is already running, it uses the existing one and leaves it running.
|
|
70
|
+
If no cluster exists, it creates a temporary one and shuts it down after
|
|
71
|
+
the function completes.
|
|
72
|
+
|
|
73
|
+
Parameters:
|
|
74
|
+
func: The function to be executed with a Dask cluster
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
wrapper: The wrapped function with access to a Dask cluster
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def wrapper(*args, **kwargs):
|
|
81
|
+
cluster_was_running = True
|
|
82
|
+
try:
|
|
83
|
+
client = Client.current()
|
|
84
|
+
except ValueError:
|
|
85
|
+
cluster_was_running = False
|
|
86
|
+
client = Client()
|
|
87
|
+
result = func(*args, **kwargs)
|
|
88
|
+
if not cluster_was_running:
|
|
89
|
+
client.shutdown()
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
return wrapper
|