ngiab-data-preprocess 4.4.0__tar.gz → 4.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/PKG-INFO +1 -1
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/create_realization.py +56 -186
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/file_paths.py +4 -4
- ngiab_data_preprocess-4.5.1/modules/data_sources/lstm-catchment-template.yml +17 -0
- ngiab_data_preprocess-4.4.0/modules/data_sources/em-realization-template.json → ngiab_data_preprocess-4.5.1/modules/data_sources/lstm-realization-template.json +5 -8
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/__main__.py +7 -9
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/arguments.py +2 -3
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +1 -1
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +2 -3
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/pyproject.toml +1 -1
- ngiab_data_preprocess-4.4.0/modules/data_sources/em-catchment-template.yml +0 -10
- ngiab_data_preprocess-4.4.0/modules/data_sources/em-config.yml +0 -60
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/.github/workflows/build_only.yml +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/.github/workflows/publish.yml +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/.gitignore +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/LICENSE +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/README.md +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/ciroh-bgsafe.png +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/dask_utils.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/dataset_utils.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/datasets.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/forcings.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/gpkg_utils.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/graph_utils.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/s3fs_utils.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/subset.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/cfe-nowpm-realization-template.json +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/cfe-template.ini +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/forcing_template.nc +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/ngen-routing-template.yaml +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/noah-owp-modular-init.namelist.input +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/source_validation.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/template.sql +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/triggers.sql +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/__init__.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/__main__.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/console.css +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/main.css +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/toggle.css +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/js/console.js +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/js/data_processing.js +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/js/main.js +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/resources/loading.gif +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/resources/screenshot.jpg +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/templates/index.html +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/views.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/custom_logging.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/forcing_cli.py +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/requires.txt +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/setup.cfg +0 -0
- {ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/tests/test_nan_impute.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.5.1
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -1,23 +1,22 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import multiprocessing
|
|
4
|
+
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import sqlite3
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Dict, Optional
|
|
9
|
-
import psutil
|
|
10
|
-
import os
|
|
11
10
|
|
|
11
|
+
import numpy as np
|
|
12
12
|
import pandas
|
|
13
|
+
import psutil
|
|
13
14
|
import requests
|
|
14
15
|
import s3fs
|
|
15
16
|
import xarray as xr
|
|
16
17
|
from data_processing.dask_utils import temp_cluster
|
|
17
18
|
from data_processing.file_paths import file_paths
|
|
18
19
|
from data_processing.gpkg_utils import (
|
|
19
|
-
GeoPackage,
|
|
20
|
-
get_cat_to_nex_flowpairs,
|
|
21
20
|
get_cat_to_nhd_feature_id,
|
|
22
21
|
get_table_crs_short,
|
|
23
22
|
)
|
|
@@ -91,7 +90,6 @@ def make_cfe_config(
|
|
|
91
90
|
def make_noahowp_config(
|
|
92
91
|
base_dir: Path, divide_conf_df: pandas.DataFrame, start_time: datetime, end_time: datetime
|
|
93
92
|
) -> None:
|
|
94
|
-
divide_conf_df.set_index("divide_id", inplace=True)
|
|
95
93
|
start_datetime = start_time.strftime("%Y%m%d%H%M")
|
|
96
94
|
end_datetime = end_time.strftime("%Y%m%d%H%M")
|
|
97
95
|
with open(file_paths.template_noahowp_config, "r") as file:
|
|
@@ -100,155 +98,78 @@ def make_noahowp_config(
|
|
|
100
98
|
cat_config_dir = base_dir / "cat_config" / "NOAH-OWP-M"
|
|
101
99
|
cat_config_dir.mkdir(parents=True, exist_ok=True)
|
|
102
100
|
|
|
103
|
-
for
|
|
104
|
-
with open(cat_config_dir / f"{
|
|
101
|
+
for _, row in divide_conf_df.iterrows():
|
|
102
|
+
with open(cat_config_dir / f"{row['divide_id']}.input", "w") as file:
|
|
105
103
|
file.write(
|
|
106
104
|
template.format(
|
|
107
105
|
start_datetime=start_datetime,
|
|
108
106
|
end_datetime=end_datetime,
|
|
109
|
-
lat=
|
|
110
|
-
lon=
|
|
111
|
-
terrain_slope=
|
|
112
|
-
azimuth=
|
|
113
|
-
ISLTYP=int(
|
|
114
|
-
IVGTYP=int(
|
|
107
|
+
lat=row["latitude"],
|
|
108
|
+
lon=row["longitude"],
|
|
109
|
+
terrain_slope=row["mean.slope_1km"],
|
|
110
|
+
azimuth=row["circ_mean.aspect"],
|
|
111
|
+
ISLTYP=int(row["mode.ISLTYP"]), # type: ignore
|
|
112
|
+
IVGTYP=int(row["mode.IVGTYP"]), # type: ignore
|
|
115
113
|
)
|
|
116
114
|
)
|
|
117
115
|
|
|
118
116
|
|
|
119
|
-
def
|
|
120
|
-
# modspatialite is faster than pyproj but can't be added as a pip dependency
|
|
121
|
-
# This incantation took a while
|
|
122
|
-
with GeoPackage(hydrofabric) as conn:
|
|
123
|
-
sql = """WITH source_crs AS (
|
|
124
|
-
SELECT organization || ':' || organization_coordsys_id AS crs_string
|
|
125
|
-
FROM gpkg_spatial_ref_sys
|
|
126
|
-
WHERE srs_id = (
|
|
127
|
-
SELECT srs_id
|
|
128
|
-
FROM gpkg_geometry_columns
|
|
129
|
-
WHERE table_name = 'divides'
|
|
130
|
-
)
|
|
131
|
-
)
|
|
132
|
-
SELECT
|
|
133
|
-
d.divide_id,
|
|
134
|
-
d.areasqkm,
|
|
135
|
-
da."mean.slope",
|
|
136
|
-
da."mean.slope_1km",
|
|
137
|
-
da."mean.elevation",
|
|
138
|
-
ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
139
|
-
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
|
|
140
|
-
ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
141
|
-
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude
|
|
142
|
-
FROM divides AS d
|
|
143
|
-
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
144
|
-
"""
|
|
145
|
-
divide_conf_df = pandas.read_sql_query(sql, conn)
|
|
146
|
-
divide_conf_df.set_index("divide_id", inplace=True)
|
|
147
|
-
return divide_conf_df
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def get_model_attributes_pyproj(hydrofabric: Path) -> pandas.DataFrame:
|
|
151
|
-
# if modspatialite is not available, use pyproj
|
|
117
|
+
def get_model_attributes(hydrofabric: Path) -> pandas.DataFrame:
|
|
152
118
|
with sqlite3.connect(hydrofabric) as conn:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
164
|
-
"""
|
|
165
|
-
divide_conf_df = pandas.read_sql_query(sql, conn)
|
|
166
|
-
|
|
119
|
+
conf_df = pandas.read_sql_query(
|
|
120
|
+
"""
|
|
121
|
+
SELECT
|
|
122
|
+
d.areasqkm,
|
|
123
|
+
da.*
|
|
124
|
+
FROM divides AS d
|
|
125
|
+
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
126
|
+
""",
|
|
127
|
+
conn,
|
|
128
|
+
)
|
|
167
129
|
source_crs = get_table_crs_short(hydrofabric, "divides")
|
|
168
|
-
|
|
169
130
|
transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
divide_conf_df["longitude"] = lon
|
|
176
|
-
divide_conf_df["latitude"] = lat
|
|
177
|
-
|
|
178
|
-
divide_conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
|
|
179
|
-
divide_conf_df.set_index("divide_id", inplace=True)
|
|
180
|
-
|
|
181
|
-
return divide_conf_df
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def get_model_attributes(hydrofabric: Path) -> pandas.DataFrame:
|
|
185
|
-
try:
|
|
186
|
-
with GeoPackage(hydrofabric) as conn:
|
|
187
|
-
conf_df = pandas.read_sql_query(
|
|
188
|
-
"""WITH source_crs AS (
|
|
189
|
-
SELECT organization || ':' || organization_coordsys_id AS crs_string
|
|
190
|
-
FROM gpkg_spatial_ref_sys
|
|
191
|
-
WHERE srs_id = (
|
|
192
|
-
SELECT srs_id
|
|
193
|
-
FROM gpkg_geometry_columns
|
|
194
|
-
WHERE table_name = 'divides'
|
|
195
|
-
)
|
|
196
|
-
)
|
|
197
|
-
SELECT
|
|
198
|
-
*,
|
|
199
|
-
ST_X(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
|
|
200
|
-
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
|
|
201
|
-
ST_Y(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
|
|
202
|
-
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude FROM 'divide-attributes';""",
|
|
203
|
-
conn,
|
|
204
|
-
)
|
|
205
|
-
except sqlite3.OperationalError:
|
|
206
|
-
with sqlite3.connect(hydrofabric) as conn:
|
|
207
|
-
conf_df = pandas.read_sql_query(
|
|
208
|
-
"SELECT* FROM 'divide-attributes';",
|
|
209
|
-
conn,
|
|
210
|
-
)
|
|
211
|
-
source_crs = get_table_crs_short(hydrofabric, "divides")
|
|
212
|
-
transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
|
|
213
|
-
lon, lat = transformer.transform(conf_df["centroid_x"].values, conf_df["centroid_y"].values)
|
|
214
|
-
conf_df["longitude"] = lon
|
|
215
|
-
conf_df["latitude"] = lat
|
|
216
|
-
|
|
217
|
-
conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
|
|
131
|
+
lon, lat = transformer.transform(conf_df["centroid_x"].values, conf_df["centroid_y"].values)
|
|
132
|
+
conf_df["longitude"] = lon
|
|
133
|
+
conf_df["latitude"] = lat
|
|
218
134
|
return conf_df
|
|
219
135
|
|
|
220
136
|
|
|
221
|
-
def
|
|
137
|
+
def make_lstm_config(
|
|
222
138
|
hydrofabric: Path,
|
|
223
139
|
output_dir: Path,
|
|
224
|
-
template_path: Path = file_paths.
|
|
140
|
+
template_path: Path = file_paths.template_lstm_config,
|
|
225
141
|
):
|
|
226
142
|
# test if modspatialite is available
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
logger.warning("Install mod_spatialite for improved performance")
|
|
232
|
-
divide_conf_df = get_model_attributes_pyproj(hydrofabric)
|
|
233
|
-
|
|
234
|
-
cat_config_dir = output_dir / "cat_config" / "empirical_model"
|
|
143
|
+
|
|
144
|
+
divide_conf_df = get_model_attributes(hydrofabric)
|
|
145
|
+
|
|
146
|
+
cat_config_dir = output_dir / "cat_config" / "lstm"
|
|
235
147
|
if cat_config_dir.exists():
|
|
236
148
|
shutil.rmtree(cat_config_dir)
|
|
237
149
|
cat_config_dir.mkdir(parents=True, exist_ok=True)
|
|
238
150
|
|
|
151
|
+
# convert the mean.slope from degrees 0-90 where 90 is flat and 0 is vertical to m/km
|
|
152
|
+
# flip 0 and 90 degree values
|
|
153
|
+
divide_conf_df["flipped_mean_slope"] = abs(divide_conf_df["mean.slope"] - 90)
|
|
154
|
+
# Convert degrees to meters per kmmeter
|
|
155
|
+
divide_conf_df["mean_slope_mpkm"] = (
|
|
156
|
+
np.tan(np.radians(divide_conf_df["flipped_mean_slope"])) * 1000
|
|
157
|
+
)
|
|
158
|
+
|
|
239
159
|
with open(template_path, "r") as file:
|
|
240
160
|
template = file.read()
|
|
241
161
|
|
|
242
|
-
for
|
|
162
|
+
for _, row in divide_conf_df.iterrows():
|
|
163
|
+
divide = row["divide_id"]
|
|
243
164
|
with open(cat_config_dir / f"{divide}.yml", "w") as file:
|
|
244
165
|
file.write(
|
|
245
166
|
template.format(
|
|
246
|
-
area_sqkm=
|
|
167
|
+
area_sqkm=row["areasqkm"],
|
|
247
168
|
divide_id=divide,
|
|
248
|
-
lat=
|
|
249
|
-
lon=
|
|
250
|
-
slope_mean=
|
|
251
|
-
elevation_mean=
|
|
169
|
+
lat=row["latitude"],
|
|
170
|
+
lon=row["longitude"],
|
|
171
|
+
slope_mean=row["mean_slope_mpkm"],
|
|
172
|
+
elevation_mean=row["mean.elevation"] / 100, # convert cm in hf to m
|
|
252
173
|
)
|
|
253
174
|
)
|
|
254
175
|
|
|
@@ -259,14 +180,16 @@ def configure_troute(
|
|
|
259
180
|
with open(file_paths.template_troute_config, "r") as file:
|
|
260
181
|
troute_template = file.read()
|
|
261
182
|
time_step_size = 300
|
|
262
|
-
gpkg_file_path=f"{config_dir}/{cat_id}_subset.gpkg"
|
|
183
|
+
gpkg_file_path = f"{config_dir}/{cat_id}_subset.gpkg"
|
|
263
184
|
nts = (end_time - start_time).total_seconds() / time_step_size
|
|
264
185
|
with sqlite3.connect(gpkg_file_path) as conn:
|
|
265
186
|
ncats_df = pandas.read_sql_query("SELECT COUNT(id) FROM 'divides';", conn)
|
|
266
|
-
ncats = ncats_df[
|
|
187
|
+
ncats = ncats_df["COUNT(id)"][0]
|
|
267
188
|
|
|
268
|
-
est_bytes_required = nts * ncats * 45
|
|
269
|
-
local_ram_available =
|
|
189
|
+
est_bytes_required = nts * ncats * 45 # extremely rough calculation based on about 3 tests :)
|
|
190
|
+
local_ram_available = (
|
|
191
|
+
0.8 * psutil.virtual_memory().available
|
|
192
|
+
) # buffer to not accidentally explode machine
|
|
270
193
|
|
|
271
194
|
if est_bytes_required > local_ram_available:
|
|
272
195
|
max_loop_size = nts // (est_bytes_required // local_ram_available)
|
|
@@ -289,7 +212,7 @@ def configure_troute(
|
|
|
289
212
|
start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
290
213
|
nts=nts,
|
|
291
214
|
max_loop_size=max_loop_size,
|
|
292
|
-
binary_nexus_file_folder_comment=binary_nexus_file_folder_comment
|
|
215
|
+
binary_nexus_file_folder_comment=binary_nexus_file_folder_comment,
|
|
293
216
|
)
|
|
294
217
|
|
|
295
218
|
with open(config_dir / "troute.yaml", "w") as file:
|
|
@@ -310,22 +233,14 @@ def make_ngen_realization_json(
|
|
|
310
233
|
json.dump(realization, file, indent=4)
|
|
311
234
|
|
|
312
235
|
|
|
313
|
-
def
|
|
236
|
+
def create_lstm_realization(cat_id: str, start_time: datetime, end_time: datetime):
|
|
314
237
|
paths = file_paths(cat_id)
|
|
315
|
-
template_path = file_paths.
|
|
316
|
-
em_config = file_paths.template_em_model_config
|
|
317
|
-
# move em_config to paths.config_dir
|
|
318
|
-
with open(em_config, "r") as f:
|
|
319
|
-
em_config = f.read()
|
|
320
|
-
with open(paths.config_dir / "em-config.yml", "w") as f:
|
|
321
|
-
f.write(em_config)
|
|
322
|
-
|
|
238
|
+
template_path = file_paths.template_lstm_realization_config
|
|
323
239
|
configure_troute(cat_id, paths.config_dir, start_time, end_time)
|
|
324
240
|
make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
|
|
325
|
-
|
|
241
|
+
make_lstm_config(paths.geopackage_path, paths.config_dir)
|
|
326
242
|
# create some partitions for parallelization
|
|
327
243
|
paths.setup_run_folders()
|
|
328
|
-
create_partitions(paths)
|
|
329
244
|
|
|
330
245
|
|
|
331
246
|
def create_realization(
|
|
@@ -368,48 +283,3 @@ def create_realization(
|
|
|
368
283
|
|
|
369
284
|
# create some partitions for parallelization
|
|
370
285
|
paths.setup_run_folders()
|
|
371
|
-
create_partitions(paths)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def create_partitions(paths: file_paths, num_partitions: Optional[int] = None) -> None:
|
|
375
|
-
if num_partitions is None:
|
|
376
|
-
num_partitions = multiprocessing.cpu_count()
|
|
377
|
-
|
|
378
|
-
cat_to_nex_pairs = get_cat_to_nex_flowpairs(hydrofabric=paths.geopackage_path)
|
|
379
|
-
# nexus = defaultdict(list)
|
|
380
|
-
|
|
381
|
-
# for cat, nex in cat_to_nex_pairs:
|
|
382
|
-
# nexus[nex].append(cat)
|
|
383
|
-
|
|
384
|
-
num_partitions = min(num_partitions, len(cat_to_nex_pairs))
|
|
385
|
-
# partition_size = ceil(len(nexus) / num_partitions)
|
|
386
|
-
# num_nexus = len(nexus)
|
|
387
|
-
# nexus = list(nexus.items())
|
|
388
|
-
# partitions = []
|
|
389
|
-
# for i in range(0, num_nexus, partition_size):
|
|
390
|
-
# part = {}
|
|
391
|
-
# part["id"] = i // partition_size
|
|
392
|
-
# part["cat-ids"] = []
|
|
393
|
-
# part["nex-ids"] = []
|
|
394
|
-
# part["remote-connections"] = []
|
|
395
|
-
# for j in range(i, i + partition_size):
|
|
396
|
-
# if j < num_nexus:
|
|
397
|
-
# part["cat-ids"].extend(nexus[j][1])
|
|
398
|
-
# part["nex-ids"].append(nexus[j][0])
|
|
399
|
-
# partitions.append(part)
|
|
400
|
-
|
|
401
|
-
# with open(paths.subset_dir / f"partitions_{num_partitions}.json", "w") as f:
|
|
402
|
-
# f.write(json.dumps({"partitions": partitions}, indent=4))
|
|
403
|
-
|
|
404
|
-
# write this to a metadata file to save on repeated file io to recalculate
|
|
405
|
-
with open(paths.metadata_dir / "num_partitions", "w") as f:
|
|
406
|
-
f.write(str(num_partitions))
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
if __name__ == "__main__":
|
|
410
|
-
cat_id = "cat-1643991"
|
|
411
|
-
start_time = datetime(2010, 1, 1, 0, 0, 0)
|
|
412
|
-
end_time = datetime(2010, 1, 2, 0, 0, 0)
|
|
413
|
-
# output_interval = 3600
|
|
414
|
-
# nts = 2592
|
|
415
|
-
create_realization(cat_id, start_time, end_time)
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/file_paths.py
RENAMED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Optional
|
|
3
|
-
|
|
4
|
+
|
|
4
5
|
|
|
5
6
|
class file_paths:
|
|
6
7
|
"""
|
|
@@ -27,11 +28,10 @@ class file_paths:
|
|
|
27
28
|
dev_file = Path(__file__).parent.parent.parent / ".dev"
|
|
28
29
|
template_troute_config = data_sources / "ngen-routing-template.yaml"
|
|
29
30
|
template_cfe_nowpm_realization_config = data_sources / "cfe-nowpm-realization-template.json"
|
|
30
|
-
|
|
31
|
+
template_lstm_realization_config = data_sources / "lstm-realization-template.json"
|
|
31
32
|
template_noahowp_config = data_sources / "noah-owp-modular-init.namelist.input"
|
|
32
33
|
template_cfe_config = data_sources / "cfe-template.ini"
|
|
33
|
-
|
|
34
|
-
template_em_model_config = data_sources / "em-config.yml"
|
|
34
|
+
template_lstm_config = data_sources / "lstm-catchment-template.yml"
|
|
35
35
|
|
|
36
36
|
def __init__(self, folder_name: Optional[str] = None, output_dir: Optional[Path] = None):
|
|
37
37
|
"""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
time_step: "1 hour"
|
|
2
|
+
area_sqkm: {area_sqkm} # areasqkm
|
|
3
|
+
basin_id: {divide_id}
|
|
4
|
+
basin_name: {divide_id}
|
|
5
|
+
elev_mean: {elevation_mean} # mean.elevation
|
|
6
|
+
initial_state: zero
|
|
7
|
+
lat: {lat} # needs calulating
|
|
8
|
+
lon: {lon} # needs calulating
|
|
9
|
+
slope_mean: {slope_mean} # mean.slope
|
|
10
|
+
train_cfg_file:
|
|
11
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_7/config.yml
|
|
12
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_8/config.yml
|
|
13
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_9/config.yml
|
|
14
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed101_0701_143442/config.yml
|
|
15
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed103_2701_171540/config.yml
|
|
16
|
+
- /ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_slope_elev_precip_temp_seq999_seed101_2801_191806/config.yml
|
|
17
|
+
verbose: 0
|
|
@@ -5,25 +5,22 @@
|
|
|
5
5
|
"name": "bmi_multi",
|
|
6
6
|
"params": {
|
|
7
7
|
"name": "bmi_multi",
|
|
8
|
-
"model_type_name": "
|
|
8
|
+
"model_type_name": "lstm",
|
|
9
9
|
"forcing_file": "",
|
|
10
10
|
"init_config": "",
|
|
11
11
|
"allow_exceed_end_time": true,
|
|
12
12
|
"main_output_variable": "land_surface_water__runoff_depth",
|
|
13
|
-
"modules": [
|
|
13
|
+
"modules": [
|
|
14
14
|
{
|
|
15
15
|
"name": "bmi_python",
|
|
16
16
|
"params": {
|
|
17
17
|
"name": "bmi_python",
|
|
18
18
|
"python_type": "lstm.bmi_lstm.bmi_LSTM",
|
|
19
|
-
"model_type_name": "
|
|
20
|
-
"init_config": "./config/cat_config/
|
|
19
|
+
"model_type_name": "bmi_lstm",
|
|
20
|
+
"init_config": "./config/cat_config/lstm/{{id}}.yml",
|
|
21
21
|
"allow_exceed_end_time": true,
|
|
22
22
|
"main_output_variable": "land_surface_water__runoff_depth",
|
|
23
|
-
"uses_forcing_file": false
|
|
24
|
-
"variables_names_map": {
|
|
25
|
-
"atmosphere_water__liquid_equivalent_precipitation_rate": "APCP_surface"
|
|
26
|
-
}
|
|
23
|
+
"uses_forcing_file": false
|
|
27
24
|
}
|
|
28
25
|
}
|
|
29
26
|
]
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/__main__.py
RENAMED
|
@@ -8,9 +8,10 @@ with rich.status.Status("loading") as status:
|
|
|
8
8
|
import logging
|
|
9
9
|
import subprocess
|
|
10
10
|
import time
|
|
11
|
+
from multiprocessing import cpu_count
|
|
11
12
|
|
|
12
13
|
import geopandas as gpd
|
|
13
|
-
from data_processing.create_realization import
|
|
14
|
+
from data_processing.create_realization import create_lstm_realization, create_realization
|
|
14
15
|
from data_processing.dask_utils import shutdown_cluster
|
|
15
16
|
from data_processing.dataset_utils import save_and_clip_dataset
|
|
16
17
|
from data_processing.datasets import load_aorc_zarr, load_v3_retrospective_zarr
|
|
@@ -22,6 +23,7 @@ with rich.status.Status("loading") as status:
|
|
|
22
23
|
from data_sources.source_validation import validate_hydrofabric, validate_output_dir
|
|
23
24
|
from ngiab_data_cli.arguments import parse_arguments
|
|
24
25
|
from ngiab_data_cli.custom_logging import set_logging_to_critical_only, setup_logging
|
|
26
|
+
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def validate_input(args: argparse.Namespace) -> Tuple[str, str]:
|
|
@@ -184,8 +186,8 @@ def main() -> None:
|
|
|
184
186
|
gage_id = None
|
|
185
187
|
if args.gage:
|
|
186
188
|
gage_id = args.input_feature
|
|
187
|
-
if args.
|
|
188
|
-
|
|
189
|
+
if args.lstm:
|
|
190
|
+
create_lstm_realization(
|
|
189
191
|
output_folder, start_time=args.start_date, end_time=args.end_date
|
|
190
192
|
)
|
|
191
193
|
else:
|
|
@@ -200,17 +202,13 @@ def main() -> None:
|
|
|
200
202
|
|
|
201
203
|
if args.run:
|
|
202
204
|
logging.info("Running Next Gen using NGIAB...")
|
|
203
|
-
|
|
204
|
-
with open(paths.metadata_dir / "num_partitions", "r") as f:
|
|
205
|
-
num_partitions = int(f.read())
|
|
206
|
-
|
|
205
|
+
|
|
207
206
|
try:
|
|
208
207
|
subprocess.run("docker pull awiciroh/ciroh-ngen-image:latest", shell=True)
|
|
209
208
|
except:
|
|
210
209
|
logging.error("Docker is not running, please start Docker and try again.")
|
|
211
210
|
try:
|
|
212
|
-
|
|
213
|
-
command = f'docker run --rm -it -v "{str(paths.subset_dir)}:/ngen/ngen/data" awiciroh/ciroh-ngen-image:latest /ngen/ngen/data/ auto {num_partitions} local'
|
|
211
|
+
command = f'docker run --rm -it -v "{str(paths.subset_dir)}:/ngen/ngen/data" awiciroh/ciroh-ngen-image:latest /ngen/ngen/data/ auto {cpu_count()} local'
|
|
214
212
|
subprocess.run(command, shell=True)
|
|
215
213
|
logging.info("Next Gen run complete.")
|
|
216
214
|
except:
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/arguments.py
RENAMED
|
@@ -106,10 +106,9 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
106
106
|
help="enable debug logging",
|
|
107
107
|
)
|
|
108
108
|
parser.add_argument(
|
|
109
|
-
"--
|
|
110
|
-
"--em",
|
|
109
|
+
"--lstm",
|
|
111
110
|
action="store_true",
|
|
112
|
-
help="enable
|
|
111
|
+
help="enable LSTM model realization and forcings",
|
|
113
112
|
)
|
|
114
113
|
parser.add_argument(
|
|
115
114
|
"--nwm_gw",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.5.1
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -17,10 +17,9 @@ modules/data_processing/s3fs_utils.py
|
|
|
17
17
|
modules/data_processing/subset.py
|
|
18
18
|
modules/data_sources/cfe-nowpm-realization-template.json
|
|
19
19
|
modules/data_sources/cfe-template.ini
|
|
20
|
-
modules/data_sources/em-catchment-template.yml
|
|
21
|
-
modules/data_sources/em-config.yml
|
|
22
|
-
modules/data_sources/em-realization-template.json
|
|
23
20
|
modules/data_sources/forcing_template.nc
|
|
21
|
+
modules/data_sources/lstm-catchment-template.yml
|
|
22
|
+
modules/data_sources/lstm-realization-template.json
|
|
24
23
|
modules/data_sources/ngen-routing-template.yaml
|
|
25
24
|
modules/data_sources/noah-owp-modular-init.namelist.input
|
|
26
25
|
modules/data_sources/source_validation.py
|
|
@@ -19,7 +19,7 @@ filterwarnings = [
|
|
|
19
19
|
]
|
|
20
20
|
[project]
|
|
21
21
|
name = "ngiab_data_preprocess"
|
|
22
|
-
version = "v4.
|
|
22
|
+
version = "v4.5.1"
|
|
23
23
|
authors = [{ name = "Josh Cunningham", email = "jcunningham8@ua.edu" }]
|
|
24
24
|
description = "Graphical Tools for creating Next Gen Water model input data."
|
|
25
25
|
readme = "README.md"
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
area_sqkm: {area_sqkm} # areasqkm
|
|
2
|
-
basin_id: {divide_id}
|
|
3
|
-
basin_name: {divide_id}
|
|
4
|
-
elev_mean: {elevation_mean} # mean.elevation
|
|
5
|
-
initial_state: zero
|
|
6
|
-
lat: {lat} # needs calulating
|
|
7
|
-
lon: {lon} # needs calulating
|
|
8
|
-
slope_mean: {slope_mean} # mean.slope
|
|
9
|
-
train_cfg_file: ./config/em-config.yml
|
|
10
|
-
verbose: 0
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
batch_size: 256
|
|
2
|
-
clip_gradient_norm: 1
|
|
3
|
-
clip_targets_to_zero:
|
|
4
|
-
- QObs(mm/d)
|
|
5
|
-
data_dir: ./data/
|
|
6
|
-
dataset: hourly_camels_us
|
|
7
|
-
device: cpu
|
|
8
|
-
dynamic_inputs:
|
|
9
|
-
- total_precipitation
|
|
10
|
-
- temperature
|
|
11
|
-
epochs: 9
|
|
12
|
-
experiment_name: hourly_slope_mean_precip_temp
|
|
13
|
-
forcings: nldas_hourly
|
|
14
|
-
head: regression
|
|
15
|
-
hidden_size: 64
|
|
16
|
-
img_log_dir: /ngen/ngen/extern/lstm/trained_neuralhydrology_models/hourly_slope_mean_precip_temp/img_log
|
|
17
|
-
initial_forget_bias: 3
|
|
18
|
-
learning_rate:
|
|
19
|
-
0: 0.0005
|
|
20
|
-
10: 0.0001
|
|
21
|
-
25: 5e-05
|
|
22
|
-
log_interval: 5
|
|
23
|
-
log_n_figures: 0
|
|
24
|
-
log_tensorboard: false
|
|
25
|
-
loss: NSE
|
|
26
|
-
mass_inputs:
|
|
27
|
-
metrics:
|
|
28
|
-
- NSE
|
|
29
|
-
- KGE
|
|
30
|
-
- Alpha-NSE
|
|
31
|
-
- Beta-NSE
|
|
32
|
-
model: cudalstm
|
|
33
|
-
num_workers: 8
|
|
34
|
-
number_of_basins: 516
|
|
35
|
-
optimizer: Adam
|
|
36
|
-
output_activation: linear
|
|
37
|
-
output_dropout: 0.4
|
|
38
|
-
package_version: 1.0.0-beta1
|
|
39
|
-
predict_last_n: 24
|
|
40
|
-
run_dir: /ngen/ngen/extern/lstm/trained_neuralhydrology_models/hourly_slope_mean_precip_temp
|
|
41
|
-
save_weights_every: 1
|
|
42
|
-
seed: 102
|
|
43
|
-
seq_length: 336
|
|
44
|
-
static_attributes:
|
|
45
|
-
- elev_mean
|
|
46
|
-
- slope_mean
|
|
47
|
-
target_variables:
|
|
48
|
-
- QObs(mm/d)
|
|
49
|
-
test_basin_file: 516_basins.txt
|
|
50
|
-
test_end_date: 31/12/2002
|
|
51
|
-
test_start_date: 01/01/2000
|
|
52
|
-
train_basin_file: 516_basins.txt
|
|
53
|
-
train_dir: trained_elsewhere
|
|
54
|
-
train_end_date: 30/09/2018
|
|
55
|
-
train_start_date: 01/10/1980
|
|
56
|
-
validate_every: 1
|
|
57
|
-
validate_n_random_basins: 15
|
|
58
|
-
validation_basin_file: 516_basins.txt
|
|
59
|
-
validation_end_date: 30/09/2018
|
|
60
|
-
validation_start_date: 01/10/1980
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/.github/workflows/build_only.yml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/dask_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/dataset_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/datasets.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/forcings.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/gpkg_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/graph_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/s3fs_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_processing/subset.py
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/cfe-template.ini
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/forcing_template.nc
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/template.sql
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/data_sources/triggers.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/console.css
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/main.css
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/css/toggle.css
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/js/console.js
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/static/js/main.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/map_app/templates/index.html
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/custom_logging.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.4.0 → ngiab_data_preprocess-4.5.1}/modules/ngiab_data_cli/forcing_cli.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|