ngiab-data-preprocess 3.2.4__tar.gz → 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/.gitignore +2 -1
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/PKG-INFO +1 -1
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/create_realization.py +65 -32
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/file_paths.py +10 -3
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/forcings.py +34 -15
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/zarr_utils.py +6 -6
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-nowpm-realization-template.json +1 -1
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-template.ini +6 -5
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-realization-template.json +1 -1
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/ngen-routing-template.yaml +5 -5
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/noah-owp-modular-init.namelist.input +6 -6
- ngiab_data_preprocess-3.3.0/modules/ngiab_data_cli/forcing_cli.py +97 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +1 -1
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +1 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +1 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/pyproject.toml +3 -2
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/.github/workflows/build_only.yml +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/.github/workflows/publish.yml +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/LICENSE +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/README.md +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/map.html +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/gpkg_utils.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/graph_utils.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/s3fs_utils.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/subset.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-catchment-template.yml +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-config.yml +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/forcing_template.nc +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/source_validation.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/template.sql +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/triggers.sql +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/__init__.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/__main__.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/console.css +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/main.css +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/console.js +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/data_processing.js +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/main.js +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/dark-style.json +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/light-style.json +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/loading.gif +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/screenshot.png +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/templates/index.html +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/views.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/__main__.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/arguments.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/custom_logging.py +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/requires.txt +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/output/.gitkeep +0 -0
- {ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -74,7 +74,7 @@ def make_cfe_config(
|
|
|
74
74
|
bexp=row["mode.bexp_soil_layers_stag=2"],
|
|
75
75
|
dksat=row["geom_mean.dksat_soil_layers_stag=2"],
|
|
76
76
|
psisat=row["geom_mean.psisat_soil_layers_stag=2"],
|
|
77
|
-
slope=row["mean.
|
|
77
|
+
slope=row["mean.slope_1km"],
|
|
78
78
|
smcmax=row["mean.smcmax_soil_layers_stag=2"],
|
|
79
79
|
smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
|
|
80
80
|
max_gw_storage=row["mean.Zmax"]/1000 if row["mean.Zmax"] is not None else "0.011[m]", # mean.Zmax is in mm!
|
|
@@ -107,10 +107,10 @@ def make_noahowp_config(
|
|
|
107
107
|
template.format(
|
|
108
108
|
start_datetime=start_datetime,
|
|
109
109
|
end_datetime=end_datetime,
|
|
110
|
-
lat=divide_conf_df.loc[divide, "
|
|
111
|
-
lon=divide_conf_df.loc[divide, "
|
|
112
|
-
terrain_slope=divide_conf_df.loc[divide, "mean.
|
|
113
|
-
azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
|
|
110
|
+
lat=divide_conf_df.loc[divide, "latitude"],
|
|
111
|
+
lon=divide_conf_df.loc[divide, "longitude"],
|
|
112
|
+
terrain_slope= divide_conf_df.loc[divide, "mean.slope_1km"],
|
|
113
|
+
azimuth= divide_conf_df.loc[divide, "circ_mean.aspect"],
|
|
114
114
|
ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
|
|
115
115
|
IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
|
|
116
116
|
)
|
|
@@ -123,24 +123,25 @@ def get_model_attributes_modspatialite(hydrofabric: Path):
|
|
|
123
123
|
with GeoPackage(hydrofabric) as conn:
|
|
124
124
|
sql = """WITH source_crs AS (
|
|
125
125
|
SELECT organization || ':' || organization_coordsys_id AS crs_string
|
|
126
|
-
FROM gpkg_spatial_ref_sys
|
|
126
|
+
FROM gpkg_spatial_ref_sys
|
|
127
127
|
WHERE srs_id = (
|
|
128
|
-
SELECT srs_id
|
|
129
|
-
FROM gpkg_geometry_columns
|
|
128
|
+
SELECT srs_id
|
|
129
|
+
FROM gpkg_geometry_columns
|
|
130
130
|
WHERE table_name = 'divides'
|
|
131
131
|
)
|
|
132
132
|
)
|
|
133
|
-
SELECT
|
|
134
|
-
d.divide_id,
|
|
135
|
-
d.areasqkm,
|
|
136
|
-
da."mean.slope",
|
|
133
|
+
SELECT
|
|
134
|
+
d.divide_id,
|
|
135
|
+
d.areasqkm,
|
|
136
|
+
da."mean.slope",
|
|
137
|
+
da."mean.slope_1km",
|
|
137
138
|
da."mean.elevation",
|
|
138
|
-
ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
139
|
+
ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
139
140
|
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
|
|
140
|
-
ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
141
|
+
ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
|
|
141
142
|
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude
|
|
142
|
-
FROM divides AS d
|
|
143
|
-
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
143
|
+
FROM divides AS d
|
|
144
|
+
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
144
145
|
"""
|
|
145
146
|
divide_conf_df = pandas.read_sql_query(sql, conn)
|
|
146
147
|
divide_conf_df.set_index("divide_id", inplace=True)
|
|
@@ -151,15 +152,16 @@ def get_model_attributes_pyproj(hydrofabric: Path):
|
|
|
151
152
|
# if modspatialite is not available, use pyproj
|
|
152
153
|
with sqlite3.connect(hydrofabric) as conn:
|
|
153
154
|
sql = """
|
|
154
|
-
SELECT
|
|
155
|
-
d.divide_id,
|
|
156
|
-
d.areasqkm,
|
|
157
|
-
da."mean.slope",
|
|
155
|
+
SELECT
|
|
156
|
+
d.divide_id,
|
|
157
|
+
d.areasqkm,
|
|
158
|
+
da."mean.slope",
|
|
159
|
+
da."mean.slope_1km",
|
|
158
160
|
da."mean.elevation",
|
|
159
161
|
da.centroid_x,
|
|
160
162
|
da.centroid_y
|
|
161
|
-
FROM divides AS d
|
|
162
|
-
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
163
|
+
FROM divides AS d
|
|
164
|
+
JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
|
|
163
165
|
"""
|
|
164
166
|
divide_conf_df = pandas.read_sql_query(sql, conn)
|
|
165
167
|
|
|
@@ -179,6 +181,40 @@ def get_model_attributes_pyproj(hydrofabric: Path):
|
|
|
179
181
|
|
|
180
182
|
return divide_conf_df
|
|
181
183
|
|
|
184
|
+
def get_model_attributes(hydrofabric: Path):
|
|
185
|
+
try:
|
|
186
|
+
with GeoPackage(hydrofabric) as conn:
|
|
187
|
+
conf_df = pandas.read_sql_query(
|
|
188
|
+
"""WITH source_crs AS (
|
|
189
|
+
SELECT organization || ':' || organization_coordsys_id AS crs_string
|
|
190
|
+
FROM gpkg_spatial_ref_sys
|
|
191
|
+
WHERE srs_id = (
|
|
192
|
+
SELECT srs_id
|
|
193
|
+
FROM gpkg_geometry_columns
|
|
194
|
+
WHERE table_name = 'divides'
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
SELECT
|
|
198
|
+
*,
|
|
199
|
+
ST_X(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
|
|
200
|
+
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
|
|
201
|
+
ST_Y(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
|
|
202
|
+
(SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude FROM 'divide-attributes';""",
|
|
203
|
+
conn,
|
|
204
|
+
)
|
|
205
|
+
except pandas.errors.DatabaseError:
|
|
206
|
+
with sqlite3.connect(hydrofabric) as conn:
|
|
207
|
+
conf_df = pandas.read_sql_query("SELECT* FROM 'divide-attributes';", conn,)
|
|
208
|
+
source_crs = get_table_crs_short(hydrofabric, "divides")
|
|
209
|
+
transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
|
|
210
|
+
lon, lat = transformer.transform(
|
|
211
|
+
conf_df["centroid_x"].values, conf_df["centroid_y"].values
|
|
212
|
+
)
|
|
213
|
+
conf_df["longitude"] = lon
|
|
214
|
+
conf_df["latitude"] = lat
|
|
215
|
+
|
|
216
|
+
conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
|
|
217
|
+
return conf_df
|
|
182
218
|
|
|
183
219
|
def make_em_config(
|
|
184
220
|
hydrofabric: Path,
|
|
@@ -224,8 +260,6 @@ def configure_troute(
|
|
|
224
260
|
troute_template = file.read()
|
|
225
261
|
time_step_size = 300
|
|
226
262
|
nts = (end_time - start_time).total_seconds() / time_step_size
|
|
227
|
-
seconds_in_hour = 3600
|
|
228
|
-
number_of_hourly_steps = nts * time_step_size / seconds_in_hour
|
|
229
263
|
filled_template = troute_template.format(
|
|
230
264
|
# hard coded to 5 minutes
|
|
231
265
|
time_step_size=time_step_size,
|
|
@@ -234,8 +268,7 @@ def configure_troute(
|
|
|
234
268
|
geo_file_path=f"./config/{cat_id}_subset.gpkg",
|
|
235
269
|
start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
236
270
|
nts=nts,
|
|
237
|
-
max_loop_size=nts,
|
|
238
|
-
stream_output_time=number_of_hourly_steps,
|
|
271
|
+
max_loop_size=nts,
|
|
239
272
|
)
|
|
240
273
|
|
|
241
274
|
with open(config_dir / "troute.yaml", "w") as file:
|
|
@@ -243,7 +276,7 @@ def configure_troute(
|
|
|
243
276
|
|
|
244
277
|
|
|
245
278
|
def make_ngen_realization_json(
|
|
246
|
-
config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
|
|
279
|
+
config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
|
|
247
280
|
) -> None:
|
|
248
281
|
with open(template_path, "r") as file:
|
|
249
282
|
realization = json.load(file)
|
|
@@ -281,8 +314,8 @@ def create_realization(cat_id: str, start_time: datetime, end_time: datetime, us
|
|
|
281
314
|
|
|
282
315
|
# get approximate groundwater levels from nwm output
|
|
283
316
|
template_path = paths.template_cfe_nowpm_realization_config
|
|
284
|
-
|
|
285
|
-
|
|
317
|
+
|
|
318
|
+
conf_df = get_model_attributes(paths.geopackage_path)
|
|
286
319
|
|
|
287
320
|
if use_nwm_gw:
|
|
288
321
|
gw_levels = get_approximate_gw_storage(paths, start_time)
|
|
@@ -310,10 +343,10 @@ def create_partitions(paths: Path, num_partitions: int = None) -> None:
|
|
|
310
343
|
cat_to_nex_pairs = get_cat_to_nex_flowpairs(hydrofabric=paths.geopackage_path)
|
|
311
344
|
nexus = defaultdict(list)
|
|
312
345
|
|
|
313
|
-
for cat, nex in cat_to_nex_pairs:
|
|
314
|
-
|
|
346
|
+
# for cat, nex in cat_to_nex_pairs:
|
|
347
|
+
# nexus[nex].append(cat)
|
|
315
348
|
|
|
316
|
-
num_partitions = min(num_partitions, len(
|
|
349
|
+
num_partitions = min(num_partitions, len(cat_to_nex_pairs))
|
|
317
350
|
# partition_size = ceil(len(nexus) / num_partitions)
|
|
318
351
|
# num_nexus = len(nexus)
|
|
319
352
|
# nexus = list(nexus.items())
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/file_paths.py
RENAMED
|
@@ -97,12 +97,19 @@ class file_paths:
|
|
|
97
97
|
|
|
98
98
|
@property
|
|
99
99
|
def cached_nc_file(self) -> Path:
|
|
100
|
-
return self.
|
|
100
|
+
return self.forcings_dir / "raw_gridded_data.nc"
|
|
101
|
+
|
|
102
|
+
def append_cli_command(self, command: list[str]) -> None:
|
|
103
|
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
104
|
+
command_string = " ".join(command)
|
|
105
|
+
history_file = self.metadata_dir / "cli_commands_history.txt"
|
|
106
|
+
if not history_file.parent.exists():
|
|
107
|
+
history_file.parent.mkdir(parents=True, exist_ok=True)
|
|
108
|
+
with open(self.metadata_dir / "cli_commands_history.txt", "a") as f:
|
|
109
|
+
f.write(f"{current_time}| {command_string}\n")
|
|
101
110
|
|
|
102
111
|
def setup_run_folders(self) -> None:
|
|
103
112
|
folders = [
|
|
104
|
-
"restart",
|
|
105
|
-
"lakeout",
|
|
106
113
|
"outputs",
|
|
107
114
|
"outputs/ngen",
|
|
108
115
|
"outputs/troute",
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/forcings.py
RENAMED
|
@@ -76,6 +76,8 @@ def add_APCP_SURFACE_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
|
|
|
76
76
|
# technically should be kg/m^2/s at 1kg = 1l it equates to mm/s
|
|
77
77
|
# nom says qinsur output is m/s, hopefully qinsur is converted to mm/h by ngen
|
|
78
78
|
dataset["APCP_surface"] = dataset["precip_rate"] * 3600
|
|
79
|
+
dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
|
|
80
|
+
dataset["APCP_surface"].attrs["source_note"] = "This is just the precip_rate variable converted to mm/h by multiplying by 3600"
|
|
79
81
|
return dataset
|
|
80
82
|
|
|
81
83
|
|
|
@@ -140,6 +142,12 @@ def get_cell_weights_parallel(gdf, input_forcings, num_partitions):
|
|
|
140
142
|
catchments = pool.starmap(get_cell_weights, args)
|
|
141
143
|
return pd.concat(catchments)
|
|
142
144
|
|
|
145
|
+
def get_units(dataset: xr.Dataset) -> dict:
|
|
146
|
+
units = {}
|
|
147
|
+
for var in dataset.data_vars:
|
|
148
|
+
if dataset[var].attrs["units"]:
|
|
149
|
+
units[var] = dataset[var].attrs["units"]
|
|
150
|
+
return units
|
|
143
151
|
|
|
144
152
|
def compute_zonal_stats(
|
|
145
153
|
gdf: gpd.GeoDataFrame, merged_data: xr.Dataset, forcings_dir: Path
|
|
@@ -152,6 +160,8 @@ def compute_zonal_stats(
|
|
|
152
160
|
|
|
153
161
|
catchments = get_cell_weights_parallel(gdf, merged_data, num_partitions)
|
|
154
162
|
|
|
163
|
+
units = get_units(merged_data)
|
|
164
|
+
|
|
155
165
|
variables = {
|
|
156
166
|
"LWDOWN": "DLWRF_surface",
|
|
157
167
|
"PSFC": "PRES_surface",
|
|
@@ -224,12 +234,12 @@ def compute_zonal_stats(
|
|
|
224
234
|
# Merge the chunks back together
|
|
225
235
|
datasets = [xr.open_dataset(forcings_dir / "temp" / f"{variable}_{i}.nc") for i in range(len(time_chunks))]
|
|
226
236
|
result = xr.concat(datasets, dim="time")
|
|
227
|
-
result.to_netcdf(forcings_dir / f"{variable}.nc")
|
|
237
|
+
result.to_netcdf(forcings_dir / "temp" / f"{variable}.nc")
|
|
228
238
|
# close the datasets
|
|
229
239
|
result.close()
|
|
230
240
|
_ = [dataset.close() for dataset in datasets]
|
|
231
241
|
|
|
232
|
-
for file in forcings_dir.glob("temp
|
|
242
|
+
for file in forcings_dir.glob("temp/*_*.nc"):
|
|
233
243
|
file.unlink()
|
|
234
244
|
progress.remove_task(chunk_task)
|
|
235
245
|
progress.update(
|
|
@@ -240,10 +250,10 @@ def compute_zonal_stats(
|
|
|
240
250
|
logger.info(
|
|
241
251
|
f"Forcing generation complete! Zonal stats computed in {time.time() - timer_start:2f} seconds"
|
|
242
252
|
)
|
|
243
|
-
write_outputs(forcings_dir, variables)
|
|
253
|
+
write_outputs(forcings_dir, variables, units)
|
|
244
254
|
|
|
245
255
|
|
|
246
|
-
def write_outputs(forcings_dir, variables):
|
|
256
|
+
def write_outputs(forcings_dir, variables, units):
|
|
247
257
|
|
|
248
258
|
# start a dask cluster if there isn't one already running
|
|
249
259
|
try:
|
|
@@ -251,12 +261,15 @@ def write_outputs(forcings_dir, variables):
|
|
|
251
261
|
except ValueError:
|
|
252
262
|
cluster = LocalCluster()
|
|
253
263
|
client = Client(cluster)
|
|
254
|
-
|
|
264
|
+
temp_forcings_dir = forcings_dir / "temp"
|
|
255
265
|
# Combine all variables into a single dataset using dask
|
|
256
|
-
results = [xr.open_dataset(file, chunks="auto") for file in
|
|
266
|
+
results = [xr.open_dataset(file, chunks="auto") for file in temp_forcings_dir.glob("*.nc")]
|
|
257
267
|
final_ds = xr.merge(results)
|
|
258
|
-
|
|
259
|
-
|
|
268
|
+
for var in final_ds.data_vars:
|
|
269
|
+
if var in units:
|
|
270
|
+
final_ds[var].attrs["units"] = units[var]
|
|
271
|
+
else:
|
|
272
|
+
logger.warning(f"Variable {var} has no units")
|
|
260
273
|
|
|
261
274
|
rename_dict = {}
|
|
262
275
|
for key, value in variables.items():
|
|
@@ -294,19 +307,25 @@ def write_outputs(forcings_dir, variables):
|
|
|
294
307
|
final_ds["Time"].attrs["units"] = "s"
|
|
295
308
|
final_ds["Time"].attrs["epoch_start"] = "01/01/1970 00:00:00" # not needed but suppresses the ngen warning
|
|
296
309
|
|
|
297
|
-
final_ds.to_netcdf(
|
|
310
|
+
final_ds.to_netcdf(forcings_dir / "forcings.nc", engine="netcdf4")
|
|
298
311
|
# close the datasets
|
|
299
312
|
_ = [result.close() for result in results]
|
|
300
313
|
final_ds.close()
|
|
301
314
|
|
|
315
|
+
# clean up the temp files
|
|
316
|
+
for file in temp_forcings_dir.glob("*.*"):
|
|
317
|
+
file.unlink()
|
|
318
|
+
temp_forcings_dir.rmdir()
|
|
319
|
+
|
|
302
320
|
|
|
303
321
|
def setup_directories(cat_id: str) -> file_paths:
|
|
304
322
|
forcing_paths = file_paths(cat_id)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
323
|
+
# delete everything in the forcing folder except the cached nc file
|
|
324
|
+
for file in forcing_paths.forcings_dir.glob("*.*"):
|
|
325
|
+
if file != forcing_paths.cached_nc_file:
|
|
326
|
+
file.unlink()
|
|
327
|
+
|
|
328
|
+
os.makedirs(forcing_paths.forcings_dir / "temp", exist_ok=True)
|
|
310
329
|
|
|
311
330
|
return forcing_paths
|
|
312
331
|
|
|
@@ -326,7 +345,7 @@ def create_forcings(
|
|
|
326
345
|
if type(end_time) == datetime:
|
|
327
346
|
end_time = end_time.strftime("%Y-%m-%d %H:%M")
|
|
328
347
|
|
|
329
|
-
merged_data = get_forcing_data(forcing_paths, start_time, end_time, gdf, forcing_vars)
|
|
348
|
+
merged_data = get_forcing_data(forcing_paths.cached_nc_path, start_time, end_time, gdf, forcing_vars)
|
|
330
349
|
compute_zonal_stats(gdf, merged_data, forcing_paths.forcings_dir)
|
|
331
350
|
|
|
332
351
|
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/zarr_utils.py
RENAMED
|
@@ -100,18 +100,18 @@ def compute_store(stores: xr.Dataset, cached_nc_path: Path) -> xr.Dataset:
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def get_forcing_data(
|
|
103
|
-
|
|
103
|
+
cached_nc_path: Path,
|
|
104
104
|
start_time: str,
|
|
105
105
|
end_time: str,
|
|
106
106
|
gdf: gpd.GeoDataFrame,
|
|
107
107
|
forcing_vars: list[str] = None,
|
|
108
108
|
) -> xr.Dataset:
|
|
109
109
|
merged_data = None
|
|
110
|
-
if os.path.exists(
|
|
110
|
+
if os.path.exists(cached_nc_path):
|
|
111
111
|
logger.info("Found cached nc file")
|
|
112
112
|
# open the cached file and check that the time range is correct
|
|
113
113
|
cached_data = xr.open_mfdataset(
|
|
114
|
-
|
|
114
|
+
cached_nc_path, parallel=True, engine="h5netcdf"
|
|
115
115
|
)
|
|
116
116
|
range_in_cache = cached_data.time[0].values <= np.datetime64(
|
|
117
117
|
start_time
|
|
@@ -138,14 +138,14 @@ def get_forcing_data(
|
|
|
138
138
|
|
|
139
139
|
if range_in_cache:
|
|
140
140
|
logger.info("Time range is within cached data")
|
|
141
|
-
logger.debug(f"Opened cached nc file: [{
|
|
141
|
+
logger.debug(f"Opened cached nc file: [{cached_nc_path}]")
|
|
142
142
|
merged_data = clip_dataset_to_bounds(
|
|
143
143
|
cached_data, gdf.total_bounds, start_time, end_time
|
|
144
144
|
)
|
|
145
145
|
logger.debug("Clipped stores")
|
|
146
146
|
else:
|
|
147
147
|
logger.info("Time range is incorrect")
|
|
148
|
-
os.remove(
|
|
148
|
+
os.remove(cached_nc_path)
|
|
149
149
|
logger.debug("Removed cached nc file")
|
|
150
150
|
|
|
151
151
|
if merged_data is None:
|
|
@@ -155,7 +155,7 @@ def get_forcing_data(
|
|
|
155
155
|
logger.debug("Got zarr stores")
|
|
156
156
|
clipped_store = clip_dataset_to_bounds(lazy_store, gdf.total_bounds, start_time, end_time)
|
|
157
157
|
logger.info("Clipped forcing data to bounds")
|
|
158
|
-
merged_data = compute_store(clipped_store,
|
|
158
|
+
merged_data = compute_store(clipped_store, cached_nc_path)
|
|
159
159
|
logger.info("Forcing data loaded and cached")
|
|
160
160
|
# close the event loop
|
|
161
161
|
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-template.ini
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
forcing_file=BMI
|
|
2
|
-
|
|
2
|
+
surface_water_partitioning_scheme=Schaake
|
|
3
|
+
surface_runoff_scheme=GIUH
|
|
3
4
|
|
|
4
5
|
# ----------------
|
|
5
6
|
# State Parameters
|
|
@@ -40,13 +41,13 @@ alpha_fc=0.33
|
|
|
40
41
|
# decimal fraction of maximum soil water storage (smcmax * depth) for the initial timestep
|
|
41
42
|
soil_storage=0.05[m/m]
|
|
42
43
|
# number of Nash lf reservoirs (optional, defaults to 2, ignored if storage values present)
|
|
43
|
-
|
|
44
|
+
K_nash_subsurface=0.03[]
|
|
44
45
|
# Nash Config param - primary reservoir
|
|
45
46
|
K_lf=0.01[]
|
|
46
47
|
# Nash Config param - secondary reservoir
|
|
47
|
-
|
|
48
|
+
nash_storage_subsurface=0.0,0.0
|
|
48
49
|
# Giuh ordinates in dt time steps
|
|
49
|
-
giuh_ordinates=
|
|
50
|
+
giuh_ordinates=0.55,0.25,0.2
|
|
50
51
|
|
|
51
52
|
# ---------------------
|
|
52
53
|
# Time Info
|
|
@@ -58,4 +59,4 @@ verbosity=0
|
|
|
58
59
|
DEBUG=0
|
|
59
60
|
# Parameter in the surface runoff parameterization
|
|
60
61
|
# (https://mikejohnson51.github.io/hyAggregate/#Routing_Attributes)
|
|
61
|
-
refkdt={refkdt}
|
|
62
|
+
refkdt={refkdt}
|
|
@@ -98,12 +98,12 @@ compute_parameters:
|
|
|
98
98
|
output_parameters:
|
|
99
99
|
#----------
|
|
100
100
|
#test_output: outputs/lcr_flowveldepth.pkl
|
|
101
|
-
lite_restart:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
lakeout_output: lakeout/
|
|
101
|
+
# lite_restart:
|
|
102
|
+
# #----------
|
|
103
|
+
# lite_restart_output_directory: restart/
|
|
104
|
+
# lakeout_output: lakeout/
|
|
105
105
|
stream_output:
|
|
106
106
|
stream_output_directory: outputs/troute/
|
|
107
|
-
stream_output_time:
|
|
107
|
+
stream_output_time: -1 # -1 adds all outputs to a single file
|
|
108
108
|
stream_output_type: ".nc" #please select only between netcdf '.nc' or '.csv' or '.pkl'
|
|
109
109
|
stream_output_internal_frequency: 60 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60
|
|
@@ -28,10 +28,10 @@
|
|
|
28
28
|
/
|
|
29
29
|
|
|
30
30
|
&model_options
|
|
31
|
-
precip_phase_option =
|
|
32
|
-
snow_albedo_option =
|
|
33
|
-
dynamic_veg_option =
|
|
34
|
-
runoff_option =
|
|
31
|
+
precip_phase_option = 1
|
|
32
|
+
snow_albedo_option = 1 ! 1 = BATS, 2 = CLASS
|
|
33
|
+
dynamic_veg_option = 4
|
|
34
|
+
runoff_option = 3
|
|
35
35
|
drainage_option = 8
|
|
36
36
|
frozen_soil_option = 1
|
|
37
37
|
dynamic_vic_option = 1
|
|
@@ -43,8 +43,8 @@
|
|
|
43
43
|
soil_temp_boundary_option = 2
|
|
44
44
|
supercooled_water_option = 1
|
|
45
45
|
stomatal_resistance_option = 1
|
|
46
|
-
evap_srfc_resistance_option =
|
|
47
|
-
subsurface_option =
|
|
46
|
+
evap_srfc_resistance_option = 4
|
|
47
|
+
subsurface_option = 2
|
|
48
48
|
/
|
|
49
49
|
|
|
50
50
|
&structure
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from data_sources.source_validation import validate_all
|
|
2
|
+
from ngiab_data_cli.custom_logging import setup_logging
|
|
3
|
+
from data_processing.forcings import compute_zonal_stats
|
|
4
|
+
from data_processing.zarr_utils import get_forcing_data
|
|
5
|
+
from data_processing.file_paths import file_paths
|
|
6
|
+
import argparse
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
import xarray as xr
|
|
10
|
+
import geopandas as gpd
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import shutil
|
|
14
|
+
|
|
15
|
+
# Constants
|
|
16
|
+
DATE_FORMAT = "%Y-%m-%d" # used for datetime parsing
|
|
17
|
+
DATE_FORMAT_HINT = "YYYY-MM-DD" # printed in help message
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_arguments() -> argparse.Namespace:
|
|
21
|
+
"""Parse command line arguments."""
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
description="Subsetting hydrofabrics, forcing generation, and realization creation"
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"-i",
|
|
27
|
+
"--input_file",
|
|
28
|
+
type=Path,
|
|
29
|
+
help="path to the input hydrofabric geopackage",
|
|
30
|
+
required=True,
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"-o",
|
|
34
|
+
"--output_file",
|
|
35
|
+
type=Path,
|
|
36
|
+
help="path to the forcing output file, e.g. /path/to/forcings.nc",
|
|
37
|
+
required=True,
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--start_date",
|
|
41
|
+
"--start",
|
|
42
|
+
type=lambda s: datetime.strptime(s, DATE_FORMAT),
|
|
43
|
+
help=f"Start date for forcings/realization (format {DATE_FORMAT_HINT})",
|
|
44
|
+
required=True,
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--end_date",
|
|
48
|
+
"--end",
|
|
49
|
+
type=lambda s: datetime.strptime(s, DATE_FORMAT),
|
|
50
|
+
help=f"End date for forcings/realization (format {DATE_FORMAT_HINT})",
|
|
51
|
+
required=True,
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"-D",
|
|
55
|
+
"--debug",
|
|
56
|
+
action="store_true",
|
|
57
|
+
help="enable debug logging",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return parser.parse_args()
|
|
61
|
+
|
|
62
|
+
def main() -> None:
|
|
63
|
+
time.sleep(0.01)
|
|
64
|
+
setup_logging()
|
|
65
|
+
validate_all()
|
|
66
|
+
args = parse_arguments()
|
|
67
|
+
projection = xr.open_dataset(file_paths.template_nc, engine="h5netcdf").crs.esri_pe_string
|
|
68
|
+
logging.debug("Got projection from grid file")
|
|
69
|
+
|
|
70
|
+
gdf = gpd.read_file(args.input_file, layer="divides").to_crs(projection)
|
|
71
|
+
logging.debug(f"gdf bounds: {gdf.total_bounds}")
|
|
72
|
+
|
|
73
|
+
start_time = args.start_date.strftime("%Y-%m-%d %H:%M")
|
|
74
|
+
end_time = args.end_date.strftime("%Y-%m-%d %H:%M")
|
|
75
|
+
|
|
76
|
+
cached_nc_path = args.output_file.parent / (args.input_file.stem + "-raw-gridded-data.nc")
|
|
77
|
+
print(cached_nc_path)
|
|
78
|
+
merged_data = get_forcing_data(cached_nc_path, start_time, end_time, gdf)
|
|
79
|
+
forcing_working_dir = args.output_file.parent / (args.input_file.stem + "-working-dir")
|
|
80
|
+
if not forcing_working_dir.exists():
|
|
81
|
+
forcing_working_dir.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
|
|
83
|
+
temp_dir = forcing_working_dir / "temp"
|
|
84
|
+
if not temp_dir.exists():
|
|
85
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
compute_zonal_stats(gdf, merged_data, forcing_working_dir)
|
|
89
|
+
|
|
90
|
+
shutil.copy(forcing_working_dir / "forcings.nc", args.output_file)
|
|
91
|
+
logging.info(f"Created forcings file: {args.output_file}")
|
|
92
|
+
# remove the working directory
|
|
93
|
+
shutil.rmtree(forcing_working_dir)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -40,6 +40,7 @@ modules/map_app/templates/index.html
|
|
|
40
40
|
modules/ngiab_data_cli/__main__.py
|
|
41
41
|
modules/ngiab_data_cli/arguments.py
|
|
42
42
|
modules/ngiab_data_cli/custom_logging.py
|
|
43
|
+
modules/ngiab_data_cli/forcing_cli.py
|
|
43
44
|
modules/ngiab_data_preprocess.egg-info/PKG-INFO
|
|
44
45
|
modules/ngiab_data_preprocess.egg-info/SOURCES.txt
|
|
45
46
|
modules/ngiab_data_preprocess.egg-info/dependency_links.txt
|
|
@@ -12,7 +12,7 @@ exclude = ["tests*"]
|
|
|
12
12
|
|
|
13
13
|
[project]
|
|
14
14
|
name = "ngiab_data_preprocess"
|
|
15
|
-
version = "v3.
|
|
15
|
+
version = "v3.3.0"
|
|
16
16
|
authors = [{ name = "Josh Cunningham", email = "jcunningham8@ua.edu" }]
|
|
17
17
|
description = "Graphical Tools for creating Next Gen Water model input data."
|
|
18
18
|
readme = "README.md"
|
|
@@ -43,7 +43,7 @@ dependencies = [
|
|
|
43
43
|
"tqdm==4.66.4",
|
|
44
44
|
"rich==13.7.1",
|
|
45
45
|
"colorama==0.4.6",
|
|
46
|
-
"bokeh==3.5.1"
|
|
46
|
+
"bokeh==3.5.1",
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
[project.optional-dependencies]
|
|
@@ -57,6 +57,7 @@ Issues = "https://github.com/CIROH-UA/NGIAB_data_preprocess/issues"
|
|
|
57
57
|
[project.scripts]
|
|
58
58
|
cli = "ngiab_data_cli.__main__:main"
|
|
59
59
|
map_app = "map_app.__main__:main"
|
|
60
|
+
forcings = "ngiab_data_cli.forcing_cli:main"
|
|
60
61
|
|
|
61
62
|
[build-system]
|
|
62
63
|
# scm adds files tracked by git to the package
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/.github/workflows/build_only.yml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/gpkg_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/graph_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/s3fs_utils.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_processing/subset.py
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-config.yml
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/forcing_template.nc
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/template.sql
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/data_sources/triggers.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/console.css
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/main.css
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/console.js
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/main.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/map_app/templates/index.html
RENAMED
|
File without changes
|
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/__main__.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/arguments.py
RENAMED
|
File without changes
|
{ngiab_data_preprocess-3.2.4 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/custom_logging.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|