ngiab-data-preprocess 4.2.0__py3-none-any.whl → 4.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_processing/create_realization.py +28 -30
- data_processing/forcings.py +68 -70
- data_processing/gpkg_utils.py +17 -9
- data_processing/subset.py +4 -4
- map_app/static/css/main.css +19 -7
- map_app/static/css/toggle.css +8 -5
- map_app/static/js/main.js +44 -21
- map_app/templates/index.html +22 -9
- map_app/views.py +7 -7
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/METADATA +1 -3
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/RECORD +15 -17
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/WHEEL +1 -1
- map_app/static/resources/dark-style.json +0 -11068
- map_app/static/resources/light-style.json +0 -11068
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/entry_points.txt +0 -0
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/licenses/LICENSE +0 -0
- {ngiab_data_preprocess-4.2.0.dist-info → ngiab_data_preprocess-4.2.2.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,30 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
import multiprocessing
|
|
4
|
+
import shutil
|
|
3
5
|
import sqlite3
|
|
6
|
+
from collections import defaultdict
|
|
4
7
|
from datetime import datetime
|
|
5
8
|
from pathlib import Path
|
|
6
|
-
import shutil
|
|
7
|
-
import requests
|
|
8
9
|
|
|
9
10
|
import pandas
|
|
11
|
+
import requests
|
|
10
12
|
import s3fs
|
|
11
13
|
import xarray as xr
|
|
12
|
-
import logging
|
|
13
|
-
from collections import defaultdict
|
|
14
14
|
from dask.distributed import Client, LocalCluster
|
|
15
15
|
from data_processing.file_paths import file_paths
|
|
16
16
|
from data_processing.gpkg_utils import (
|
|
17
17
|
GeoPackage,
|
|
18
|
+
get_cat_to_nex_flowpairs,
|
|
18
19
|
get_cat_to_nhd_feature_id,
|
|
19
20
|
get_table_crs_short,
|
|
20
|
-
get_cat_to_nex_flowpairs,
|
|
21
21
|
)
|
|
22
|
-
from tqdm.rich import tqdm
|
|
23
22
|
from pyproj import Transformer
|
|
23
|
+
from tqdm.rich import tqdm
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
26
26
|
|
|
27
|
+
|
|
27
28
|
def get_approximate_gw_storage(paths: file_paths, start_date: datetime):
|
|
28
29
|
# get the gw levels from the NWM output on a given start date
|
|
29
30
|
# this kind of works in place of warmstates for now
|
|
@@ -78,7 +79,9 @@ def make_cfe_config(
|
|
|
78
79
|
slope=row["mean.slope_1km"],
|
|
79
80
|
smcmax=row["mean.smcmax_soil_layers_stag=2"],
|
|
80
81
|
smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
|
|
81
|
-
max_gw_storage=row["mean.Zmax"]/1000
|
|
82
|
+
max_gw_storage=row["mean.Zmax"] / 1000
|
|
83
|
+
if row["mean.Zmax"] is not None
|
|
84
|
+
else "0.011[m]", # mean.Zmax is in mm!
|
|
82
85
|
gw_Coeff=row["mean.Coeff"] if row["mean.Coeff"] is not None else "0.0018[m h-1]",
|
|
83
86
|
gw_Expon=row["mode.Expon"],
|
|
84
87
|
gw_storage="{:.5}".format(gw_storage_ratio),
|
|
@@ -92,7 +95,6 @@ def make_cfe_config(
|
|
|
92
95
|
def make_noahowp_config(
|
|
93
96
|
base_dir: Path, divide_conf_df: pandas.DataFrame, start_time: datetime, end_time: datetime
|
|
94
97
|
) -> None:
|
|
95
|
-
|
|
96
98
|
divide_conf_df.set_index("divide_id", inplace=True)
|
|
97
99
|
start_datetime = start_time.strftime("%Y%m%d%H%M")
|
|
98
100
|
end_datetime = end_time.strftime("%Y%m%d%H%M")
|
|
@@ -110,8 +112,8 @@ def make_noahowp_config(
|
|
|
110
112
|
end_datetime=end_datetime,
|
|
111
113
|
lat=divide_conf_df.loc[divide, "latitude"],
|
|
112
114
|
lon=divide_conf_df.loc[divide, "longitude"],
|
|
113
|
-
terrain_slope=
|
|
114
|
-
azimuth=
|
|
115
|
+
terrain_slope=divide_conf_df.loc[divide, "mean.slope_1km"],
|
|
116
|
+
azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
|
|
115
117
|
ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
|
|
116
118
|
IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
|
|
117
119
|
)
|
|
@@ -182,6 +184,7 @@ def get_model_attributes_pyproj(hydrofabric: Path):
|
|
|
182
184
|
|
|
183
185
|
return divide_conf_df
|
|
184
186
|
|
|
187
|
+
|
|
185
188
|
def get_model_attributes(hydrofabric: Path):
|
|
186
189
|
try:
|
|
187
190
|
with GeoPackage(hydrofabric) as conn:
|
|
@@ -205,30 +208,31 @@ def get_model_attributes(hydrofabric: Path):
|
|
|
205
208
|
)
|
|
206
209
|
except sqlite3.OperationalError:
|
|
207
210
|
with sqlite3.connect(hydrofabric) as conn:
|
|
208
|
-
conf_df = pandas.read_sql_query(
|
|
211
|
+
conf_df = pandas.read_sql_query(
|
|
212
|
+
"SELECT* FROM 'divide-attributes';",
|
|
213
|
+
conn,
|
|
214
|
+
)
|
|
209
215
|
source_crs = get_table_crs_short(hydrofabric, "divides")
|
|
210
216
|
transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
|
|
211
|
-
lon, lat = transformer.transform(
|
|
212
|
-
conf_df["centroid_x"].values, conf_df["centroid_y"].values
|
|
213
|
-
)
|
|
217
|
+
lon, lat = transformer.transform(conf_df["centroid_x"].values, conf_df["centroid_y"].values)
|
|
214
218
|
conf_df["longitude"] = lon
|
|
215
219
|
conf_df["latitude"] = lat
|
|
216
220
|
|
|
217
221
|
conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
|
|
218
222
|
return conf_df
|
|
219
223
|
|
|
224
|
+
|
|
220
225
|
def make_em_config(
|
|
221
226
|
hydrofabric: Path,
|
|
222
227
|
output_dir: Path,
|
|
223
228
|
template_path: Path = file_paths.template_em_config,
|
|
224
229
|
):
|
|
225
|
-
|
|
226
230
|
# test if modspatialite is available
|
|
227
231
|
try:
|
|
228
232
|
divide_conf_df = get_model_attributes_modspatialite(hydrofabric)
|
|
229
233
|
except Exception as e:
|
|
230
234
|
logger.warning(f"mod_spatialite not available, using pyproj instead: {e}")
|
|
231
|
-
logger.warning(
|
|
235
|
+
logger.warning("Install mod_spatialite for improved performance")
|
|
232
236
|
divide_conf_df = get_model_attributes_pyproj(hydrofabric)
|
|
233
237
|
|
|
234
238
|
cat_config_dir = output_dir / "cat_config" / "empirical_model"
|
|
@@ -256,7 +260,6 @@ def make_em_config(
|
|
|
256
260
|
def configure_troute(
|
|
257
261
|
cat_id: str, config_dir: Path, start_time: datetime, end_time: datetime
|
|
258
262
|
) -> int:
|
|
259
|
-
|
|
260
263
|
with open(file_paths.template_troute_config, "r") as file:
|
|
261
264
|
troute_template = file.read()
|
|
262
265
|
time_step_size = 300
|
|
@@ -269,7 +272,7 @@ def configure_troute(
|
|
|
269
272
|
geo_file_path=f"./config/{cat_id}_subset.gpkg",
|
|
270
273
|
start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
271
274
|
nts=nts,
|
|
272
|
-
max_loop_size=nts,
|
|
275
|
+
max_loop_size=nts,
|
|
273
276
|
)
|
|
274
277
|
|
|
275
278
|
with open(config_dir / "troute.yaml", "w") as file:
|
|
@@ -301,9 +304,7 @@ def create_em_realization(cat_id: str, start_time: datetime, end_time: datetime)
|
|
|
301
304
|
f.write(em_config)
|
|
302
305
|
|
|
303
306
|
configure_troute(cat_id, paths.config_dir, start_time, end_time)
|
|
304
|
-
make_ngen_realization_json(
|
|
305
|
-
paths.config_dir, template_path, start_time, end_time
|
|
306
|
-
)
|
|
307
|
+
make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
|
|
307
308
|
make_em_config(paths.geopackage_path, paths.config_dir)
|
|
308
309
|
# create some partitions for parallelization
|
|
309
310
|
paths.setup_run_folders()
|
|
@@ -324,15 +325,14 @@ def create_realization(
|
|
|
324
325
|
if gage_id is not None:
|
|
325
326
|
# try and download s3:communityhydrofabric/hydrofabrics/community/gage_parameters/gage_id
|
|
326
327
|
# if it doesn't exist, use the default
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
328
|
+
url = f"https://communityhydrofabric.s3.us-east-1.amazonaws.com/hydrofabrics/community/gage_parameters/{gage_id}.json"
|
|
329
|
+
response = requests.get(url)
|
|
330
|
+
if response.status_code == 200:
|
|
330
331
|
new_template = requests.get(url).json()
|
|
331
|
-
template_path = paths.config_dir / "
|
|
332
|
+
template_path = paths.config_dir / "downloaded_params.json"
|
|
332
333
|
with open(template_path, "w") as f:
|
|
333
334
|
json.dump(new_template, f)
|
|
334
|
-
|
|
335
|
-
logger.warning(f"Failed to download gage parameters")
|
|
335
|
+
logger.info(f"downloaded calibrated parameters for {gage_id}")
|
|
336
336
|
|
|
337
337
|
conf_df = get_model_attributes(paths.geopackage_path)
|
|
338
338
|
|
|
@@ -347,9 +347,7 @@ def create_realization(
|
|
|
347
347
|
|
|
348
348
|
configure_troute(cat_id, paths.config_dir, start_time, end_time)
|
|
349
349
|
|
|
350
|
-
make_ngen_realization_json(
|
|
351
|
-
paths.config_dir, template_path, start_time, end_time
|
|
352
|
-
)
|
|
350
|
+
make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
|
|
353
351
|
|
|
354
352
|
# create some partitions for parallelization
|
|
355
353
|
paths.setup_run_folders()
|
data_processing/forcings.py
CHANGED
|
@@ -3,32 +3,29 @@ import multiprocessing
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
import warnings
|
|
6
|
-
from datetime import datetime
|
|
7
6
|
from functools import partial
|
|
8
7
|
from math import ceil
|
|
9
8
|
from multiprocessing import shared_memory
|
|
10
9
|
from pathlib import Path
|
|
11
|
-
|
|
12
|
-
from dask.distributed import Client, LocalCluster
|
|
10
|
+
from typing import Tuple
|
|
13
11
|
|
|
14
12
|
import geopandas as gpd
|
|
15
13
|
import numpy as np
|
|
16
14
|
import pandas as pd
|
|
17
15
|
import psutil
|
|
18
16
|
import xarray as xr
|
|
19
|
-
from
|
|
17
|
+
from dask.distributed import Client, LocalCluster
|
|
20
18
|
from data_processing.dataset_utils import validate_dataset_format
|
|
19
|
+
from data_processing.file_paths import file_paths
|
|
21
20
|
from exactextract import exact_extract
|
|
22
21
|
from exactextract.raster import NumPyRasterSource
|
|
23
22
|
from rich.progress import (
|
|
24
|
-
Progress,
|
|
25
23
|
BarColumn,
|
|
24
|
+
Progress,
|
|
26
25
|
TextColumn,
|
|
27
26
|
TimeElapsedColumn,
|
|
28
27
|
TimeRemainingColumn,
|
|
29
28
|
)
|
|
30
|
-
from typing import Tuple
|
|
31
|
-
|
|
32
29
|
|
|
33
30
|
logger = logging.getLogger(__name__)
|
|
34
31
|
# Suppress the specific warning from numpy to keep the cli output clean
|
|
@@ -40,13 +37,13 @@ warnings.filterwarnings(
|
|
|
40
37
|
)
|
|
41
38
|
|
|
42
39
|
|
|
43
|
-
def weighted_sum_of_cells(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
40
|
+
def weighted_sum_of_cells(
|
|
41
|
+
flat_raster: np.ndarray, cell_ids: np.ndarray, factors: np.ndarray
|
|
42
|
+
) -> np.ndarray:
|
|
43
|
+
"""
|
|
47
44
|
Take an average of each forcing variable in a catchment. Create an output
|
|
48
|
-
array initialized with zeros, and then sum up the forcing variable and
|
|
49
|
-
divide by the sum of the cell weights to get an averaged forcing variable
|
|
45
|
+
array initialized with zeros, and then sum up the forcing variable and
|
|
46
|
+
divide by the sum of the cell weights to get an averaged forcing variable
|
|
50
47
|
for the entire catchment.
|
|
51
48
|
|
|
52
49
|
Parameters
|
|
@@ -65,7 +62,7 @@ def weighted_sum_of_cells(flat_raster: np.ndarray,
|
|
|
65
62
|
An one-dimensional array, where each element corresponds to a timestep.
|
|
66
63
|
Each element contains the averaged forcing value for the whole catchment
|
|
67
64
|
over one timestep.
|
|
68
|
-
|
|
65
|
+
"""
|
|
69
66
|
result = np.zeros(flat_raster.shape[0])
|
|
70
67
|
result = np.sum(flat_raster[:, cell_ids] * factors, axis=1)
|
|
71
68
|
sum_of_weights = np.sum(factors)
|
|
@@ -73,12 +70,10 @@ def weighted_sum_of_cells(flat_raster: np.ndarray,
|
|
|
73
70
|
return result
|
|
74
71
|
|
|
75
72
|
|
|
76
|
-
def get_cell_weights(raster: xr.Dataset,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
Get the cell weights (coverage) for each cell in a divide. Coverage is
|
|
81
|
-
defined as the fraction (a float in [0,1]) of a raster cell that overlaps
|
|
73
|
+
def get_cell_weights(raster: xr.Dataset, gdf: gpd.GeoDataFrame, wkt: str) -> pd.DataFrame:
|
|
74
|
+
"""
|
|
75
|
+
Get the cell weights (coverage) for each cell in a divide. Coverage is
|
|
76
|
+
defined as the fraction (a float in [0,1]) of a raster cell that overlaps
|
|
82
77
|
with the polygon in the passed gdf.
|
|
83
78
|
|
|
84
79
|
Parameters
|
|
@@ -96,7 +91,7 @@ def get_cell_weights(raster: xr.Dataset,
|
|
|
96
91
|
pd.DataFrame
|
|
97
92
|
DataFrame indexed by divide_id that contains information about coverage
|
|
98
93
|
for each raster cell in gridded forcing file.
|
|
99
|
-
|
|
94
|
+
"""
|
|
100
95
|
xmin = raster.x[0]
|
|
101
96
|
xmax = raster.x[-1]
|
|
102
97
|
ymin = raster.y[0]
|
|
@@ -116,15 +111,17 @@ def get_cell_weights(raster: xr.Dataset,
|
|
|
116
111
|
|
|
117
112
|
|
|
118
113
|
def add_APCP_SURFACE_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
|
|
119
|
-
|
|
114
|
+
"""Convert precipitation value to correct units."""
|
|
120
115
|
# precip_rate is mm/s
|
|
121
116
|
# cfe says input atmosphere_water__liquid_equivalent_precipitation_rate is mm/h
|
|
122
117
|
# nom says prcpnonc input is mm/s
|
|
123
118
|
# technically should be kg/m^2/s at 1kg = 1l it equates to mm/s
|
|
124
119
|
# nom says qinsur output is m/s, hopefully qinsur is converted to mm/h by ngen
|
|
125
120
|
dataset["APCP_surface"] = dataset["precip_rate"] * 3600
|
|
126
|
-
dataset["APCP_surface"].attrs["units"] = "mm h^-1"
|
|
127
|
-
dataset["APCP_surface"].attrs["source_note"] =
|
|
121
|
+
dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
|
|
122
|
+
dataset["APCP_surface"].attrs["source_note"] = (
|
|
123
|
+
"This is just the precip_rate variable converted to mm/h by multiplying by 3600"
|
|
124
|
+
)
|
|
128
125
|
return dataset
|
|
129
126
|
|
|
130
127
|
|
|
@@ -132,14 +129,14 @@ def add_precip_rate_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
|
|
|
132
129
|
# the inverse of the function above
|
|
133
130
|
dataset["precip_rate"] = dataset["APCP_surface"] / 3600
|
|
134
131
|
dataset["precip_rate"].attrs["units"] = "mm s^-1"
|
|
135
|
-
dataset["precip_rate"].attrs[
|
|
136
|
-
"
|
|
137
|
-
|
|
132
|
+
dataset["precip_rate"].attrs["source_note"] = (
|
|
133
|
+
"This is just the APCP_surface variable converted to mm/s by dividing by 3600"
|
|
134
|
+
)
|
|
138
135
|
return dataset
|
|
139
136
|
|
|
140
137
|
|
|
141
138
|
def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
|
|
142
|
-
|
|
139
|
+
"""
|
|
143
140
|
Take a DataArray and calculate the start and end index for each chunk based
|
|
144
141
|
on the available memory.
|
|
145
142
|
|
|
@@ -153,7 +150,7 @@ def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
|
|
|
153
150
|
list[Tuple[int, int]]
|
|
154
151
|
Each element in the list represents a chunk of data. The tuple within
|
|
155
152
|
the chunk indicates the start index and end index of the chunk.
|
|
156
|
-
|
|
153
|
+
"""
|
|
157
154
|
array_memory_usage = data.nbytes
|
|
158
155
|
free_memory = psutil.virtual_memory().available * 0.8 # 80% of available memory
|
|
159
156
|
# limit the chunk to 20gb, makes things more stable
|
|
@@ -166,15 +163,13 @@ def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
|
|
|
166
163
|
return index_chunks
|
|
167
164
|
|
|
168
165
|
|
|
169
|
-
def create_shared_memory(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
'''
|
|
175
|
-
Create a shared memory object so that multiple processes can access loaded
|
|
166
|
+
def create_shared_memory(
|
|
167
|
+
lazy_array: xr.Dataset,
|
|
168
|
+
) -> Tuple[shared_memory.SharedMemory, np.dtype, np.dtype]:
|
|
169
|
+
"""
|
|
170
|
+
Create a shared memory object so that multiple processes can access loaded
|
|
176
171
|
data.
|
|
177
|
-
|
|
172
|
+
|
|
178
173
|
Parameters
|
|
179
174
|
----------
|
|
180
175
|
lazy_array : xr.Dataset
|
|
@@ -183,22 +178,22 @@ def create_shared_memory(lazy_array: xr.Dataset) -> Tuple[
|
|
|
183
178
|
Returns
|
|
184
179
|
-------
|
|
185
180
|
shared_memory.SharedMemory
|
|
186
|
-
A specific block of memory allocated by the OS of the size of
|
|
181
|
+
A specific block of memory allocated by the OS of the size of
|
|
187
182
|
lazy_array.
|
|
188
183
|
np.dtype.shape
|
|
189
184
|
A shape object with dimensions (# timesteps, # of raster cells) in
|
|
190
185
|
reference to lazy_array.
|
|
191
186
|
np.dtype
|
|
192
187
|
Data type of objects in lazy_array.
|
|
193
|
-
|
|
194
|
-
logger.debug(f"Creating shared memory size {lazy_array.nbytes/ 10**6} Mb.")
|
|
188
|
+
"""
|
|
189
|
+
logger.debug(f"Creating shared memory size {lazy_array.nbytes / 10**6} Mb.")
|
|
195
190
|
shm = shared_memory.SharedMemory(create=True, size=lazy_array.nbytes)
|
|
196
191
|
shared_array = np.ndarray(lazy_array.shape, dtype=np.float32, buffer=shm.buf)
|
|
197
192
|
# if your data is not float32, xarray will do an automatic conversion here
|
|
198
193
|
# which consumes a lot more memory, forcings downloaded with this tool will work
|
|
199
194
|
for start, end in get_index_chunks(lazy_array):
|
|
200
|
-
|
|
201
|
-
|
|
195
|
+
# copy data from lazy to shared memory one chunk at a time
|
|
196
|
+
shared_array[start:end] = lazy_array[start:end]
|
|
202
197
|
|
|
203
198
|
time, x, y = shared_array.shape
|
|
204
199
|
shared_array = shared_array.reshape(time, -1)
|
|
@@ -206,14 +201,16 @@ def create_shared_memory(lazy_array: xr.Dataset) -> Tuple[
|
|
|
206
201
|
return shm, shared_array.shape, shared_array.dtype
|
|
207
202
|
|
|
208
203
|
|
|
209
|
-
def process_chunk_shared(
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
204
|
+
def process_chunk_shared(
|
|
205
|
+
variable: str,
|
|
206
|
+
times: np.ndarray,
|
|
207
|
+
shm_name: str,
|
|
208
|
+
shape: np.dtype.shape,
|
|
209
|
+
dtype: np.dtype,
|
|
210
|
+
chunk: gpd.GeoDataFrame,
|
|
211
|
+
) -> xr.DataArray:
|
|
212
|
+
"""
|
|
213
|
+
Process the gridded forcings chunk loaded into a SharedMemory block.
|
|
217
214
|
|
|
218
215
|
Parameters
|
|
219
216
|
----------
|
|
@@ -235,7 +232,7 @@ def process_chunk_shared(variable: str,
|
|
|
235
232
|
-------
|
|
236
233
|
xr.DataArray
|
|
237
234
|
Averaged forcings data for each timestep for each catchment.
|
|
238
|
-
|
|
235
|
+
"""
|
|
239
236
|
existing_shm = shared_memory.SharedMemory(name=shm_name)
|
|
240
237
|
raster = np.ndarray(shape, dtype=dtype, buffer=existing_shm.buf)
|
|
241
238
|
results = []
|
|
@@ -256,10 +253,10 @@ def process_chunk_shared(variable: str,
|
|
|
256
253
|
return xr.concat(results, dim="catchment")
|
|
257
254
|
|
|
258
255
|
|
|
259
|
-
def get_cell_weights_parallel(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
256
|
+
def get_cell_weights_parallel(
|
|
257
|
+
gdf: gpd.GeoDataFrame, input_forcings: xr.Dataset, num_partitions: int
|
|
258
|
+
) -> pd.DataFrame:
|
|
259
|
+
"""
|
|
263
260
|
Execute get_cell_weights with multiprocessing, with chunking for the passed
|
|
264
261
|
GeoDataFrame to conserve memory usage.
|
|
265
262
|
|
|
@@ -277,7 +274,7 @@ def get_cell_weights_parallel(gdf: gpd.GeoDataFrame,
|
|
|
277
274
|
pd.DataFrame
|
|
278
275
|
DataFrame indexed by divide_id that contains information about coverage
|
|
279
276
|
for each raster cell and each timestep in gridded forcing file.
|
|
280
|
-
|
|
277
|
+
"""
|
|
281
278
|
gdf_chunks = np.array_split(gdf, num_partitions)
|
|
282
279
|
wkt = gdf.crs.to_wkt()
|
|
283
280
|
one_timestep = input_forcings.isel(time=0).compute()
|
|
@@ -286,20 +283,21 @@ def get_cell_weights_parallel(gdf: gpd.GeoDataFrame,
|
|
|
286
283
|
catchments = pool.starmap(get_cell_weights, args)
|
|
287
284
|
return pd.concat(catchments)
|
|
288
285
|
|
|
286
|
+
|
|
289
287
|
def get_units(dataset: xr.Dataset) -> dict:
|
|
290
|
-
|
|
288
|
+
"""
|
|
291
289
|
Return dictionary of units for each variable in dataset.
|
|
292
|
-
|
|
290
|
+
|
|
293
291
|
Parameters
|
|
294
292
|
----------
|
|
295
293
|
dataset : xr.Dataset
|
|
296
294
|
Dataset with variables and units.
|
|
297
|
-
|
|
295
|
+
|
|
298
296
|
Returns
|
|
299
297
|
-------
|
|
300
|
-
dict
|
|
298
|
+
dict
|
|
301
299
|
{variable name: unit}
|
|
302
|
-
|
|
300
|
+
"""
|
|
303
301
|
units = {}
|
|
304
302
|
for var in dataset.data_vars:
|
|
305
303
|
if dataset[var].attrs["units"]:
|
|
@@ -310,9 +308,9 @@ def get_units(dataset: xr.Dataset) -> dict:
|
|
|
310
308
|
def compute_zonal_stats(
|
|
311
309
|
gdf: gpd.GeoDataFrame, gridded_data: xr.Dataset, forcings_dir: Path
|
|
312
310
|
) -> None:
|
|
313
|
-
|
|
314
|
-
Compute zonal statistics in parallel for all timesteps over all desired
|
|
315
|
-
catchments. Create chunks of catchments and within those, chunks of
|
|
311
|
+
"""
|
|
312
|
+
Compute zonal statistics in parallel for all timesteps over all desired
|
|
313
|
+
catchments. Create chunks of catchments and within those, chunks of
|
|
316
314
|
timesteps for memory management.
|
|
317
315
|
|
|
318
316
|
Parameters
|
|
@@ -323,7 +321,7 @@ def compute_zonal_stats(
|
|
|
323
321
|
Gridded forcing data that intersects with desired catchments.
|
|
324
322
|
forcings_dir : Path
|
|
325
323
|
Path to directory where outputs are to be stored.
|
|
326
|
-
|
|
324
|
+
"""
|
|
327
325
|
logger.info("Computing zonal stats in parallel for all timesteps")
|
|
328
326
|
timer_start = time.time()
|
|
329
327
|
num_partitions = multiprocessing.cpu_count() - 1
|
|
@@ -414,7 +412,7 @@ def compute_zonal_stats(
|
|
|
414
412
|
|
|
415
413
|
|
|
416
414
|
def write_outputs(forcings_dir: Path, units: dict) -> None:
|
|
417
|
-
|
|
415
|
+
"""
|
|
418
416
|
Write outputs to disk in the form of a NetCDF file, using dask clusters to
|
|
419
417
|
facilitate parallel computing.
|
|
420
418
|
|
|
@@ -423,13 +421,13 @@ def write_outputs(forcings_dir: Path, units: dict) -> None:
|
|
|
423
421
|
forcings_dir : Path
|
|
424
422
|
Path to directory where outputs are to be stored.
|
|
425
423
|
variables : dict
|
|
426
|
-
Preset dictionary where the keys are forcing variable names and the
|
|
424
|
+
Preset dictionary where the keys are forcing variable names and the
|
|
427
425
|
values are units.
|
|
428
426
|
units : dict
|
|
429
|
-
Dictionary where the keys are forcing variable names and the values are
|
|
427
|
+
Dictionary where the keys are forcing variable names and the values are
|
|
430
428
|
units. Differs from variables, as this dictionary depends on the gridded
|
|
431
429
|
forcing dataset.
|
|
432
|
-
|
|
430
|
+
"""
|
|
433
431
|
|
|
434
432
|
# start a dask cluster if there isn't one already running
|
|
435
433
|
try:
|
|
@@ -508,7 +506,7 @@ def setup_directories(cat_id: str) -> file_paths:
|
|
|
508
506
|
def create_forcings(dataset: xr.Dataset, output_folder_name: str) -> None:
|
|
509
507
|
validate_dataset_format(dataset)
|
|
510
508
|
forcing_paths = setup_directories(output_folder_name)
|
|
511
|
-
|
|
509
|
+
logger.debug(f"forcing path {output_folder_name} {forcing_paths.forcings_dir}")
|
|
512
510
|
gdf = gpd.read_file(forcing_paths.geopackage_path, layer="divides")
|
|
513
511
|
logger.debug(f"gdf bounds: {gdf.total_bounds}")
|
|
514
512
|
gdf = gdf.to_crs(dataset.crs)
|
data_processing/gpkg_utils.py
CHANGED
|
@@ -290,6 +290,7 @@ def update_geopackage_metadata(gpkg: Path) -> None:
|
|
|
290
290
|
|
|
291
291
|
con.close()
|
|
292
292
|
|
|
293
|
+
|
|
293
294
|
def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_name: Path) -> None:
|
|
294
295
|
"""
|
|
295
296
|
Subset the specified table from the hydrofabric database by vpuid and save it to the subset geopackage.
|
|
@@ -306,9 +307,9 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
|
|
|
306
307
|
dest_db = sqlite3.connect(subset_gpkg_name)
|
|
307
308
|
|
|
308
309
|
if vpu == "03":
|
|
309
|
-
vpus = ["03N","03S","03W"]
|
|
310
|
+
vpus = ["03N", "03S", "03W"]
|
|
310
311
|
elif vpu == "10":
|
|
311
|
-
vpus = ["10L","10U"]
|
|
312
|
+
vpus = ["10L", "10U"]
|
|
312
313
|
else:
|
|
313
314
|
vpus = [vpu]
|
|
314
315
|
|
|
@@ -320,22 +321,21 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
|
|
|
320
321
|
if table == "network":
|
|
321
322
|
# Look for the network entry that has a toid not in the flowpath or nexus tables
|
|
322
323
|
network_toids = [x[2] for x in contents]
|
|
323
|
-
|
|
324
|
+
logger.debug(f"Network toids: {len(network_toids)}")
|
|
324
325
|
sql = "SELECT id FROM flowpaths"
|
|
325
326
|
flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
|
|
326
|
-
|
|
327
|
+
logger.debug(f"Flowpath ids: {len(flowpath_ids)}")
|
|
327
328
|
sql = "SELECT id FROM nexus"
|
|
328
329
|
nexus_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
|
|
329
|
-
|
|
330
|
+
logger.debug(f"Nexus ids: {len(nexus_ids)}")
|
|
330
331
|
bad_ids = set(network_toids) - set(flowpath_ids + nexus_ids)
|
|
331
|
-
|
|
332
|
-
|
|
332
|
+
logger.debug(bad_ids)
|
|
333
|
+
logger.info(f"Removing {len(bad_ids)} network entries that are not in flowpaths or nexuses")
|
|
333
334
|
# id column is second after fid
|
|
334
335
|
contents = [x for x in contents if x[1] not in bad_ids]
|
|
335
336
|
|
|
336
337
|
insert_data(dest_db, table, contents)
|
|
337
338
|
|
|
338
|
-
|
|
339
339
|
if table in get_feature_tables(file_paths.conus_hydrofabric):
|
|
340
340
|
fids = [str(x[0]) for x in contents]
|
|
341
341
|
copy_rTree_tables(table, fids, source_db, dest_db)
|
|
@@ -372,6 +372,14 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
|
|
|
372
372
|
sql_query = "SELECT divide_id FROM 'divides'"
|
|
373
373
|
contents = dest_db.execute(sql_query).fetchall()
|
|
374
374
|
ids = [str(x[0]) for x in contents]
|
|
375
|
+
|
|
376
|
+
if table == "nexus":
|
|
377
|
+
# add the nexuses in the toid column from the flowpaths table
|
|
378
|
+
sql_query = "SELECT toid FROM 'flowpaths'"
|
|
379
|
+
contents = dest_db.execute(sql_query).fetchall()
|
|
380
|
+
new_ids = [str(x[0]) for x in contents]
|
|
381
|
+
ids.extend(new_ids)
|
|
382
|
+
|
|
375
383
|
ids = [f"'{x}'" for x in ids]
|
|
376
384
|
key_name = "id"
|
|
377
385
|
if table in table_keys:
|
|
@@ -409,6 +417,7 @@ def get_table_crs_short(gpkg: str, table: str) -> str:
|
|
|
409
417
|
crs = con.execute(sql_query).fetchone()[0]
|
|
410
418
|
return crs
|
|
411
419
|
|
|
420
|
+
|
|
412
421
|
def get_table_crs(gpkg: str, table: str) -> str:
|
|
413
422
|
"""
|
|
414
423
|
Get the CRS of the specified table in the specified geopackage.
|
|
@@ -510,7 +519,6 @@ def get_available_tables(gpkg: Path) -> List[str]:
|
|
|
510
519
|
|
|
511
520
|
|
|
512
521
|
def get_cat_to_nhd_feature_id(gpkg: Path = file_paths.conus_hydrofabric) -> dict:
|
|
513
|
-
|
|
514
522
|
available_tables = get_available_tables(gpkg)
|
|
515
523
|
possible_tables = ["flowpath_edge_list", "network"]
|
|
516
524
|
|
data_processing/subset.py
CHANGED
|
@@ -22,7 +22,7 @@ subset_tables = [
|
|
|
22
22
|
"flowpath-attributes-ml",
|
|
23
23
|
"flowpaths",
|
|
24
24
|
"hydrolocations",
|
|
25
|
-
"nexus",
|
|
25
|
+
"nexus", # depends on flowpaths in some cases e.g. gage delineation
|
|
26
26
|
"pois", # requires flowpaths
|
|
27
27
|
"lakes", # requires pois
|
|
28
28
|
"network",
|
|
@@ -52,8 +52,9 @@ def create_subset_gpkg(
|
|
|
52
52
|
update_geopackage_metadata(output_gpkg_path)
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def subset_vpu(
|
|
56
|
-
|
|
55
|
+
def subset_vpu(
|
|
56
|
+
vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_paths.conus_hydrofabric
|
|
57
|
+
):
|
|
57
58
|
if output_gpkg_path.exists():
|
|
58
59
|
os.remove(output_gpkg_path)
|
|
59
60
|
|
|
@@ -68,7 +69,6 @@ def subset(
|
|
|
68
69
|
output_gpkg_path: Path = Path(),
|
|
69
70
|
include_outlet: bool = True,
|
|
70
71
|
):
|
|
71
|
-
print(cat_ids)
|
|
72
72
|
upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
|
|
73
73
|
|
|
74
74
|
if not output_gpkg_path:
|
map_app/static/css/main.css
CHANGED
|
@@ -109,7 +109,7 @@ h2 {
|
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
#selected-basins,
|
|
112
|
-
#cli-command {
|
|
112
|
+
#cli-command,#cli-prefix {
|
|
113
113
|
background: var(--code-bg);
|
|
114
114
|
padding: 16px;
|
|
115
115
|
border-radius: var(--border-radius);
|
|
@@ -119,6 +119,7 @@ h2 {
|
|
|
119
119
|
color: var(--text-color);
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
|
|
122
123
|
button {
|
|
123
124
|
background-color: var(--primary-color);
|
|
124
125
|
color: light-dark(white, #f1f5f9);
|
|
@@ -204,6 +205,11 @@ input[type="datetime-local"] {
|
|
|
204
205
|
display: inline-block;
|
|
205
206
|
}
|
|
206
207
|
|
|
208
|
+
#command-builder{
|
|
209
|
+
display: inline-block;
|
|
210
|
+
padding:16px ;
|
|
211
|
+
}
|
|
212
|
+
|
|
207
213
|
.command-container {
|
|
208
214
|
background: var(--surface-color);
|
|
209
215
|
border: 1px solid var(--border-color);
|
|
@@ -222,7 +228,7 @@ input[type="datetime-local"] {
|
|
|
222
228
|
border-top-right-radius: var(--border-radius);
|
|
223
229
|
}
|
|
224
230
|
|
|
225
|
-
.command-header
|
|
231
|
+
.command-header>span {
|
|
226
232
|
font-size: 0.875rem;
|
|
227
233
|
color: var(--secondary-text);
|
|
228
234
|
font-weight: 500;
|
|
@@ -254,7 +260,8 @@ input[type="datetime-local"] {
|
|
|
254
260
|
}
|
|
255
261
|
|
|
256
262
|
.command-content {
|
|
257
|
-
|
|
263
|
+
display:inline;
|
|
264
|
+
padding: 0px !important;
|
|
258
265
|
background: var(--code-bg);
|
|
259
266
|
font-family: 'Monaco', 'Consolas', monospace;
|
|
260
267
|
font-size: 0.875rem;
|
|
@@ -263,6 +270,11 @@ input[type="datetime-local"] {
|
|
|
263
270
|
border-bottom-left-radius: var(--border-radius);
|
|
264
271
|
border-bottom-right-radius: var(--border-radius);
|
|
265
272
|
color: var(--text-color);
|
|
273
|
+
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
#cli-prefix{
|
|
277
|
+
opacity: 0;
|
|
266
278
|
}
|
|
267
279
|
|
|
268
280
|
.copy-button.copied {
|
|
@@ -281,17 +293,17 @@ input[type="datetime-local"] {
|
|
|
281
293
|
body {
|
|
282
294
|
padding: 16px;
|
|
283
295
|
}
|
|
284
|
-
|
|
296
|
+
|
|
285
297
|
main {
|
|
286
298
|
width: 90vw;
|
|
287
299
|
}
|
|
288
|
-
|
|
300
|
+
|
|
289
301
|
.time-input {
|
|
290
302
|
flex-direction: column;
|
|
291
303
|
align-items: flex-start;
|
|
292
304
|
}
|
|
293
|
-
|
|
305
|
+
|
|
294
306
|
input[type="datetime-local"] {
|
|
295
307
|
width: 100%;
|
|
296
308
|
}
|
|
297
|
-
}
|
|
309
|
+
}
|