ngiab-data-preprocess 4.1.1__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_processing/datasets.py +32 -0
- data_processing/gpkg_utils.py +12 -12
- data_processing/graph_utils.py +8 -1
- data_processing/subset.py +6 -17
- ngiab_data_cli/__main__.py +5 -2
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/METADATA +1 -1
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/RECORD +11 -11
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/WHEEL +0 -0
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/entry_points.txt +0 -0
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/licenses/LICENSE +0 -0
- {ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/top_level.txt +0 -0
data_processing/datasets.py
CHANGED
|
@@ -85,3 +85,35 @@ def load_aorc_zarr(start_year: int = None, end_year: int = None) -> xr.Dataset:
|
|
|
85
85
|
|
|
86
86
|
validate_dataset_format(dataset)
|
|
87
87
|
return dataset
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def load_swe_zarr() -> xr.Dataset:
|
|
91
|
+
"""Load the swe zarr dataset from S3."""
|
|
92
|
+
s3_urls = [
|
|
93
|
+
f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/zarr/ldasout.zarr"
|
|
94
|
+
]
|
|
95
|
+
# default cache is readahead which is detrimental to performance in this case
|
|
96
|
+
fs = S3ParallelFileSystem(anon=True, default_cache_type="none") # default_block_size
|
|
97
|
+
s3_stores = [s3fs.S3Map(url, s3=fs) for url in s3_urls]
|
|
98
|
+
# the cache option here just holds accessed data in memory to prevent s3 being queried multiple times
|
|
99
|
+
# most of the data is read once and written to disk but some of the coordinate data is read multiple times
|
|
100
|
+
dataset = xr.open_mfdataset(s3_stores, parallel=True, engine="zarr", cache=True)
|
|
101
|
+
|
|
102
|
+
# set the crs attribute to conform with the format
|
|
103
|
+
esri_pe_string = dataset.crs.esri_pe_string
|
|
104
|
+
dataset = dataset.drop_vars(["crs"])
|
|
105
|
+
dataset.attrs["crs"] = esri_pe_string
|
|
106
|
+
# drop everything except SNEQV
|
|
107
|
+
vars_to_drop = list(dataset.data_vars)
|
|
108
|
+
vars_to_drop.remove('SNEQV')
|
|
109
|
+
dataset = dataset.drop_vars(vars_to_drop)
|
|
110
|
+
dataset.attrs["name"] = "v3_swe_zarr"
|
|
111
|
+
|
|
112
|
+
# rename the data vars to work with ngen
|
|
113
|
+
variables = {
|
|
114
|
+
"SNEQV": "swe"
|
|
115
|
+
}
|
|
116
|
+
dataset = dataset.rename_vars(variables)
|
|
117
|
+
|
|
118
|
+
validate_dataset_format(dataset)
|
|
119
|
+
return dataset
|
data_processing/gpkg_utils.py
CHANGED
|
@@ -69,7 +69,7 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
|
|
|
69
69
|
con.close()
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def create_empty_gpkg(gpkg:
|
|
72
|
+
def create_empty_gpkg(gpkg: Path) -> None:
|
|
73
73
|
"""
|
|
74
74
|
Create an empty geopackage with the necessary tables and indices.
|
|
75
75
|
"""
|
|
@@ -80,7 +80,7 @@ def create_empty_gpkg(gpkg: str) -> None:
|
|
|
80
80
|
conn.executescript(sql_script)
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
def add_triggers_to_gpkg(gpkg:
|
|
83
|
+
def add_triggers_to_gpkg(gpkg: Path) -> None:
|
|
84
84
|
"""
|
|
85
85
|
Adds geopackage triggers required to maintain spatial index integrity
|
|
86
86
|
"""
|
|
@@ -256,7 +256,7 @@ def insert_data(con: sqlite3.Connection, table: str, contents: List[Tuple]) -> N
|
|
|
256
256
|
con.commit()
|
|
257
257
|
|
|
258
258
|
|
|
259
|
-
def update_geopackage_metadata(gpkg:
|
|
259
|
+
def update_geopackage_metadata(gpkg: Path) -> None:
|
|
260
260
|
"""
|
|
261
261
|
Update the contents of the gpkg_contents table in the specified geopackage.
|
|
262
262
|
"""
|
|
@@ -318,10 +318,10 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
|
|
|
318
318
|
contents = source_db.execute(sql_query).fetchall()
|
|
319
319
|
|
|
320
320
|
if table == "network":
|
|
321
|
-
# Look for the network entry that has a toid not in the flowpath or nexus tables
|
|
321
|
+
# Look for the network entry that has a toid not in the flowpath or nexus tables
|
|
322
322
|
network_toids = [x[2] for x in contents]
|
|
323
323
|
print(f"Network toids: {len(network_toids)}")
|
|
324
|
-
sql = "SELECT id FROM flowpaths"
|
|
324
|
+
sql = "SELECT id FROM flowpaths"
|
|
325
325
|
flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
|
|
326
326
|
print(f"Flowpath ids: {len(flowpath_ids)}")
|
|
327
327
|
sql = "SELECT id FROM nexus"
|
|
@@ -342,8 +342,8 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
|
|
|
342
342
|
|
|
343
343
|
dest_db.commit()
|
|
344
344
|
source_db.close()
|
|
345
|
-
dest_db.close()
|
|
346
|
-
|
|
345
|
+
dest_db.close()
|
|
346
|
+
|
|
347
347
|
|
|
348
348
|
def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name: Path) -> None:
|
|
349
349
|
"""
|
|
@@ -359,7 +359,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
|
|
|
359
359
|
source_db = sqlite3.connect(f"file:{hydrofabric}?mode=ro", uri=True)
|
|
360
360
|
dest_db = sqlite3.connect(subset_gpkg_name)
|
|
361
361
|
|
|
362
|
-
table_keys = {"
|
|
362
|
+
table_keys = {"divide-attributes": "divide_id", "lakes": "poi_id"}
|
|
363
363
|
|
|
364
364
|
if table == "lakes":
|
|
365
365
|
# lakes subset we get from the pois table which was already subset by water body id
|
|
@@ -377,7 +377,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
|
|
|
377
377
|
if table in table_keys:
|
|
378
378
|
key_name = table_keys[table]
|
|
379
379
|
sql_query = f"SELECT * FROM '{table}' WHERE {key_name} IN ({','.join(ids)})"
|
|
380
|
-
contents = source_db.execute(sql_query).fetchall()
|
|
380
|
+
contents = source_db.execute(sql_query).fetchall()
|
|
381
381
|
|
|
382
382
|
insert_data(dest_db, table, contents)
|
|
383
383
|
|
|
@@ -429,16 +429,16 @@ def get_table_crs(gpkg: str, table: str) -> str:
|
|
|
429
429
|
|
|
430
430
|
def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric) -> str:
|
|
431
431
|
"""
|
|
432
|
-
Get the
|
|
432
|
+
Get the catchment id associated with a gage id.
|
|
433
433
|
|
|
434
434
|
Args:
|
|
435
435
|
gage_id (str): The gage ID.
|
|
436
436
|
|
|
437
437
|
Returns:
|
|
438
|
-
str: The
|
|
438
|
+
str: The catchment id of the watershed containing the gage ID.
|
|
439
439
|
|
|
440
440
|
Raises:
|
|
441
|
-
IndexError: If
|
|
441
|
+
IndexError: If catchment is found for the given gage ID.
|
|
442
442
|
|
|
443
443
|
"""
|
|
444
444
|
gage_id = "".join([x for x in gage_id if x.isdigit()])
|
data_processing/graph_utils.py
CHANGED
|
@@ -165,6 +165,10 @@ def get_upstream_cats(names: Union[str, List[str]]) -> Set[str]:
|
|
|
165
165
|
if name in parent_ids:
|
|
166
166
|
continue
|
|
167
167
|
try:
|
|
168
|
+
if "cat" in name:
|
|
169
|
+
node_index = graph.vs.find(cat=name).index
|
|
170
|
+
else:
|
|
171
|
+
node_index = graph.vs.find(name=name).index
|
|
168
172
|
node_index = graph.vs.find(cat=name).index
|
|
169
173
|
upstream_nodes = graph.subcomponent(node_index, mode="IN")
|
|
170
174
|
for node in upstream_nodes:
|
|
@@ -205,7 +209,10 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
|
|
|
205
209
|
if name in parent_ids:
|
|
206
210
|
continue
|
|
207
211
|
try:
|
|
208
|
-
|
|
212
|
+
if "cat" in name:
|
|
213
|
+
node_index = graph.vs.find(cat=name).index
|
|
214
|
+
else:
|
|
215
|
+
node_index = graph.vs.find(name=name).index
|
|
209
216
|
upstream_nodes = graph.subcomponent(node_index, mode="IN")
|
|
210
217
|
for node in upstream_nodes:
|
|
211
218
|
parent_ids.add(graph.vs[node]["name"])
|
data_processing/subset.py
CHANGED
|
@@ -31,7 +31,7 @@ subset_tables = [
|
|
|
31
31
|
|
|
32
32
|
def create_subset_gpkg(
|
|
33
33
|
ids: Union[List[str], str], hydrofabric: Path, output_gpkg_path: Path, is_vpu: bool = False
|
|
34
|
-
)
|
|
34
|
+
):
|
|
35
35
|
# ids is a list of nexus and wb ids, or a single vpu id
|
|
36
36
|
if not isinstance(ids, list):
|
|
37
37
|
ids = [ids]
|
|
@@ -65,10 +65,11 @@ def subset_vpu(vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_pat
|
|
|
65
65
|
def subset(
|
|
66
66
|
cat_ids: List[str],
|
|
67
67
|
hydrofabric: Path = file_paths.conus_hydrofabric,
|
|
68
|
-
output_gpkg_path: Path =
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
68
|
+
output_gpkg_path: Path = Path(),
|
|
69
|
+
include_outlet: bool = True,
|
|
70
|
+
):
|
|
71
|
+
print(cat_ids)
|
|
72
|
+
upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
|
|
72
73
|
|
|
73
74
|
if not output_gpkg_path:
|
|
74
75
|
# if the name isn't provided, use the first upstream id
|
|
@@ -80,15 +81,3 @@ def subset(
|
|
|
80
81
|
create_subset_gpkg(upstream_ids, hydrofabric, output_gpkg_path)
|
|
81
82
|
logger.info(f"Subset complete for {len(upstream_ids)} features (catchments + nexuses)")
|
|
82
83
|
logger.debug(f"Subset complete for {upstream_ids} catchments")
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def move_files_to_config_dir(subset_output_dir: str) -> None:
|
|
86
|
-
config_dir = subset_output_dir / "config"
|
|
87
|
-
config_dir.mkdir(parents=True, exist_ok=True)
|
|
88
|
-
|
|
89
|
-
files = [x for x in subset_output_dir.iterdir()]
|
|
90
|
-
for file in files:
|
|
91
|
-
if file.suffix in [".csv", ".json", ".geojson"]:
|
|
92
|
-
if "partitions" in file.name:
|
|
93
|
-
continue
|
|
94
|
-
os.system(f"mv {file} {config_dir}")
|
ngiab_data_cli/__main__.py
CHANGED
|
@@ -147,7 +147,10 @@ def main() -> None:
|
|
|
147
147
|
logging.info("Subsetting complete.")
|
|
148
148
|
else:
|
|
149
149
|
logging.info(f"Subsetting hydrofabric")
|
|
150
|
-
|
|
150
|
+
include_outlet = True
|
|
151
|
+
if args.gage:
|
|
152
|
+
include_outlet = False
|
|
153
|
+
subset(feature_to_subset, output_gpkg_path=paths.geopackage_path, include_outlet=include_outlet)
|
|
151
154
|
logging.info("Subsetting complete.")
|
|
152
155
|
|
|
153
156
|
if args.forcings:
|
|
@@ -237,7 +240,7 @@ def main() -> None:
|
|
|
237
240
|
|
|
238
241
|
if args.vis:
|
|
239
242
|
try:
|
|
240
|
-
command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.
|
|
243
|
+
command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.1'
|
|
241
244
|
subprocess.run(command, shell=True)
|
|
242
245
|
except:
|
|
243
246
|
logging.error("Failed to launch docker container.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ngiab_data_preprocess
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Graphical Tools for creating Next Gen Water model input data.
|
|
5
5
|
Author-email: Josh Cunningham <jcunningham8@ua.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
data_processing/create_realization.py,sha256=AxUDVSORjwd6IAImZpgDr8GRHKSUQF8emQi1ikfIno4,14899
|
|
2
2
|
data_processing/dataset_utils.py,sha256=4qmRmK3qMWPkiWZHXhXv3c-ISbtOwr7QhciEl2ok6Ao,7314
|
|
3
|
-
data_processing/datasets.py,sha256=
|
|
3
|
+
data_processing/datasets.py,sha256=BB_X0IvGnUrjBmj-wryn6-Nv5cd0Lje3zly3p896eU4,4822
|
|
4
4
|
data_processing/file_paths.py,sha256=Cp3BxbO6sD50464ciTshjb3Z0vTvL0ZeSbOJgNdOqQ0,4698
|
|
5
5
|
data_processing/forcings.py,sha256=6Q9fSXa67OrXg_r9li0K9widsclN2DQUp1-oUH2tZwo,19208
|
|
6
|
-
data_processing/gpkg_utils.py,sha256=
|
|
7
|
-
data_processing/graph_utils.py,sha256=
|
|
6
|
+
data_processing/gpkg_utils.py,sha256=j-j6TkCwlkOzM5ATMHi44raYS-xrJMzk5V0XQZ_0dzQ,20220
|
|
7
|
+
data_processing/graph_utils.py,sha256=I9IFkU08SkXb8pfVASTvkm7T8BYt-92CaNj7TV6wJFE,8244
|
|
8
8
|
data_processing/s3fs_utils.py,sha256=WoqqwxkHpv0Qq4I5f5-gUZBCFtVQ68ehXbdOjWRKTDQ,2752
|
|
9
|
-
data_processing/subset.py,sha256=
|
|
9
|
+
data_processing/subset.py,sha256=GbvfAaSJTfEvYOAZ6K0OgFA81xG_Y5UxiGDxukxO9gE,2558
|
|
10
10
|
data_sources/cfe-nowpm-realization-template.json,sha256=8an6q1drWD8wU1ocvdPab-GvZDvlQ-0di_-NommH3QI,3528
|
|
11
11
|
data_sources/cfe-template.ini,sha256=6e5-usqjWtm3MWVvtm8CTeZTJJMxO1ZswkOXq0L9mnc,2033
|
|
12
12
|
data_sources/em-catchment-template.yml,sha256=M08ixazEUHYI2PNavtI0xPZeSzcQ9bg2g0XzNT-8_u4,292
|
|
@@ -32,13 +32,13 @@ map_app/static/resources/light-style.json,sha256=DaE52qKpAkjiWSKY_z7LxreqA2rW4Zy
|
|
|
32
32
|
map_app/static/resources/loading.gif,sha256=ggdkZf1AD7rSwIpSJwfiIqANgmVV1WHlxGuKxQKv7uY,72191
|
|
33
33
|
map_app/static/resources/screenshot.jpg,sha256=Ia358aX-OHM9BP4B8lX05cLnguF2fHUIimno9bnFLYw,253730
|
|
34
34
|
map_app/templates/index.html,sha256=ITRzQEYn15sFN4qRACjaNj5muhalOeP9n_IwcdsIlUs,6631
|
|
35
|
-
ngiab_data_cli/__main__.py,sha256=
|
|
35
|
+
ngiab_data_cli/__main__.py,sha256=V-g0dwa000e5Qye7PBMTtHTK7Cl7as3JS0UoAR0QrZ4,10503
|
|
36
36
|
ngiab_data_cli/arguments.py,sha256=7ptImy8tLM1XvjfDr13tZszkjGVtenXo0KqllJeE3Mw,4372
|
|
37
37
|
ngiab_data_cli/custom_logging.py,sha256=iS2XozaxudcxQj17qAsrCgbVK9LJAYAPmarJuVWJo1k,1280
|
|
38
38
|
ngiab_data_cli/forcing_cli.py,sha256=lkcqWDk5H8IPyGv0DwLIZMQldqTUXpfwSX0C_RIuIJ8,3890
|
|
39
|
-
ngiab_data_preprocess-4.
|
|
40
|
-
ngiab_data_preprocess-4.
|
|
41
|
-
ngiab_data_preprocess-4.
|
|
42
|
-
ngiab_data_preprocess-4.
|
|
43
|
-
ngiab_data_preprocess-4.
|
|
44
|
-
ngiab_data_preprocess-4.
|
|
39
|
+
ngiab_data_preprocess-4.2.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
|
|
40
|
+
ngiab_data_preprocess-4.2.0.dist-info/METADATA,sha256=Y2NVmd92S38QMwKXEkLvLKyWQ2IGy25okHV8sDy6c2c,10310
|
|
41
|
+
ngiab_data_preprocess-4.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
42
|
+
ngiab_data_preprocess-4.2.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
|
|
43
|
+
ngiab_data_preprocess-4.2.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
|
|
44
|
+
ngiab_data_preprocess-4.2.0.dist-info/RECORD,,
|
|
File without changes
|
{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|