ngiab-data-preprocess 4.1.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,3 +85,35 @@ def load_aorc_zarr(start_year: int = None, end_year: int = None) -> xr.Dataset:
85
85
 
86
86
  validate_dataset_format(dataset)
87
87
  return dataset
88
+
89
+
90
+ def load_swe_zarr() -> xr.Dataset:
91
+ """Load the swe zarr dataset from S3."""
92
+ s3_urls = [
93
+ f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/zarr/ldasout.zarr"
94
+ ]
95
+ # default cache is readahead which is detrimental to performance in this case
96
+ fs = S3ParallelFileSystem(anon=True, default_cache_type="none") # default_block_size
97
+ s3_stores = [s3fs.S3Map(url, s3=fs) for url in s3_urls]
98
+ # the cache option here just holds accessed data in memory to prevent s3 being queried multiple times
99
+ # most of the data is read once and written to disk but some of the coordinate data is read multiple times
100
+ dataset = xr.open_mfdataset(s3_stores, parallel=True, engine="zarr", cache=True)
101
+
102
+ # set the crs attribute to conform with the format
103
+ esri_pe_string = dataset.crs.esri_pe_string
104
+ dataset = dataset.drop_vars(["crs"])
105
+ dataset.attrs["crs"] = esri_pe_string
106
+ # drop everything except SNEQV
107
+ vars_to_drop = list(dataset.data_vars)
108
+ vars_to_drop.remove('SNEQV')
109
+ dataset = dataset.drop_vars(vars_to_drop)
110
+ dataset.attrs["name"] = "v3_swe_zarr"
111
+
112
+ # rename the data vars to work with ngen
113
+ variables = {
114
+ "SNEQV": "swe"
115
+ }
116
+ dataset = dataset.rename_vars(variables)
117
+
118
+ validate_dataset_format(dataset)
119
+ return dataset
@@ -69,7 +69,7 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
69
69
  con.close()
70
70
 
71
71
 
72
- def create_empty_gpkg(gpkg: str) -> None:
72
+ def create_empty_gpkg(gpkg: Path) -> None:
73
73
  """
74
74
  Create an empty geopackage with the necessary tables and indices.
75
75
  """
@@ -80,7 +80,7 @@ def create_empty_gpkg(gpkg: str) -> None:
80
80
  conn.executescript(sql_script)
81
81
 
82
82
 
83
- def add_triggers_to_gpkg(gpkg: str) -> None:
83
+ def add_triggers_to_gpkg(gpkg: Path) -> None:
84
84
  """
85
85
  Adds geopackage triggers required to maintain spatial index integrity
86
86
  """
@@ -256,7 +256,7 @@ def insert_data(con: sqlite3.Connection, table: str, contents: List[Tuple]) -> N
256
256
  con.commit()
257
257
 
258
258
 
259
- def update_geopackage_metadata(gpkg: str) -> None:
259
+ def update_geopackage_metadata(gpkg: Path) -> None:
260
260
  """
261
261
  Update the contents of the gpkg_contents table in the specified geopackage.
262
262
  """
@@ -318,10 +318,10 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
318
318
  contents = source_db.execute(sql_query).fetchall()
319
319
 
320
320
  if table == "network":
321
- # Look for the network entry that has a toid not in the flowpath or nexus tables
321
+ # Look for the network entry that has a toid not in the flowpath or nexus tables
322
322
  network_toids = [x[2] for x in contents]
323
323
  print(f"Network toids: {len(network_toids)}")
324
- sql = "SELECT id FROM flowpaths"
324
+ sql = "SELECT id FROM flowpaths"
325
325
  flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
326
326
  print(f"Flowpath ids: {len(flowpath_ids)}")
327
327
  sql = "SELECT id FROM nexus"
@@ -342,8 +342,8 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
342
342
 
343
343
  dest_db.commit()
344
344
  source_db.close()
345
- dest_db.close()
346
-
345
+ dest_db.close()
346
+
347
347
 
348
348
  def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name: Path) -> None:
349
349
  """
@@ -359,7 +359,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
359
359
  source_db = sqlite3.connect(f"file:{hydrofabric}?mode=ro", uri=True)
360
360
  dest_db = sqlite3.connect(subset_gpkg_name)
361
361
 
362
- table_keys = {"divides": "toid", "divide-attributes": "divide_id", "lakes": "poi_id"}
362
+ table_keys = {"divide-attributes": "divide_id", "lakes": "poi_id"}
363
363
 
364
364
  if table == "lakes":
365
365
  # lakes subset we get from the pois table which was already subset by water body id
@@ -377,7 +377,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
377
377
  if table in table_keys:
378
378
  key_name = table_keys[table]
379
379
  sql_query = f"SELECT * FROM '{table}' WHERE {key_name} IN ({','.join(ids)})"
380
- contents = source_db.execute(sql_query).fetchall()
380
+ contents = source_db.execute(sql_query).fetchall()
381
381
 
382
382
  insert_data(dest_db, table, contents)
383
383
 
@@ -429,16 +429,16 @@ def get_table_crs(gpkg: str, table: str) -> str:
429
429
 
430
430
  def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric) -> str:
431
431
  """
432
- Get the nexus id of associated with a gage id.
432
+ Get the catchment id associated with a gage id.
433
433
 
434
434
  Args:
435
435
  gage_id (str): The gage ID.
436
436
 
437
437
  Returns:
438
- str: The nexus id of the watershed containing the gage ID.
438
+ str: The catchment id of the watershed containing the gage ID.
439
439
 
440
440
  Raises:
441
- IndexError: If nexus is found for the given gage ID.
441
+ IndexError: If catchment is found for the given gage ID.
442
442
 
443
443
  """
444
444
  gage_id = "".join([x for x in gage_id if x.isdigit()])
@@ -165,6 +165,10 @@ def get_upstream_cats(names: Union[str, List[str]]) -> Set[str]:
165
165
  if name in parent_ids:
166
166
  continue
167
167
  try:
168
+ if "cat" in name:
169
+ node_index = graph.vs.find(cat=name).index
170
+ else:
171
+ node_index = graph.vs.find(name=name).index
168
172
  node_index = graph.vs.find(cat=name).index
169
173
  upstream_nodes = graph.subcomponent(node_index, mode="IN")
170
174
  for node in upstream_nodes:
@@ -205,7 +209,10 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
205
209
  if name in parent_ids:
206
210
  continue
207
211
  try:
208
- node_index = graph.vs.find(name=name).index
212
+ if "cat" in name:
213
+ node_index = graph.vs.find(cat=name).index
214
+ else:
215
+ node_index = graph.vs.find(name=name).index
209
216
  upstream_nodes = graph.subcomponent(node_index, mode="IN")
210
217
  for node in upstream_nodes:
211
218
  parent_ids.add(graph.vs[node]["name"])
data_processing/subset.py CHANGED
@@ -31,7 +31,7 @@ subset_tables = [
31
31
 
32
32
  def create_subset_gpkg(
33
33
  ids: Union[List[str], str], hydrofabric: Path, output_gpkg_path: Path, is_vpu: bool = False
34
- ) -> Path:
34
+ ):
35
35
  # ids is a list of nexus and wb ids, or a single vpu id
36
36
  if not isinstance(ids, list):
37
37
  ids = [ids]
@@ -65,10 +65,11 @@ def subset_vpu(vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_pat
65
65
  def subset(
66
66
  cat_ids: List[str],
67
67
  hydrofabric: Path = file_paths.conus_hydrofabric,
68
- output_gpkg_path: Path = None,
69
- ) -> str:
70
-
71
- upstream_ids = list(get_upstream_ids(cat_ids))
68
+ output_gpkg_path: Path = Path(),
69
+ include_outlet: bool = True,
70
+ ):
71
+ print(cat_ids)
72
+ upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
72
73
 
73
74
  if not output_gpkg_path:
74
75
  # if the name isn't provided, use the first upstream id
@@ -80,15 +81,3 @@ def subset(
80
81
  create_subset_gpkg(upstream_ids, hydrofabric, output_gpkg_path)
81
82
  logger.info(f"Subset complete for {len(upstream_ids)} features (catchments + nexuses)")
82
83
  logger.debug(f"Subset complete for {upstream_ids} catchments")
83
-
84
-
85
- def move_files_to_config_dir(subset_output_dir: str) -> None:
86
- config_dir = subset_output_dir / "config"
87
- config_dir.mkdir(parents=True, exist_ok=True)
88
-
89
- files = [x for x in subset_output_dir.iterdir()]
90
- for file in files:
91
- if file.suffix in [".csv", ".json", ".geojson"]:
92
- if "partitions" in file.name:
93
- continue
94
- os.system(f"mv {file} {config_dir}")
@@ -147,7 +147,10 @@ def main() -> None:
147
147
  logging.info("Subsetting complete.")
148
148
  else:
149
149
  logging.info(f"Subsetting hydrofabric")
150
- subset(feature_to_subset, output_gpkg_path=paths.geopackage_path)
150
+ include_outlet = True
151
+ if args.gage:
152
+ include_outlet = False
153
+ subset(feature_to_subset, output_gpkg_path=paths.geopackage_path, include_outlet=include_outlet)
151
154
  logging.info("Subsetting complete.")
152
155
 
153
156
  if args.forcings:
@@ -237,7 +240,7 @@ def main() -> None:
237
240
 
238
241
  if args.vis:
239
242
  try:
240
- command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.0'
243
+ command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.1'
241
244
  subprocess.run(command, shell=True)
242
245
  except:
243
246
  logging.error("Failed to launch docker container.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngiab_data_preprocess
3
- Version: 4.1.1
3
+ Version: 4.2.0
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -1,12 +1,12 @@
1
1
  data_processing/create_realization.py,sha256=AxUDVSORjwd6IAImZpgDr8GRHKSUQF8emQi1ikfIno4,14899
2
2
  data_processing/dataset_utils.py,sha256=4qmRmK3qMWPkiWZHXhXv3c-ISbtOwr7QhciEl2ok6Ao,7314
3
- data_processing/datasets.py,sha256=kAtTKj2yQINUSzI0vX-kuXMPVeQBtZVfGkfpPh6Qi_0,3528
3
+ data_processing/datasets.py,sha256=BB_X0IvGnUrjBmj-wryn6-Nv5cd0Lje3zly3p896eU4,4822
4
4
  data_processing/file_paths.py,sha256=Cp3BxbO6sD50464ciTshjb3Z0vTvL0ZeSbOJgNdOqQ0,4698
5
5
  data_processing/forcings.py,sha256=6Q9fSXa67OrXg_r9li0K9widsclN2DQUp1-oUH2tZwo,19208
6
- data_processing/gpkg_utils.py,sha256=SdniJrwhta7fPchVSCliuThPENgcs0m9Elx615yttAU,20248
7
- data_processing/graph_utils.py,sha256=TsCSZmQ8SPm5F2YzDKv32BxdzaHlPrC6UijZxUJbZvI,7966
6
+ data_processing/gpkg_utils.py,sha256=j-j6TkCwlkOzM5ATMHi44raYS-xrJMzk5V0XQZ_0dzQ,20220
7
+ data_processing/graph_utils.py,sha256=I9IFkU08SkXb8pfVASTvkm7T8BYt-92CaNj7TV6wJFE,8244
8
8
  data_processing/s3fs_utils.py,sha256=WoqqwxkHpv0Qq4I5f5-gUZBCFtVQ68ehXbdOjWRKTDQ,2752
9
- data_processing/subset.py,sha256=WklS0CT5O4oz29fhTT46NmSOe_EFMCC93gLeg6nQ88I,2914
9
+ data_processing/subset.py,sha256=GbvfAaSJTfEvYOAZ6K0OgFA81xG_Y5UxiGDxukxO9gE,2558
10
10
  data_sources/cfe-nowpm-realization-template.json,sha256=8an6q1drWD8wU1ocvdPab-GvZDvlQ-0di_-NommH3QI,3528
11
11
  data_sources/cfe-template.ini,sha256=6e5-usqjWtm3MWVvtm8CTeZTJJMxO1ZswkOXq0L9mnc,2033
12
12
  data_sources/em-catchment-template.yml,sha256=M08ixazEUHYI2PNavtI0xPZeSzcQ9bg2g0XzNT-8_u4,292
@@ -32,13 +32,13 @@ map_app/static/resources/light-style.json,sha256=DaE52qKpAkjiWSKY_z7LxreqA2rW4Zy
32
32
  map_app/static/resources/loading.gif,sha256=ggdkZf1AD7rSwIpSJwfiIqANgmVV1WHlxGuKxQKv7uY,72191
33
33
  map_app/static/resources/screenshot.jpg,sha256=Ia358aX-OHM9BP4B8lX05cLnguF2fHUIimno9bnFLYw,253730
34
34
  map_app/templates/index.html,sha256=ITRzQEYn15sFN4qRACjaNj5muhalOeP9n_IwcdsIlUs,6631
35
- ngiab_data_cli/__main__.py,sha256=X_imHhWo2RBTJzJ9bUwEcAnAtZSyjKJXB60dBxea6ck,10361
35
+ ngiab_data_cli/__main__.py,sha256=V-g0dwa000e5Qye7PBMTtHTK7Cl7as3JS0UoAR0QrZ4,10503
36
36
  ngiab_data_cli/arguments.py,sha256=7ptImy8tLM1XvjfDr13tZszkjGVtenXo0KqllJeE3Mw,4372
37
37
  ngiab_data_cli/custom_logging.py,sha256=iS2XozaxudcxQj17qAsrCgbVK9LJAYAPmarJuVWJo1k,1280
38
38
  ngiab_data_cli/forcing_cli.py,sha256=lkcqWDk5H8IPyGv0DwLIZMQldqTUXpfwSX0C_RIuIJ8,3890
39
- ngiab_data_preprocess-4.1.1.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
40
- ngiab_data_preprocess-4.1.1.dist-info/METADATA,sha256=-LH19uwmvO9AB0NBtN4JECrQrwSLVZUXKtoSVqaNoWA,10310
41
- ngiab_data_preprocess-4.1.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
42
- ngiab_data_preprocess-4.1.1.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
43
- ngiab_data_preprocess-4.1.1.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
44
- ngiab_data_preprocess-4.1.1.dist-info/RECORD,,
39
+ ngiab_data_preprocess-4.2.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
40
+ ngiab_data_preprocess-4.2.0.dist-info/METADATA,sha256=Y2NVmd92S38QMwKXEkLvLKyWQ2IGy25okHV8sDy6c2c,10310
41
+ ngiab_data_preprocess-4.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
42
+ ngiab_data_preprocess-4.2.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
43
+ ngiab_data_preprocess-4.2.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
44
+ ngiab_data_preprocess-4.2.0.dist-info/RECORD,,