ngiab-data-preprocess 4.1.0__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,3 +85,35 @@ def load_aorc_zarr(start_year: int = None, end_year: int = None) -> xr.Dataset:
85
85
 
86
86
  validate_dataset_format(dataset)
87
87
  return dataset
88
+
89
+
90
+ def load_swe_zarr() -> xr.Dataset:
91
+ """Load the swe zarr dataset from S3."""
92
+ s3_urls = [
93
+ f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/zarr/ldasout.zarr"
94
+ ]
95
+ # default cache is readahead which is detrimental to performance in this case
96
+ fs = S3ParallelFileSystem(anon=True, default_cache_type="none") # default_block_size
97
+ s3_stores = [s3fs.S3Map(url, s3=fs) for url in s3_urls]
98
+ # the cache option here just holds accessed data in memory to prevent s3 being queried multiple times
99
+ # most of the data is read once and written to disk but some of the coordinate data is read multiple times
100
+ dataset = xr.open_mfdataset(s3_stores, parallel=True, engine="zarr", cache=True)
101
+
102
+ # set the crs attribute to conform with the format
103
+ esri_pe_string = dataset.crs.esri_pe_string
104
+ dataset = dataset.drop_vars(["crs"])
105
+ dataset.attrs["crs"] = esri_pe_string
106
+ # drop everything except SNEQV
107
+ vars_to_drop = list(dataset.data_vars)
108
+ vars_to_drop.remove('SNEQV')
109
+ dataset = dataset.drop_vars(vars_to_drop)
110
+ dataset.attrs["name"] = "v3_swe_zarr"
111
+
112
+ # rename the data vars to work with ngen
113
+ variables = {
114
+ "SNEQV": "swe"
115
+ }
116
+ dataset = dataset.rename_vars(variables)
117
+
118
+ validate_dataset_format(dataset)
119
+ return dataset
@@ -69,7 +69,7 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
69
69
  con.close()
70
70
 
71
71
 
72
- def create_empty_gpkg(gpkg: str) -> None:
72
+ def create_empty_gpkg(gpkg: Path) -> None:
73
73
  """
74
74
  Create an empty geopackage with the necessary tables and indices.
75
75
  """
@@ -80,7 +80,7 @@ def create_empty_gpkg(gpkg: str) -> None:
80
80
  conn.executescript(sql_script)
81
81
 
82
82
 
83
- def add_triggers_to_gpkg(gpkg: str) -> None:
83
+ def add_triggers_to_gpkg(gpkg: Path) -> None:
84
84
  """
85
85
  Adds geopackage triggers required to maintain spatial index integrity
86
86
  """
@@ -256,7 +256,7 @@ def insert_data(con: sqlite3.Connection, table: str, contents: List[Tuple]) -> N
256
256
  con.commit()
257
257
 
258
258
 
259
- def update_geopackage_metadata(gpkg: str) -> None:
259
+ def update_geopackage_metadata(gpkg: Path) -> None:
260
260
  """
261
261
  Update the contents of the gpkg_contents table in the specified geopackage.
262
262
  """
@@ -318,10 +318,10 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
318
318
  contents = source_db.execute(sql_query).fetchall()
319
319
 
320
320
  if table == "network":
321
- # Look for the network entry that has a toid not in the flowpath or nexus tables
321
+ # Look for the network entry that has a toid not in the flowpath or nexus tables
322
322
  network_toids = [x[2] for x in contents]
323
323
  print(f"Network toids: {len(network_toids)}")
324
- sql = "SELECT id FROM flowpaths"
324
+ sql = "SELECT id FROM flowpaths"
325
325
  flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
326
326
  print(f"Flowpath ids: {len(flowpath_ids)}")
327
327
  sql = "SELECT id FROM nexus"
@@ -342,8 +342,8 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
342
342
 
343
343
  dest_db.commit()
344
344
  source_db.close()
345
- dest_db.close()
346
-
345
+ dest_db.close()
346
+
347
347
 
348
348
  def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name: Path) -> None:
349
349
  """
@@ -359,7 +359,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
359
359
  source_db = sqlite3.connect(f"file:{hydrofabric}?mode=ro", uri=True)
360
360
  dest_db = sqlite3.connect(subset_gpkg_name)
361
361
 
362
- table_keys = {"divides": "toid", "divide-attributes": "divide_id", "lakes": "poi_id"}
362
+ table_keys = {"divide-attributes": "divide_id", "lakes": "poi_id"}
363
363
 
364
364
  if table == "lakes":
365
365
  # lakes subset we get from the pois table which was already subset by water body id
@@ -377,7 +377,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
377
377
  if table in table_keys:
378
378
  key_name = table_keys[table]
379
379
  sql_query = f"SELECT * FROM '{table}' WHERE {key_name} IN ({','.join(ids)})"
380
- contents = source_db.execute(sql_query).fetchall()
380
+ contents = source_db.execute(sql_query).fetchall()
381
381
 
382
382
  insert_data(dest_db, table, contents)
383
383
 
@@ -429,16 +429,16 @@ def get_table_crs(gpkg: str, table: str) -> str:
429
429
 
430
430
  def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric) -> str:
431
431
  """
432
- Get the nexus id of associated with a gage id.
432
+ Get the catchment id associated with a gage id.
433
433
 
434
434
  Args:
435
435
  gage_id (str): The gage ID.
436
436
 
437
437
  Returns:
438
- str: The nexus id of the watershed containing the gage ID.
438
+ str: The catchment id of the watershed containing the gage ID.
439
439
 
440
440
  Raises:
441
- IndexError: If nexus is found for the given gage ID.
441
+ IndexError: If catchment is found for the given gage ID.
442
442
 
443
443
  """
444
444
  gage_id = "".join([x for x in gage_id if x.isdigit()])
@@ -165,6 +165,10 @@ def get_upstream_cats(names: Union[str, List[str]]) -> Set[str]:
165
165
  if name in parent_ids:
166
166
  continue
167
167
  try:
168
+ if "cat" in name:
169
+ node_index = graph.vs.find(cat=name).index
170
+ else:
171
+ node_index = graph.vs.find(name=name).index
168
172
  node_index = graph.vs.find(cat=name).index
169
173
  upstream_nodes = graph.subcomponent(node_index, mode="IN")
170
174
  for node in upstream_nodes:
@@ -205,7 +209,10 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
205
209
  if name in parent_ids:
206
210
  continue
207
211
  try:
208
- node_index = graph.vs.find(name=name).index
212
+ if "cat" in name:
213
+ node_index = graph.vs.find(cat=name).index
214
+ else:
215
+ node_index = graph.vs.find(name=name).index
209
216
  upstream_nodes = graph.subcomponent(node_index, mode="IN")
210
217
  for node in upstream_nodes:
211
218
  parent_ids.add(graph.vs[node]["name"])
data_processing/subset.py CHANGED
@@ -31,7 +31,7 @@ subset_tables = [
31
31
 
32
32
  def create_subset_gpkg(
33
33
  ids: Union[List[str], str], hydrofabric: Path, output_gpkg_path: Path, is_vpu: bool = False
34
- ) -> Path:
34
+ ):
35
35
  # ids is a list of nexus and wb ids, or a single vpu id
36
36
  if not isinstance(ids, list):
37
37
  ids = [ids]
@@ -65,10 +65,11 @@ def subset_vpu(vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_pat
65
65
  def subset(
66
66
  cat_ids: List[str],
67
67
  hydrofabric: Path = file_paths.conus_hydrofabric,
68
- output_gpkg_path: Path = None,
69
- ) -> str:
70
-
71
- upstream_ids = list(get_upstream_ids(cat_ids))
68
+ output_gpkg_path: Path = Path(),
69
+ include_outlet: bool = True,
70
+ ):
71
+ print(cat_ids)
72
+ upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
72
73
 
73
74
  if not output_gpkg_path:
74
75
  # if the name isn't provided, use the first upstream id
@@ -80,15 +81,3 @@ def subset(
80
81
  create_subset_gpkg(upstream_ids, hydrofabric, output_gpkg_path)
81
82
  logger.info(f"Subset complete for {len(upstream_ids)} features (catchments + nexuses)")
82
83
  logger.debug(f"Subset complete for {upstream_ids} catchments")
83
-
84
-
85
- def move_files_to_config_dir(subset_output_dir: str) -> None:
86
- config_dir = subset_output_dir / "config"
87
- config_dir.mkdir(parents=True, exist_ok=True)
88
-
89
- files = [x for x in subset_output_dir.iterdir()]
90
- for file in files:
91
- if file.suffix in [".csv", ".json", ".geojson"]:
92
- if "partitions" in file.name:
93
- continue
94
- os.system(f"mv {file} {config_dir}")
@@ -147,7 +147,10 @@ def main() -> None:
147
147
  logging.info("Subsetting complete.")
148
148
  else:
149
149
  logging.info(f"Subsetting hydrofabric")
150
- subset(feature_to_subset, output_gpkg_path=paths.geopackage_path)
150
+ include_outlet = True
151
+ if args.gage:
152
+ include_outlet = False
153
+ subset(feature_to_subset, output_gpkg_path=paths.geopackage_path, include_outlet=include_outlet)
151
154
  logging.info("Subsetting complete.")
152
155
 
153
156
  if args.forcings:
@@ -237,7 +240,7 @@ def main() -> None:
237
240
 
238
241
  if args.vis:
239
242
  try:
240
- command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.0'
243
+ command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.1'
241
244
  subprocess.run(command, shell=True)
242
245
  except:
243
246
  logging.error("Failed to launch docker container.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngiab_data_preprocess
3
- Version: 4.1.0
3
+ Version: 4.2.0
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -8,11 +8,11 @@ Project-URL: Issues, https://github.com/CIROH-UA/NGIAB_data_preprocess/issues
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: <3.13,>=3.10
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: pyogrio==0.7.2
15
- Requires-Dist: pyproj==3.6.1
14
+ Requires-Dist: pyogrio>=0.7.2
15
+ Requires-Dist: pyproj>=3.6.1
16
16
  Requires-Dist: Flask==3.0.2
17
17
  Requires-Dist: geopandas>=1.0.0
18
18
  Requires-Dist: requests==2.32.2
@@ -20,19 +20,20 @@ Requires-Dist: igraph==0.11.4
20
20
  Requires-Dist: s3fs==2024.3.1
21
21
  Requires-Dist: xarray==2024.2.0
22
22
  Requires-Dist: zarr==2.17.1
23
- Requires-Dist: netCDF4==1.6.5
23
+ Requires-Dist: netCDF4>=1.6.5
24
24
  Requires-Dist: dask==2024.4.1
25
25
  Requires-Dist: dask[distributed]==2024.4.1
26
26
  Requires-Dist: black==24.3.0
27
27
  Requires-Dist: isort==5.13.2
28
28
  Requires-Dist: h5netcdf==1.3.0
29
29
  Requires-Dist: exactextract==0.2.0
30
- Requires-Dist: numpy==1.26.4
30
+ Requires-Dist: numpy>=1.26.4
31
31
  Requires-Dist: tqdm==4.66.4
32
32
  Requires-Dist: rich==13.7.1
33
33
  Requires-Dist: colorama==0.4.6
34
34
  Requires-Dist: bokeh==3.5.1
35
35
  Requires-Dist: boto3
36
+ Requires-Dist: numcodecs<0.16.0
36
37
  Provides-Extra: eval
37
38
  Requires-Dist: ngiab_eval; extra == "eval"
38
39
  Provides-Extra: plot
@@ -85,6 +86,41 @@ For automatic interactive visualisation, please run [NGIAB](https://github.com/C
85
86
  * This tool is officially supported on macOS or Ubuntu (tested on 22.04 & 24.04). To use it on Windows, please install [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
86
87
 
87
88
  ## Installation and Running
89
+ It is highly recommended to use [Astral UV](https://docs.astral.sh/uv/) to install and run this tool. It works similarly to pip and conda, and I would also recommend you use it for other python projects as it is so useful.
90
+
91
+ ```bash
92
+ # Install UV
93
+ curl -LsSf https://astral.sh/uv/install.sh | sh
94
+ # It can be installed via pip if that fails
95
+ # pip install uv
96
+
97
+ # Create a virtual environment in the current directory
98
+ uv venv
99
+
100
+ # Install the tool in the virtual environment
101
+ uv pip install ngiab_data_preprocess
102
+
103
+ # To run the cli
104
+ uv run cli --help
105
+
106
+ # To run the map
107
+ uv run map_app
108
+ ```
109
+
110
+ UV automatically detects any virtual environments in the current directory and will use them when you use `uv run`.
111
+
112
+ ### Running without install
113
+ This package supports pipx and uvx which means you can run the tool without installing it. No virtual environment needed, just UV.
114
+ ```bash
115
+ # run this from anywhere
116
+ uvx --from ngiab_data_preprocess cli --help
117
+ # for the map
118
+ uvx --from ngiab_data_preprocess map_app
119
+ ```
120
+
121
+ ## For legacy pip installation
122
+ <details>
123
+ <summary>Click here to expand</summary>
88
124
 
89
125
  ```bash
90
126
  # If you're installing this on jupyterhub / 2i2c you HAVE TO DEACTIVATE THE CONDA ENV
@@ -102,8 +138,7 @@ pip install 'ngiab_data_preprocess'
102
138
  python -m map_app
103
139
  # CLI instructions at the bottom of the README
104
140
  ```
105
-
106
- The first time you run this command, it will download the hydrofabric from Lynker Spatial. If you already have it, place `conus_nextgen.gpkg` into `~/.ngiab/hydrofabric/v2.2/`.
141
+ </details>
107
142
 
108
143
  ## Development Installation
109
144
 
@@ -117,24 +152,23 @@ To install and run the tool, follow these steps:
117
152
  git clone https://github.com/CIROH-UA/NGIAB_data_preprocess
118
153
  cd NGIAB_data_preprocess
119
154
  ```
120
- 2. Create a virtual environment and activate it:
155
+ 2. Create a virtual environment:
121
156
  ```bash
122
- python3 -m venv env
123
- source env/bin/activate
157
+ uv venv
124
158
  ```
125
159
  3. Install the tool:
126
160
  ```bash
127
- pip install -e .
161
+ uv pip install -e .
128
162
  ```
129
163
  4. Run the map app:
130
164
  ```bash
131
- python -m map_app
165
+ uv run map_app
132
166
  ```
133
167
  </details>
134
168
 
135
169
  ## Usage
136
170
 
137
- Running the command `python -m map_app` will open the app in a new browser tab.
171
+ Running the command `uv run map_app` will open the app in a new browser tab.
138
172
 
139
173
  To use the tool:
140
174
  1. Select the catchment you're interested in on the map.
@@ -1,12 +1,12 @@
1
1
  data_processing/create_realization.py,sha256=AxUDVSORjwd6IAImZpgDr8GRHKSUQF8emQi1ikfIno4,14899
2
2
  data_processing/dataset_utils.py,sha256=4qmRmK3qMWPkiWZHXhXv3c-ISbtOwr7QhciEl2ok6Ao,7314
3
- data_processing/datasets.py,sha256=kAtTKj2yQINUSzI0vX-kuXMPVeQBtZVfGkfpPh6Qi_0,3528
3
+ data_processing/datasets.py,sha256=BB_X0IvGnUrjBmj-wryn6-Nv5cd0Lje3zly3p896eU4,4822
4
4
  data_processing/file_paths.py,sha256=Cp3BxbO6sD50464ciTshjb3Z0vTvL0ZeSbOJgNdOqQ0,4698
5
5
  data_processing/forcings.py,sha256=6Q9fSXa67OrXg_r9li0K9widsclN2DQUp1-oUH2tZwo,19208
6
- data_processing/gpkg_utils.py,sha256=SdniJrwhta7fPchVSCliuThPENgcs0m9Elx615yttAU,20248
7
- data_processing/graph_utils.py,sha256=TsCSZmQ8SPm5F2YzDKv32BxdzaHlPrC6UijZxUJbZvI,7966
6
+ data_processing/gpkg_utils.py,sha256=j-j6TkCwlkOzM5ATMHi44raYS-xrJMzk5V0XQZ_0dzQ,20220
7
+ data_processing/graph_utils.py,sha256=I9IFkU08SkXb8pfVASTvkm7T8BYt-92CaNj7TV6wJFE,8244
8
8
  data_processing/s3fs_utils.py,sha256=WoqqwxkHpv0Qq4I5f5-gUZBCFtVQ68ehXbdOjWRKTDQ,2752
9
- data_processing/subset.py,sha256=WklS0CT5O4oz29fhTT46NmSOe_EFMCC93gLeg6nQ88I,2914
9
+ data_processing/subset.py,sha256=GbvfAaSJTfEvYOAZ6K0OgFA81xG_Y5UxiGDxukxO9gE,2558
10
10
  data_sources/cfe-nowpm-realization-template.json,sha256=8an6q1drWD8wU1ocvdPab-GvZDvlQ-0di_-NommH3QI,3528
11
11
  data_sources/cfe-template.ini,sha256=6e5-usqjWtm3MWVvtm8CTeZTJJMxO1ZswkOXq0L9mnc,2033
12
12
  data_sources/em-catchment-template.yml,sha256=M08ixazEUHYI2PNavtI0xPZeSzcQ9bg2g0XzNT-8_u4,292
@@ -32,13 +32,13 @@ map_app/static/resources/light-style.json,sha256=DaE52qKpAkjiWSKY_z7LxreqA2rW4Zy
32
32
  map_app/static/resources/loading.gif,sha256=ggdkZf1AD7rSwIpSJwfiIqANgmVV1WHlxGuKxQKv7uY,72191
33
33
  map_app/static/resources/screenshot.jpg,sha256=Ia358aX-OHM9BP4B8lX05cLnguF2fHUIimno9bnFLYw,253730
34
34
  map_app/templates/index.html,sha256=ITRzQEYn15sFN4qRACjaNj5muhalOeP9n_IwcdsIlUs,6631
35
- ngiab_data_cli/__main__.py,sha256=X_imHhWo2RBTJzJ9bUwEcAnAtZSyjKJXB60dBxea6ck,10361
35
+ ngiab_data_cli/__main__.py,sha256=V-g0dwa000e5Qye7PBMTtHTK7Cl7as3JS0UoAR0QrZ4,10503
36
36
  ngiab_data_cli/arguments.py,sha256=7ptImy8tLM1XvjfDr13tZszkjGVtenXo0KqllJeE3Mw,4372
37
37
  ngiab_data_cli/custom_logging.py,sha256=iS2XozaxudcxQj17qAsrCgbVK9LJAYAPmarJuVWJo1k,1280
38
38
  ngiab_data_cli/forcing_cli.py,sha256=lkcqWDk5H8IPyGv0DwLIZMQldqTUXpfwSX0C_RIuIJ8,3890
39
- ngiab_data_preprocess-4.1.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
40
- ngiab_data_preprocess-4.1.0.dist-info/METADATA,sha256=PiNJhT7iKzPwVbU9i1kv8A8hR1aciMEEqnKfcM3W9OI,9433
41
- ngiab_data_preprocess-4.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
42
- ngiab_data_preprocess-4.1.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
43
- ngiab_data_preprocess-4.1.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
44
- ngiab_data_preprocess-4.1.0.dist-info/RECORD,,
39
+ ngiab_data_preprocess-4.2.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
40
+ ngiab_data_preprocess-4.2.0.dist-info/METADATA,sha256=Y2NVmd92S38QMwKXEkLvLKyWQ2IGy25okHV8sDy6c2c,10310
41
+ ngiab_data_preprocess-4.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
42
+ ngiab_data_preprocess-4.2.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
43
+ ngiab_data_preprocess-4.2.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
44
+ ngiab_data_preprocess-4.2.0.dist-info/RECORD,,