PyPI - ngiab-data-preprocess - Versions diffs - 4.1.0__py3-none-any.whl → 4.2.0__py3-none-any.whl - Mend

ngiab-data-preprocess 4.1.0py3-none-any.whl → 4.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data_processing/datasets.py CHANGED Viewed

@@ -85,3 +85,35 @@ def load_aorc_zarr(start_year: int = None, end_year: int = None) -> xr.Dataset:
     validate_dataset_format(dataset)
     return dataset
+def load_swe_zarr() -> xr.Dataset:
+    """Load the swe zarr dataset from S3."""
+    s3_urls = [
+        f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/zarr/ldasout.zarr"
+    ]
+    # default cache is readahead which is detrimental to performance in this case
+    fs = S3ParallelFileSystem(anon=True, default_cache_type="none")  # default_block_size
+    s3_stores = [s3fs.S3Map(url, s3=fs) for url in s3_urls]
+    # the cache option here just holds accessed data in memory to prevent s3 being queried multiple times
+    # most of the data is read once and written to disk but some of the coordinate data is read multiple times
+    dataset = xr.open_mfdataset(s3_stores, parallel=True, engine="zarr", cache=True)
+    # set the crs attribute to conform with the format
+    esri_pe_string = dataset.crs.esri_pe_string
+    dataset = dataset.drop_vars(["crs"])
+    dataset.attrs["crs"] = esri_pe_string
+    # drop everything except SNEQV
+    vars_to_drop = list(dataset.data_vars)
+    vars_to_drop.remove('SNEQV')
+    dataset = dataset.drop_vars(vars_to_drop)
+    dataset.attrs["name"] = "v3_swe_zarr"
+    # rename the data vars to work with ngen
+    variables = {
+        "SNEQV": "swe"
+    }
+    dataset = dataset.rename_vars(variables)
+    validate_dataset_format(dataset)
+    return dataset

data_processing/gpkg_utils.py CHANGED Viewed

@@ -69,7 +69,7 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
     con.close()
-def create_empty_gpkg(gpkg: str) -> None:
+def create_empty_gpkg(gpkg: Path) -> None:
     """
     Create an empty geopackage with the necessary tables and indices.
     """
@@ -80,7 +80,7 @@ def create_empty_gpkg(gpkg: str) -> None:
         conn.executescript(sql_script)
-def add_triggers_to_gpkg(gpkg: str) -> None:
+def add_triggers_to_gpkg(gpkg: Path) -> None:
     """
     Adds geopackage triggers required to maintain spatial index integrity
     """
@@ -256,7 +256,7 @@ def insert_data(con: sqlite3.Connection, table: str, contents: List[Tuple]) -> N
     con.commit()
-def update_geopackage_metadata(gpkg: str) -> None:
+def update_geopackage_metadata(gpkg: Path) -> None:
     """
     Update the contents of the gpkg_contents table in the specified geopackage.
     """
@@ -318,10 +318,10 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
     contents = source_db.execute(sql_query).fetchall()
     if table == "network":
-        # Look for the network entry that has a toid not in the flowpath or nexus tables
+        # Look for the network entry that has a toid not in the flowpath or nexus tables
         network_toids = [x[2] for x in contents]
         print(f"Network toids: {len(network_toids)}")
-        sql = "SELECT id FROM flowpaths"
+        sql = "SELECT id FROM flowpaths"
         flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
         print(f"Flowpath ids: {len(flowpath_ids)}")
         sql = "SELECT id FROM nexus"
@@ -342,8 +342,8 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
     dest_db.commit()
     source_db.close()
-    dest_db.close()
+    dest_db.close()
 def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name: Path) -> None:
     """
@@ -359,7 +359,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
     source_db = sqlite3.connect(f"file:{hydrofabric}?mode=ro", uri=True)
     dest_db = sqlite3.connect(subset_gpkg_name)
-    table_keys = {"divides": "toid", "divide-attributes": "divide_id", "lakes": "poi_id"}
+    table_keys = {"divide-attributes": "divide_id", "lakes": "poi_id"}
     if table == "lakes":
         # lakes subset we get from the pois table which was already subset by water body id
@@ -377,7 +377,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
     if table in table_keys:
         key_name = table_keys[table]
     sql_query = f"SELECT * FROM '{table}' WHERE {key_name} IN ({','.join(ids)})"
-    contents = source_db.execute(sql_query).fetchall()
+    contents = source_db.execute(sql_query).fetchall()
     insert_data(dest_db, table, contents)
@@ -429,16 +429,16 @@ def get_table_crs(gpkg: str, table: str) -> str:
 def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric) -> str:
     """
-    Get the nexus id of associated with a gage id.
+    Get the catchment id associated with a gage id.
     Args:
         gage_id (str): The gage ID.
     Returns:
-        str: The nexus id of the watershed containing the gage ID.
+        str: The catchment id of the watershed containing the gage ID.
     Raises:
-        IndexError: If nexus is found for the given gage ID.
+        IndexError: If catchment is found for the given gage ID.
     """
     gage_id = "".join([x for x in gage_id if x.isdigit()])

data_processing/graph_utils.py CHANGED Viewed

@@ -165,6 +165,10 @@ def get_upstream_cats(names: Union[str, List[str]]) -> Set[str]:
         if name in parent_ids:
             continue
         try:
+            if "cat" in name:
+                node_index = graph.vs.find(cat=name).index
+            else:
+                node_index = graph.vs.find(name=name).index
             node_index = graph.vs.find(cat=name).index
             upstream_nodes = graph.subcomponent(node_index, mode="IN")
             for node in upstream_nodes:
@@ -205,7 +209,10 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
         if name in parent_ids:
             continue
         try:
-            node_index = graph.vs.find(name=name).index
+            if "cat" in name:
+                node_index = graph.vs.find(cat=name).index
+            else:
+                node_index = graph.vs.find(name=name).index
             upstream_nodes = graph.subcomponent(node_index, mode="IN")
             for node in upstream_nodes:
                 parent_ids.add(graph.vs[node]["name"])

data_processing/subset.py CHANGED Viewed

@@ -31,7 +31,7 @@ subset_tables = [
 def create_subset_gpkg(
     ids: Union[List[str], str], hydrofabric: Path, output_gpkg_path: Path, is_vpu: bool = False
-) -> Path:
+):
     # ids is a list of nexus and wb ids, or a single vpu id
     if not isinstance(ids, list):
         ids = [ids]
@@ -65,10 +65,11 @@ def subset_vpu(vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_pat
 def subset(
     cat_ids: List[str],
     hydrofabric: Path = file_paths.conus_hydrofabric,
-    output_gpkg_path: Path = None,
-) -> str:
-    upstream_ids = list(get_upstream_ids(cat_ids))
+    output_gpkg_path: Path = Path(),
+    include_outlet: bool = True,
+):
+    print(cat_ids)
+    upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
     if not output_gpkg_path:
         # if the name isn't provided, use the first upstream id
@@ -80,15 +81,3 @@ def subset(
     create_subset_gpkg(upstream_ids, hydrofabric, output_gpkg_path)
     logger.info(f"Subset complete for {len(upstream_ids)} features (catchments + nexuses)")
     logger.debug(f"Subset complete for {upstream_ids} catchments")
-def move_files_to_config_dir(subset_output_dir: str) -> None:
-    config_dir = subset_output_dir / "config"
-    config_dir.mkdir(parents=True, exist_ok=True)
-    files = [x for x in subset_output_dir.iterdir()]
-    for file in files:
-        if file.suffix in [".csv", ".json", ".geojson"]:
-            if "partitions" in file.name:
-                continue
-            os.system(f"mv {file} {config_dir}")

ngiab_data_cli/__main__.py CHANGED Viewed

@@ -147,7 +147,10 @@ def main() -> None:
                 logging.info("Subsetting complete.")
             else:
                 logging.info(f"Subsetting hydrofabric")
-                subset(feature_to_subset, output_gpkg_path=paths.geopackage_path)
+                include_outlet = True
+                if args.gage:
+                    include_outlet = False
+                subset(feature_to_subset, output_gpkg_path=paths.geopackage_path, include_outlet=include_outlet)
                 logging.info("Subsetting complete.")
         if args.forcings:
@@ -237,7 +240,7 @@ def main() -> None:
         if args.vis:
             try:
-                command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.0'
+                command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.1'
                 subprocess.run(command, shell=True)
             except:
                 logging.error("Failed to launch docker container.")

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ngiab_data_preprocess
-Version: 4.1.0
+Version: 4.2.0
 Summary: Graphical Tools for creating Next Gen Water model input data.
 Author-email: Josh Cunningham <jcunningham8@ua.edu>
 Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -8,11 +8,11 @@ Project-URL: Issues, https://github.com/CIROH-UA/NGIAB_data_preprocess/issues
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
-Requires-Python: <3.13,>=3.10
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: pyogrio==0.7.2
-Requires-Dist: pyproj==3.6.1
+Requires-Dist: pyogrio>=0.7.2
+Requires-Dist: pyproj>=3.6.1
 Requires-Dist: Flask==3.0.2
 Requires-Dist: geopandas>=1.0.0
 Requires-Dist: requests==2.32.2
@@ -20,19 +20,20 @@ Requires-Dist: igraph==0.11.4
 Requires-Dist: s3fs==2024.3.1
 Requires-Dist: xarray==2024.2.0
 Requires-Dist: zarr==2.17.1
-Requires-Dist: netCDF4==1.6.5
+Requires-Dist: netCDF4>=1.6.5
 Requires-Dist: dask==2024.4.1
 Requires-Dist: dask[distributed]==2024.4.1
 Requires-Dist: black==24.3.0
 Requires-Dist: isort==5.13.2
 Requires-Dist: h5netcdf==1.3.0
 Requires-Dist: exactextract==0.2.0
-Requires-Dist: numpy==1.26.4
+Requires-Dist: numpy>=1.26.4
 Requires-Dist: tqdm==4.66.4
 Requires-Dist: rich==13.7.1
 Requires-Dist: colorama==0.4.6
 Requires-Dist: bokeh==3.5.1
 Requires-Dist: boto3
+Requires-Dist: numcodecs<0.16.0
 Provides-Extra: eval
 Requires-Dist: ngiab_eval; extra == "eval"
 Provides-Extra: plot
@@ -85,6 +86,41 @@ For automatic interactive visualisation, please run [NGIAB](https://github.com/C
 * This tool is officially supported on macOS or Ubuntu (tested on 22.04 & 24.04). To use it on Windows, please install [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
 ## Installation and Running
+It is highly recommended to use [Astral UV](https://docs.astral.sh/uv/) to install and run this tool. It works similarly to pip and conda, and I would also recommend you use it for other python projects as it is so useful.
+```bash
+# Install UV
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# It can be installed via pip if that fails
+# pip install uv
+# Create a virtual environment in the current directory
+uv venv
+# Install the tool in the virtual environment
+uv pip install ngiab_data_preprocess
+# To run the cli
+uv run cli --help
+# To run the map
+uv run map_app
+```
+UV automatically detects any virtual environments in the current directory and will use them when you use `uv run`.
+### Running without install
+This package supports pipx and uvx which means you can run the tool without installing it. No virtual environment needed, just UV.
+```bash
+# run this from anywhere
+uvx --from ngiab_data_preprocess cli --help
+# for the map
+uvx --from ngiab_data_preprocess map_app
+```
+## For legacy pip installation
+<details>
+  <summary>Click here to expand</summary>
 ```bash
 # If you're installing this on jupyterhub / 2i2c you HAVE TO DEACTIVATE THE CONDA ENV
@@ -102,8 +138,7 @@ pip install 'ngiab_data_preprocess'
 python -m map_app
 # CLI instructions at the bottom of the README
 ```
-The first time you run this command, it will download the hydrofabric from Lynker Spatial. If you already have it, place `conus_nextgen.gpkg` into `~/.ngiab/hydrofabric/v2.2/`.
+</details>
 ## Development Installation
@@ -117,24 +152,23 @@ To install and run the tool, follow these steps:
    git clone https://github.com/CIROH-UA/NGIAB_data_preprocess
    cd NGIAB_data_preprocess
    ```
-2. Create a virtual environment and activate it:
+2. Create a virtual environment:
    ```bash
-   python3 -m venv env
-   source env/bin/activate
+   uv venv
    ```
 3. Install the tool:
    ```bash
-   pip install -e .
+   uv pip install -e .
    ```
 4. Run the map app:
    ```bash
-   python -m map_app
+   uv run map_app
    ```
 </details>
 ## Usage
-Running the command `python -m map_app` will open the app in a new browser tab.
+Running the command `uv run map_app` will open the app in a new browser tab.
 To use the tool:
 1. Select the catchment you're interested in on the map.

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 data_processing/create_realization.py,sha256=AxUDVSORjwd6IAImZpgDr8GRHKSUQF8emQi1ikfIno4,14899
 data_processing/dataset_utils.py,sha256=4qmRmK3qMWPkiWZHXhXv3c-ISbtOwr7QhciEl2ok6Ao,7314
-data_processing/datasets.py,sha256=kAtTKj2yQINUSzI0vX-kuXMPVeQBtZVfGkfpPh6Qi_0,3528
+data_processing/datasets.py,sha256=BB_X0IvGnUrjBmj-wryn6-Nv5cd0Lje3zly3p896eU4,4822
 data_processing/file_paths.py,sha256=Cp3BxbO6sD50464ciTshjb3Z0vTvL0ZeSbOJgNdOqQ0,4698
 data_processing/forcings.py,sha256=6Q9fSXa67OrXg_r9li0K9widsclN2DQUp1-oUH2tZwo,19208
-data_processing/gpkg_utils.py,sha256=SdniJrwhta7fPchVSCliuThPENgcs0m9Elx615yttAU,20248
-data_processing/graph_utils.py,sha256=TsCSZmQ8SPm5F2YzDKv32BxdzaHlPrC6UijZxUJbZvI,7966
+data_processing/gpkg_utils.py,sha256=j-j6TkCwlkOzM5ATMHi44raYS-xrJMzk5V0XQZ_0dzQ,20220
+data_processing/graph_utils.py,sha256=I9IFkU08SkXb8pfVASTvkm7T8BYt-92CaNj7TV6wJFE,8244
 data_processing/s3fs_utils.py,sha256=WoqqwxkHpv0Qq4I5f5-gUZBCFtVQ68ehXbdOjWRKTDQ,2752
-data_processing/subset.py,sha256=WklS0CT5O4oz29fhTT46NmSOe_EFMCC93gLeg6nQ88I,2914
+data_processing/subset.py,sha256=GbvfAaSJTfEvYOAZ6K0OgFA81xG_Y5UxiGDxukxO9gE,2558
 data_sources/cfe-nowpm-realization-template.json,sha256=8an6q1drWD8wU1ocvdPab-GvZDvlQ-0di_-NommH3QI,3528
 data_sources/cfe-template.ini,sha256=6e5-usqjWtm3MWVvtm8CTeZTJJMxO1ZswkOXq0L9mnc,2033
 data_sources/em-catchment-template.yml,sha256=M08ixazEUHYI2PNavtI0xPZeSzcQ9bg2g0XzNT-8_u4,292
@@ -32,13 +32,13 @@ map_app/static/resources/light-style.json,sha256=DaE52qKpAkjiWSKY_z7LxreqA2rW4Zy
 map_app/static/resources/loading.gif,sha256=ggdkZf1AD7rSwIpSJwfiIqANgmVV1WHlxGuKxQKv7uY,72191
 map_app/static/resources/screenshot.jpg,sha256=Ia358aX-OHM9BP4B8lX05cLnguF2fHUIimno9bnFLYw,253730
 map_app/templates/index.html,sha256=ITRzQEYn15sFN4qRACjaNj5muhalOeP9n_IwcdsIlUs,6631
-ngiab_data_cli/__main__.py,sha256=X_imHhWo2RBTJzJ9bUwEcAnAtZSyjKJXB60dBxea6ck,10361
+ngiab_data_cli/__main__.py,sha256=V-g0dwa000e5Qye7PBMTtHTK7Cl7as3JS0UoAR0QrZ4,10503
 ngiab_data_cli/arguments.py,sha256=7ptImy8tLM1XvjfDr13tZszkjGVtenXo0KqllJeE3Mw,4372
 ngiab_data_cli/custom_logging.py,sha256=iS2XozaxudcxQj17qAsrCgbVK9LJAYAPmarJuVWJo1k,1280
 ngiab_data_cli/forcing_cli.py,sha256=lkcqWDk5H8IPyGv0DwLIZMQldqTUXpfwSX0C_RIuIJ8,3890
-ngiab_data_preprocess-4.1.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
-ngiab_data_preprocess-4.1.0.dist-info/METADATA,sha256=PiNJhT7iKzPwVbU9i1kv8A8hR1aciMEEqnKfcM3W9OI,9433
-ngiab_data_preprocess-4.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-ngiab_data_preprocess-4.1.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
-ngiab_data_preprocess-4.1.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
-ngiab_data_preprocess-4.1.0.dist-info/RECORD,,
+ngiab_data_preprocess-4.2.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
+ngiab_data_preprocess-4.2.0.dist-info/METADATA,sha256=Y2NVmd92S38QMwKXEkLvLKyWQ2IGy25okHV8sDy6c2c,10310
+ngiab_data_preprocess-4.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+ngiab_data_preprocess-4.2.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
+ngiab_data_preprocess-4.2.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
+ngiab_data_preprocess-4.2.0.dist-info/RECORD,,

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.0.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

ngiab-data-preprocess 4.1.0__py3-none-any.whl → 4.2.0__py3-none-any.whl

ngiab-data-preprocess 4.1.0py3-none-any.whl → 4.2.0py3-none-any.whl