PyPI - ngiab-data-preprocess - Versions diffs - 4.1.1__py3-none-any.whl → 4.2.0__py3-none-any.whl - Mend

ngiab-data-preprocess 4.1.1py3-none-any.whl → 4.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data_processing/datasets.py CHANGED Viewed

@@ -85,3 +85,35 @@ def load_aorc_zarr(start_year: int = None, end_year: int = None) -> xr.Dataset:
     validate_dataset_format(dataset)
     return dataset
+def load_swe_zarr() -> xr.Dataset:
+    """Load the swe zarr dataset from S3."""
+    s3_urls = [
+        f"s3://noaa-nwm-retrospective-3-0-pds/CONUS/zarr/ldasout.zarr"
+    ]
+    # default cache is readahead which is detrimental to performance in this case
+    fs = S3ParallelFileSystem(anon=True, default_cache_type="none")  # default_block_size
+    s3_stores = [s3fs.S3Map(url, s3=fs) for url in s3_urls]
+    # the cache option here just holds accessed data in memory to prevent s3 being queried multiple times
+    # most of the data is read once and written to disk but some of the coordinate data is read multiple times
+    dataset = xr.open_mfdataset(s3_stores, parallel=True, engine="zarr", cache=True)
+    # set the crs attribute to conform with the format
+    esri_pe_string = dataset.crs.esri_pe_string
+    dataset = dataset.drop_vars(["crs"])
+    dataset.attrs["crs"] = esri_pe_string
+    # drop everything except SNEQV
+    vars_to_drop = list(dataset.data_vars)
+    vars_to_drop.remove('SNEQV')
+    dataset = dataset.drop_vars(vars_to_drop)
+    dataset.attrs["name"] = "v3_swe_zarr"
+    # rename the data vars to work with ngen
+    variables = {
+        "SNEQV": "swe"
+    }
+    dataset = dataset.rename_vars(variables)
+    validate_dataset_format(dataset)
+    return dataset

data_processing/gpkg_utils.py CHANGED Viewed

@@ -69,7 +69,7 @@ def verify_indices(gpkg: str = file_paths.conus_hydrofabric) -> None:
     con.close()
-def create_empty_gpkg(gpkg: str) -> None:
+def create_empty_gpkg(gpkg: Path) -> None:
     """
     Create an empty geopackage with the necessary tables and indices.
     """
@@ -80,7 +80,7 @@ def create_empty_gpkg(gpkg: str) -> None:
         conn.executescript(sql_script)
-def add_triggers_to_gpkg(gpkg: str) -> None:
+def add_triggers_to_gpkg(gpkg: Path) -> None:
     """
     Adds geopackage triggers required to maintain spatial index integrity
     """
@@ -256,7 +256,7 @@ def insert_data(con: sqlite3.Connection, table: str, contents: List[Tuple]) -> N
     con.commit()
-def update_geopackage_metadata(gpkg: str) -> None:
+def update_geopackage_metadata(gpkg: Path) -> None:
     """
     Update the contents of the gpkg_contents table in the specified geopackage.
     """
@@ -318,10 +318,10 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
     contents = source_db.execute(sql_query).fetchall()
     if table == "network":
-        # Look for the network entry that has a toid not in the flowpath or nexus tables
+        # Look for the network entry that has a toid not in the flowpath or nexus tables
         network_toids = [x[2] for x in contents]
         print(f"Network toids: {len(network_toids)}")
-        sql = "SELECT id FROM flowpaths"
+        sql = "SELECT id FROM flowpaths"
         flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
         print(f"Flowpath ids: {len(flowpath_ids)}")
         sql = "SELECT id FROM nexus"
@@ -342,8 +342,8 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
     dest_db.commit()
     source_db.close()
-    dest_db.close()
+    dest_db.close()
 def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name: Path) -> None:
     """
@@ -359,7 +359,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
     source_db = sqlite3.connect(f"file:{hydrofabric}?mode=ro", uri=True)
     dest_db = sqlite3.connect(subset_gpkg_name)
-    table_keys = {"divides": "toid", "divide-attributes": "divide_id", "lakes": "poi_id"}
+    table_keys = {"divide-attributes": "divide_id", "lakes": "poi_id"}
     if table == "lakes":
         # lakes subset we get from the pois table which was already subset by water body id
@@ -377,7 +377,7 @@ def subset_table(table: str, ids: List[str], hydrofabric: Path, subset_gpkg_name
     if table in table_keys:
         key_name = table_keys[table]
     sql_query = f"SELECT * FROM '{table}' WHERE {key_name} IN ({','.join(ids)})"
-    contents = source_db.execute(sql_query).fetchall()
+    contents = source_db.execute(sql_query).fetchall()
     insert_data(dest_db, table, contents)
@@ -429,16 +429,16 @@ def get_table_crs(gpkg: str, table: str) -> str:
 def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric) -> str:
     """
-    Get the nexus id of associated with a gage id.
+    Get the catchment id associated with a gage id.
     Args:
         gage_id (str): The gage ID.
     Returns:
-        str: The nexus id of the watershed containing the gage ID.
+        str: The catchment id of the watershed containing the gage ID.
     Raises:
-        IndexError: If nexus is found for the given gage ID.
+        IndexError: If catchment is found for the given gage ID.
     """
     gage_id = "".join([x for x in gage_id if x.isdigit()])

data_processing/graph_utils.py CHANGED Viewed

@@ -165,6 +165,10 @@ def get_upstream_cats(names: Union[str, List[str]]) -> Set[str]:
         if name in parent_ids:
             continue
         try:
+            if "cat" in name:
+                node_index = graph.vs.find(cat=name).index
+            else:
+                node_index = graph.vs.find(name=name).index
             node_index = graph.vs.find(cat=name).index
             upstream_nodes = graph.subcomponent(node_index, mode="IN")
             for node in upstream_nodes:
@@ -205,7 +209,10 @@ def get_upstream_ids(names: Union[str, List[str]], include_outlet: bool = True)
         if name in parent_ids:
             continue
         try:
-            node_index = graph.vs.find(name=name).index
+            if "cat" in name:
+                node_index = graph.vs.find(cat=name).index
+            else:
+                node_index = graph.vs.find(name=name).index
             upstream_nodes = graph.subcomponent(node_index, mode="IN")
             for node in upstream_nodes:
                 parent_ids.add(graph.vs[node]["name"])

data_processing/subset.py CHANGED Viewed

@@ -31,7 +31,7 @@ subset_tables = [
 def create_subset_gpkg(
     ids: Union[List[str], str], hydrofabric: Path, output_gpkg_path: Path, is_vpu: bool = False
-) -> Path:
+):
     # ids is a list of nexus and wb ids, or a single vpu id
     if not isinstance(ids, list):
         ids = [ids]
@@ -65,10 +65,11 @@ def subset_vpu(vpu_id: str, output_gpkg_path: Path, hydrofabric: Path = file_pat
 def subset(
     cat_ids: List[str],
     hydrofabric: Path = file_paths.conus_hydrofabric,
-    output_gpkg_path: Path = None,
-) -> str:
-    upstream_ids = list(get_upstream_ids(cat_ids))
+    output_gpkg_path: Path = Path(),
+    include_outlet: bool = True,
+):
+    print(cat_ids)
+    upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
     if not output_gpkg_path:
         # if the name isn't provided, use the first upstream id
@@ -80,15 +81,3 @@ def subset(
     create_subset_gpkg(upstream_ids, hydrofabric, output_gpkg_path)
     logger.info(f"Subset complete for {len(upstream_ids)} features (catchments + nexuses)")
     logger.debug(f"Subset complete for {upstream_ids} catchments")
-def move_files_to_config_dir(subset_output_dir: str) -> None:
-    config_dir = subset_output_dir / "config"
-    config_dir.mkdir(parents=True, exist_ok=True)
-    files = [x for x in subset_output_dir.iterdir()]
-    for file in files:
-        if file.suffix in [".csv", ".json", ".geojson"]:
-            if "partitions" in file.name:
-                continue
-            os.system(f"mv {file} {config_dir}")

ngiab_data_cli/__main__.py CHANGED Viewed

@@ -147,7 +147,10 @@ def main() -> None:
                 logging.info("Subsetting complete.")
             else:
                 logging.info(f"Subsetting hydrofabric")
-                subset(feature_to_subset, output_gpkg_path=paths.geopackage_path)
+                include_outlet = True
+                if args.gage:
+                    include_outlet = False
+                subset(feature_to_subset, output_gpkg_path=paths.geopackage_path, include_outlet=include_outlet)
                 logging.info("Subsetting complete.")
         if args.forcings:
@@ -237,7 +240,7 @@ def main() -> None:
         if args.vis:
             try:
-                command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.0'
+                command = f'docker run --rm -it -p 3000:3000 -v "{str(paths.subset_dir)}:/ngen/ngen/data/" joshcu/ngiab_grafana:v0.2.1'
                 subprocess.run(command, shell=True)
             except:
                 logging.error("Failed to launch docker container.")

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ngiab_data_preprocess
-Version: 4.1.1
+Version: 4.2.0
 Summary: Graphical Tools for creating Next Gen Water model input data.
 Author-email: Josh Cunningham <jcunningham8@ua.edu>
 Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 data_processing/create_realization.py,sha256=AxUDVSORjwd6IAImZpgDr8GRHKSUQF8emQi1ikfIno4,14899
 data_processing/dataset_utils.py,sha256=4qmRmK3qMWPkiWZHXhXv3c-ISbtOwr7QhciEl2ok6Ao,7314
-data_processing/datasets.py,sha256=kAtTKj2yQINUSzI0vX-kuXMPVeQBtZVfGkfpPh6Qi_0,3528
+data_processing/datasets.py,sha256=BB_X0IvGnUrjBmj-wryn6-Nv5cd0Lje3zly3p896eU4,4822
 data_processing/file_paths.py,sha256=Cp3BxbO6sD50464ciTshjb3Z0vTvL0ZeSbOJgNdOqQ0,4698
 data_processing/forcings.py,sha256=6Q9fSXa67OrXg_r9li0K9widsclN2DQUp1-oUH2tZwo,19208
-data_processing/gpkg_utils.py,sha256=SdniJrwhta7fPchVSCliuThPENgcs0m9Elx615yttAU,20248
-data_processing/graph_utils.py,sha256=TsCSZmQ8SPm5F2YzDKv32BxdzaHlPrC6UijZxUJbZvI,7966
+data_processing/gpkg_utils.py,sha256=j-j6TkCwlkOzM5ATMHi44raYS-xrJMzk5V0XQZ_0dzQ,20220
+data_processing/graph_utils.py,sha256=I9IFkU08SkXb8pfVASTvkm7T8BYt-92CaNj7TV6wJFE,8244
 data_processing/s3fs_utils.py,sha256=WoqqwxkHpv0Qq4I5f5-gUZBCFtVQ68ehXbdOjWRKTDQ,2752
-data_processing/subset.py,sha256=WklS0CT5O4oz29fhTT46NmSOe_EFMCC93gLeg6nQ88I,2914
+data_processing/subset.py,sha256=GbvfAaSJTfEvYOAZ6K0OgFA81xG_Y5UxiGDxukxO9gE,2558
 data_sources/cfe-nowpm-realization-template.json,sha256=8an6q1drWD8wU1ocvdPab-GvZDvlQ-0di_-NommH3QI,3528
 data_sources/cfe-template.ini,sha256=6e5-usqjWtm3MWVvtm8CTeZTJJMxO1ZswkOXq0L9mnc,2033
 data_sources/em-catchment-template.yml,sha256=M08ixazEUHYI2PNavtI0xPZeSzcQ9bg2g0XzNT-8_u4,292
@@ -32,13 +32,13 @@ map_app/static/resources/light-style.json,sha256=DaE52qKpAkjiWSKY_z7LxreqA2rW4Zy
 map_app/static/resources/loading.gif,sha256=ggdkZf1AD7rSwIpSJwfiIqANgmVV1WHlxGuKxQKv7uY,72191
 map_app/static/resources/screenshot.jpg,sha256=Ia358aX-OHM9BP4B8lX05cLnguF2fHUIimno9bnFLYw,253730
 map_app/templates/index.html,sha256=ITRzQEYn15sFN4qRACjaNj5muhalOeP9n_IwcdsIlUs,6631
-ngiab_data_cli/__main__.py,sha256=X_imHhWo2RBTJzJ9bUwEcAnAtZSyjKJXB60dBxea6ck,10361
+ngiab_data_cli/__main__.py,sha256=V-g0dwa000e5Qye7PBMTtHTK7Cl7as3JS0UoAR0QrZ4,10503
 ngiab_data_cli/arguments.py,sha256=7ptImy8tLM1XvjfDr13tZszkjGVtenXo0KqllJeE3Mw,4372
 ngiab_data_cli/custom_logging.py,sha256=iS2XozaxudcxQj17qAsrCgbVK9LJAYAPmarJuVWJo1k,1280
 ngiab_data_cli/forcing_cli.py,sha256=lkcqWDk5H8IPyGv0DwLIZMQldqTUXpfwSX0C_RIuIJ8,3890
-ngiab_data_preprocess-4.1.1.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
-ngiab_data_preprocess-4.1.1.dist-info/METADATA,sha256=-LH19uwmvO9AB0NBtN4JECrQrwSLVZUXKtoSVqaNoWA,10310
-ngiab_data_preprocess-4.1.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-ngiab_data_preprocess-4.1.1.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
-ngiab_data_preprocess-4.1.1.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
-ngiab_data_preprocess-4.1.1.dist-info/RECORD,,
+ngiab_data_preprocess-4.2.0.dist-info/licenses/LICENSE,sha256=6dMSprwwnsRzEm02mEDbKHD9dUbL8bPIt9Vhrhb0Ulk,1081
+ngiab_data_preprocess-4.2.0.dist-info/METADATA,sha256=Y2NVmd92S38QMwKXEkLvLKyWQ2IGy25okHV8sDy6c2c,10310
+ngiab_data_preprocess-4.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+ngiab_data_preprocess-4.2.0.dist-info/entry_points.txt,sha256=spwlhKEJ3ZnNETQsJGeTjD7Vwy8O_zGHb9GdX8ACCtw,128
+ngiab_data_preprocess-4.2.0.dist-info/top_level.txt,sha256=CjhYAUZrdveR2fOK6rxffU09VIN2IuPD7hk4V3l3pV0,52
+ngiab_data_preprocess-4.2.0.dist-info/RECORD,,

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ngiab_data_preprocess-4.1.1.dist-info → ngiab_data_preprocess-4.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

ngiab-data-preprocess 4.1.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

ngiab-data-preprocess 4.1.1py3-none-any.whl → 4.2.0py3-none-any.whl