PyPI - ngiab-data-preprocess - Versions diffs - 3.2.3__tar.gz → 3.3.0__tar.gz - Mend

ngiab-data-preprocess 3.2.3tar.gz → 3.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/.gitignore RENAMED Viewed

@@ -12,4 +12,5 @@ dist
 **/tiles/vpu*
 *.tar.gz
 *.dat
-uv.lock
+uv.lock
+/build

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ngiab_data_preprocess
-Version: 3.2.3
+Version: 3.3.0
 Summary: Graphical Tools for creating Next Gen Water model input data.
 Author-email: Josh Cunningham <jcunningham8@ua.edu>
 Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/create_realization.py RENAMED Viewed

@@ -74,7 +74,7 @@ def make_cfe_config(
             bexp=row["mode.bexp_soil_layers_stag=2"],
             dksat=row["geom_mean.dksat_soil_layers_stag=2"],
             psisat=row["geom_mean.psisat_soil_layers_stag=2"],
-            slope=row["mean.slope"],
+            slope=row["mean.slope_1km"],
             smcmax=row["mean.smcmax_soil_layers_stag=2"],
             smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
             max_gw_storage=row["mean.Zmax"]/1000 if row["mean.Zmax"] is not None else "0.011[m]", # mean.Zmax is in mm!
@@ -107,10 +107,10 @@ def make_noahowp_config(
                 template.format(
                     start_datetime=start_datetime,
                     end_datetime=end_datetime,
-                    lat=divide_conf_df.loc[divide, "centroid_y"],
-                    lon=divide_conf_df.loc[divide, "centroid_x"],
-                    terrain_slope=divide_conf_df.loc[divide, "mean.slope"],
-                    azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
+                    lat=divide_conf_df.loc[divide, "latitude"],
+                    lon=divide_conf_df.loc[divide, "longitude"],
+                    terrain_slope= divide_conf_df.loc[divide, "mean.slope_1km"],
+                    azimuth= divide_conf_df.loc[divide, "circ_mean.aspect"],
                     ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
                     IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
                 )
@@ -123,24 +123,25 @@ def get_model_attributes_modspatialite(hydrofabric: Path):
     with GeoPackage(hydrofabric) as conn:
         sql = """WITH source_crs AS (
         SELECT organization || ':' || organization_coordsys_id AS crs_string
-        FROM gpkg_spatial_ref_sys
+        FROM gpkg_spatial_ref_sys
         WHERE srs_id = (
-            SELECT srs_id
-            FROM gpkg_geometry_columns
+            SELECT srs_id
+            FROM gpkg_geometry_columns
             WHERE table_name = 'divides'
         )
         )
-        SELECT
-        d.divide_id,
-        d.areasqkm,
-        da."mean.slope",
+        SELECT
+        d.divide_id,
+        d.areasqkm,
+        da."mean.slope",
+        da."mean.slope_1km",
         da."mean.elevation",
-        ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
+        ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
             (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
-        ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
+        ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
             (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude
-        FROM divides AS d
-        JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
+        FROM divides AS d
+        JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
         """
         divide_conf_df = pandas.read_sql_query(sql, conn)
     divide_conf_df.set_index("divide_id", inplace=True)
@@ -151,15 +152,16 @@ def get_model_attributes_pyproj(hydrofabric: Path):
     # if modspatialite is not available, use pyproj
     with sqlite3.connect(hydrofabric) as conn:
         sql = """
-        SELECT
-        d.divide_id,
-        d.areasqkm,
-        da."mean.slope",
+        SELECT
+        d.divide_id,
+        d.areasqkm,
+        da."mean.slope",
+        da."mean.slope_1km",
         da."mean.elevation",
         da.centroid_x,
         da.centroid_y
-        FROM divides AS d
-        JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
+        FROM divides AS d
+        JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
         """
         divide_conf_df = pandas.read_sql_query(sql, conn)
@@ -179,6 +181,40 @@ def get_model_attributes_pyproj(hydrofabric: Path):
     return divide_conf_df
+def get_model_attributes(hydrofabric: Path):
+    try:
+        with GeoPackage(hydrofabric) as conn:
+            conf_df = pandas.read_sql_query(
+                """WITH source_crs AS (
+            SELECT organization || ':' || organization_coordsys_id AS crs_string
+            FROM gpkg_spatial_ref_sys
+            WHERE srs_id = (
+                SELECT srs_id
+                FROM gpkg_geometry_columns
+                WHERE table_name = 'divides'
+            )
+            )
+            SELECT
+            *,
+            ST_X(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
+                (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
+            ST_Y(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
+                (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude FROM 'divide-attributes';""",
+                conn,
+            )
+    except pandas.errors.DatabaseError:
+        with sqlite3.connect(hydrofabric) as conn:
+            conf_df = pandas.read_sql_query("SELECT* FROM 'divide-attributes';", conn,)
+        source_crs = get_table_crs_short(hydrofabric, "divides")
+        transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
+        lon, lat = transformer.transform(
+            conf_df["centroid_x"].values, conf_df["centroid_y"].values
+        )
+        conf_df["longitude"] = lon
+        conf_df["latitude"] = lat
+        conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
+    return conf_df
 def make_em_config(
     hydrofabric: Path,
@@ -224,8 +260,6 @@ def configure_troute(
         troute_template = file.read()
     time_step_size = 300
     nts = (end_time - start_time).total_seconds() / time_step_size
-    seconds_in_hour = 3600
-    number_of_hourly_steps = nts * time_step_size / seconds_in_hour
     filled_template = troute_template.format(
         # hard coded to 5 minutes
         time_step_size=time_step_size,
@@ -234,8 +268,7 @@ def configure_troute(
         geo_file_path=f"./config/{cat_id}_subset.gpkg",
         start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
         nts=nts,
-        max_loop_size=nts,
-        stream_output_time=number_of_hourly_steps,
+        max_loop_size=nts,
     )
     with open(config_dir / "troute.yaml", "w") as file:
@@ -243,7 +276,7 @@ def configure_troute(
 def make_ngen_realization_json(
-    config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
+    config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
 ) -> None:
     with open(template_path, "r") as file:
         realization = json.load(file)
@@ -281,8 +314,8 @@ def create_realization(cat_id: str, start_time: datetime, end_time: datetime, us
     # get approximate groundwater levels from nwm output
     template_path = paths.template_cfe_nowpm_realization_config
-    with sqlite3.connect(paths.geopackage_path) as conn:
-        conf_df = pandas.read_sql_query("SELECT * FROM 'divide-attributes';", conn)
+    conf_df = get_model_attributes(paths.geopackage_path)
     if use_nwm_gw:
         gw_levels = get_approximate_gw_storage(paths, start_time)
@@ -310,10 +343,10 @@ def create_partitions(paths: Path, num_partitions: int = None) -> None:
     cat_to_nex_pairs = get_cat_to_nex_flowpairs(hydrofabric=paths.geopackage_path)
     nexus = defaultdict(list)
-    for cat, nex in cat_to_nex_pairs:
-        nexus[nex].append(cat)
+    # for cat, nex in cat_to_nex_pairs:
+    #     nexus[nex].append(cat)
-    num_partitions = min(num_partitions, len(nexus))
+    num_partitions = min(num_partitions, len(cat_to_nex_pairs))
     # partition_size = ceil(len(nexus) / num_partitions)
     # num_nexus = len(nexus)
     # nexus = list(nexus.items())

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/file_paths.py RENAMED Viewed

@@ -97,12 +97,19 @@ class file_paths:
     @property
     def cached_nc_file(self) -> Path:
-        return self.subset_dir / "merged_data.nc"
+        return self.forcings_dir / "raw_gridded_data.nc"
+    def append_cli_command(self, command: list[str]) -> None:
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        command_string = " ".join(command)
+        history_file  = self.metadata_dir / "cli_commands_history.txt"
+        if not history_file.parent.exists():
+            history_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.metadata_dir / "cli_commands_history.txt", "a") as f:
+            f.write(f"{current_time}| {command_string}\n")
     def setup_run_folders(self) -> None:
         folders = [
-            "restart",
-            "lakeout",
             "outputs",
             "outputs/ngen",
             "outputs/troute",

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/forcings.py RENAMED Viewed

@@ -76,6 +76,8 @@ def add_APCP_SURFACE_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
     # technically should be kg/m^2/s at 1kg = 1l it equates to mm/s
     # nom says qinsur output is m/s, hopefully qinsur is converted to mm/h by ngen
     dataset["APCP_surface"] = dataset["precip_rate"] * 3600
+    dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
+    dataset["APCP_surface"].attrs["source_note"] = "This is just the precip_rate variable converted to mm/h by multiplying by 3600"
     return dataset
@@ -140,6 +142,12 @@ def get_cell_weights_parallel(gdf, input_forcings, num_partitions):
         catchments = pool.starmap(get_cell_weights, args)
     return pd.concat(catchments)
+def get_units(dataset: xr.Dataset) -> dict:
+    units = {}
+    for var in dataset.data_vars:
+        if dataset[var].attrs["units"]:
+            units[var] = dataset[var].attrs["units"]
+    return units
 def compute_zonal_stats(
     gdf: gpd.GeoDataFrame, merged_data: xr.Dataset, forcings_dir: Path
@@ -152,6 +160,8 @@ def compute_zonal_stats(
     catchments = get_cell_weights_parallel(gdf, merged_data, num_partitions)
+    units = get_units(merged_data)
     variables = {
                 "LWDOWN": "DLWRF_surface",
                 "PSFC": "PRES_surface",
@@ -224,12 +234,12 @@ def compute_zonal_stats(
         # Merge the chunks back together
         datasets = [xr.open_dataset(forcings_dir / "temp" / f"{variable}_{i}.nc") for i in range(len(time_chunks))]
         result = xr.concat(datasets, dim="time")
-        result.to_netcdf(forcings_dir / f"{variable}.nc")
+        result.to_netcdf(forcings_dir / "temp" / f"{variable}.nc")
         # close the datasets
         result.close()
         _ = [dataset.close() for dataset in datasets]
-        for file in forcings_dir.glob("temp/*.nc"):
+        for file in forcings_dir.glob("temp/*_*.nc"):
             file.unlink()
         progress.remove_task(chunk_task)
     progress.update(
@@ -240,10 +250,10 @@ def compute_zonal_stats(
     logger.info(
         f"Forcing generation complete! Zonal stats computed in {time.time() - timer_start:2f} seconds"
     )
-    write_outputs(forcings_dir, variables)
+    write_outputs(forcings_dir, variables, units)
-def write_outputs(forcings_dir, variables):
+def write_outputs(forcings_dir, variables, units):
     # start a dask cluster if there isn't one already running
     try:
@@ -251,12 +261,15 @@ def write_outputs(forcings_dir, variables):
     except ValueError:
         cluster = LocalCluster()
         client = Client(cluster)
+    temp_forcings_dir = forcings_dir / "temp"
     # Combine all variables into a single dataset using dask
-    results = [xr.open_dataset(file, chunks="auto") for file in forcings_dir.glob("*.nc")]
+    results = [xr.open_dataset(file, chunks="auto") for file in temp_forcings_dir.glob("*.nc")]
     final_ds = xr.merge(results)
-    output_folder = forcings_dir / "by_catchment"
+    for var in final_ds.data_vars:
+        if var in units:
+            final_ds[var].attrs["units"] = units[var]
+        else:
+            logger.warning(f"Variable {var} has no units")
     rename_dict = {}
     for key, value in variables.items():
@@ -294,19 +307,25 @@ def write_outputs(forcings_dir, variables):
     final_ds["Time"].attrs["units"] = "s"
     final_ds["Time"].attrs["epoch_start"] = "01/01/1970 00:00:00" # not needed but suppresses the ngen warning
-    final_ds.to_netcdf(output_folder / "forcings.nc", engine="netcdf4")
+    final_ds.to_netcdf(forcings_dir / "forcings.nc", engine="netcdf4")
     # close the datasets
     _ = [result.close() for result in results]
     final_ds.close()
+    # clean up the temp files
+    for file in temp_forcings_dir.glob("*.*"):
+        file.unlink()
+    temp_forcings_dir.rmdir()
 def setup_directories(cat_id: str) -> file_paths:
     forcing_paths = file_paths(cat_id)
-    if forcing_paths.forcings_dir.exists():
-        logger.info("Forcings directory already exists, deleting")
-        shutil.rmtree(forcing_paths.forcings_dir)
-    for folder in ["by_catchment", "temp"]:
-        os.makedirs(forcing_paths.forcings_dir / folder, exist_ok=True)
+    # delete everything in the forcing folder except the cached nc file
+    for file in forcing_paths.forcings_dir.glob("*.*"):
+        if file != forcing_paths.cached_nc_file:
+            file.unlink()
+    os.makedirs(forcing_paths.forcings_dir / "temp", exist_ok=True)
     return forcing_paths
@@ -326,7 +345,7 @@ def create_forcings(
     if type(end_time) == datetime:
         end_time = end_time.strftime("%Y-%m-%d %H:%M")
-    merged_data = get_forcing_data(forcing_paths, start_time, end_time, gdf, forcing_vars)
+    merged_data = get_forcing_data(forcing_paths.cached_nc_path, start_time, end_time, gdf, forcing_vars)
     compute_zonal_stats(gdf, merged_data, forcing_paths.forcings_dir)

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/zarr_utils.py RENAMED Viewed

@@ -100,18 +100,18 @@ def compute_store(stores: xr.Dataset, cached_nc_path: Path) -> xr.Dataset:
 def get_forcing_data(
-    forcing_paths: file_paths,
+    cached_nc_path: Path,
     start_time: str,
     end_time: str,
     gdf: gpd.GeoDataFrame,
     forcing_vars: list[str] = None,
 ) -> xr.Dataset:
     merged_data = None
-    if os.path.exists(forcing_paths.cached_nc_file):
+    if os.path.exists(cached_nc_path):
         logger.info("Found cached nc file")
         # open the cached file and check that the time range is correct
         cached_data = xr.open_mfdataset(
-            forcing_paths.cached_nc_file, parallel=True, engine="h5netcdf"
+            cached_nc_path, parallel=True, engine="h5netcdf"
         )
         range_in_cache = cached_data.time[0].values <= np.datetime64(
             start_time
@@ -138,14 +138,14 @@ def get_forcing_data(
         if range_in_cache:
             logger.info("Time range is within cached data")
-            logger.debug(f"Opened cached nc file: [{forcing_paths.cached_nc_file}]")
+            logger.debug(f"Opened cached nc file: [{cached_nc_path}]")
             merged_data = clip_dataset_to_bounds(
                 cached_data, gdf.total_bounds, start_time, end_time
             )
             logger.debug("Clipped stores")
         else:
             logger.info("Time range is incorrect")
-            os.remove(forcing_paths.cached_nc_file)
+            os.remove(cached_nc_path)
             logger.debug("Removed cached nc file")
     if merged_data is None:
@@ -155,7 +155,7 @@ def get_forcing_data(
         logger.debug("Got zarr stores")
         clipped_store = clip_dataset_to_bounds(lazy_store, gdf.total_bounds, start_time, end_time)
         logger.info("Clipped forcing data to bounds")
-        merged_data = compute_store(clipped_store, forcing_paths.cached_nc_file)
+        merged_data = compute_store(clipped_store, cached_nc_path)
         logger.info("Forcing data loaded and cached")
         # close the event loop

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-nowpm-realization-template.json RENAMED Viewed

@@ -80,7 +80,7 @@
       }
     ],
     "forcing": {
-      "path": "./forcings/by_catchment/forcings.nc",
+      "path": "./forcings/forcings.nc",
       "provider": "NetCDF",
       "enable_cache": false
     }

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-template.ini RENAMED Viewed

@@ -1,5 +1,6 @@
 forcing_file=BMI
-surface_partitioning_scheme=Schaake
+surface_water_partitioning_scheme=Schaake
+surface_runoff_scheme=GIUH
 # ----------------
 # State Parameters
@@ -40,13 +41,13 @@ alpha_fc=0.33
 # decimal fraction of maximum soil water storage (smcmax * depth) for the initial timestep
 soil_storage=0.05[m/m]
 # number of Nash lf reservoirs (optional, defaults to 2, ignored if storage values present)
-K_nash=0.03[]
+K_nash_subsurface=0.03[]
 # Nash Config param - primary reservoir
 K_lf=0.01[]
 # Nash Config param - secondary reservoir
-nash_storage=0.0,0.0
+nash_storage_subsurface=0.0,0.0
 # Giuh ordinates in dt time steps
-giuh_ordinates=1.00,0.00
+giuh_ordinates=0.55,0.25,0.2
 # ---------------------
 # Time Info
@@ -58,4 +59,4 @@ verbosity=0
 DEBUG=0
 # Parameter in the surface runoff parameterization
 # (https://mikejohnson51.github.io/hyAggregate/#Routing_Attributes)
-refkdt={refkdt}
+refkdt={refkdt}

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-realization-template.json RENAMED Viewed

@@ -53,7 +53,7 @@
       }
     ],
     "forcing": {
-      "path": "./forcings/by_catchment/forcings.nc",
+      "path": "./forcings/forcings.nc",
       "provider": "NetCDF",
       "enable_cache": false
     }

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/ngen-routing-template.yaml RENAMED Viewed

@@ -98,12 +98,12 @@ compute_parameters:
 output_parameters:
     #----------
     #test_output: outputs/lcr_flowveldepth.pkl
-    lite_restart:
-        #----------
-        lite_restart_output_directory: restart/
-    lakeout_output: lakeout/
+    # lite_restart:
+    #     #----------
+    #     lite_restart_output_directory: restart/
+    # lakeout_output: lakeout/
     stream_output:
         stream_output_directory: outputs/troute/
-        stream_output_time: {stream_output_time} #number of internal_frequency timesteps per output file
+        stream_output_time: -1 # -1 adds all outputs to a single file
         stream_output_type: ".nc" #please select only between netcdf '.nc' or '.csv' or '.pkl'
         stream_output_internal_frequency: 60 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/noah-owp-modular-init.namelist.input RENAMED Viewed

@@ -28,10 +28,10 @@
 /
 &model_options
-  precip_phase_option               = 2
-  snow_albedo_option                = 2 ! 1 = BATS, 2 = CLASS
-  dynamic_veg_option                = 1
-  runoff_option                     = 8
+  precip_phase_option               = 1
+  snow_albedo_option                = 1 ! 1 = BATS, 2 = CLASS
+  dynamic_veg_option                = 4
+  runoff_option                     = 3
   drainage_option                   = 8
   frozen_soil_option                = 1
   dynamic_vic_option                = 1
@@ -43,8 +43,8 @@
   soil_temp_boundary_option         = 2
   supercooled_water_option          = 1
   stomatal_resistance_option        = 1
-  evap_srfc_resistance_option       = 1
-  subsurface_option                 = 1
+  evap_srfc_resistance_option       = 4
+  subsurface_option                 = 2
 /
 &structure

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/__main__.py RENAMED Viewed

@@ -119,7 +119,7 @@ def validate_run_directory(args, paths: file_paths):
 def main() -> None:
     setup_logging()
+    validate_all()
     try:
         args = parse_arguments()
         if args.debug:
@@ -128,7 +128,7 @@ def main() -> None:
         paths = file_paths(output_folder)
         args = set_dependent_flags(args, paths)  # --validate
         if feature_to_subset:
-            logging.info(f"Subsetting {feature_to_subset} to {paths.output_dir}")
+            logging.info(f"Processing {feature_to_subset} in {paths.output_dir}")
             if not args.vpu:
                 upstream_count = len(get_upstream_cats(feature_to_subset))
                 logging.info(f"Upstream catchments: {upstream_count}")
@@ -243,5 +243,4 @@ def main() -> None:
 if __name__ == "__main__":
-    validate_all()
     main()

ngiab_data_preprocess-3.3.0/modules/ngiab_data_cli/forcing_cli.py ADDED Viewed

@@ -0,0 +1,97 @@
+from data_sources.source_validation import validate_all
+from ngiab_data_cli.custom_logging import setup_logging
+from data_processing.forcings import compute_zonal_stats
+from data_processing.zarr_utils import get_forcing_data
+from data_processing.file_paths import file_paths
+import argparse
+import logging
+import time
+import xarray as xr
+import geopandas as gpd
+from datetime import datetime
+from pathlib import Path
+import shutil
+# Constants
+DATE_FORMAT = "%Y-%m-%d"  # used for datetime parsing
+DATE_FORMAT_HINT = "YYYY-MM-DD"  # printed in help message
+def parse_arguments() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Subsetting hydrofabrics, forcing generation, and realization creation"
+    )
+    parser.add_argument(
+        "-i",
+        "--input_file",
+        type=Path,
+        help="path to the input hydrofabric geopackage",
+        required=True,
+    )
+    parser.add_argument(
+        "-o",
+        "--output_file",
+        type=Path,
+        help="path to the forcing output file, e.g. /path/to/forcings.nc",
+        required=True,
+    )
+    parser.add_argument(
+        "--start_date",
+        "--start",
+        type=lambda s: datetime.strptime(s, DATE_FORMAT),
+        help=f"Start date for forcings/realization (format {DATE_FORMAT_HINT})",
+        required=True,
+    )
+    parser.add_argument(
+        "--end_date",
+        "--end",
+        type=lambda s: datetime.strptime(s, DATE_FORMAT),
+        help=f"End date for forcings/realization (format {DATE_FORMAT_HINT})",
+        required=True,
+    )
+    parser.add_argument(
+        "-D",
+        "--debug",
+        action="store_true",
+        help="enable debug logging",
+    )
+    return parser.parse_args()
+def main() -> None:
+    time.sleep(0.01)
+    setup_logging()
+    validate_all()
+    args = parse_arguments()
+    projection = xr.open_dataset(file_paths.template_nc, engine="h5netcdf").crs.esri_pe_string
+    logging.debug("Got projection from grid file")
+    gdf = gpd.read_file(args.input_file, layer="divides").to_crs(projection)
+    logging.debug(f"gdf  bounds: {gdf.total_bounds}")
+    start_time = args.start_date.strftime("%Y-%m-%d %H:%M")
+    end_time = args.end_date.strftime("%Y-%m-%d %H:%M")
+    cached_nc_path = args.output_file.parent / (args.input_file.stem + "-raw-gridded-data.nc")
+    print(cached_nc_path)
+    merged_data = get_forcing_data(cached_nc_path, start_time, end_time, gdf)
+    forcing_working_dir = args.output_file.parent / (args.input_file.stem + "-working-dir")
+    if not forcing_working_dir.exists():
+        forcing_working_dir.mkdir(parents=True, exist_ok=True)
+    temp_dir = forcing_working_dir / "temp"
+    if not temp_dir.exists():
+        temp_dir.mkdir(parents=True, exist_ok=True)
+    compute_zonal_stats(gdf, merged_data, forcing_working_dir)
+    shutil.copy(forcing_working_dir / "forcings.nc", args.output_file)
+    logging.info(f"Created forcings file: {args.output_file}")
+    # remove the working directory
+    shutil.rmtree(forcing_working_dir)
+if __name__ == "__main__":
+    main()

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ngiab_data_preprocess
-Version: 3.2.3
+Version: 3.3.0
 Summary: Graphical Tools for creating Next Gen Water model input data.
 Author-email: Josh Cunningham <jcunningham8@ua.edu>
 Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt RENAMED Viewed

@@ -40,6 +40,7 @@ modules/map_app/templates/index.html
 modules/ngiab_data_cli/__main__.py
 modules/ngiab_data_cli/arguments.py
 modules/ngiab_data_cli/custom_logging.py
+modules/ngiab_data_cli/forcing_cli.py
 modules/ngiab_data_preprocess.egg-info/PKG-INFO
 modules/ngiab_data_preprocess.egg-info/SOURCES.txt
 modules/ngiab_data_preprocess.egg-info/dependency_links.txt

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/entry_points.txt RENAMED Viewed

@@ -1,3 +1,4 @@
 [console_scripts]
 cli = ngiab_data_cli.__main__:main
+forcings = ngiab_data_cli.forcing_cli:main
 map_app = map_app.__main__:main

{ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/pyproject.toml RENAMED Viewed

@@ -12,7 +12,7 @@ exclude = ["tests*"]
 [project]
 name = "ngiab_data_preprocess"
-version = "v3.2.3"
+version = "v3.3.0"
 authors = [{ name = "Josh Cunningham", email = "jcunningham8@ua.edu" }]
 description = "Graphical Tools for creating Next Gen Water model input data."
 readme = "README.md"
@@ -43,7 +43,7 @@ dependencies = [
     "tqdm==4.66.4",
     "rich==13.7.1",
     "colorama==0.4.6",
-    "bokeh==3.5.1"
+    "bokeh==3.5.1",
 ]
 [project.optional-dependencies]
@@ -57,6 +57,7 @@ Issues = "https://github.com/CIROH-UA/NGIAB_data_preprocess/issues"
 [project.scripts]
 cli = "ngiab_data_cli.__main__:main"
 map_app = "map_app.__main__:main"
+forcings = "ngiab_data_cli.forcing_cli:main"
 [build-system]
 # scm adds files tracked by git to the package