ngiab-data-preprocess 3.2.3__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/.gitignore +2 -1
  2. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/PKG-INFO +1 -1
  3. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/create_realization.py +65 -32
  4. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/file_paths.py +10 -3
  5. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/forcings.py +34 -15
  6. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/zarr_utils.py +6 -6
  7. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-nowpm-realization-template.json +1 -1
  8. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/cfe-template.ini +6 -5
  9. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-realization-template.json +1 -1
  10. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/ngen-routing-template.yaml +5 -5
  11. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/noah-owp-modular-init.namelist.input +6 -6
  12. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/__main__.py +2 -3
  13. ngiab_data_preprocess-3.3.0/modules/ngiab_data_cli/forcing_cli.py +97 -0
  14. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +1 -1
  15. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +1 -0
  16. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +1 -0
  17. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/pyproject.toml +3 -2
  18. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/.github/workflows/build_only.yml +0 -0
  19. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/.github/workflows/publish.yml +0 -0
  20. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/LICENSE +0 -0
  21. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/README.md +0 -0
  22. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/map.html +0 -0
  23. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/gpkg_utils.py +0 -0
  24. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/graph_utils.py +0 -0
  25. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/s3fs_utils.py +0 -0
  26. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_processing/subset.py +0 -0
  27. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-catchment-template.yml +0 -0
  28. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/em-config.yml +0 -0
  29. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/forcing_template.nc +0 -0
  30. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/source_validation.py +0 -0
  31. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/template.sql +0 -0
  32. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/data_sources/triggers.sql +0 -0
  33. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/__init__.py +0 -0
  34. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/__main__.py +0 -0
  35. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/console.css +0 -0
  36. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/css/main.css +0 -0
  37. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/console.js +0 -0
  38. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/data_processing.js +0 -0
  39. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/js/main.js +0 -0
  40. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/dark-style.json +0 -0
  41. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/light-style.json +0 -0
  42. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/loading.gif +0 -0
  43. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/static/resources/screenshot.png +0 -0
  44. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/templates/index.html +0 -0
  45. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/map_app/views.py +0 -0
  46. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/arguments.py +0 -0
  47. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_cli/custom_logging.py +0 -0
  48. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
  49. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/requires.txt +0 -0
  50. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
  51. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/output/.gitkeep +0 -0
  52. {ngiab_data_preprocess-3.2.3 → ngiab_data_preprocess-3.3.0}/setup.cfg +0 -0
@@ -12,4 +12,5 @@ dist
12
12
  **/tiles/vpu*
13
13
  *.tar.gz
14
14
  *.dat
15
- uv.lock
15
+ uv.lock
16
+ /build
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ngiab_data_preprocess
3
- Version: 3.2.3
3
+ Version: 3.3.0
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -74,7 +74,7 @@ def make_cfe_config(
74
74
  bexp=row["mode.bexp_soil_layers_stag=2"],
75
75
  dksat=row["geom_mean.dksat_soil_layers_stag=2"],
76
76
  psisat=row["geom_mean.psisat_soil_layers_stag=2"],
77
- slope=row["mean.slope"],
77
+ slope=row["mean.slope_1km"],
78
78
  smcmax=row["mean.smcmax_soil_layers_stag=2"],
79
79
  smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
80
80
  max_gw_storage=row["mean.Zmax"]/1000 if row["mean.Zmax"] is not None else "0.011[m]", # mean.Zmax is in mm!
@@ -107,10 +107,10 @@ def make_noahowp_config(
107
107
  template.format(
108
108
  start_datetime=start_datetime,
109
109
  end_datetime=end_datetime,
110
- lat=divide_conf_df.loc[divide, "centroid_y"],
111
- lon=divide_conf_df.loc[divide, "centroid_x"],
112
- terrain_slope=divide_conf_df.loc[divide, "mean.slope"],
113
- azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
110
+ lat=divide_conf_df.loc[divide, "latitude"],
111
+ lon=divide_conf_df.loc[divide, "longitude"],
112
+ terrain_slope= divide_conf_df.loc[divide, "mean.slope_1km"],
113
+ azimuth= divide_conf_df.loc[divide, "circ_mean.aspect"],
114
114
  ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
115
115
  IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
116
116
  )
@@ -123,24 +123,25 @@ def get_model_attributes_modspatialite(hydrofabric: Path):
123
123
  with GeoPackage(hydrofabric) as conn:
124
124
  sql = """WITH source_crs AS (
125
125
  SELECT organization || ':' || organization_coordsys_id AS crs_string
126
- FROM gpkg_spatial_ref_sys
126
+ FROM gpkg_spatial_ref_sys
127
127
  WHERE srs_id = (
128
- SELECT srs_id
129
- FROM gpkg_geometry_columns
128
+ SELECT srs_id
129
+ FROM gpkg_geometry_columns
130
130
  WHERE table_name = 'divides'
131
131
  )
132
132
  )
133
- SELECT
134
- d.divide_id,
135
- d.areasqkm,
136
- da."mean.slope",
133
+ SELECT
134
+ d.divide_id,
135
+ d.areasqkm,
136
+ da."mean.slope",
137
+ da."mean.slope_1km",
137
138
  da."mean.elevation",
138
- ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
139
+ ST_X(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
139
140
  (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
140
- ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
141
+ ST_Y(Transform(MakePoint(da.centroid_x, da.centroid_y), 4326, NULL,
141
142
  (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude
142
- FROM divides AS d
143
- JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
143
+ FROM divides AS d
144
+ JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
144
145
  """
145
146
  divide_conf_df = pandas.read_sql_query(sql, conn)
146
147
  divide_conf_df.set_index("divide_id", inplace=True)
@@ -151,15 +152,16 @@ def get_model_attributes_pyproj(hydrofabric: Path):
151
152
  # if modspatialite is not available, use pyproj
152
153
  with sqlite3.connect(hydrofabric) as conn:
153
154
  sql = """
154
- SELECT
155
- d.divide_id,
156
- d.areasqkm,
157
- da."mean.slope",
155
+ SELECT
156
+ d.divide_id,
157
+ d.areasqkm,
158
+ da."mean.slope",
159
+ da."mean.slope_1km",
158
160
  da."mean.elevation",
159
161
  da.centroid_x,
160
162
  da.centroid_y
161
- FROM divides AS d
162
- JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
163
+ FROM divides AS d
164
+ JOIN 'divide-attributes' AS da ON d.divide_id = da.divide_id
163
165
  """
164
166
  divide_conf_df = pandas.read_sql_query(sql, conn)
165
167
 
@@ -179,6 +181,40 @@ def get_model_attributes_pyproj(hydrofabric: Path):
179
181
 
180
182
  return divide_conf_df
181
183
 
184
+ def get_model_attributes(hydrofabric: Path):
185
+ try:
186
+ with GeoPackage(hydrofabric) as conn:
187
+ conf_df = pandas.read_sql_query(
188
+ """WITH source_crs AS (
189
+ SELECT organization || ':' || organization_coordsys_id AS crs_string
190
+ FROM gpkg_spatial_ref_sys
191
+ WHERE srs_id = (
192
+ SELECT srs_id
193
+ FROM gpkg_geometry_columns
194
+ WHERE table_name = 'divides'
195
+ )
196
+ )
197
+ SELECT
198
+ *,
199
+ ST_X(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
200
+ (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS longitude,
201
+ ST_Y(Transform(MakePoint(centroid_x, centroid_y), 4326, NULL,
202
+ (SELECT crs_string FROM source_crs), 'EPSG:4326')) AS latitude FROM 'divide-attributes';""",
203
+ conn,
204
+ )
205
+ except pandas.errors.DatabaseError:
206
+ with sqlite3.connect(hydrofabric) as conn:
207
+ conf_df = pandas.read_sql_query("SELECT* FROM 'divide-attributes';", conn,)
208
+ source_crs = get_table_crs_short(hydrofabric, "divides")
209
+ transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
210
+ lon, lat = transformer.transform(
211
+ conf_df["centroid_x"].values, conf_df["centroid_y"].values
212
+ )
213
+ conf_df["longitude"] = lon
214
+ conf_df["latitude"] = lat
215
+
216
+ conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
217
+ return conf_df
182
218
 
183
219
  def make_em_config(
184
220
  hydrofabric: Path,
@@ -224,8 +260,6 @@ def configure_troute(
224
260
  troute_template = file.read()
225
261
  time_step_size = 300
226
262
  nts = (end_time - start_time).total_seconds() / time_step_size
227
- seconds_in_hour = 3600
228
- number_of_hourly_steps = nts * time_step_size / seconds_in_hour
229
263
  filled_template = troute_template.format(
230
264
  # hard coded to 5 minutes
231
265
  time_step_size=time_step_size,
@@ -234,8 +268,7 @@ def configure_troute(
234
268
  geo_file_path=f"./config/{cat_id}_subset.gpkg",
235
269
  start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
236
270
  nts=nts,
237
- max_loop_size=nts,
238
- stream_output_time=number_of_hourly_steps,
271
+ max_loop_size=nts,
239
272
  )
240
273
 
241
274
  with open(config_dir / "troute.yaml", "w") as file:
@@ -243,7 +276,7 @@ def configure_troute(
243
276
 
244
277
 
245
278
  def make_ngen_realization_json(
246
- config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
279
+ config_dir: Path, template_path: Path, start_time: datetime, end_time: datetime
247
280
  ) -> None:
248
281
  with open(template_path, "r") as file:
249
282
  realization = json.load(file)
@@ -281,8 +314,8 @@ def create_realization(cat_id: str, start_time: datetime, end_time: datetime, us
281
314
 
282
315
  # get approximate groundwater levels from nwm output
283
316
  template_path = paths.template_cfe_nowpm_realization_config
284
- with sqlite3.connect(paths.geopackage_path) as conn:
285
- conf_df = pandas.read_sql_query("SELECT * FROM 'divide-attributes';", conn)
317
+
318
+ conf_df = get_model_attributes(paths.geopackage_path)
286
319
 
287
320
  if use_nwm_gw:
288
321
  gw_levels = get_approximate_gw_storage(paths, start_time)
@@ -310,10 +343,10 @@ def create_partitions(paths: Path, num_partitions: int = None) -> None:
310
343
  cat_to_nex_pairs = get_cat_to_nex_flowpairs(hydrofabric=paths.geopackage_path)
311
344
  nexus = defaultdict(list)
312
345
 
313
- for cat, nex in cat_to_nex_pairs:
314
- nexus[nex].append(cat)
346
+ # for cat, nex in cat_to_nex_pairs:
347
+ # nexus[nex].append(cat)
315
348
 
316
- num_partitions = min(num_partitions, len(nexus))
349
+ num_partitions = min(num_partitions, len(cat_to_nex_pairs))
317
350
  # partition_size = ceil(len(nexus) / num_partitions)
318
351
  # num_nexus = len(nexus)
319
352
  # nexus = list(nexus.items())
@@ -97,12 +97,19 @@ class file_paths:
97
97
 
98
98
  @property
99
99
  def cached_nc_file(self) -> Path:
100
- return self.subset_dir / "merged_data.nc"
100
+ return self.forcings_dir / "raw_gridded_data.nc"
101
+
102
+ def append_cli_command(self, command: list[str]) -> None:
103
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
104
+ command_string = " ".join(command)
105
+ history_file = self.metadata_dir / "cli_commands_history.txt"
106
+ if not history_file.parent.exists():
107
+ history_file.parent.mkdir(parents=True, exist_ok=True)
108
+ with open(self.metadata_dir / "cli_commands_history.txt", "a") as f:
109
+ f.write(f"{current_time}| {command_string}\n")
101
110
 
102
111
  def setup_run_folders(self) -> None:
103
112
  folders = [
104
- "restart",
105
- "lakeout",
106
113
  "outputs",
107
114
  "outputs/ngen",
108
115
  "outputs/troute",
@@ -76,6 +76,8 @@ def add_APCP_SURFACE_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
76
76
  # technically should be kg/m^2/s at 1kg = 1l it equates to mm/s
77
77
  # nom says qinsur output is m/s, hopefully qinsur is converted to mm/h by ngen
78
78
  dataset["APCP_surface"] = dataset["precip_rate"] * 3600
79
+ dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
80
+ dataset["APCP_surface"].attrs["source_note"] = "This is just the precip_rate variable converted to mm/h by multiplying by 3600"
79
81
  return dataset
80
82
 
81
83
 
@@ -140,6 +142,12 @@ def get_cell_weights_parallel(gdf, input_forcings, num_partitions):
140
142
  catchments = pool.starmap(get_cell_weights, args)
141
143
  return pd.concat(catchments)
142
144
 
145
+ def get_units(dataset: xr.Dataset) -> dict:
146
+ units = {}
147
+ for var in dataset.data_vars:
148
+ if dataset[var].attrs["units"]:
149
+ units[var] = dataset[var].attrs["units"]
150
+ return units
143
151
 
144
152
  def compute_zonal_stats(
145
153
  gdf: gpd.GeoDataFrame, merged_data: xr.Dataset, forcings_dir: Path
@@ -152,6 +160,8 @@ def compute_zonal_stats(
152
160
 
153
161
  catchments = get_cell_weights_parallel(gdf, merged_data, num_partitions)
154
162
 
163
+ units = get_units(merged_data)
164
+
155
165
  variables = {
156
166
  "LWDOWN": "DLWRF_surface",
157
167
  "PSFC": "PRES_surface",
@@ -224,12 +234,12 @@ def compute_zonal_stats(
224
234
  # Merge the chunks back together
225
235
  datasets = [xr.open_dataset(forcings_dir / "temp" / f"{variable}_{i}.nc") for i in range(len(time_chunks))]
226
236
  result = xr.concat(datasets, dim="time")
227
- result.to_netcdf(forcings_dir / f"{variable}.nc")
237
+ result.to_netcdf(forcings_dir / "temp" / f"{variable}.nc")
228
238
  # close the datasets
229
239
  result.close()
230
240
  _ = [dataset.close() for dataset in datasets]
231
241
 
232
- for file in forcings_dir.glob("temp/*.nc"):
242
+ for file in forcings_dir.glob("temp/*_*.nc"):
233
243
  file.unlink()
234
244
  progress.remove_task(chunk_task)
235
245
  progress.update(
@@ -240,10 +250,10 @@ def compute_zonal_stats(
240
250
  logger.info(
241
251
  f"Forcing generation complete! Zonal stats computed in {time.time() - timer_start:2f} seconds"
242
252
  )
243
- write_outputs(forcings_dir, variables)
253
+ write_outputs(forcings_dir, variables, units)
244
254
 
245
255
 
246
- def write_outputs(forcings_dir, variables):
256
+ def write_outputs(forcings_dir, variables, units):
247
257
 
248
258
  # start a dask cluster if there isn't one already running
249
259
  try:
@@ -251,12 +261,15 @@ def write_outputs(forcings_dir, variables):
251
261
  except ValueError:
252
262
  cluster = LocalCluster()
253
263
  client = Client(cluster)
254
-
264
+ temp_forcings_dir = forcings_dir / "temp"
255
265
  # Combine all variables into a single dataset using dask
256
- results = [xr.open_dataset(file, chunks="auto") for file in forcings_dir.glob("*.nc")]
266
+ results = [xr.open_dataset(file, chunks="auto") for file in temp_forcings_dir.glob("*.nc")]
257
267
  final_ds = xr.merge(results)
258
-
259
- output_folder = forcings_dir / "by_catchment"
268
+ for var in final_ds.data_vars:
269
+ if var in units:
270
+ final_ds[var].attrs["units"] = units[var]
271
+ else:
272
+ logger.warning(f"Variable {var} has no units")
260
273
 
261
274
  rename_dict = {}
262
275
  for key, value in variables.items():
@@ -294,19 +307,25 @@ def write_outputs(forcings_dir, variables):
294
307
  final_ds["Time"].attrs["units"] = "s"
295
308
  final_ds["Time"].attrs["epoch_start"] = "01/01/1970 00:00:00" # not needed but suppresses the ngen warning
296
309
 
297
- final_ds.to_netcdf(output_folder / "forcings.nc", engine="netcdf4")
310
+ final_ds.to_netcdf(forcings_dir / "forcings.nc", engine="netcdf4")
298
311
  # close the datasets
299
312
  _ = [result.close() for result in results]
300
313
  final_ds.close()
301
314
 
315
+ # clean up the temp files
316
+ for file in temp_forcings_dir.glob("*.*"):
317
+ file.unlink()
318
+ temp_forcings_dir.rmdir()
319
+
302
320
 
303
321
  def setup_directories(cat_id: str) -> file_paths:
304
322
  forcing_paths = file_paths(cat_id)
305
- if forcing_paths.forcings_dir.exists():
306
- logger.info("Forcings directory already exists, deleting")
307
- shutil.rmtree(forcing_paths.forcings_dir)
308
- for folder in ["by_catchment", "temp"]:
309
- os.makedirs(forcing_paths.forcings_dir / folder, exist_ok=True)
323
+ # delete everything in the forcing folder except the cached nc file
324
+ for file in forcing_paths.forcings_dir.glob("*.*"):
325
+ if file != forcing_paths.cached_nc_file:
326
+ file.unlink()
327
+
328
+ os.makedirs(forcing_paths.forcings_dir / "temp", exist_ok=True)
310
329
 
311
330
  return forcing_paths
312
331
 
@@ -326,7 +345,7 @@ def create_forcings(
326
345
  if type(end_time) == datetime:
327
346
  end_time = end_time.strftime("%Y-%m-%d %H:%M")
328
347
 
329
- merged_data = get_forcing_data(forcing_paths, start_time, end_time, gdf, forcing_vars)
348
+ merged_data = get_forcing_data(forcing_paths.cached_nc_path, start_time, end_time, gdf, forcing_vars)
330
349
  compute_zonal_stats(gdf, merged_data, forcing_paths.forcings_dir)
331
350
 
332
351
 
@@ -100,18 +100,18 @@ def compute_store(stores: xr.Dataset, cached_nc_path: Path) -> xr.Dataset:
100
100
 
101
101
 
102
102
  def get_forcing_data(
103
- forcing_paths: file_paths,
103
+ cached_nc_path: Path,
104
104
  start_time: str,
105
105
  end_time: str,
106
106
  gdf: gpd.GeoDataFrame,
107
107
  forcing_vars: list[str] = None,
108
108
  ) -> xr.Dataset:
109
109
  merged_data = None
110
- if os.path.exists(forcing_paths.cached_nc_file):
110
+ if os.path.exists(cached_nc_path):
111
111
  logger.info("Found cached nc file")
112
112
  # open the cached file and check that the time range is correct
113
113
  cached_data = xr.open_mfdataset(
114
- forcing_paths.cached_nc_file, parallel=True, engine="h5netcdf"
114
+ cached_nc_path, parallel=True, engine="h5netcdf"
115
115
  )
116
116
  range_in_cache = cached_data.time[0].values <= np.datetime64(
117
117
  start_time
@@ -138,14 +138,14 @@ def get_forcing_data(
138
138
 
139
139
  if range_in_cache:
140
140
  logger.info("Time range is within cached data")
141
- logger.debug(f"Opened cached nc file: [{forcing_paths.cached_nc_file}]")
141
+ logger.debug(f"Opened cached nc file: [{cached_nc_path}]")
142
142
  merged_data = clip_dataset_to_bounds(
143
143
  cached_data, gdf.total_bounds, start_time, end_time
144
144
  )
145
145
  logger.debug("Clipped stores")
146
146
  else:
147
147
  logger.info("Time range is incorrect")
148
- os.remove(forcing_paths.cached_nc_file)
148
+ os.remove(cached_nc_path)
149
149
  logger.debug("Removed cached nc file")
150
150
 
151
151
  if merged_data is None:
@@ -155,7 +155,7 @@ def get_forcing_data(
155
155
  logger.debug("Got zarr stores")
156
156
  clipped_store = clip_dataset_to_bounds(lazy_store, gdf.total_bounds, start_time, end_time)
157
157
  logger.info("Clipped forcing data to bounds")
158
- merged_data = compute_store(clipped_store, forcing_paths.cached_nc_file)
158
+ merged_data = compute_store(clipped_store, cached_nc_path)
159
159
  logger.info("Forcing data loaded and cached")
160
160
  # close the event loop
161
161
 
@@ -80,7 +80,7 @@
80
80
  }
81
81
  ],
82
82
  "forcing": {
83
- "path": "./forcings/by_catchment/forcings.nc",
83
+ "path": "./forcings/forcings.nc",
84
84
  "provider": "NetCDF",
85
85
  "enable_cache": false
86
86
  }
@@ -1,5 +1,6 @@
1
1
  forcing_file=BMI
2
- surface_partitioning_scheme=Schaake
2
+ surface_water_partitioning_scheme=Schaake
3
+ surface_runoff_scheme=GIUH
3
4
 
4
5
  # ----------------
5
6
  # State Parameters
@@ -40,13 +41,13 @@ alpha_fc=0.33
40
41
  # decimal fraction of maximum soil water storage (smcmax * depth) for the initial timestep
41
42
  soil_storage=0.05[m/m]
42
43
  # number of Nash lf reservoirs (optional, defaults to 2, ignored if storage values present)
43
- K_nash=0.03[]
44
+ K_nash_subsurface=0.03[]
44
45
  # Nash Config param - primary reservoir
45
46
  K_lf=0.01[]
46
47
  # Nash Config param - secondary reservoir
47
- nash_storage=0.0,0.0
48
+ nash_storage_subsurface=0.0,0.0
48
49
  # Giuh ordinates in dt time steps
49
- giuh_ordinates=1.00,0.00
50
+ giuh_ordinates=0.55,0.25,0.2
50
51
 
51
52
  # ---------------------
52
53
  # Time Info
@@ -58,4 +59,4 @@ verbosity=0
58
59
  DEBUG=0
59
60
  # Parameter in the surface runoff parameterization
60
61
  # (https://mikejohnson51.github.io/hyAggregate/#Routing_Attributes)
61
- refkdt={refkdt}
62
+ refkdt={refkdt}
@@ -53,7 +53,7 @@
53
53
  }
54
54
  ],
55
55
  "forcing": {
56
- "path": "./forcings/by_catchment/forcings.nc",
56
+ "path": "./forcings/forcings.nc",
57
57
  "provider": "NetCDF",
58
58
  "enable_cache": false
59
59
  }
@@ -98,12 +98,12 @@ compute_parameters:
98
98
  output_parameters:
99
99
  #----------
100
100
  #test_output: outputs/lcr_flowveldepth.pkl
101
- lite_restart:
102
- #----------
103
- lite_restart_output_directory: restart/
104
- lakeout_output: lakeout/
101
+ # lite_restart:
102
+ # #----------
103
+ # lite_restart_output_directory: restart/
104
+ # lakeout_output: lakeout/
105
105
  stream_output:
106
106
  stream_output_directory: outputs/troute/
107
- stream_output_time: {stream_output_time} #number of internal_frequency timesteps per output file
107
+ stream_output_time: -1 # -1 adds all outputs to a single file
108
108
  stream_output_type: ".nc" #please select only between netcdf '.nc' or '.csv' or '.pkl'
109
109
  stream_output_internal_frequency: 60 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60
@@ -28,10 +28,10 @@
28
28
  /
29
29
 
30
30
  &model_options
31
- precip_phase_option = 2
32
- snow_albedo_option = 2 ! 1 = BATS, 2 = CLASS
33
- dynamic_veg_option = 1
34
- runoff_option = 8
31
+ precip_phase_option = 1
32
+ snow_albedo_option = 1 ! 1 = BATS, 2 = CLASS
33
+ dynamic_veg_option = 4
34
+ runoff_option = 3
35
35
  drainage_option = 8
36
36
  frozen_soil_option = 1
37
37
  dynamic_vic_option = 1
@@ -43,8 +43,8 @@
43
43
  soil_temp_boundary_option = 2
44
44
  supercooled_water_option = 1
45
45
  stomatal_resistance_option = 1
46
- evap_srfc_resistance_option = 1
47
- subsurface_option = 1
46
+ evap_srfc_resistance_option = 4
47
+ subsurface_option = 2
48
48
  /
49
49
 
50
50
  &structure
@@ -119,7 +119,7 @@ def validate_run_directory(args, paths: file_paths):
119
119
 
120
120
  def main() -> None:
121
121
  setup_logging()
122
-
122
+ validate_all()
123
123
  try:
124
124
  args = parse_arguments()
125
125
  if args.debug:
@@ -128,7 +128,7 @@ def main() -> None:
128
128
  paths = file_paths(output_folder)
129
129
  args = set_dependent_flags(args, paths) # --validate
130
130
  if feature_to_subset:
131
- logging.info(f"Subsetting {feature_to_subset} to {paths.output_dir}")
131
+ logging.info(f"Processing {feature_to_subset} in {paths.output_dir}")
132
132
  if not args.vpu:
133
133
  upstream_count = len(get_upstream_cats(feature_to_subset))
134
134
  logging.info(f"Upstream catchments: {upstream_count}")
@@ -243,5 +243,4 @@ def main() -> None:
243
243
 
244
244
 
245
245
  if __name__ == "__main__":
246
- validate_all()
247
246
  main()
@@ -0,0 +1,97 @@
1
+ from data_sources.source_validation import validate_all
2
+ from ngiab_data_cli.custom_logging import setup_logging
3
+ from data_processing.forcings import compute_zonal_stats
4
+ from data_processing.zarr_utils import get_forcing_data
5
+ from data_processing.file_paths import file_paths
6
+ import argparse
7
+ import logging
8
+ import time
9
+ import xarray as xr
10
+ import geopandas as gpd
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ import shutil
14
+
15
+ # Constants
16
+ DATE_FORMAT = "%Y-%m-%d" # used for datetime parsing
17
+ DATE_FORMAT_HINT = "YYYY-MM-DD" # printed in help message
18
+
19
+
20
+ def parse_arguments() -> argparse.Namespace:
21
+ """Parse command line arguments."""
22
+ parser = argparse.ArgumentParser(
23
+ description="Subsetting hydrofabrics, forcing generation, and realization creation"
24
+ )
25
+ parser.add_argument(
26
+ "-i",
27
+ "--input_file",
28
+ type=Path,
29
+ help="path to the input hydrofabric geopackage",
30
+ required=True,
31
+ )
32
+ parser.add_argument(
33
+ "-o",
34
+ "--output_file",
35
+ type=Path,
36
+ help="path to the forcing output file, e.g. /path/to/forcings.nc",
37
+ required=True,
38
+ )
39
+ parser.add_argument(
40
+ "--start_date",
41
+ "--start",
42
+ type=lambda s: datetime.strptime(s, DATE_FORMAT),
43
+ help=f"Start date for forcings/realization (format {DATE_FORMAT_HINT})",
44
+ required=True,
45
+ )
46
+ parser.add_argument(
47
+ "--end_date",
48
+ "--end",
49
+ type=lambda s: datetime.strptime(s, DATE_FORMAT),
50
+ help=f"End date for forcings/realization (format {DATE_FORMAT_HINT})",
51
+ required=True,
52
+ )
53
+ parser.add_argument(
54
+ "-D",
55
+ "--debug",
56
+ action="store_true",
57
+ help="enable debug logging",
58
+ )
59
+
60
+ return parser.parse_args()
61
+
62
+ def main() -> None:
63
+ time.sleep(0.01)
64
+ setup_logging()
65
+ validate_all()
66
+ args = parse_arguments()
67
+ projection = xr.open_dataset(file_paths.template_nc, engine="h5netcdf").crs.esri_pe_string
68
+ logging.debug("Got projection from grid file")
69
+
70
+ gdf = gpd.read_file(args.input_file, layer="divides").to_crs(projection)
71
+ logging.debug(f"gdf bounds: {gdf.total_bounds}")
72
+
73
+ start_time = args.start_date.strftime("%Y-%m-%d %H:%M")
74
+ end_time = args.end_date.strftime("%Y-%m-%d %H:%M")
75
+
76
+ cached_nc_path = args.output_file.parent / (args.input_file.stem + "-raw-gridded-data.nc")
77
+ print(cached_nc_path)
78
+ merged_data = get_forcing_data(cached_nc_path, start_time, end_time, gdf)
79
+ forcing_working_dir = args.output_file.parent / (args.input_file.stem + "-working-dir")
80
+ if not forcing_working_dir.exists():
81
+ forcing_working_dir.mkdir(parents=True, exist_ok=True)
82
+
83
+ temp_dir = forcing_working_dir / "temp"
84
+ if not temp_dir.exists():
85
+ temp_dir.mkdir(parents=True, exist_ok=True)
86
+
87
+
88
+ compute_zonal_stats(gdf, merged_data, forcing_working_dir)
89
+
90
+ shutil.copy(forcing_working_dir / "forcings.nc", args.output_file)
91
+ logging.info(f"Created forcings file: {args.output_file}")
92
+ # remove the working directory
93
+ shutil.rmtree(forcing_working_dir)
94
+
95
+
96
+ if __name__ == "__main__":
97
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ngiab_data_preprocess
3
- Version: 3.2.3
3
+ Version: 3.3.0
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -40,6 +40,7 @@ modules/map_app/templates/index.html
40
40
  modules/ngiab_data_cli/__main__.py
41
41
  modules/ngiab_data_cli/arguments.py
42
42
  modules/ngiab_data_cli/custom_logging.py
43
+ modules/ngiab_data_cli/forcing_cli.py
43
44
  modules/ngiab_data_preprocess.egg-info/PKG-INFO
44
45
  modules/ngiab_data_preprocess.egg-info/SOURCES.txt
45
46
  modules/ngiab_data_preprocess.egg-info/dependency_links.txt
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
2
  cli = ngiab_data_cli.__main__:main
3
+ forcings = ngiab_data_cli.forcing_cli:main
3
4
  map_app = map_app.__main__:main
@@ -12,7 +12,7 @@ exclude = ["tests*"]
12
12
 
13
13
  [project]
14
14
  name = "ngiab_data_preprocess"
15
- version = "v3.2.3"
15
+ version = "v3.3.0"
16
16
  authors = [{ name = "Josh Cunningham", email = "jcunningham8@ua.edu" }]
17
17
  description = "Graphical Tools for creating Next Gen Water model input data."
18
18
  readme = "README.md"
@@ -43,7 +43,7 @@ dependencies = [
43
43
  "tqdm==4.66.4",
44
44
  "rich==13.7.1",
45
45
  "colorama==0.4.6",
46
- "bokeh==3.5.1"
46
+ "bokeh==3.5.1",
47
47
  ]
48
48
 
49
49
  [project.optional-dependencies]
@@ -57,6 +57,7 @@ Issues = "https://github.com/CIROH-UA/NGIAB_data_preprocess/issues"
57
57
  [project.scripts]
58
58
  cli = "ngiab_data_cli.__main__:main"
59
59
  map_app = "map_app.__main__:main"
60
+ forcings = "ngiab_data_cli.forcing_cli:main"
60
61
 
61
62
  [build-system]
62
63
  # scm adds files tracked by git to the package