ngiab-data-preprocess 4.2.1__tar.gz → 4.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/PKG-INFO +1 -3
  2. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/create_realization.py +28 -30
  3. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/forcings.py +68 -70
  4. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/gpkg_utils.py +5 -5
  5. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/subset.py +0 -1
  6. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/css/main.css +19 -7
  7. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/css/toggle.css +8 -5
  8. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/js/main.js +44 -21
  9. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/templates/index.html +22 -9
  10. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/views.py +7 -7
  11. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +1 -3
  12. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +1 -5
  13. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/requires.txt +0 -2
  14. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/pyproject.toml +41 -5
  15. ngiab_data_preprocess-4.2.1/map.html +0 -98
  16. ngiab_data_preprocess-4.2.1/modules/map_app/static/resources/dark-style.json +0 -11068
  17. ngiab_data_preprocess-4.2.1/modules/map_app/static/resources/light-style.json +0 -11068
  18. ngiab_data_preprocess-4.2.1/output/.gitkeep +0 -0
  19. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/.github/workflows/build_only.yml +0 -0
  20. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/.github/workflows/publish.yml +0 -0
  21. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/.gitignore +0 -0
  22. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/LICENSE +0 -0
  23. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/README.md +0 -0
  24. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/dataset_utils.py +0 -0
  25. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/datasets.py +0 -0
  26. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/file_paths.py +0 -0
  27. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/graph_utils.py +0 -0
  28. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_processing/s3fs_utils.py +0 -0
  29. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/cfe-nowpm-realization-template.json +0 -0
  30. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/cfe-template.ini +0 -0
  31. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/em-catchment-template.yml +0 -0
  32. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/em-config.yml +0 -0
  33. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/em-realization-template.json +0 -0
  34. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/forcing_template.nc +0 -0
  35. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/ngen-routing-template.yaml +0 -0
  36. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/noah-owp-modular-init.namelist.input +0 -0
  37. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/source_validation.py +0 -0
  38. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/template.sql +0 -0
  39. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/data_sources/triggers.sql +0 -0
  40. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/__init__.py +0 -0
  41. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/__main__.py +0 -0
  42. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/css/console.css +0 -0
  43. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/js/console.js +0 -0
  44. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/js/data_processing.js +0 -0
  45. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/resources/loading.gif +0 -0
  46. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/map_app/static/resources/screenshot.jpg +0 -0
  47. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_cli/__main__.py +0 -0
  48. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_cli/arguments.py +0 -0
  49. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_cli/custom_logging.py +0 -0
  50. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_cli/forcing_cli.py +0 -0
  51. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
  52. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +0 -0
  53. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
  54. {ngiab_data_preprocess-4.2.1 → ngiab_data_preprocess-4.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngiab_data_preprocess
3
- Version: 4.2.1
3
+ Version: 4.2.2
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -23,8 +23,6 @@ Requires-Dist: zarr==2.17.1
23
23
  Requires-Dist: netCDF4>=1.6.5
24
24
  Requires-Dist: dask==2024.4.1
25
25
  Requires-Dist: dask[distributed]==2024.4.1
26
- Requires-Dist: black==24.3.0
27
- Requires-Dist: isort==5.13.2
28
26
  Requires-Dist: h5netcdf==1.3.0
29
27
  Requires-Dist: exactextract==0.2.0
30
28
  Requires-Dist: numpy>=1.26.4
@@ -1,29 +1,30 @@
1
1
  import json
2
+ import logging
2
3
  import multiprocessing
4
+ import shutil
3
5
  import sqlite3
6
+ from collections import defaultdict
4
7
  from datetime import datetime
5
8
  from pathlib import Path
6
- import shutil
7
- import requests
8
9
 
9
10
  import pandas
11
+ import requests
10
12
  import s3fs
11
13
  import xarray as xr
12
- import logging
13
- from collections import defaultdict
14
14
  from dask.distributed import Client, LocalCluster
15
15
  from data_processing.file_paths import file_paths
16
16
  from data_processing.gpkg_utils import (
17
17
  GeoPackage,
18
+ get_cat_to_nex_flowpairs,
18
19
  get_cat_to_nhd_feature_id,
19
20
  get_table_crs_short,
20
- get_cat_to_nex_flowpairs,
21
21
  )
22
- from tqdm.rich import tqdm
23
22
  from pyproj import Transformer
23
+ from tqdm.rich import tqdm
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
+
27
28
  def get_approximate_gw_storage(paths: file_paths, start_date: datetime):
28
29
  # get the gw levels from the NWM output on a given start date
29
30
  # this kind of works in place of warmstates for now
@@ -78,7 +79,9 @@ def make_cfe_config(
78
79
  slope=row["mean.slope_1km"],
79
80
  smcmax=row["mean.smcmax_soil_layers_stag=2"],
80
81
  smcwlt=row["mean.smcwlt_soil_layers_stag=2"],
81
- max_gw_storage=row["mean.Zmax"]/1000 if row["mean.Zmax"] is not None else "0.011[m]", # mean.Zmax is in mm!
82
+ max_gw_storage=row["mean.Zmax"] / 1000
83
+ if row["mean.Zmax"] is not None
84
+ else "0.011[m]", # mean.Zmax is in mm!
82
85
  gw_Coeff=row["mean.Coeff"] if row["mean.Coeff"] is not None else "0.0018[m h-1]",
83
86
  gw_Expon=row["mode.Expon"],
84
87
  gw_storage="{:.5}".format(gw_storage_ratio),
@@ -92,7 +95,6 @@ def make_cfe_config(
92
95
  def make_noahowp_config(
93
96
  base_dir: Path, divide_conf_df: pandas.DataFrame, start_time: datetime, end_time: datetime
94
97
  ) -> None:
95
-
96
98
  divide_conf_df.set_index("divide_id", inplace=True)
97
99
  start_datetime = start_time.strftime("%Y%m%d%H%M")
98
100
  end_datetime = end_time.strftime("%Y%m%d%H%M")
@@ -110,8 +112,8 @@ def make_noahowp_config(
110
112
  end_datetime=end_datetime,
111
113
  lat=divide_conf_df.loc[divide, "latitude"],
112
114
  lon=divide_conf_df.loc[divide, "longitude"],
113
- terrain_slope= divide_conf_df.loc[divide, "mean.slope_1km"],
114
- azimuth= divide_conf_df.loc[divide, "circ_mean.aspect"],
115
+ terrain_slope=divide_conf_df.loc[divide, "mean.slope_1km"],
116
+ azimuth=divide_conf_df.loc[divide, "circ_mean.aspect"],
115
117
  ISLTYP=int(divide_conf_df.loc[divide, "mode.ISLTYP"]),
116
118
  IVGTYP=int(divide_conf_df.loc[divide, "mode.IVGTYP"]),
117
119
  )
@@ -182,6 +184,7 @@ def get_model_attributes_pyproj(hydrofabric: Path):
182
184
 
183
185
  return divide_conf_df
184
186
 
187
+
185
188
  def get_model_attributes(hydrofabric: Path):
186
189
  try:
187
190
  with GeoPackage(hydrofabric) as conn:
@@ -205,30 +208,31 @@ def get_model_attributes(hydrofabric: Path):
205
208
  )
206
209
  except sqlite3.OperationalError:
207
210
  with sqlite3.connect(hydrofabric) as conn:
208
- conf_df = pandas.read_sql_query("SELECT* FROM 'divide-attributes';", conn,)
211
+ conf_df = pandas.read_sql_query(
212
+ "SELECT* FROM 'divide-attributes';",
213
+ conn,
214
+ )
209
215
  source_crs = get_table_crs_short(hydrofabric, "divides")
210
216
  transformer = Transformer.from_crs(source_crs, "EPSG:4326", always_xy=True)
211
- lon, lat = transformer.transform(
212
- conf_df["centroid_x"].values, conf_df["centroid_y"].values
213
- )
217
+ lon, lat = transformer.transform(conf_df["centroid_x"].values, conf_df["centroid_y"].values)
214
218
  conf_df["longitude"] = lon
215
219
  conf_df["latitude"] = lat
216
220
 
217
221
  conf_df.drop(columns=["centroid_x", "centroid_y"], axis=1, inplace=True)
218
222
  return conf_df
219
223
 
224
+
220
225
  def make_em_config(
221
226
  hydrofabric: Path,
222
227
  output_dir: Path,
223
228
  template_path: Path = file_paths.template_em_config,
224
229
  ):
225
-
226
230
  # test if modspatialite is available
227
231
  try:
228
232
  divide_conf_df = get_model_attributes_modspatialite(hydrofabric)
229
233
  except Exception as e:
230
234
  logger.warning(f"mod_spatialite not available, using pyproj instead: {e}")
231
- logger.warning(f"Install mod_spatialite for improved performance")
235
+ logger.warning("Install mod_spatialite for improved performance")
232
236
  divide_conf_df = get_model_attributes_pyproj(hydrofabric)
233
237
 
234
238
  cat_config_dir = output_dir / "cat_config" / "empirical_model"
@@ -256,7 +260,6 @@ def make_em_config(
256
260
  def configure_troute(
257
261
  cat_id: str, config_dir: Path, start_time: datetime, end_time: datetime
258
262
  ) -> int:
259
-
260
263
  with open(file_paths.template_troute_config, "r") as file:
261
264
  troute_template = file.read()
262
265
  time_step_size = 300
@@ -269,7 +272,7 @@ def configure_troute(
269
272
  geo_file_path=f"./config/{cat_id}_subset.gpkg",
270
273
  start_datetime=start_time.strftime("%Y-%m-%d %H:%M:%S"),
271
274
  nts=nts,
272
- max_loop_size=nts,
275
+ max_loop_size=nts,
273
276
  )
274
277
 
275
278
  with open(config_dir / "troute.yaml", "w") as file:
@@ -301,9 +304,7 @@ def create_em_realization(cat_id: str, start_time: datetime, end_time: datetime)
301
304
  f.write(em_config)
302
305
 
303
306
  configure_troute(cat_id, paths.config_dir, start_time, end_time)
304
- make_ngen_realization_json(
305
- paths.config_dir, template_path, start_time, end_time
306
- )
307
+ make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
307
308
  make_em_config(paths.geopackage_path, paths.config_dir)
308
309
  # create some partitions for parallelization
309
310
  paths.setup_run_folders()
@@ -324,15 +325,14 @@ def create_realization(
324
325
  if gage_id is not None:
325
326
  # try and download s3:communityhydrofabric/hydrofabrics/community/gage_parameters/gage_id
326
327
  # if it doesn't exist, use the default
327
- try:
328
- url = f"https://communityhydrofabric.s3.us-east-1.amazonaws.com/hydrofabrics/community/gage_parameters/{gage_id}.json"
329
-
328
+ url = f"https://communityhydrofabric.s3.us-east-1.amazonaws.com/hydrofabrics/community/gage_parameters/{gage_id}.json"
329
+ response = requests.get(url)
330
+ if response.status_code == 200:
330
331
  new_template = requests.get(url).json()
331
- template_path = paths.config_dir / "calibrated_params.json"
332
+ template_path = paths.config_dir / "downloaded_params.json"
332
333
  with open(template_path, "w") as f:
333
334
  json.dump(new_template, f)
334
- except Exception as e:
335
- logger.warning(f"Failed to download gage parameters")
335
+ logger.info(f"downloaded calibrated parameters for {gage_id}")
336
336
 
337
337
  conf_df = get_model_attributes(paths.geopackage_path)
338
338
 
@@ -347,9 +347,7 @@ def create_realization(
347
347
 
348
348
  configure_troute(cat_id, paths.config_dir, start_time, end_time)
349
349
 
350
- make_ngen_realization_json(
351
- paths.config_dir, template_path, start_time, end_time
352
- )
350
+ make_ngen_realization_json(paths.config_dir, template_path, start_time, end_time)
353
351
 
354
352
  # create some partitions for parallelization
355
353
  paths.setup_run_folders()
@@ -3,32 +3,29 @@ import multiprocessing
3
3
  import os
4
4
  import time
5
5
  import warnings
6
- from datetime import datetime
7
6
  from functools import partial
8
7
  from math import ceil
9
8
  from multiprocessing import shared_memory
10
9
  from pathlib import Path
11
-
12
- from dask.distributed import Client, LocalCluster
10
+ from typing import Tuple
13
11
 
14
12
  import geopandas as gpd
15
13
  import numpy as np
16
14
  import pandas as pd
17
15
  import psutil
18
16
  import xarray as xr
19
- from data_processing.file_paths import file_paths
17
+ from dask.distributed import Client, LocalCluster
20
18
  from data_processing.dataset_utils import validate_dataset_format
19
+ from data_processing.file_paths import file_paths
21
20
  from exactextract import exact_extract
22
21
  from exactextract.raster import NumPyRasterSource
23
22
  from rich.progress import (
24
- Progress,
25
23
  BarColumn,
24
+ Progress,
26
25
  TextColumn,
27
26
  TimeElapsedColumn,
28
27
  TimeRemainingColumn,
29
28
  )
30
- from typing import Tuple
31
-
32
29
 
33
30
  logger = logging.getLogger(__name__)
34
31
  # Suppress the specific warning from numpy to keep the cli output clean
@@ -40,13 +37,13 @@ warnings.filterwarnings(
40
37
  )
41
38
 
42
39
 
43
- def weighted_sum_of_cells(flat_raster: np.ndarray,
44
- cell_ids: np.ndarray,
45
- factors: np.ndarray) -> np.ndarray:
46
- '''
40
+ def weighted_sum_of_cells(
41
+ flat_raster: np.ndarray, cell_ids: np.ndarray, factors: np.ndarray
42
+ ) -> np.ndarray:
43
+ """
47
44
  Take an average of each forcing variable in a catchment. Create an output
48
- array initialized with zeros, and then sum up the forcing variable and
49
- divide by the sum of the cell weights to get an averaged forcing variable
45
+ array initialized with zeros, and then sum up the forcing variable and
46
+ divide by the sum of the cell weights to get an averaged forcing variable
50
47
  for the entire catchment.
51
48
 
52
49
  Parameters
@@ -65,7 +62,7 @@ def weighted_sum_of_cells(flat_raster: np.ndarray,
65
62
  An one-dimensional array, where each element corresponds to a timestep.
66
63
  Each element contains the averaged forcing value for the whole catchment
67
64
  over one timestep.
68
- '''
65
+ """
69
66
  result = np.zeros(flat_raster.shape[0])
70
67
  result = np.sum(flat_raster[:, cell_ids] * factors, axis=1)
71
68
  sum_of_weights = np.sum(factors)
@@ -73,12 +70,10 @@ def weighted_sum_of_cells(flat_raster: np.ndarray,
73
70
  return result
74
71
 
75
72
 
76
- def get_cell_weights(raster: xr.Dataset,
77
- gdf: gpd.GeoDataFrame,
78
- wkt: str) -> pd.DataFrame:
79
- '''
80
- Get the cell weights (coverage) for each cell in a divide. Coverage is
81
- defined as the fraction (a float in [0,1]) of a raster cell that overlaps
73
+ def get_cell_weights(raster: xr.Dataset, gdf: gpd.GeoDataFrame, wkt: str) -> pd.DataFrame:
74
+ """
75
+ Get the cell weights (coverage) for each cell in a divide. Coverage is
76
+ defined as the fraction (a float in [0,1]) of a raster cell that overlaps
82
77
  with the polygon in the passed gdf.
83
78
 
84
79
  Parameters
@@ -96,7 +91,7 @@ def get_cell_weights(raster: xr.Dataset,
96
91
  pd.DataFrame
97
92
  DataFrame indexed by divide_id that contains information about coverage
98
93
  for each raster cell in gridded forcing file.
99
- '''
94
+ """
100
95
  xmin = raster.x[0]
101
96
  xmax = raster.x[-1]
102
97
  ymin = raster.y[0]
@@ -116,15 +111,17 @@ def get_cell_weights(raster: xr.Dataset,
116
111
 
117
112
 
118
113
  def add_APCP_SURFACE_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
119
- '''Convert precipitation value to correct units.'''
114
+ """Convert precipitation value to correct units."""
120
115
  # precip_rate is mm/s
121
116
  # cfe says input atmosphere_water__liquid_equivalent_precipitation_rate is mm/h
122
117
  # nom says prcpnonc input is mm/s
123
118
  # technically should be kg/m^2/s at 1kg = 1l it equates to mm/s
124
119
  # nom says qinsur output is m/s, hopefully qinsur is converted to mm/h by ngen
125
120
  dataset["APCP_surface"] = dataset["precip_rate"] * 3600
126
- dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
127
- dataset["APCP_surface"].attrs["source_note"] = "This is just the precip_rate variable converted to mm/h by multiplying by 3600"
121
+ dataset["APCP_surface"].attrs["units"] = "mm h^-1" # ^-1 notation copied from source data
122
+ dataset["APCP_surface"].attrs["source_note"] = (
123
+ "This is just the precip_rate variable converted to mm/h by multiplying by 3600"
124
+ )
128
125
  return dataset
129
126
 
130
127
 
@@ -132,14 +129,14 @@ def add_precip_rate_to_dataset(dataset: xr.Dataset) -> xr.Dataset:
132
129
  # the inverse of the function above
133
130
  dataset["precip_rate"] = dataset["APCP_surface"] / 3600
134
131
  dataset["precip_rate"].attrs["units"] = "mm s^-1"
135
- dataset["precip_rate"].attrs[
136
- "source_note"
137
- ] = "This is just the APCP_surface variable converted to mm/s by dividing by 3600"
132
+ dataset["precip_rate"].attrs["source_note"] = (
133
+ "This is just the APCP_surface variable converted to mm/s by dividing by 3600"
134
+ )
138
135
  return dataset
139
136
 
140
137
 
141
138
  def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
142
- '''
139
+ """
143
140
  Take a DataArray and calculate the start and end index for each chunk based
144
141
  on the available memory.
145
142
 
@@ -153,7 +150,7 @@ def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
153
150
  list[Tuple[int, int]]
154
151
  Each element in the list represents a chunk of data. The tuple within
155
152
  the chunk indicates the start index and end index of the chunk.
156
- '''
153
+ """
157
154
  array_memory_usage = data.nbytes
158
155
  free_memory = psutil.virtual_memory().available * 0.8 # 80% of available memory
159
156
  # limit the chunk to 20gb, makes things more stable
@@ -166,15 +163,13 @@ def get_index_chunks(data: xr.DataArray) -> list[tuple[int, int]]:
166
163
  return index_chunks
167
164
 
168
165
 
169
- def create_shared_memory(lazy_array: xr.Dataset) -> Tuple[
170
- shared_memory.SharedMemory,
171
- np.dtype,
172
- np.dtype
173
- ]:
174
- '''
175
- Create a shared memory object so that multiple processes can access loaded
166
+ def create_shared_memory(
167
+ lazy_array: xr.Dataset,
168
+ ) -> Tuple[shared_memory.SharedMemory, np.dtype, np.dtype]:
169
+ """
170
+ Create a shared memory object so that multiple processes can access loaded
176
171
  data.
177
-
172
+
178
173
  Parameters
179
174
  ----------
180
175
  lazy_array : xr.Dataset
@@ -183,22 +178,22 @@ def create_shared_memory(lazy_array: xr.Dataset) -> Tuple[
183
178
  Returns
184
179
  -------
185
180
  shared_memory.SharedMemory
186
- A specific block of memory allocated by the OS of the size of
181
+ A specific block of memory allocated by the OS of the size of
187
182
  lazy_array.
188
183
  np.dtype.shape
189
184
  A shape object with dimensions (# timesteps, # of raster cells) in
190
185
  reference to lazy_array.
191
186
  np.dtype
192
187
  Data type of objects in lazy_array.
193
- '''
194
- logger.debug(f"Creating shared memory size {lazy_array.nbytes/ 10**6} Mb.")
188
+ """
189
+ logger.debug(f"Creating shared memory size {lazy_array.nbytes / 10**6} Mb.")
195
190
  shm = shared_memory.SharedMemory(create=True, size=lazy_array.nbytes)
196
191
  shared_array = np.ndarray(lazy_array.shape, dtype=np.float32, buffer=shm.buf)
197
192
  # if your data is not float32, xarray will do an automatic conversion here
198
193
  # which consumes a lot more memory, forcings downloaded with this tool will work
199
194
  for start, end in get_index_chunks(lazy_array):
200
- # copy data from lazy to shared memory one chunk at a time
201
- shared_array[start:end] = lazy_array[start:end]
195
+ # copy data from lazy to shared memory one chunk at a time
196
+ shared_array[start:end] = lazy_array[start:end]
202
197
 
203
198
  time, x, y = shared_array.shape
204
199
  shared_array = shared_array.reshape(time, -1)
@@ -206,14 +201,16 @@ def create_shared_memory(lazy_array: xr.Dataset) -> Tuple[
206
201
  return shm, shared_array.shape, shared_array.dtype
207
202
 
208
203
 
209
- def process_chunk_shared(variable: str,
210
- times: np.ndarray,
211
- shm_name: str,
212
- shape: np.dtype.shape,
213
- dtype: np.dtype,
214
- chunk: gpd.GeoDataFrame) -> xr.DataArray:
215
- '''
216
- Process the gridded forcings chunk loaded into a SharedMemory block.
204
+ def process_chunk_shared(
205
+ variable: str,
206
+ times: np.ndarray,
207
+ shm_name: str,
208
+ shape: np.dtype.shape,
209
+ dtype: np.dtype,
210
+ chunk: gpd.GeoDataFrame,
211
+ ) -> xr.DataArray:
212
+ """
213
+ Process the gridded forcings chunk loaded into a SharedMemory block.
217
214
 
218
215
  Parameters
219
216
  ----------
@@ -235,7 +232,7 @@ def process_chunk_shared(variable: str,
235
232
  -------
236
233
  xr.DataArray
237
234
  Averaged forcings data for each timestep for each catchment.
238
- '''
235
+ """
239
236
  existing_shm = shared_memory.SharedMemory(name=shm_name)
240
237
  raster = np.ndarray(shape, dtype=dtype, buffer=existing_shm.buf)
241
238
  results = []
@@ -256,10 +253,10 @@ def process_chunk_shared(variable: str,
256
253
  return xr.concat(results, dim="catchment")
257
254
 
258
255
 
259
- def get_cell_weights_parallel(gdf: gpd.GeoDataFrame,
260
- input_forcings: xr.Dataset,
261
- num_partitions: int) -> pd.DataFrame:
262
- '''
256
+ def get_cell_weights_parallel(
257
+ gdf: gpd.GeoDataFrame, input_forcings: xr.Dataset, num_partitions: int
258
+ ) -> pd.DataFrame:
259
+ """
263
260
  Execute get_cell_weights with multiprocessing, with chunking for the passed
264
261
  GeoDataFrame to conserve memory usage.
265
262
 
@@ -277,7 +274,7 @@ def get_cell_weights_parallel(gdf: gpd.GeoDataFrame,
277
274
  pd.DataFrame
278
275
  DataFrame indexed by divide_id that contains information about coverage
279
276
  for each raster cell and each timestep in gridded forcing file.
280
- '''
277
+ """
281
278
  gdf_chunks = np.array_split(gdf, num_partitions)
282
279
  wkt = gdf.crs.to_wkt()
283
280
  one_timestep = input_forcings.isel(time=0).compute()
@@ -286,20 +283,21 @@ def get_cell_weights_parallel(gdf: gpd.GeoDataFrame,
286
283
  catchments = pool.starmap(get_cell_weights, args)
287
284
  return pd.concat(catchments)
288
285
 
286
+
289
287
  def get_units(dataset: xr.Dataset) -> dict:
290
- '''
288
+ """
291
289
  Return dictionary of units for each variable in dataset.
292
-
290
+
293
291
  Parameters
294
292
  ----------
295
293
  dataset : xr.Dataset
296
294
  Dataset with variables and units.
297
-
295
+
298
296
  Returns
299
297
  -------
300
- dict
298
+ dict
301
299
  {variable name: unit}
302
- '''
300
+ """
303
301
  units = {}
304
302
  for var in dataset.data_vars:
305
303
  if dataset[var].attrs["units"]:
@@ -310,9 +308,9 @@ def get_units(dataset: xr.Dataset) -> dict:
310
308
  def compute_zonal_stats(
311
309
  gdf: gpd.GeoDataFrame, gridded_data: xr.Dataset, forcings_dir: Path
312
310
  ) -> None:
313
- '''
314
- Compute zonal statistics in parallel for all timesteps over all desired
315
- catchments. Create chunks of catchments and within those, chunks of
311
+ """
312
+ Compute zonal statistics in parallel for all timesteps over all desired
313
+ catchments. Create chunks of catchments and within those, chunks of
316
314
  timesteps for memory management.
317
315
 
318
316
  Parameters
@@ -323,7 +321,7 @@ def compute_zonal_stats(
323
321
  Gridded forcing data that intersects with desired catchments.
324
322
  forcings_dir : Path
325
323
  Path to directory where outputs are to be stored.
326
- '''
324
+ """
327
325
  logger.info("Computing zonal stats in parallel for all timesteps")
328
326
  timer_start = time.time()
329
327
  num_partitions = multiprocessing.cpu_count() - 1
@@ -414,7 +412,7 @@ def compute_zonal_stats(
414
412
 
415
413
 
416
414
  def write_outputs(forcings_dir: Path, units: dict) -> None:
417
- '''
415
+ """
418
416
  Write outputs to disk in the form of a NetCDF file, using dask clusters to
419
417
  facilitate parallel computing.
420
418
 
@@ -423,13 +421,13 @@ def write_outputs(forcings_dir: Path, units: dict) -> None:
423
421
  forcings_dir : Path
424
422
  Path to directory where outputs are to be stored.
425
423
  variables : dict
426
- Preset dictionary where the keys are forcing variable names and the
424
+ Preset dictionary where the keys are forcing variable names and the
427
425
  values are units.
428
426
  units : dict
429
- Dictionary where the keys are forcing variable names and the values are
427
+ Dictionary where the keys are forcing variable names and the values are
430
428
  units. Differs from variables, as this dictionary depends on the gridded
431
429
  forcing dataset.
432
- '''
430
+ """
433
431
 
434
432
  # start a dask cluster if there isn't one already running
435
433
  try:
@@ -508,7 +506,7 @@ def setup_directories(cat_id: str) -> file_paths:
508
506
  def create_forcings(dataset: xr.Dataset, output_folder_name: str) -> None:
509
507
  validate_dataset_format(dataset)
510
508
  forcing_paths = setup_directories(output_folder_name)
511
- print(f"forcing path {output_folder_name} {forcing_paths.forcings_dir}")
509
+ logger.debug(f"forcing path {output_folder_name} {forcing_paths.forcings_dir}")
512
510
  gdf = gpd.read_file(forcing_paths.geopackage_path, layer="divides")
513
511
  logger.debug(f"gdf bounds: {gdf.total_bounds}")
514
512
  gdf = gdf.to_crs(dataset.crs)
@@ -321,16 +321,16 @@ def subset_table_by_vpu(table: str, vpu: str, hydrofabric: Path, subset_gpkg_nam
321
321
  if table == "network":
322
322
  # Look for the network entry that has a toid not in the flowpath or nexus tables
323
323
  network_toids = [x[2] for x in contents]
324
- print(f"Network toids: {len(network_toids)}")
324
+ logger.debug(f"Network toids: {len(network_toids)}")
325
325
  sql = "SELECT id FROM flowpaths"
326
326
  flowpath_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
327
- print(f"Flowpath ids: {len(flowpath_ids)}")
327
+ logger.debug(f"Flowpath ids: {len(flowpath_ids)}")
328
328
  sql = "SELECT id FROM nexus"
329
329
  nexus_ids = [x[0] for x in dest_db.execute(sql).fetchall()]
330
- print(f"Nexus ids: {len(nexus_ids)}")
330
+ logger.debug(f"Nexus ids: {len(nexus_ids)}")
331
331
  bad_ids = set(network_toids) - set(flowpath_ids + nexus_ids)
332
- print(bad_ids)
333
- print(f"Removing {len(bad_ids)} network entries that are not in flowpaths or nexuses")
332
+ logger.debug(bad_ids)
333
+ logger.info(f"Removing {len(bad_ids)} network entries that are not in flowpaths or nexuses")
334
334
  # id column is second after fid
335
335
  contents = [x for x in contents if x[1] not in bad_ids]
336
336
 
@@ -69,7 +69,6 @@ def subset(
69
69
  output_gpkg_path: Path = Path(),
70
70
  include_outlet: bool = True,
71
71
  ):
72
- print(cat_ids)
73
72
  upstream_ids = list(get_upstream_ids(cat_ids, include_outlet))
74
73
 
75
74
  if not output_gpkg_path:
@@ -109,7 +109,7 @@ h2 {
109
109
  }
110
110
 
111
111
  #selected-basins,
112
- #cli-command {
112
+ #cli-command,#cli-prefix {
113
113
  background: var(--code-bg);
114
114
  padding: 16px;
115
115
  border-radius: var(--border-radius);
@@ -119,6 +119,7 @@ h2 {
119
119
  color: var(--text-color);
120
120
  }
121
121
 
122
+
122
123
  button {
123
124
  background-color: var(--primary-color);
124
125
  color: light-dark(white, #f1f5f9);
@@ -204,6 +205,11 @@ input[type="datetime-local"] {
204
205
  display: inline-block;
205
206
  }
206
207
 
208
+ #command-builder{
209
+ display: inline-block;
210
+ padding:16px ;
211
+ }
212
+
207
213
  .command-container {
208
214
  background: var(--surface-color);
209
215
  border: 1px solid var(--border-color);
@@ -222,7 +228,7 @@ input[type="datetime-local"] {
222
228
  border-top-right-radius: var(--border-radius);
223
229
  }
224
230
 
225
- .command-header span {
231
+ .command-header>span {
226
232
  font-size: 0.875rem;
227
233
  color: var(--secondary-text);
228
234
  font-weight: 500;
@@ -254,7 +260,8 @@ input[type="datetime-local"] {
254
260
  }
255
261
 
256
262
  .command-content {
257
- padding: 16px;
263
+ display:inline;
264
+ padding: 0px !important;
258
265
  background: var(--code-bg);
259
266
  font-family: 'Monaco', 'Consolas', monospace;
260
267
  font-size: 0.875rem;
@@ -263,6 +270,11 @@ input[type="datetime-local"] {
263
270
  border-bottom-left-radius: var(--border-radius);
264
271
  border-bottom-right-radius: var(--border-radius);
265
272
  color: var(--text-color);
273
+
274
+ }
275
+
276
+ #cli-prefix{
277
+ opacity: 0;
266
278
  }
267
279
 
268
280
  .copy-button.copied {
@@ -281,17 +293,17 @@ input[type="datetime-local"] {
281
293
  body {
282
294
  padding: 16px;
283
295
  }
284
-
296
+
285
297
  main {
286
298
  width: 90vw;
287
299
  }
288
-
300
+
289
301
  .time-input {
290
302
  flex-direction: column;
291
303
  align-items: flex-start;
292
304
  }
293
-
305
+
294
306
  input[type="datetime-local"] {
295
307
  width: 100%;
296
308
  }
297
- }
309
+ }
@@ -34,24 +34,27 @@
34
34
  background-color: white;
35
35
  border-radius: 18px;
36
36
  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
37
- transition: transform 0.3s ease, box-shadow 0.3s ease;
37
+ transition:
38
+ transform 0.3s ease,
39
+ box-shadow 0.3s ease;
38
40
  display: flex;
39
41
  justify-content: center;
40
42
  align-items: center;
41
43
  font-size: 14px;
42
44
  font-weight: bold;
43
- color: #4CAF50; /* Default color for the selected text */
45
+ color: #4caf50; /* Default color for the selected text */
44
46
  }
45
47
 
46
48
  /* Toggle Text (NWM and AORC labels) */
47
49
  .toggle-text {
48
50
  position: absolute;
49
51
  top: 50%;
52
+ min-width: 40px;
53
+ text-align: center;
50
54
  transform: translateY(-50%);
51
55
  font-size: 14px;
52
56
  font-weight: bold;
53
57
  color: #888; /* Grey color for non-selected text */
54
- transition: color 0.3s ease;
55
58
  }
56
59
 
57
60
  .toggle-text-left {
@@ -70,7 +73,7 @@
70
73
  .toggle-input:checked + .toggle-label .toggle-handle {
71
74
  transform: translateX(56px);
72
75
  box-shadow: 0 0 10px rgba(0, 123, 255, 0.8); /* Blue glow effect */
73
- color: #007BFF; /* Blue color for the selected text */
76
+ color: #007bff; /* Blue color for the selected text */
74
77
  }
75
78
 
76
79
  .toggle-input:checked + .toggle-label .toggle-text-left {
@@ -79,4 +82,4 @@
79
82
 
80
83
  .toggle-input:checked + .toggle-label .toggle-text-right {
81
84
  color: #888; /* Grey color for non-selected text */
82
- }
85
+ }