ngiab-data-preprocess 4.6.5__tar.gz → 4.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/PKG-INFO +13 -12
  2. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/README.md +12 -11
  3. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/dataset_utils.py +6 -2
  4. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/gpkg_utils.py +2 -1
  5. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/css/toggle.css +3 -3
  6. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/js/data_processing.js +8 -7
  7. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_cli/__main__.py +1 -1
  8. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_cli/arguments.py +10 -2
  9. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/PKG-INFO +13 -12
  10. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/SOURCES.txt +2 -1
  11. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/pyproject.toml +1 -1
  12. ngiab_data_preprocess-4.6.7/tests/test_ngiab_data_cli_regression.py +375 -0
  13. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/.github/pull_request_template.md +0 -0
  14. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/.github/workflows/build_only.yml +0 -0
  15. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/.github/workflows/publish.yml +0 -0
  16. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/.gitignore +0 -0
  17. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/LICENSE +0 -0
  18. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/ciroh-bgsafe.png +0 -0
  19. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/create_realization.py +0 -0
  20. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/dask_utils.py +0 -0
  21. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/datasets.py +0 -0
  22. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/file_paths.py +0 -0
  23. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/forcings.py +0 -0
  24. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/graph_utils.py +0 -0
  25. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/s3fs_utils.py +0 -0
  26. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_processing/subset.py +0 -0
  27. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/cfe-nowpm-realization-template.json +0 -0
  28. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/cfe-template.ini +0 -0
  29. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/forcing_template.nc +0 -0
  30. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/lstm-catchment-template.yml +0 -0
  31. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/lstm-realization-template.json +0 -0
  32. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/lstm-rust-realization-template.json +0 -0
  33. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/ngen-routing-template.yaml +0 -0
  34. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/noah-owp-modular-init.namelist.input +0 -0
  35. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/source_validation.py +0 -0
  36. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/template.sql +0 -0
  37. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/data_sources/triggers.sql +0 -0
  38. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/__init__.py +0 -0
  39. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/__main__.py +0 -0
  40. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/css/console.css +0 -0
  41. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/css/main.css +0 -0
  42. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/js/console.js +0 -0
  43. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/js/main.js +0 -0
  44. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/resources/loading.gif +0 -0
  45. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/static/resources/screenshot.jpg +0 -0
  46. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/templates/index.html +0 -0
  47. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/map_app/views.py +0 -0
  48. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_cli/custom_logging.py +0 -0
  49. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_cli/forcing_cli.py +0 -0
  50. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/dependency_links.txt +0 -0
  51. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/entry_points.txt +0 -0
  52. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/requires.txt +0 -0
  53. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/modules/ngiab_data_preprocess.egg-info/top_level.txt +0 -0
  54. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/setup.cfg +0 -0
  55. {ngiab_data_preprocess-4.6.5 → ngiab_data_preprocess-4.6.7}/tests/test_nan_impute.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngiab_data_preprocess
3
- Version: 4.6.5
3
+ Version: 4.6.7
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -75,13 +75,13 @@ This repository contains tools for preparing data to run a [NextGen](https://git
75
75
 
76
76
  ## What does this tool do?
77
77
 
78
- This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
79
- It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
78
+ This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
79
+ It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
80
80
  The raw forcing data is [nwm retrospective v3 forcing](https://noaa-nwm-retrospective-3-0-pds.s3.amazonaws.com/index.html#CONUS/zarr/forcing/) data or the [AORC 1km gridded data](https://noaa-nws-aorc-v1-1-1km.s3.amazonaws.com/index.html) depending on user input
81
81
 
82
- 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
83
- 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
84
- 3. Creates **configuration files** for a default NGIAB model run.
82
+ 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
83
+ 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
84
+ 3. Creates **configuration files** for a default NGIAB model run.
85
85
  - realization.json - ngen model configuration
86
86
  - troute.yaml - routing configuration.
87
87
  - **per catchment** model configuration
@@ -136,13 +136,13 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
136
136
  # Create a virtual environment in the current directory
137
137
  uv venv
138
138
 
139
- # Install the tool in the virtual environment
139
+ # Install the tool in the virtual environment
140
140
  uv pip install ngiab_data_preprocess
141
141
 
142
142
  # To run the cli
143
143
  uv run cli --help
144
144
 
145
- # To run the map
145
+ # To run the map
146
146
  uv run map_app
147
147
  ```
148
148
 
@@ -160,7 +160,7 @@ UV automatically detects any virtual environments in the current directory and w
160
160
  (notebook) jovyan@jupyter-user:~$ conda deactivate
161
161
  jovyan@jupyter-user:~$
162
162
  # The interactive map won't work on 2i2c
163
- ```
163
+ ```
164
164
 
165
165
  ```bash
166
166
  # This tool is likely to not work without a virtual environment
@@ -205,7 +205,7 @@ To install and run the tool, follow these steps:
205
205
 
206
206
  Running the `map_app` tool will open the app in a new browser tab.
207
207
 
208
- Install-free: `uvx --from ngiab-data-preprocess map_app`
208
+ Install-free: `uvx --from ngiab-data-preprocess map_app`
209
209
  Installed with uv: `uv run map_app`
210
210
 
211
211
  ## Using the map interface
@@ -225,7 +225,7 @@ Once all the steps are finished, you can run NGIAB on the folder shown underneat
225
225
 
226
226
  ## Running the CLI
227
227
 
228
- Install-free: `uvx ngiab-prep`
228
+ Install-free: `uvx ngiab-prep`
229
229
  Installed with uv: `uv run cli`
230
230
 
231
231
  ## Arguments
@@ -236,6 +236,7 @@ Installed with uv: `uv run cli`
236
236
  - `-l`, `--latlon`: Use latitude and longitude instead of catid. Expects comma-separated values via the CLI, e.g., `python -m ngiab_data_cli -i 54.33,-69.4 -l -s`.
237
237
  - `-g`, `--gage`: Use gage ID instead of catid. Expects a single gage ID via the CLI, e.g., `python -m ngiab_data_cli -i 01646500 -g -s`.
238
238
  - `-s`, `--subset`: Subset the hydrofabric to the given feature.
239
+ - `--subset_type`: Specify the subset type. `nexus`: get everything flowing into the downstream nexus of the selected catchment. `catchment`: get everything flowing into the selected catchment.
239
240
  - `-f`, `--forcings`: Generate forcings for the given feature.
240
241
  - `-r`, `--realization`: Create a realization for the given feature.
241
242
  - `--lstm`: Configures the data for the [python lstm](https://github.com/ciroh-ua/lstm/).
@@ -259,7 +260,7 @@ Installed with uv: `uv run cli`
259
260
 
260
261
  1. Prepare everything for an NGIAB run at a given gage:
261
262
  ```bash
262
- uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
263
+ uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
263
264
  # add --run or replace -sfr with --all to run NGIAB, too
264
265
  # to name the folder, add -o folder_name
265
266
  ```
@@ -35,13 +35,13 @@ This repository contains tools for preparing data to run a [NextGen](https://git
35
35
 
36
36
  ## What does this tool do?
37
37
 
38
- This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
39
- It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
38
+ This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
39
+ It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
40
40
  The raw forcing data is [nwm retrospective v3 forcing](https://noaa-nwm-retrospective-3-0-pds.s3.amazonaws.com/index.html#CONUS/zarr/forcing/) data or the [AORC 1km gridded data](https://noaa-nws-aorc-v1-1-1km.s3.amazonaws.com/index.html) depending on user input
41
41
 
42
- 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
43
- 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
44
- 3. Creates **configuration files** for a default NGIAB model run.
42
+ 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
43
+ 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
44
+ 3. Creates **configuration files** for a default NGIAB model run.
45
45
  - realization.json - ngen model configuration
46
46
  - troute.yaml - routing configuration.
47
47
  - **per catchment** model configuration
@@ -96,13 +96,13 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
96
96
  # Create a virtual environment in the current directory
97
97
  uv venv
98
98
 
99
- # Install the tool in the virtual environment
99
+ # Install the tool in the virtual environment
100
100
  uv pip install ngiab_data_preprocess
101
101
 
102
102
  # To run the cli
103
103
  uv run cli --help
104
104
 
105
- # To run the map
105
+ # To run the map
106
106
  uv run map_app
107
107
  ```
108
108
 
@@ -120,7 +120,7 @@ UV automatically detects any virtual environments in the current directory and w
120
120
  (notebook) jovyan@jupyter-user:~$ conda deactivate
121
121
  jovyan@jupyter-user:~$
122
122
  # The interactive map won't work on 2i2c
123
- ```
123
+ ```
124
124
 
125
125
  ```bash
126
126
  # This tool is likely to not work without a virtual environment
@@ -165,7 +165,7 @@ To install and run the tool, follow these steps:
165
165
 
166
166
  Running the `map_app` tool will open the app in a new browser tab.
167
167
 
168
- Install-free: `uvx --from ngiab-data-preprocess map_app`
168
+ Install-free: `uvx --from ngiab-data-preprocess map_app`
169
169
  Installed with uv: `uv run map_app`
170
170
 
171
171
  ## Using the map interface
@@ -185,7 +185,7 @@ Once all the steps are finished, you can run NGIAB on the folder shown underneat
185
185
 
186
186
  ## Running the CLI
187
187
 
188
- Install-free: `uvx ngiab-prep`
188
+ Install-free: `uvx ngiab-prep`
189
189
  Installed with uv: `uv run cli`
190
190
 
191
191
  ## Arguments
@@ -196,6 +196,7 @@ Installed with uv: `uv run cli`
196
196
  - `-l`, `--latlon`: Use latitude and longitude instead of catid. Expects comma-separated values via the CLI, e.g., `python -m ngiab_data_cli -i 54.33,-69.4 -l -s`.
197
197
  - `-g`, `--gage`: Use gage ID instead of catid. Expects a single gage ID via the CLI, e.g., `python -m ngiab_data_cli -i 01646500 -g -s`.
198
198
  - `-s`, `--subset`: Subset the hydrofabric to the given feature.
199
+ - `--subset_type`: Specify the subset type. `nexus`: get everything flowing into the downstream nexus of the selected catchment. `catchment`: get everything flowing into the selected catchment.
199
200
  - `-f`, `--forcings`: Generate forcings for the given feature.
200
201
  - `-r`, `--realization`: Create a realization for the given feature.
201
202
  - `--lstm`: Configures the data for the [python lstm](https://github.com/ciroh-ua/lstm/).
@@ -219,7 +220,7 @@ Installed with uv: `uv run cli`
219
220
 
220
221
  1. Prepare everything for an NGIAB run at a given gage:
221
222
  ```bash
222
- uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
223
+ uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
223
224
  # add --run or replace -sfr with --all to run NGIAB, too
224
225
  # to name the folder, add -o folder_name
225
226
  ```
@@ -107,9 +107,13 @@ def clip_dataset_to_bounds(
107
107
  """
108
108
  # check time range here in case just this function is imported and not the whole module
109
109
  start_time, end_time = validate_time_range(dataset, start_time, end_time)
110
+ samplex = dataset.x.values[:2]
111
+ intervalx = samplex[1] - samplex[0]
112
+ sampley = dataset.y.values[:2]
113
+ intervaly = sampley[1] - sampley[0]
110
114
  dataset = dataset.sel(
111
- x=slice(bounds[0], bounds[2]),
112
- y=slice(bounds[1], bounds[3]),
115
+ x=slice(bounds[0]-intervalx, bounds[2]+intervalx),
116
+ y=slice(bounds[1]-intervaly, bounds[3]+intervaly),
113
117
  time=slice(start_time, end_time),
114
118
  )
115
119
  logger.info("Selected time range and clipped to bounds")
@@ -530,7 +530,7 @@ def get_cat_to_nhd_feature_id(gpkg: Path = FilePaths.conus_hydrofabric) -> Dict[
530
530
  )
531
531
 
532
532
  table_name = list(tables)[0]
533
- sql_query = f"SELECT divide_id, hf_id FROM {table_name} WHERE divide_id IS NOT NULL AND hf_id IS NOT NULL"
533
+ sql_query = f"SELECT divide_id, hf_id FROM {table_name} WHERE divide_id IS NOT NULL AND hf_id IS NOT NULL ORDER BY hf_hydroseq DESC"
534
534
 
535
535
  with sqlite3.connect(gpkg) as conn:
536
536
  result: List[Tuple[str, str]] = conn.execute(sql_query).fetchall()
@@ -539,6 +539,7 @@ def get_cat_to_nhd_feature_id(gpkg: Path = FilePaths.conus_hydrofabric) -> Dict[
539
539
  for cat, feature in result:
540
540
  # the ids are stored as floats this converts to int to match nwm output
541
541
  # numeric ids should be stored as strings.
542
+ # Because of the ORDER BY above, the lowest hf_hydroseq "wins"
542
543
  mapping[cat] = int(feature)
543
544
 
544
545
  return mapping
@@ -79,7 +79,7 @@
79
79
  .toggle-input:checked + .toggle-label .toggle-text-left {
80
80
  color: #888; /* Grey color for non-selected text */
81
81
  }
82
-
82
+
83
83
  .toggle-input:checked + .toggle-label .toggle-text-right {
84
84
  color: #888; /* Grey color for non-selected text */
85
85
  }
@@ -91,7 +91,7 @@
91
91
  top: -40px;
92
92
  }
93
93
 
94
- .menu-switch__input {
94
+ #menuToggle .menu-switch__input {
95
95
  opacity: 0;
96
96
  width: 100%;
97
97
  height: 100%;
@@ -162,7 +162,7 @@
162
162
  height: 1.4em;
163
163
  position: relative;
164
164
  top: -0.2em;
165
- margin-right: 1em;
165
+ margin-right: 1em;
166
166
  vertical-align: top;
167
167
  cursor: pointer;
168
168
  text-align: center;
@@ -19,12 +19,13 @@ async function subset() {
19
19
  document.getElementById('output-path').innerHTML = "Subset canceled. Geopackage located at " + filename;
20
20
  return;
21
21
  }
22
- }
22
+ }
23
23
  // check what kind of subset
24
- // get the position of the subset toggle
25
- // false means subset by nexus, true means subset by catchment
26
- var nexus_catchment = document.getElementById('subset-toggle').checked;
27
- var subset_type = nexus_catchment ? 'catchment' : 'nexus';
24
+ if (document.getElementById('radio-nexus').checked) {
25
+ var subset_type = 'nexus'
26
+ } else {
27
+ var subset_type = 'catchment'
28
+ }
28
29
 
29
30
  const startTime = performance.now(); // Start the timer
30
31
  fetch('/subset', {
@@ -126,7 +127,7 @@ async function forcings() {
126
127
  body: JSON.stringify(forcing_dir),
127
128
  })
128
129
  .then(async (response) => response.text())
129
- .then(progressFile => {
130
+ .then(progressFile => {
130
131
  pollForcingsProgress(progressFile); // Start polling for progress
131
132
  })
132
133
  fetch('/forcings', {
@@ -138,7 +139,7 @@ async function forcings() {
138
139
  .catch(error => {
139
140
  console.error('Error:', error);
140
141
  }).finally(() => {
141
- document.getElementById('forcings-button').disabled = false;
142
+ document.getElementById('forcings-button').disabled = false;
142
143
  });
143
144
  } else {
144
145
  alert('No existing geopackage found. Please subset the data before getting forcings');
@@ -177,7 +177,7 @@ def main() -> None:
177
177
  else:
178
178
  logging.info("Subsetting hydrofabric")
179
179
  include_outlet = True
180
- if args.gage:
180
+ if args.gage or args.subset_type == "catchment":
181
181
  include_outlet = False
182
182
  subset(
183
183
  feature_to_subset,
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
 
4
4
  # Constants
5
5
  DATE_FORMAT = "%Y-%m-%d" # used for datetime parsing
6
+ DATE_FORMAT2 = "%Y-%m-%d %H:%M" # used for datetime parsing
6
7
  DATE_FORMAT_HINT = "YYYY-MM-DD" # printed in help message
7
8
 
8
9
 
@@ -91,13 +92,13 @@ def parse_arguments() -> argparse.Namespace:
91
92
  parser.add_argument(
92
93
  "--start_date",
93
94
  "--start",
94
- type=lambda s: datetime.strptime(s, DATE_FORMAT),
95
+ type=lambda s: datetime.strptime(s, DATE_FORMAT) if len(s) == 10 else datetime.strptime(s, DATE_FORMAT2),
95
96
  help=f"Start date for forcings/realization (format {DATE_FORMAT_HINT})",
96
97
  )
97
98
  parser.add_argument(
98
99
  "--end_date",
99
100
  "--end",
100
- type=lambda s: datetime.strptime(s, DATE_FORMAT),
101
+ type=lambda s: datetime.strptime(s, DATE_FORMAT) if len(s) == 10 else datetime.strptime(s, DATE_FORMAT2),
101
102
  help=f"End date for forcings/realization (format {DATE_FORMAT_HINT})",
102
103
  )
103
104
  parser.add_argument(
@@ -147,6 +148,13 @@ def parse_arguments() -> argparse.Namespace:
147
148
  choices=["aorc", "nwm"],
148
149
  default="nwm",
149
150
  )
151
+ parser.add_argument(
152
+ "--subset_type",
153
+ type=str,
154
+ help="By nexus: get everything flowing into the downstream nexus of the selected catchment. By catchment: get everything flowing into the selected catchment.",
155
+ choices=["nexus", "catchment"],
156
+ default="nexus",
157
+ )
150
158
  parser.add_argument(
151
159
  "-a",
152
160
  "--all",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngiab_data_preprocess
3
- Version: 4.6.5
3
+ Version: 4.6.7
4
4
  Summary: Graphical Tools for creating Next Gen Water model input data.
5
5
  Author-email: Josh Cunningham <jcunningham8@ua.edu>
6
6
  Project-URL: Homepage, https://github.com/CIROH-UA/NGIAB_data_preprocess
@@ -75,13 +75,13 @@ This repository contains tools for preparing data to run a [NextGen](https://git
75
75
 
76
76
  ## What does this tool do?
77
77
 
78
- This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
79
- It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
78
+ This tool prepares data to run a NextGen-based simulation by creating a run package that can be used with NGIAB.
79
+ It uses geometry and model attributes from the [v2.2 hydrofabric](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/conus/conus_nextgen.gpkg) more information on [all data sources here](https://lynker-spatial.s3-us-west-2.amazonaws.com/hydrofabric/v2.2/hfv2.2-data_model.html).
80
80
  The raw forcing data is [nwm retrospective v3 forcing](https://noaa-nwm-retrospective-3-0-pds.s3.amazonaws.com/index.html#CONUS/zarr/forcing/) data or the [AORC 1km gridded data](https://noaa-nws-aorc-v1-1-1km.s3.amazonaws.com/index.html) depending on user input
81
81
 
82
- 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
83
- 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
84
- 3. Creates **configuration files** for a default NGIAB model run.
82
+ 1. **Subsets** (delineates) everything upstream of your point of interest (catchment, gage, flowpath etc) from the hydrofabric. This subset is output as a geopackage (.gpkg).
83
+ 2. Calculates **forcings** as a weighted mean of the gridded NWM or AORC forcings. Weights are calculated using [exact extract](https://isciences.github.io/exactextract/) and computed with numpy.
84
+ 3. Creates **configuration files** for a default NGIAB model run.
85
85
  - realization.json - ngen model configuration
86
86
  - troute.yaml - routing configuration.
87
87
  - **per catchment** model configuration
@@ -136,13 +136,13 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
136
136
  # Create a virtual environment in the current directory
137
137
  uv venv
138
138
 
139
- # Install the tool in the virtual environment
139
+ # Install the tool in the virtual environment
140
140
  uv pip install ngiab_data_preprocess
141
141
 
142
142
  # To run the cli
143
143
  uv run cli --help
144
144
 
145
- # To run the map
145
+ # To run the map
146
146
  uv run map_app
147
147
  ```
148
148
 
@@ -160,7 +160,7 @@ UV automatically detects any virtual environments in the current directory and w
160
160
  (notebook) jovyan@jupyter-user:~$ conda deactivate
161
161
  jovyan@jupyter-user:~$
162
162
  # The interactive map won't work on 2i2c
163
- ```
163
+ ```
164
164
 
165
165
  ```bash
166
166
  # This tool is likely to not work without a virtual environment
@@ -205,7 +205,7 @@ To install and run the tool, follow these steps:
205
205
 
206
206
  Running the `map_app` tool will open the app in a new browser tab.
207
207
 
208
- Install-free: `uvx --from ngiab-data-preprocess map_app`
208
+ Install-free: `uvx --from ngiab-data-preprocess map_app`
209
209
  Installed with uv: `uv run map_app`
210
210
 
211
211
  ## Using the map interface
@@ -225,7 +225,7 @@ Once all the steps are finished, you can run NGIAB on the folder shown underneat
225
225
 
226
226
  ## Running the CLI
227
227
 
228
- Install-free: `uvx ngiab-prep`
228
+ Install-free: `uvx ngiab-prep`
229
229
  Installed with uv: `uv run cli`
230
230
 
231
231
  ## Arguments
@@ -236,6 +236,7 @@ Installed with uv: `uv run cli`
236
236
  - `-l`, `--latlon`: Use latitude and longitude instead of catid. Expects comma-separated values via the CLI, e.g., `python -m ngiab_data_cli -i 54.33,-69.4 -l -s`.
237
237
  - `-g`, `--gage`: Use gage ID instead of catid. Expects a single gage ID via the CLI, e.g., `python -m ngiab_data_cli -i 01646500 -g -s`.
238
238
  - `-s`, `--subset`: Subset the hydrofabric to the given feature.
239
+ - `--subset_type`: Specify the subset type. `nexus`: get everything flowing into the downstream nexus of the selected catchment. `catchment`: get everything flowing into the selected catchment.
239
240
  - `-f`, `--forcings`: Generate forcings for the given feature.
240
241
  - `-r`, `--realization`: Create a realization for the given feature.
241
242
  - `--lstm`: Configures the data for the [python lstm](https://github.com/ciroh-ua/lstm/).
@@ -259,7 +260,7 @@ Installed with uv: `uv run cli`
259
260
 
260
261
  1. Prepare everything for an NGIAB run at a given gage:
261
262
  ```bash
262
- uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
263
+ uvx ngiab-prep -i gage-10154200 -sfr --start 2022-01-01 --end 2022-02-28
263
264
  # add --run or replace -sfr with --all to run NGIAB, too
264
265
  # to name the folder, add -o folder_name
265
266
  ```
@@ -49,4 +49,5 @@ modules/ngiab_data_preprocess.egg-info/dependency_links.txt
49
49
  modules/ngiab_data_preprocess.egg-info/entry_points.txt
50
50
  modules/ngiab_data_preprocess.egg-info/requires.txt
51
51
  modules/ngiab_data_preprocess.egg-info/top_level.txt
52
- tests/test_nan_impute.py
52
+ tests/test_nan_impute.py
53
+ tests/test_ngiab_data_cli_regression.py
@@ -19,7 +19,7 @@ filterwarnings = [
19
19
  ]
20
20
  [project]
21
21
  name = "ngiab_data_preprocess"
22
- version = "v4.6.5"
22
+ version = "v4.6.7"
23
23
  authors = [{ name = "Josh Cunningham", email = "jcunningham8@ua.edu" }]
24
24
  description = "Graphical Tools for creating Next Gen Water model input data."
25
25
  readme = "README.md"
@@ -0,0 +1,375 @@
1
+ import logging
2
+ import shutil
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ import geopandas as gpd
7
+ import numpy as np
8
+ import pytest
9
+ import xarray as xr
10
+ from data_processing.file_paths import FilePaths
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
14
+
15
+ CONFIG_PATH = FilePaths.config_file
16
+
17
+
18
+ def run_cli(input_id, start_date, end_date, output_name, source="aorc"):
19
+ """Run the CLI and return output paths."""
20
+ # Read config to get output root
21
+ with open(CONFIG_PATH, "r") as f:
22
+ output_root = Path(f.readline().strip()).expanduser()
23
+
24
+ output_path = output_root / output_name
25
+
26
+ # Clean up any existing output directory
27
+ if output_path.exists():
28
+ shutil.rmtree(output_path)
29
+
30
+ cmd = [
31
+ "uv",
32
+ "run",
33
+ "cli",
34
+ "-i",
35
+ input_id,
36
+ "-s",
37
+ "-f",
38
+ "--start_date",
39
+ start_date,
40
+ "--end_date",
41
+ end_date,
42
+ "--source",
43
+ source,
44
+ "-o",
45
+ output_name,
46
+ ]
47
+ try:
48
+ subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=600)
49
+ except subprocess.CalledProcessError as e:
50
+ pytest.fail(f"CLI failed: {e.stderr}")
51
+ except subprocess.TimeoutExpired:
52
+ pytest.fail("CLI timed out")
53
+
54
+ assert output_path.exists(), f"Output directory not created: {output_path}"
55
+ return {
56
+ "output_dir": output_path,
57
+ "start_date": start_date,
58
+ "end_date": end_date,
59
+ "gpkg_path": output_path / "config" / f"{output_name}_subset.gpkg",
60
+ "raw_nc": output_path / "forcings" / "raw_gridded_data.nc",
61
+ "forcings_nc": output_path / "forcings" / "forcings.nc",
62
+ }
63
+
64
+
65
+ @pytest.fixture(scope="module")
66
+ def cat_1555522_output():
67
+ """Single catchment test: cat-1555522, 1 day."""
68
+ return run_cli("cat-1555522", "2020-01-01", "2020-01-02", "test_cat_1555522")
69
+
70
+
71
+ @pytest.fixture(scope="module")
72
+ def gage_10109001_output():
73
+ """Multi-catchment gage test: gage-10109001, 9 days."""
74
+ return run_cli("gage-10109001", "2019-10-01", "2019-10-10", "test_gage_10109001")
75
+
76
+
77
+ # =============================================================================
78
+ # Test configurations
79
+ # =============================================================================
80
+
81
+ GEOPACKAGE_LAYERS = [
82
+ "divides",
83
+ "divide-attributes",
84
+ "flowpath-attributes",
85
+ "flowpath-attributes-ml",
86
+ "flowpaths",
87
+ "hydrolocations",
88
+ "nexus",
89
+ "pois",
90
+ "lakes",
91
+ "network",
92
+ ]
93
+
94
+ FORCING_VARS = [
95
+ "SPFH_2maboveground",
96
+ "DSWRF_surface",
97
+ "VGRD_10maboveground",
98
+ "DLWRF_surface",
99
+ "APCP_surface",
100
+ "UGRD_10maboveground",
101
+ "PRES_surface",
102
+ "TMP_2maboveground",
103
+ "precip_rate",
104
+ "ids",
105
+ "Time",
106
+ ]
107
+
108
+ PHYSICAL_RANGES = {
109
+ "TMP_2maboveground": (200, 330),
110
+ "PRES_surface": (50000, 110000),
111
+ "SPFH_2maboveground": (0, 0.05),
112
+ "DSWRF_surface": (0, 1400),
113
+ "DLWRF_surface": (0, 600),
114
+ "APCP_surface": (0, 500),
115
+ "precip_rate": (0, 0.2),
116
+ }
117
+
118
+ CAT_1555522_REGRESSION = {
119
+ "dims": {"catchment-id": 1, "time": 25},
120
+ "catchment_ids": ["cat-1555522"],
121
+ "table_counts": {"divides": 1, "flowpaths": 1, "nexus": 1},
122
+ "stats": {
123
+ "TMP_2maboveground": {"min": 270.04, "max": 287.06, "mean": 276.30},
124
+ "PRES_surface": {"min": 96235.0, "max": 97941.0, "mean": 97159.5},
125
+ "DSWRF_surface": {"min": 0.0, "max": 366.91, "mean": 85.40},
126
+ "DLWRF_surface": {"min": 207.95, "max": 248.33, "mean": 222.67},
127
+ "SPFH_2maboveground": {"min": 0.0024, "max": 0.00464, "mean": 0.00313},
128
+ },
129
+ "sample_values": {
130
+ "TMP_2maboveground": [275.938, 274.598, 273.735, 272.985, 272.476],
131
+ "PRES_surface": [97941.0, 97872.3, 97857.2, 97852.3, 97829.1],
132
+ },
133
+ "time_values": [1577836800, 1577840400, 1577844000, 1577847600, 1577851200],
134
+ }
135
+
136
+ GAGE_10109001_REGRESSION = {
137
+ "dims": {"catchment-id": 88, "time": 217},
138
+ "catchment_ids": [
139
+ "cat-2861379",
140
+ "cat-2861380",
141
+ "cat-2861387",
142
+ "cat-2861414",
143
+ "cat-2861421",
144
+ "cat-2861429",
145
+ "cat-2861431",
146
+ "cat-2861436",
147
+ "cat-2861438",
148
+ "cat-2861442",
149
+ ], # First 10 for spot check
150
+ "table_counts": {"divides": 88, "flowpaths": 88, "nexus": 38},
151
+ "stats": {
152
+ "TMP_2maboveground": {"min": 266.08, "max": 293.25, "mean": 276.13},
153
+ "PRES_surface": {"min": 72895.4, "max": 85003.4, "mean": 77537.8},
154
+ "DSWRF_surface": {"min": 0.0, "max": 711.17, "mean": 179.39},
155
+ "DLWRF_surface": {"min": 177.51, "max": 322.51, "mean": 222.13},
156
+ "SPFH_2maboveground": {"min": 0.00122, "max": 0.00588, "mean": 0.00333},
157
+ "APCP_surface": {"min": 0.0, "max": 4.696, "mean": 0.0233},
158
+ },
159
+ "sample_values": {
160
+ "TMP_2maboveground": [274.370, 272.429, 270.498, 268.974, 269.294],
161
+ "PRES_surface": [74866.3, 74861.7, 74884.5, 74898.7, 74877.5],
162
+ },
163
+ "time_values": [1569888000, 1569891600, 1569895200, 1569898800, 1569902400],
164
+ }
165
+
166
+
167
+ # =============================================================================
168
+ # cat-1555522 Tests (Single Catchment)
169
+ # =============================================================================
170
+
171
+
172
+ class TestCat1555522Geopackage:
173
+ """Geopackage tests for cat-1555522."""
174
+
175
+ def test_geopackage_layers(self, cat_1555522_output):
176
+ gpkg = cat_1555522_output["gpkg_path"]
177
+ assert gpkg.exists()
178
+ actual = set(gpd.list_layers(gpkg)["name"])
179
+ assert not (set(GEOPACKAGE_LAYERS) - actual), (
180
+ f"Missing layers: {set(GEOPACKAGE_LAYERS) - actual}"
181
+ )
182
+
183
+ @pytest.mark.parametrize("layer", ["divides", "flowpaths", "nexus"])
184
+ def test_table_row_counts(self, cat_1555522_output, layer):
185
+ gdf = gpd.read_file(cat_1555522_output["gpkg_path"], layer=layer)
186
+ assert len(gdf) == CAT_1555522_REGRESSION["table_counts"][layer]
187
+
188
+
189
+ class TestCat1555522GriddedForcings:
190
+ """Raw gridded forcing tests for cat-1555522."""
191
+
192
+ def test_netcdf_structure(self, cat_1555522_output):
193
+ nc = cat_1555522_output["raw_nc"]
194
+ assert nc.exists()
195
+ with xr.open_dataset(nc) as ds:
196
+ assert "time" in ds.dims
197
+ assert any(d in ds.dims for d in ("x", "lon"))
198
+ assert any(d in ds.dims for d in ("y", "lat"))
199
+
200
+ def test_netcdf_time_range(self, cat_1555522_output):
201
+ with xr.open_dataset(cat_1555522_output["raw_nc"]) as ds:
202
+ assert ds.time.min().values >= np.datetime64(cat_1555522_output["start_date"])
203
+ assert ds.time.max().values <= np.datetime64(cat_1555522_output["end_date"])
204
+
205
+
206
+ class TestCat1555522ProcessedForcings:
207
+ """Processed forcing tests for cat-1555522."""
208
+
209
+ def test_structure(self, cat_1555522_output):
210
+ nc = cat_1555522_output["forcings_nc"]
211
+ assert nc.exists()
212
+ with xr.open_dataset(nc) as ds:
213
+ assert ds.sizes["catchment-id"] == CAT_1555522_REGRESSION["dims"]["catchment-id"]
214
+ assert ds.sizes["time"] == CAT_1555522_REGRESSION["dims"]["time"]
215
+ for var in FORCING_VARS:
216
+ assert var in ds.data_vars or var in ds.coords
217
+
218
+ def test_catchment_ids(self, cat_1555522_output):
219
+ gpkg_ids = set(gpd.read_file(cat_1555522_output["gpkg_path"], layer="divides")["divide_id"])
220
+ with xr.open_dataset(cat_1555522_output["forcings_nc"]) as ds:
221
+ nc_ids = set(ds["ids"].values)
222
+ assert gpkg_ids == nc_ids
223
+
224
+ def test_value_ranges(self, cat_1555522_output):
225
+ with xr.open_dataset(cat_1555522_output["forcings_nc"]) as ds:
226
+ for var, (lo, hi) in PHYSICAL_RANGES.items():
227
+ if var in ds.data_vars:
228
+ data = ds[var].values
229
+ assert np.nanmin(data) >= lo, f"{var} below min"
230
+ assert np.nanmax(data) <= hi, f"{var} above max"
231
+
232
+ def test_regression_stats(self, cat_1555522_output):
233
+ with xr.open_dataset(cat_1555522_output["forcings_nc"]) as ds:
234
+ for var, expected in CAT_1555522_REGRESSION["stats"].items():
235
+ data = ds[var].values
236
+ np.testing.assert_allclose(np.nanmin(data), expected["min"], rtol=0.01)
237
+ np.testing.assert_allclose(np.nanmax(data), expected["max"], rtol=0.01)
238
+ np.testing.assert_allclose(np.nanmean(data), expected["mean"], rtol=0.01)
239
+
240
+ def test_regression_sample_values(self, cat_1555522_output):
241
+ with xr.open_dataset(cat_1555522_output["forcings_nc"]) as ds:
242
+ for var, expected in CAT_1555522_REGRESSION["sample_values"].items():
243
+ actual = ds[var].isel({"catchment-id": 0, "time": slice(0, 5)}).values
244
+ np.testing.assert_allclose(actual, expected, rtol=0.001)
245
+
246
+ def test_regression_time_values(self, cat_1555522_output):
247
+ with xr.open_dataset(cat_1555522_output["forcings_nc"]) as ds:
248
+ actual = ds["Time"].isel({"catchment-id": 0, "time": slice(0, 5)}).values.tolist()
249
+ assert actual == CAT_1555522_REGRESSION["time_values"]
250
+
251
+
252
+ # =============================================================================
253
+ # gage-10109001 Tests (Multi-Catchment)
254
+ # =============================================================================
255
+
256
+
257
+ class TestGage10109001Geopackage:
258
+ """Geopackage tests for gage-10109001."""
259
+
260
+ def test_geopackage_layers(self, gage_10109001_output):
261
+ gpkg = gage_10109001_output["gpkg_path"]
262
+ assert gpkg.exists()
263
+ actual = set(gpd.list_layers(gpkg)["name"])
264
+ assert not (set(GEOPACKAGE_LAYERS) - actual)
265
+
266
+ @pytest.mark.parametrize("layer", ["divides", "flowpaths", "nexus"])
267
+ def test_table_row_counts(self, gage_10109001_output, layer):
268
+ gdf = gpd.read_file(gage_10109001_output["gpkg_path"], layer=layer)
269
+ assert len(gdf) == GAGE_10109001_REGRESSION["table_counts"][layer]
270
+
271
+
272
+ class TestGage10109001GriddedForcings:
273
+ """Raw gridded forcing tests for gage-10109001."""
274
+
275
+ def test_netcdf_structure(self, gage_10109001_output):
276
+ nc = gage_10109001_output["raw_nc"]
277
+ assert nc.exists()
278
+ with xr.open_dataset(nc) as ds:
279
+ assert "time" in ds.dims
280
+ assert any(d in ds.dims for d in ("x", "lon"))
281
+ assert any(d in ds.dims for d in ("y", "lat"))
282
+
283
+ def test_netcdf_time_range(self, gage_10109001_output):
284
+ with xr.open_dataset(gage_10109001_output["raw_nc"]) as ds:
285
+ assert ds.time.min().values >= np.datetime64(gage_10109001_output["start_date"])
286
+ assert ds.time.max().values <= np.datetime64(gage_10109001_output["end_date"])
287
+
288
+
289
+ class TestGage10109001ProcessedForcings:
290
+ """Processed forcing tests for gage-10109001."""
291
+
292
+ def test_structure(self, gage_10109001_output):
293
+ nc = gage_10109001_output["forcings_nc"]
294
+ assert nc.exists()
295
+ with xr.open_dataset(nc) as ds:
296
+ assert ds.sizes["catchment-id"] == GAGE_10109001_REGRESSION["dims"]["catchment-id"]
297
+ assert ds.sizes["time"] == GAGE_10109001_REGRESSION["dims"]["time"]
298
+ for var in FORCING_VARS:
299
+ assert var in ds.data_vars or var in ds.coords
300
+
301
+ def test_catchment_ids_subset(self, gage_10109001_output):
302
+ """Check that expected catchment IDs are present (spot check first 10)."""
303
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
304
+ nc_ids = set(ds["ids"].values)
305
+ for cat_id in GAGE_10109001_REGRESSION["catchment_ids"]:
306
+ assert cat_id in nc_ids
307
+
308
+ def test_catchment_ids_match_gpkg(self, gage_10109001_output):
309
+ gpkg_ids = set(
310
+ gpd.read_file(gage_10109001_output["gpkg_path"], layer="divides")["divide_id"]
311
+ )
312
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
313
+ nc_ids = set(ds["ids"].values)
314
+ assert gpkg_ids == nc_ids
315
+
316
+ def test_value_ranges(self, gage_10109001_output):
317
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
318
+ for var, (lo, hi) in PHYSICAL_RANGES.items():
319
+ if var in ds.data_vars:
320
+ data = ds[var].values
321
+ assert np.nanmin(data) >= lo, f"{var} below min"
322
+ assert np.nanmax(data) <= hi, f"{var} above max"
323
+
324
+ def test_no_all_nan(self, gage_10109001_output):
325
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
326
+ for var in ds.data_vars:
327
+ if ds[var].dtype in (np.float32, np.float64):
328
+ assert not np.all(np.isnan(ds[var].values)), f"{var} is all NaN"
329
+
330
+ def test_regression_stats(self, gage_10109001_output):
331
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
332
+ for var, expected in GAGE_10109001_REGRESSION["stats"].items():
333
+ data = ds[var].values
334
+ np.testing.assert_allclose(np.nanmin(data), expected["min"], rtol=0.01)
335
+ np.testing.assert_allclose(np.nanmax(data), expected["max"], rtol=0.01)
336
+ np.testing.assert_allclose(np.nanmean(data), expected["mean"], rtol=0.01)
337
+
338
+ def test_regression_sample_values(self, gage_10109001_output):
339
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
340
+ for var, expected in GAGE_10109001_REGRESSION["sample_values"].items():
341
+ actual = ds[var].isel({"catchment-id": 0, "time": slice(0, 5)}).values
342
+ np.testing.assert_allclose(actual, expected, rtol=0.001)
343
+
344
+ def test_regression_time_values(self, gage_10109001_output):
345
+ with xr.open_dataset(gage_10109001_output["forcings_nc"]) as ds:
346
+ actual = ds["Time"].isel({"catchment-id": 0, "time": slice(0, 5)}).values.tolist()
347
+ assert actual == GAGE_10109001_REGRESSION["time_values"]
348
+
349
+
350
+ # =============================================================================
351
+ # End-to-End Tests
352
+ # =============================================================================
353
+
354
+
355
+ class TestEndToEnd:
356
+ """End-to-end integration tests."""
357
+
358
+ @pytest.mark.parametrize("fixture_name", ["cat_1555522_output", "gage_10109001_output"])
359
+ def test_complete_pipeline(self, fixture_name, request):
360
+ output = request.getfixturevalue(fixture_name)
361
+ assert output["gpkg_path"].exists()
362
+ assert output["raw_nc"].exists()
363
+ assert output["forcings_nc"].exists()
364
+
365
+ @pytest.mark.parametrize("fixture_name", ["cat_1555522_output", "gage_10109001_output"])
366
+ def test_output_size_reasonable(self, fixture_name, request):
367
+ output = request.getfixturevalue(fixture_name)
368
+ size_mb = sum(f.stat().st_size for f in output["output_dir"].rglob("*") if f.is_file()) / (
369
+ 1024 * 1024
370
+ )
371
+ assert 0.1 < size_mb < 1000, f"Suspicious output size: {size_mb:.2f} MB"
372
+
373
+
374
+ if __name__ == "__main__":
375
+ pytest.main([__file__, "-v", "-s"])