roms-tools 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. roms_tools/__init__.py +8 -1
  2. roms_tools/analysis/cdr_analysis.py +203 -0
  3. roms_tools/analysis/cdr_ensemble.py +198 -0
  4. roms_tools/analysis/roms_output.py +80 -46
  5. roms_tools/data/grids/GLORYS_global_grid.nc +0 -0
  6. roms_tools/download.py +4 -0
  7. roms_tools/plot.py +131 -30
  8. roms_tools/regrid.py +6 -1
  9. roms_tools/setup/boundary_forcing.py +94 -44
  10. roms_tools/setup/cdr_forcing.py +123 -15
  11. roms_tools/setup/cdr_release.py +161 -8
  12. roms_tools/setup/datasets.py +709 -341
  13. roms_tools/setup/grid.py +167 -139
  14. roms_tools/setup/initial_conditions.py +113 -48
  15. roms_tools/setup/mask.py +63 -7
  16. roms_tools/setup/nesting.py +67 -42
  17. roms_tools/setup/river_forcing.py +45 -19
  18. roms_tools/setup/surface_forcing.py +16 -10
  19. roms_tools/setup/tides.py +1 -2
  20. roms_tools/setup/topography.py +4 -4
  21. roms_tools/setup/utils.py +134 -22
  22. roms_tools/tests/test_analysis/test_cdr_analysis.py +144 -0
  23. roms_tools/tests/test_analysis/test_cdr_ensemble.py +202 -0
  24. roms_tools/tests/test_analysis/test_roms_output.py +61 -3
  25. roms_tools/tests/test_setup/test_boundary_forcing.py +111 -52
  26. roms_tools/tests/test_setup/test_cdr_forcing.py +54 -0
  27. roms_tools/tests/test_setup/test_cdr_release.py +118 -1
  28. roms_tools/tests/test_setup/test_datasets.py +458 -34
  29. roms_tools/tests/test_setup/test_grid.py +238 -121
  30. roms_tools/tests/test_setup/test_initial_conditions.py +94 -41
  31. roms_tools/tests/test_setup/test_surface_forcing.py +28 -3
  32. roms_tools/tests/test_setup/test_utils.py +91 -1
  33. roms_tools/tests/test_setup/test_validation.py +21 -15
  34. roms_tools/tests/test_setup/utils.py +71 -0
  35. roms_tools/tests/test_tiling/test_join.py +241 -0
  36. roms_tools/tests/test_tiling/test_partition.py +45 -0
  37. roms_tools/tests/test_utils.py +224 -2
  38. roms_tools/tiling/join.py +189 -0
  39. roms_tools/tiling/partition.py +44 -30
  40. roms_tools/utils.py +488 -161
  41. {roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/METADATA +15 -4
  42. {roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/RECORD +45 -37
  43. {roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/WHEEL +0 -0
  44. {roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/licenses/LICENSE +0 -0
  45. {roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/top_level.txt +0 -0
roms_tools/utils.py CHANGED
@@ -1,26 +1,213 @@
1
1
  import glob
2
2
  import logging
3
3
  import re
4
+ import textwrap
4
5
  import warnings
6
+ from collections.abc import Callable, Iterable, Sequence
7
+ from dataclasses import dataclass
5
8
  from importlib.util import find_spec
6
9
  from pathlib import Path
10
+ from typing import TypeAlias
7
11
 
8
12
  import numpy as np
9
13
  import xarray as xr
10
14
 
11
15
  from roms_tools.constants import R_EARTH
12
16
 
17
+ FilePaths: TypeAlias = str | Path | list[Path | str]
13
18
 
14
- def _load_data(
15
- filename,
16
- dim_names,
17
- use_dask,
18
- time_chunking=True,
19
- decode_times=True,
20
- force_combine_nested=False,
21
- read_zarr: bool = False,
22
- ):
23
- """Load dataset from the specified file.
19
+
20
+ def _path_list_from_input(files: FilePaths) -> list[Path]:
21
+ """Converts a generic user input to a list of Paths.
22
+
23
+ Takes a list of strings or paths, or wildcard pattern, and
24
+ returns a list of pathlib.Path objects
25
+
26
+ Parameters
27
+ ----------
28
+ files: FilePaths
29
+ A list of files (str, Path), single path as a str or Path, or a wildcard string
30
+
31
+ Returns
32
+ -------
33
+ List[Path]
34
+ A list of pathlib.Paths
35
+ """
36
+ if isinstance(files, str):
37
+ filepaths = sorted(Path(files).parent.glob(Path(files).name))
38
+ if not filepaths:
39
+ raise FileNotFoundError(f"No files matched: {files}")
40
+ elif isinstance(files, Path):
41
+ filepaths = [
42
+ files,
43
+ ]
44
+ elif isinstance(files, list):
45
+ filepaths = [Path(f) for f in files]
46
+ else:
47
+ raise TypeError("'files' should be str, Path, or List[Path | str]")
48
+
49
+ return filepaths
50
+
51
+
52
+ @dataclass
53
+ class FileMatchResult:
54
+ """The result of performing a wildcard search."""
55
+
56
+ contains_wildcard: bool
57
+ """Return `True` if the search contained a wildcard."""
58
+ matches: list[str]
59
+ """The items matching the wildcard search."""
60
+
61
+
62
+ def _get_file_matches(
63
+ filename: str | Path | Sequence[str | Path],
64
+ ) -> FileMatchResult:
65
+ """Filter the filename using an optional wildcard search in the filename.
66
+
67
+ Parameters
68
+ ----------
69
+ filename : str | Path | Sequence[str | Path]
70
+ An item to search for matches.
71
+ """
72
+ # Precompile the regex for matching wildcard characters
73
+ wildcard_regex = re.compile(r"[\*\?\[\]]")
74
+
75
+ # Normalize input into a list of strings
76
+ if isinstance(filename, (str | Path)):
77
+ filenames: list[str] = [str(filename)]
78
+ elif isinstance(filename, Sequence):
79
+ filenames = [str(f) for f in filename]
80
+ else:
81
+ msg = "filename must be a string, Path, or a sequence of strings/Paths."
82
+ raise ValueError(msg)
83
+
84
+ contains_wildcard = any(wildcard_regex.search(f) for f in filenames)
85
+ matching_files: list[str] = []
86
+
87
+ for f in filenames:
88
+ if wildcard_regex.search(f):
89
+ files = glob.glob(f)
90
+ if not files:
91
+ raise FileNotFoundError(f"No files found matching the pattern '{f}'.")
92
+ matching_files.extend(files)
93
+ else:
94
+ matching_files.append(f)
95
+
96
+ return FileMatchResult(
97
+ contains_wildcard=contains_wildcard,
98
+ matches=sorted(matching_files),
99
+ )
100
+
101
+
102
+ def _get_ds_combination_params(
103
+ force_combine_nested: bool,
104
+ dim_names: dict[str, str],
105
+ match_result: FileMatchResult,
106
+ ) -> dict[str, str]:
107
+ """Determine the non-base parameters for combining datasets.
108
+
109
+ Parameters
110
+ ----------
111
+ force_combine_nested: bool, optional
112
+ If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
113
+ Defaults to False.
114
+ dim_names : Dict[str, str], optional
115
+ Dictionary specifying the names of dimensions in the dataset.
116
+ Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
117
+ For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
118
+ match_result : FileMatchResult
119
+ The result of an optional wildcard search of dataset filename(s).
120
+
121
+ Returns
122
+ -------
123
+ dict[str, str]
124
+ The default dataset combination parameters
125
+
126
+ """
127
+ if force_combine_nested:
128
+ load_kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
129
+ elif match_result.contains_wildcard or len(match_result.matches) == 1:
130
+ load_kwargs = {"combine": "by_coords"}
131
+ else:
132
+ load_kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
133
+
134
+ return load_kwargs
135
+
136
+
137
+ def _get_ds_combine_base_params() -> dict[str, str]:
138
+ """Return the base parameters used when combining an xr.Dataset.
139
+
140
+ Returns
141
+ -------
142
+ dict[str, str]
143
+ The default dataset combination parameters
144
+
145
+ """
146
+ return {
147
+ "coords": "minimal",
148
+ "compat": "override",
149
+ "combine_attrs": "override",
150
+ }
151
+
152
+
153
+ def get_dask_chunks(
154
+ dim_names: dict[str, str], time_chunking: bool = True
155
+ ) -> dict[str, int]:
156
+ """Return the default dask chunks for ROMS datasets.
157
+
158
+ Parameters
159
+ ----------
160
+ dim_names : dict[str, str]
161
+ Dictionary specifying the names of dimensions in the dataset.
162
+ - For lat-lon datasets, provide keys "latitude" and "longitude" (and optionally "depth" and "time").
163
+ - For ROMS datasets, the default ROMS dimensions are assumed ("eta_rho", "xi_rho", "s_rho", etc.).
164
+ time_chunking : bool, optional
165
+ Whether to chunk along the time dimension.
166
+ - True: chunk time dimension with size 1 (useful for processing large time-series data with Dask).
167
+ - False: do not explicitly chunk time; Dask will use default auto-chunking.
168
+ Defaults to True.
169
+
170
+ Returns
171
+ -------
172
+ dict[str, int]
173
+ The default dask chunks for ROMS datasets.
174
+ """
175
+ if "latitude" in dim_names and "longitude" in dim_names:
176
+ # for lat-lon datasets
177
+ chunks = {
178
+ dim_names["latitude"]: -1,
179
+ dim_names["longitude"]: -1,
180
+ }
181
+ else:
182
+ # For ROMS datasets
183
+ chunks = {
184
+ "eta_rho": -1,
185
+ "eta_v": -1,
186
+ "xi_rho": -1,
187
+ "xi_u": -1,
188
+ "s_rho": -1,
189
+ }
190
+
191
+ if "depth" in dim_names:
192
+ chunks[dim_names["depth"]] = -1
193
+ if "time" in dim_names and time_chunking:
194
+ chunks[dim_names["time"]] = 1
195
+ if "ntides" in dim_names:
196
+ chunks[dim_names["ntides"]] = 1
197
+
198
+ return chunks
199
+
200
+
201
+ def _load_data_dask(
202
+ filenames: list[str],
203
+ dim_names: dict[str, str],
204
+ time_chunking: bool = True,
205
+ decode_times: bool = True,
206
+ decode_timedelta: bool = True,
207
+ read_zarr: bool = True,
208
+ load_kwargs: dict[str, str] | None = None,
209
+ ) -> xr.Dataset:
210
+ """Load dataset from the specified file using Dask.
24
211
 
25
212
  Parameters
26
213
  ----------
@@ -31,8 +218,6 @@ def _load_data(
31
218
  Dictionary specifying the names of dimensions in the dataset.
32
219
  Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
33
220
  For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
34
- use_dask: bool
35
- Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
36
221
  time_chunking : bool, optional
37
222
  If True and `use_dask=True`, the data will be chunked along the time dimension with a chunk size of 1.
38
223
  If False, the data will not be chunked explicitly along the time dimension, but will follow the default auto chunking scheme. This option is useful for ROMS restart files.
@@ -40,9 +225,9 @@ def _load_data(
40
225
  decode_times: bool, optional
41
226
  If True, decode times and timedeltas encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
42
227
  Defaults to True.
43
- force_combine_nested: bool, optional
44
- If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
45
- Defaults to False.
228
+ decode_timedelta: bool, optional
229
+ If True, decode timedeltas encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
230
+ Defaults to True.
46
231
  read_zarr: bool, optional
47
232
  If True, use the zarr engine to read the dataset, and don't use mfdataset.
48
233
  Defaults to False.
@@ -58,155 +243,230 @@ def _load_data(
58
243
  If the specified file does not exist.
59
244
  ValueError
60
245
  If a list of files is provided but dim_names["time"] is not available or use_dask=False.
246
+
61
247
  """
62
- if dim_names is None:
63
- dim_names = {}
248
+ chunks = get_dask_chunks(dim_names, time_chunking)
64
249
 
65
- if use_dask:
66
- if not _has_dask():
67
- raise RuntimeError(
68
- "Dask is required but not installed. Install it with:\n"
69
- " `pip install roms-tools[dask]` or\n"
70
- " • `conda install dask`\n"
71
- "Alternatively, install `roms-tools` with conda to include all dependencies."
250
+ with warnings.catch_warnings():
251
+ warnings.filterwarnings(
252
+ "ignore",
253
+ category=UserWarning,
254
+ message=r"^The specified chunks separate.*",
255
+ )
256
+
257
+ if read_zarr:
258
+ return xr.open_zarr(
259
+ filenames[0],
260
+ decode_times=decode_times,
261
+ decode_timedelta=decode_timedelta,
262
+ chunks=chunks,
263
+ consolidated=None,
264
+ storage_options={"token": "anon"},
72
265
  )
266
+
267
+ kwargs = {**_get_ds_combine_base_params(), **(load_kwargs or {})}
268
+ return xr.open_mfdataset(
269
+ filenames,
270
+ decode_times=decode_times,
271
+ decode_timedelta=decode_timedelta,
272
+ chunks=chunks,
273
+ **kwargs,
274
+ )
275
+
276
+
277
+ def _check_load_data_dask(use_dask: bool) -> None:
278
+ """Determine if dask is installed.
279
+
280
+ Parameters
281
+ ----------
282
+ use_dask: bool
283
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
284
+
285
+ Raises
286
+ ------
287
+ RuntimeError
288
+ If dask is requested but not installed.
289
+ """
290
+ if use_dask and not has_dask():
291
+ msg = (
292
+ "Dask is required but not installed. Install it with:\n"
293
+ " • `pip install roms-tools[dask]` or\n"
294
+ " • `conda install dask`\n"
295
+ "Alternatively, install `roms-tools` with conda to include all dependencies."
296
+ )
297
+ raise RuntimeError(msg)
298
+
299
+
300
+ def _check_load_data_zarr(
301
+ use_dask: bool, read_zarr: bool, filename: str | Path | list[str | Path]
302
+ ) -> None:
303
+ """Determine if zarr streaming will conflict with the current request configuration.
304
+
305
+ Parameters
306
+ ----------
307
+ filename : Union[str, Path, List[Union[str, Path]]]
308
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
309
+ or a list of strings or Path objects containing multiple files.
310
+ use_dask: bool
311
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
312
+ read_zarr: bool, optional
313
+ If True, use the zarr engine to read the dataset, and don't use mfdataset.
314
+ Defaults to False.
315
+
316
+ Raises
317
+ ------
318
+ RuntimeError
319
+ If read_zarr is requested, but:
320
+ - the request doesn't specify a dependency on dask
321
+ - the request includes a list of filenames
322
+
323
+ """
73
324
  if read_zarr:
74
325
  if isinstance(filename, list):
75
- raise ValueError("read_zarr requires a single path, not a list of paths")
326
+ msg = "read_zarr requires a single path, not a list of paths"
327
+ raise ValueError(msg)
328
+
76
329
  if not use_dask:
77
- raise ValueError("read_zarr must be used with use_dask")
330
+ msg = "read_zarr must be used with use_dask"
331
+ raise ValueError(msg)
78
332
 
79
- # Precompile the regex for matching wildcard characters
80
- wildcard_regex = re.compile(r"[\*\?\[\]]")
81
333
 
82
- # Convert Path objects to strings
83
- if isinstance(filename, str | Path):
84
- filename_str = str(filename)
85
- elif isinstance(filename, list):
86
- filename_str = [str(f) for f in filename]
87
- else:
88
- raise ValueError("filename must be a string, Path, or a list of strings/Paths.")
89
-
90
- # Handle the case when filename is a string
91
- contains_wildcard = False
92
- if isinstance(filename_str, str):
93
- contains_wildcard = bool(wildcard_regex.search(filename_str))
94
- if contains_wildcard:
95
- matching_files = glob.glob(filename_str)
96
- if not matching_files:
97
- raise FileNotFoundError(
98
- f"No files found matching the pattern '{filename_str}'."
99
- )
100
- else:
101
- matching_files = [filename_str]
102
-
103
- # Handle the case when filename is a list
104
- elif isinstance(filename_str, list):
105
- contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
106
- if contains_wildcard:
107
- matching_files = []
108
- for f in filename_str:
109
- files = glob.glob(f)
110
- if not files:
111
- raise FileNotFoundError(
112
- f"No files found matching the pattern '{f}'."
113
- )
114
- matching_files.extend(files)
115
- else:
116
- matching_files = filename_str
334
+ def _check_load_data_filename(
335
+ filename: str | Path | list[str | Path], dim_names: Iterable[str]
336
+ ) -> None:
337
+ """Determine if time dimension is available when multiple files are provided.
117
338
 
118
- # Sort the matching files
119
- matching_files = sorted(matching_files)
339
+ Parameters
340
+ ----------
341
+ filename : Union[str, Path, List[Union[str, Path]]]
342
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
343
+ or a list of strings or Path objects containing multiple files.
344
+ dim_names : Dict[str, str], optional
345
+ Dictionary specifying the names of dimensions in the dataset.
346
+ Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
347
+ For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
348
+
349
+ Raises
350
+ ------
351
+ ValueError
352
+ If time dimension is not found and a list of files is provided.
120
353
 
121
- # Check if time dimension is available when multiple files are provided
122
- if isinstance(filename_str, list) and "time" not in dim_names:
123
- raise ValueError(
354
+ """
355
+ if isinstance(filename, list) and "time" not in dim_names:
356
+ msg = (
124
357
  "A list of files is provided, but time dimension is not available. "
125
358
  "A time dimension must be available to concatenate the files."
126
359
  )
360
+ raise ValueError(msg)
127
361
 
128
- # Determine the kwargs for combining datasets
129
- if force_combine_nested:
130
- kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
131
- elif contains_wildcard or len(matching_files) == 1:
132
- kwargs = {"combine": "by_coords"}
133
- else:
134
- kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
135
362
 
136
- # Base kwargs used for dataset combination
137
- combine_kwargs = {
138
- "coords": "minimal",
139
- "compat": "override",
140
- "combine_attrs": "override",
141
- }
363
+ def load_data(
364
+ filename: str | Path | list[str | Path],
365
+ dim_names: dict[str, str] | None = None,
366
+ use_dask: bool = False,
367
+ time_chunking: bool = True,
368
+ decode_times: bool = True,
369
+ decode_timedelta: bool = True,
370
+ force_combine_nested: bool = False,
371
+ read_zarr: bool = False,
372
+ ds_loader_fn: Callable[[], xr.Dataset] | None = None,
373
+ ):
374
+ """Load dataset from the specified file.
142
375
 
143
- if use_dask:
144
- if "latitude" in dim_names and "longitude" in dim_names:
145
- # for lat-lon datasets
146
- chunks = {
147
- dim_names["latitude"]: -1,
148
- dim_names["longitude"]: -1,
149
- }
150
- else:
151
- # For ROMS datasets
152
- chunks = {
153
- "eta_rho": -1,
154
- "eta_v": -1,
155
- "xi_rho": -1,
156
- "xi_u": -1,
157
- "s_rho": -1,
158
- }
159
-
160
- if "depth" in dim_names:
161
- chunks[dim_names["depth"]] = -1
162
- if "time" in dim_names and time_chunking:
163
- chunks[dim_names["time"]] = 1
164
- if "ntides" in dim_names:
165
- chunks[dim_names["ntides"]] = 1
166
-
167
- with warnings.catch_warnings():
168
- warnings.filterwarnings(
169
- "ignore",
170
- category=UserWarning,
171
- message=r"^The specified chunks separate.*",
172
- )
376
+ Parameters
377
+ ----------
378
+ filename : str | Path | list[str | Path]
379
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
380
+ or a list of strings or Path objects containing multiple files.
381
+ dim_names : dict[str, str], optional
382
+ Dictionary specifying the names of dimensions in the dataset.
383
+ Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
384
+ For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
385
+ use_dask: bool, optional
386
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
387
+ time_chunking : bool, optional
388
+ If True and `use_dask=True`, the data will be chunked along the time dimension with a chunk size of 1.
389
+ If False, the data will not be chunked explicitly along the time dimension, but will follow the default auto chunking scheme. This option is useful for ROMS restart files.
390
+ Defaults to True.
391
+ decode_times: bool, optional
392
+ If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
393
+ Defaults to True.
394
+ decode_timedelta: bool, optional
395
+ If True, decode timedeltas encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
396
+ Defaults to True.
397
+ force_combine_nested: bool, optional
398
+ If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
399
+ Defaults to False.
400
+ read_zarr: bool, optional
401
+ If True, use the zarr engine to read the dataset, and don't use mfdataset.
402
+ Defaults to False.
173
403
 
174
- if read_zarr:
175
- ds = xr.open_zarr(
176
- matching_files[0],
177
- decode_times=decode_times,
178
- chunks=chunks,
179
- consolidated=None,
180
- storage_options=dict(token="anon"),
181
- )
182
- else:
183
- ds = xr.open_mfdataset(
184
- matching_files,
185
- decode_times=decode_times,
186
- decode_timedelta=decode_times,
187
- chunks=chunks,
188
- **combine_kwargs,
189
- **kwargs,
190
- )
404
+ Returns
405
+ -------
406
+ ds : xr.Dataset
407
+ The loaded xarray Dataset containing the forcing data.
408
+
409
+ Raises
410
+ ------
411
+ FileNotFoundError
412
+ If the specified file does not exist.
413
+ ValueError
414
+ If a list of files is provided but dim_names["time"] is not available or use_dask=False.
415
+ RuntimeError
416
+ If loading the dataset fails
417
+ """
418
+ dim_names = dim_names or {}
419
+
420
+ _check_load_data_dask(use_dask)
421
+ _check_load_data_zarr(use_dask, read_zarr, filename)
422
+ _check_load_data_filename(filename, dim_names.keys())
423
+
424
+ match_result = _get_file_matches(filename)
191
425
 
426
+ load_kwargs = _get_ds_combination_params(
427
+ force_combine_nested,
428
+ dim_names,
429
+ match_result,
430
+ )
431
+
432
+ ds: xr.Dataset | xr.DataArray | None = None
433
+
434
+ if ds_loader_fn is not None:
435
+ ds = ds_loader_fn()
436
+ elif use_dask:
437
+ ds = _load_data_dask(
438
+ match_result.matches,
439
+ dim_names,
440
+ time_chunking,
441
+ decode_times,
442
+ decode_timedelta,
443
+ read_zarr,
444
+ load_kwargs,
445
+ )
192
446
  else:
193
447
  ds_list = []
194
- for file in matching_files:
448
+ for file in match_result.matches:
195
449
  ds = xr.open_dataset(
196
450
  file,
197
451
  decode_times=decode_times,
198
- decode_timedelta=decode_times,
452
+ decode_timedelta=decode_timedelta,
199
453
  chunks=None,
200
454
  )
201
455
  ds_list.append(ds)
202
456
 
203
- if kwargs["combine"] == "by_coords":
457
+ combine_kwargs = _get_ds_combine_base_params()
458
+
459
+ if load_kwargs["combine"] == "by_coords":
204
460
  ds = xr.combine_by_coords(ds_list, **combine_kwargs)
205
- elif kwargs["combine"] == "nested":
461
+ elif load_kwargs["combine"] == "nested":
206
462
  ds = xr.combine_nested(
207
- ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
463
+ ds_list, concat_dim=load_kwargs["concat_dim"], **combine_kwargs
208
464
  )
209
465
 
466
+ if ds is None:
467
+ msg = "A dataset was not loaded."
468
+ raise RuntimeError(msg)
469
+
210
470
  if "time" in dim_names and dim_names["time"] not in ds.dims:
211
471
  ds = ds.expand_dims(dim_names["time"])
212
472
 
@@ -366,7 +626,44 @@ def save_datasets(dataset_list, output_filenames, use_dask=False, verbose=True):
366
626
  List[Path]
367
627
  A list of Path objects for the filenames that were saved.
368
628
  """
629
+
630
+ def _patch_1d_encodings(dataset_list: list[xr.Dataset]) -> None:
631
+ """Replaces problematic encodings in 1D variables.
632
+
633
+ ROMS' Fortran-based tools fail with certain encoding types that are common
634
+ in roms-tools' exported 1D vars (e.g. `abs_time`, `river_name`). This function
635
+ replaces int64 -> int32 (for true integers), int64 -> float64
636
+ (for non-integer vars encoded as int64 on disk), and NC_STRING -> char.
637
+
638
+ Parameters
639
+ ----------
640
+ dataset_list: list[xr.Dataset]
641
+ List of datasets to be saved
642
+
643
+ """
644
+ for ds in dataset_list:
645
+ for name in ds.variables:
646
+ da = ds[name]
647
+ if da.ndim != 1:
648
+ continue
649
+
650
+ enc_var = xr.conventions.encode_cf_variable(da.variable, name=name)
651
+ enc_dtype = enc_var.dtype
652
+
653
+ # NC_STRING → fixed-width char
654
+ if enc_dtype.kind in ("O", "U", "S"):
655
+ da.encoding["dtype"] = "S1"
656
+ continue
657
+
658
+ # NC_INT64 → int32 for true integers; float64 otherwise
659
+ if enc_dtype == np.int64:
660
+ if da.dtype.kind in ("i", "u"):
661
+ da.encoding["dtype"] = "int32"
662
+ else:
663
+ da.encoding["dtype"] = "float64"
664
+
369
665
  saved_filenames = []
666
+ _patch_1d_encodings(dataset_list)
370
667
 
371
668
  output_filenames = [f"{filename}.nc" for filename in output_filenames]
372
669
  if verbose:
@@ -387,30 +684,7 @@ def save_datasets(dataset_list, output_filenames, use_dask=False, verbose=True):
387
684
  return saved_filenames
388
685
 
389
686
 
390
- def get_dask_chunks(location, chunk_size):
391
- """Returns the appropriate Dask chunking dictionary based on grid location.
392
-
393
- Parameters
394
- ----------
395
- location : str
396
- The grid location, one of "rho", "u", or "v".
397
- chunk_size : int
398
- The chunk size to apply.
399
-
400
- Returns
401
- -------
402
- dict
403
- Dictionary specifying the chunking strategy.
404
- """
405
- chunk_mapping = {
406
- "rho": {"eta_rho": chunk_size, "xi_rho": chunk_size},
407
- "u": {"eta_rho": chunk_size, "xi_u": chunk_size},
408
- "v": {"eta_v": chunk_size, "xi_rho": chunk_size},
409
- }
410
- return chunk_mapping.get(location, {})
411
-
412
-
413
- def _generate_coordinate_range(min_val: float, max_val: float, resolution: float):
687
+ def generate_coordinate_range(min_val: float, max_val: float, resolution: float):
414
688
  """Generate an array of target coordinates (e.g., latitude or longitude) within a
415
689
  specified range, with a resolution that is rounded to the nearest value of the form
416
690
  `1/n` (or integer).
@@ -472,7 +746,7 @@ def _generate_coordinate_range(min_val: float, max_val: float, resolution: float
472
746
  return target.astype(np.float32)
473
747
 
474
748
 
475
- def _generate_focused_coordinate_range(
749
+ def generate_focused_coordinate_range(
476
750
  center: float,
477
751
  sc: float,
478
752
  min_val: float,
@@ -558,7 +832,7 @@ def _generate_focused_coordinate_range(
558
832
  return centers, faces
559
833
 
560
834
 
561
- def _remove_edge_nans(
835
+ def remove_edge_nans(
562
836
  field: xr.DataArray, xdim: str, layer_depth: xr.DataArray | None = None
563
837
  ) -> tuple[xr.DataArray, xr.DataArray | None]:
564
838
  """Remove NaN-only slices at the edges of a specified dimension.
@@ -634,14 +908,42 @@ def _remove_edge_nans(
634
908
  return field, layer_depth
635
909
 
636
910
 
637
- def _has_dask() -> bool:
911
+ def has_dask() -> bool:
912
+ """Determine if the Dask package is installed.
913
+
914
+ Returns
915
+ -------
916
+ bool
917
+ `True` if package is found, `False` otherwise.
918
+
919
+ """
638
920
  return find_spec("dask") is not None
639
921
 
640
922
 
641
- def _has_gcsfs() -> bool:
923
+ def has_gcsfs() -> bool:
924
+ """Determine if the GCSFS package is installed.
925
+
926
+ Returns
927
+ -------
928
+ bool
929
+ `True` if package is found, `False` otherwise.
930
+
931
+ """
642
932
  return find_spec("gcsfs") is not None
643
933
 
644
934
 
935
+ def has_copernicus() -> bool:
936
+ """Determine if the Copernicus Marine Toolkit package is installed.
937
+
938
+ Returns
939
+ -------
940
+ bool
941
+ `True` if package is found, `False` otherwise.
942
+
943
+ """
944
+ return find_spec("copernicusmarine") is not None
945
+
946
+
645
947
  def normalize_longitude(lon: float, straddle: bool) -> float:
646
948
  """Normalize longitude to the appropriate range depending on whether the grid
647
949
  straddles the dateline.
@@ -704,3 +1006,28 @@ def infer_nominal_horizontal_resolution(
704
1006
  resolution_in_degrees = resolution_in_m / (meters_per_degree * np.cos(lat_rad))
705
1007
 
706
1008
  return float(resolution_in_degrees)
1009
+
1010
+
1011
+ def get_pkg_error_msg(purpose: str, package_name: str, option_name: str) -> str:
1012
+ """Generate an error message indicating how to install an optional dependency.
1013
+
1014
+ Parameters
1015
+ ----------
1016
+ purpose : str
1017
+ Description of the feature the package enables.
1018
+ package_name : str
1019
+ The package name
1020
+ option_name : str
1021
+ The optional dependency containing the package
1022
+
1023
+ Returns
1024
+ -------
1025
+ str
1026
+ The formatted error message
1027
+ """
1028
+ return textwrap.dedent(f"""\
1029
+ To use {purpose}, {package_name} is required but not installed. Install it with:
1030
+ • `pip install roms-tools[{option_name}]` or
1031
+ • `pip install {package_name}` or
1032
+ • `conda install {package_name}`
1033
+ Alternatively, install `roms-tools` with conda to include all dependencies.""")