roms-tools 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roms_tools/__init__.py +5 -1
- roms_tools/plot.py +56 -9
- roms_tools/regrid.py +6 -1
- roms_tools/setup/boundary_forcing.py +55 -30
- roms_tools/setup/cdr_forcing.py +1 -7
- roms_tools/setup/datasets.py +96 -14
- roms_tools/setup/grid.py +29 -2
- roms_tools/setup/surface_forcing.py +12 -4
- roms_tools/tests/test_setup/test_boundary_forcing.py +57 -0
- roms_tools/tests/test_setup/test_datasets.py +76 -0
- roms_tools/tests/test_setup/test_grid.py +16 -6
- roms_tools/tests/test_setup/test_surface_forcing.py +26 -2
- roms_tools/tests/test_setup/test_validation.py +21 -15
- roms_tools/tests/test_tiling/test_partition.py +45 -0
- roms_tools/tests/test_utils.py +101 -1
- roms_tools/tiling/partition.py +44 -30
- roms_tools/utils.py +426 -131
- {roms_tools-3.1.1.dist-info → roms_tools-3.1.2.dist-info}/METADATA +4 -3
- {roms_tools-3.1.1.dist-info → roms_tools-3.1.2.dist-info}/RECORD +22 -22
- {roms_tools-3.1.1.dist-info → roms_tools-3.1.2.dist-info}/WHEEL +0 -0
- {roms_tools-3.1.1.dist-info → roms_tools-3.1.2.dist-info}/licenses/LICENSE +0 -0
- {roms_tools-3.1.1.dist-info → roms_tools-3.1.2.dist-info}/top_level.txt +0 -0
roms_tools/utils.py
CHANGED
|
@@ -1,26 +1,146 @@
|
|
|
1
1
|
import glob
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
|
+
import textwrap
|
|
4
5
|
import warnings
|
|
6
|
+
from collections.abc import Callable, Iterable
|
|
5
7
|
from importlib.util import find_spec
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
|
|
8
10
|
import numpy as np
|
|
9
11
|
import xarray as xr
|
|
12
|
+
from attr import dataclass
|
|
10
13
|
|
|
11
14
|
from roms_tools.constants import R_EARTH
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
@dataclass
|
|
18
|
+
class FileMatchResult:
|
|
19
|
+
"""The result of performing a wildcard search."""
|
|
20
|
+
|
|
21
|
+
contains_wildcard: bool
|
|
22
|
+
"""Return `True` if the search contained a wildcard."""
|
|
23
|
+
matches: list[str]
|
|
24
|
+
"""The items matching the wildcard search."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_file_matches(
|
|
28
|
+
filename: str | Path | list[str | Path],
|
|
29
|
+
) -> FileMatchResult:
|
|
30
|
+
"""Filter the filename using an optional wildcard search in the filename.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
filename : str or Path or list of str or Path
|
|
35
|
+
An item to search for matches.
|
|
36
|
+
"""
|
|
37
|
+
# Precompile the regex for matching wildcard characters
|
|
38
|
+
wildcard_regex = re.compile(r"[\*\?\[\]]")
|
|
39
|
+
|
|
40
|
+
# Convert Path objects to strings
|
|
41
|
+
if isinstance(filename, str | Path):
|
|
42
|
+
filename_str = str(filename)
|
|
43
|
+
elif isinstance(filename, list):
|
|
44
|
+
filename_str = [str(f) for f in filename]
|
|
45
|
+
else:
|
|
46
|
+
msg = "filename must be a string, Path, or a list of strings/Paths."
|
|
47
|
+
raise ValueError(msg)
|
|
48
|
+
|
|
49
|
+
# Handle the case when filename is a string
|
|
50
|
+
contains_wildcard = False
|
|
51
|
+
matching_files = []
|
|
52
|
+
|
|
53
|
+
if isinstance(filename_str, str):
|
|
54
|
+
contains_wildcard = bool(wildcard_regex.search(filename_str))
|
|
55
|
+
if contains_wildcard:
|
|
56
|
+
matching_files = glob.glob(filename_str)
|
|
57
|
+
if not matching_files:
|
|
58
|
+
msg = f"No files found matching the pattern '{filename_str}'."
|
|
59
|
+
raise FileNotFoundError(msg)
|
|
60
|
+
else:
|
|
61
|
+
matching_files = [filename_str]
|
|
62
|
+
|
|
63
|
+
# Handle the case when filename is a list
|
|
64
|
+
elif isinstance(filename_str, list):
|
|
65
|
+
# contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
|
|
66
|
+
if contains_wildcard := any(wildcard_regex.search(f) for f in filename_str):
|
|
67
|
+
matching_files = []
|
|
68
|
+
for f in filename_str:
|
|
69
|
+
files = glob.glob(f)
|
|
70
|
+
if not files:
|
|
71
|
+
msg = f"No files found matching the pattern '{f}'."
|
|
72
|
+
raise FileNotFoundError(msg)
|
|
73
|
+
matching_files.extend(files)
|
|
74
|
+
else:
|
|
75
|
+
matching_files = filename_str
|
|
76
|
+
|
|
77
|
+
# Sort the matching files
|
|
78
|
+
return FileMatchResult(
|
|
79
|
+
contains_wildcard=contains_wildcard,
|
|
80
|
+
matches=sorted(matching_files),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _get_ds_combination_params(
|
|
85
|
+
force_combine_nested: bool,
|
|
86
|
+
dim_names: dict[str, str],
|
|
87
|
+
match_result: FileMatchResult,
|
|
88
|
+
) -> dict[str, str]:
|
|
89
|
+
"""Determine the non-base parameters for combining datasets.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
force_combine_nested: bool, optional
|
|
94
|
+
If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
|
|
95
|
+
Defaults to False.
|
|
96
|
+
dim_names : Dict[str, str], optional
|
|
97
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
98
|
+
Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
|
|
99
|
+
For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
|
|
100
|
+
match_result : FileMatchResult
|
|
101
|
+
The result of an optional wildcard search of dataset filename(s).
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
dict[str, str]
|
|
106
|
+
The default dataset combination parameters
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
if force_combine_nested:
|
|
110
|
+
load_kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
|
|
111
|
+
elif match_result.contains_wildcard or len(match_result.matches) == 1:
|
|
112
|
+
load_kwargs = {"combine": "by_coords"}
|
|
113
|
+
else:
|
|
114
|
+
load_kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
|
|
115
|
+
|
|
116
|
+
return load_kwargs
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _get_ds_combine_base_params() -> dict[str, str]:
|
|
120
|
+
"""Return the base parameters used when combining an xr.Dataset.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
dict[str, str]
|
|
125
|
+
The default dataset combination parameters
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
return {
|
|
129
|
+
"coords": "minimal",
|
|
130
|
+
"compat": "override",
|
|
131
|
+
"combine_attrs": "override",
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _load_data_dask(
|
|
136
|
+
filenames: list[str],
|
|
137
|
+
dim_names: dict[str, str],
|
|
138
|
+
time_chunking: bool = True,
|
|
139
|
+
decode_times: bool = True,
|
|
140
|
+
read_zarr: bool = True,
|
|
141
|
+
load_kwargs: dict[str, str] | None = None,
|
|
142
|
+
) -> xr.Dataset:
|
|
143
|
+
"""Load dataset from the specified file using Dask.
|
|
24
144
|
|
|
25
145
|
Parameters
|
|
26
146
|
----------
|
|
@@ -31,8 +151,6 @@ def _load_data(
|
|
|
31
151
|
Dictionary specifying the names of dimensions in the dataset.
|
|
32
152
|
Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
|
|
33
153
|
For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
|
|
34
|
-
use_dask: bool
|
|
35
|
-
Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
|
|
36
154
|
time_chunking : bool, optional
|
|
37
155
|
If True and `use_dask=True`, the data will be chunked along the time dimension with a chunk size of 1.
|
|
38
156
|
If False, the data will not be chunked explicitly along the time dimension, but will follow the default auto chunking scheme. This option is useful for ROMS restart files.
|
|
@@ -40,9 +158,6 @@ def _load_data(
|
|
|
40
158
|
decode_times: bool, optional
|
|
41
159
|
If True, decode times and timedeltas encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
|
|
42
160
|
Defaults to True.
|
|
43
|
-
force_combine_nested: bool, optional
|
|
44
|
-
If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
|
|
45
|
-
Defaults to False.
|
|
46
161
|
read_zarr: bool, optional
|
|
47
162
|
If True, use the zarr engine to read the dataset, and don't use mfdataset.
|
|
48
163
|
Defaults to False.
|
|
@@ -58,140 +173,224 @@ def _load_data(
|
|
|
58
173
|
If the specified file does not exist.
|
|
59
174
|
ValueError
|
|
60
175
|
If a list of files is provided but dim_names["time"] is not available or use_dask=False.
|
|
176
|
+
|
|
61
177
|
"""
|
|
62
|
-
if dim_names
|
|
63
|
-
|
|
178
|
+
if "latitude" in dim_names and "longitude" in dim_names:
|
|
179
|
+
# for lat-lon datasets
|
|
180
|
+
chunks = {
|
|
181
|
+
dim_names["latitude"]: -1,
|
|
182
|
+
dim_names["longitude"]: -1,
|
|
183
|
+
}
|
|
184
|
+
else:
|
|
185
|
+
# For ROMS datasets
|
|
186
|
+
chunks = {
|
|
187
|
+
"eta_rho": -1,
|
|
188
|
+
"eta_v": -1,
|
|
189
|
+
"xi_rho": -1,
|
|
190
|
+
"xi_u": -1,
|
|
191
|
+
"s_rho": -1,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if "depth" in dim_names:
|
|
195
|
+
chunks[dim_names["depth"]] = -1
|
|
196
|
+
if "time" in dim_names and time_chunking:
|
|
197
|
+
chunks[dim_names["time"]] = 1
|
|
198
|
+
if "ntides" in dim_names:
|
|
199
|
+
chunks[dim_names["ntides"]] = 1
|
|
200
|
+
|
|
201
|
+
with warnings.catch_warnings():
|
|
202
|
+
warnings.filterwarnings(
|
|
203
|
+
"ignore",
|
|
204
|
+
category=UserWarning,
|
|
205
|
+
message=r"^The specified chunks separate.*",
|
|
206
|
+
)
|
|
64
207
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"
|
|
208
|
+
if read_zarr:
|
|
209
|
+
return xr.open_zarr(
|
|
210
|
+
filenames[0],
|
|
211
|
+
decode_times=decode_times,
|
|
212
|
+
chunks=chunks,
|
|
213
|
+
consolidated=None,
|
|
214
|
+
storage_options={"token": "anon"},
|
|
72
215
|
)
|
|
216
|
+
|
|
217
|
+
kwargs = {**_get_ds_combine_base_params(), **(load_kwargs or {})}
|
|
218
|
+
return xr.open_mfdataset(
|
|
219
|
+
filenames,
|
|
220
|
+
decode_times=decode_times,
|
|
221
|
+
decode_timedelta=decode_times,
|
|
222
|
+
chunks=chunks,
|
|
223
|
+
**kwargs,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _check_load_data_dask(use_dask: bool) -> None:
|
|
228
|
+
"""Determine if dask is installed.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
use_dask: bool
|
|
233
|
+
Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
|
|
234
|
+
|
|
235
|
+
Raises
|
|
236
|
+
------
|
|
237
|
+
RuntimeError
|
|
238
|
+
If dask is requested but not installed.
|
|
239
|
+
"""
|
|
240
|
+
if use_dask and not _has_dask():
|
|
241
|
+
msg = (
|
|
242
|
+
"Dask is required but not installed. Install it with:\n"
|
|
243
|
+
" • `pip install roms-tools[dask]` or\n"
|
|
244
|
+
" • `conda install dask`\n"
|
|
245
|
+
"Alternatively, install `roms-tools` with conda to include all dependencies."
|
|
246
|
+
)
|
|
247
|
+
raise RuntimeError(msg)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _check_load_data_zarr(
|
|
251
|
+
use_dask: bool, read_zarr: bool, filename: str | Path | list[str | Path]
|
|
252
|
+
) -> None:
|
|
253
|
+
"""Determine if zarr streaming will conflict with the current request configuration.
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
filename : Union[str, Path, List[Union[str, Path]]]
|
|
258
|
+
The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
|
|
259
|
+
or a list of strings or Path objects containing multiple files.
|
|
260
|
+
use_dask: bool
|
|
261
|
+
Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
|
|
262
|
+
read_zarr: bool, optional
|
|
263
|
+
If True, use the zarr engine to read the dataset, and don't use mfdataset.
|
|
264
|
+
Defaults to False.
|
|
265
|
+
|
|
266
|
+
Raises
|
|
267
|
+
------
|
|
268
|
+
RuntimeError
|
|
269
|
+
If read_zarr is requested, but:
|
|
270
|
+
- the request doesn't specify a dependency on dask
|
|
271
|
+
- the request includes a list of filenames
|
|
272
|
+
|
|
273
|
+
"""
|
|
73
274
|
if read_zarr:
|
|
74
275
|
if isinstance(filename, list):
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
raise ValueError("read_zarr must be used with use_dask")
|
|
276
|
+
msg = "read_zarr requires a single path, not a list of paths"
|
|
277
|
+
raise ValueError(msg)
|
|
78
278
|
|
|
79
|
-
|
|
80
|
-
|
|
279
|
+
if not use_dask:
|
|
280
|
+
msg = "read_zarr must be used with use_dask"
|
|
281
|
+
raise ValueError(msg)
|
|
81
282
|
|
|
82
|
-
# Convert Path objects to strings
|
|
83
|
-
if isinstance(filename, str | Path):
|
|
84
|
-
filename_str = str(filename)
|
|
85
|
-
elif isinstance(filename, list):
|
|
86
|
-
filename_str = [str(f) for f in filename]
|
|
87
|
-
else:
|
|
88
|
-
raise ValueError("filename must be a string, Path, or a list of strings/Paths.")
|
|
89
283
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if contains_wildcard:
|
|
95
|
-
matching_files = glob.glob(filename_str)
|
|
96
|
-
if not matching_files:
|
|
97
|
-
raise FileNotFoundError(
|
|
98
|
-
f"No files found matching the pattern '{filename_str}'."
|
|
99
|
-
)
|
|
100
|
-
else:
|
|
101
|
-
matching_files = [filename_str]
|
|
284
|
+
def _check_load_data_filename(
|
|
285
|
+
filename: str | Path | list[str | Path], dim_names: Iterable[str]
|
|
286
|
+
) -> None:
|
|
287
|
+
"""Determine if time dimension is available when multiple files are provided.
|
|
102
288
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
f"No files found matching the pattern '{f}'."
|
|
113
|
-
)
|
|
114
|
-
matching_files.extend(files)
|
|
115
|
-
else:
|
|
116
|
-
matching_files = filename_str
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
filename : Union[str, Path, List[Union[str, Path]]]
|
|
292
|
+
The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
|
|
293
|
+
or a list of strings or Path objects containing multiple files.
|
|
294
|
+
dim_names : Dict[str, str], optional
|
|
295
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
296
|
+
Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
|
|
297
|
+
For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
|
|
117
298
|
|
|
118
|
-
|
|
119
|
-
|
|
299
|
+
Raises
|
|
300
|
+
------
|
|
301
|
+
ValueError
|
|
302
|
+
If time dimension is not found and a list of files is provided.
|
|
120
303
|
|
|
121
|
-
|
|
122
|
-
if isinstance(
|
|
123
|
-
|
|
304
|
+
"""
|
|
305
|
+
if isinstance(filename, list) and "time" not in dim_names:
|
|
306
|
+
msg = (
|
|
124
307
|
"A list of files is provided, but time dimension is not available. "
|
|
125
308
|
"A time dimension must be available to concatenate the files."
|
|
126
309
|
)
|
|
310
|
+
raise ValueError(msg)
|
|
127
311
|
|
|
128
|
-
# Determine the kwargs for combining datasets
|
|
129
|
-
if force_combine_nested:
|
|
130
|
-
kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
|
|
131
|
-
elif contains_wildcard or len(matching_files) == 1:
|
|
132
|
-
kwargs = {"combine": "by_coords"}
|
|
133
|
-
else:
|
|
134
|
-
kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
|
|
135
312
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
313
|
+
def _load_data(
|
|
314
|
+
filename: str | Path | list[str | Path],
|
|
315
|
+
dim_names: dict[str, str],
|
|
316
|
+
use_dask: bool,
|
|
317
|
+
time_chunking: bool = True,
|
|
318
|
+
decode_times: bool = True,
|
|
319
|
+
force_combine_nested: bool = False,
|
|
320
|
+
read_zarr: bool = False,
|
|
321
|
+
ds_loader_fn: Callable[[], xr.Dataset] | None = None,
|
|
322
|
+
):
|
|
323
|
+
"""Load dataset from the specified file.
|
|
142
324
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
with warnings.catch_warnings():
|
|
168
|
-
warnings.filterwarnings(
|
|
169
|
-
"ignore",
|
|
170
|
-
category=UserWarning,
|
|
171
|
-
message=r"^The specified chunks separate.*",
|
|
172
|
-
)
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
filename : Union[str, Path, List[Union[str, Path]]]
|
|
328
|
+
The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
|
|
329
|
+
or a list of strings or Path objects containing multiple files.
|
|
330
|
+
dim_names : Dict[str, str], optional
|
|
331
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
332
|
+
Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
|
|
333
|
+
For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
|
|
334
|
+
use_dask: bool
|
|
335
|
+
Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
|
|
336
|
+
time_chunking : bool, optional
|
|
337
|
+
If True and `use_dask=True`, the data will be chunked along the time dimension with a chunk size of 1.
|
|
338
|
+
If False, the data will not be chunked explicitly along the time dimension, but will follow the default auto chunking scheme. This option is useful for ROMS restart files.
|
|
339
|
+
Defaults to True.
|
|
340
|
+
decode_times: bool, optional
|
|
341
|
+
If True, decode times and timedeltas encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
|
|
342
|
+
Defaults to True.
|
|
343
|
+
force_combine_nested: bool, optional
|
|
344
|
+
If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
|
|
345
|
+
Defaults to False.
|
|
346
|
+
read_zarr: bool, optional
|
|
347
|
+
If True, use the zarr engine to read the dataset, and don't use mfdataset.
|
|
348
|
+
Defaults to False.
|
|
173
349
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
350
|
+
Returns
|
|
351
|
+
-------
|
|
352
|
+
ds : xr.Dataset
|
|
353
|
+
The loaded xarray Dataset containing the forcing data.
|
|
354
|
+
|
|
355
|
+
Raises
|
|
356
|
+
------
|
|
357
|
+
FileNotFoundError
|
|
358
|
+
If the specified file does not exist.
|
|
359
|
+
ValueError
|
|
360
|
+
If a list of files is provided but dim_names["time"] is not available or use_dask=False.
|
|
361
|
+
RuntimeError
|
|
362
|
+
If loading the dataset fails
|
|
363
|
+
"""
|
|
364
|
+
dim_names = dim_names or {}
|
|
365
|
+
|
|
366
|
+
_check_load_data_dask(use_dask)
|
|
367
|
+
_check_load_data_zarr(use_dask, read_zarr, filename)
|
|
368
|
+
_check_load_data_filename(filename, dim_names.keys())
|
|
369
|
+
|
|
370
|
+
match_result = _get_file_matches(filename)
|
|
371
|
+
|
|
372
|
+
load_kwargs = _get_ds_combination_params(
|
|
373
|
+
force_combine_nested,
|
|
374
|
+
dim_names,
|
|
375
|
+
match_result,
|
|
376
|
+
)
|
|
191
377
|
|
|
378
|
+
ds: xr.Dataset | xr.DataArray | None = None
|
|
379
|
+
|
|
380
|
+
if ds_loader_fn is not None:
|
|
381
|
+
ds = ds_loader_fn()
|
|
382
|
+
elif use_dask:
|
|
383
|
+
ds = _load_data_dask(
|
|
384
|
+
match_result.matches,
|
|
385
|
+
dim_names,
|
|
386
|
+
time_chunking,
|
|
387
|
+
decode_times,
|
|
388
|
+
read_zarr,
|
|
389
|
+
load_kwargs,
|
|
390
|
+
)
|
|
192
391
|
else:
|
|
193
392
|
ds_list = []
|
|
194
|
-
for file in
|
|
393
|
+
for file in match_result.matches:
|
|
195
394
|
ds = xr.open_dataset(
|
|
196
395
|
file,
|
|
197
396
|
decode_times=decode_times,
|
|
@@ -200,13 +399,19 @@ def _load_data(
|
|
|
200
399
|
)
|
|
201
400
|
ds_list.append(ds)
|
|
202
401
|
|
|
203
|
-
|
|
402
|
+
combine_kwargs = _get_ds_combine_base_params()
|
|
403
|
+
|
|
404
|
+
if load_kwargs["combine"] == "by_coords":
|
|
204
405
|
ds = xr.combine_by_coords(ds_list, **combine_kwargs)
|
|
205
|
-
elif
|
|
406
|
+
elif load_kwargs["combine"] == "nested":
|
|
206
407
|
ds = xr.combine_nested(
|
|
207
|
-
ds_list, concat_dim=
|
|
408
|
+
ds_list, concat_dim=load_kwargs["concat_dim"], **combine_kwargs
|
|
208
409
|
)
|
|
209
410
|
|
|
411
|
+
if ds is None:
|
|
412
|
+
msg = "A dataset was not loaded."
|
|
413
|
+
raise RuntimeError(msg)
|
|
414
|
+
|
|
210
415
|
if "time" in dim_names and dim_names["time"] not in ds.dims:
|
|
211
416
|
ds = ds.expand_dims(dim_names["time"])
|
|
212
417
|
|
|
@@ -366,7 +571,44 @@ def save_datasets(dataset_list, output_filenames, use_dask=False, verbose=True):
|
|
|
366
571
|
List[Path]
|
|
367
572
|
A list of Path objects for the filenames that were saved.
|
|
368
573
|
"""
|
|
574
|
+
|
|
575
|
+
def _patch_1d_encodings(dataset_list: list[xr.Dataset]) -> None:
|
|
576
|
+
"""Replaces problematic encodings in 1D variables.
|
|
577
|
+
|
|
578
|
+
ROMS' Fortran-based tools fail with certain encoding types that are common
|
|
579
|
+
in roms-tools' exported 1D vars (e.g. `abs_time`, `river_name`). This function
|
|
580
|
+
replaces int64 -> int32 (for true integers), int64 -> float64
|
|
581
|
+
(for non-integer vars encoded as int64 on disk), and NC_STRING -> char.
|
|
582
|
+
|
|
583
|
+
Parameters
|
|
584
|
+
----------
|
|
585
|
+
dataset_list: list[xr.Dataset]
|
|
586
|
+
List of datasets to be saved
|
|
587
|
+
|
|
588
|
+
"""
|
|
589
|
+
for ds in dataset_list:
|
|
590
|
+
for name in ds.variables:
|
|
591
|
+
da = ds[name]
|
|
592
|
+
if da.ndim != 1:
|
|
593
|
+
continue
|
|
594
|
+
|
|
595
|
+
enc_var = xr.conventions.encode_cf_variable(da.variable, name=name)
|
|
596
|
+
enc_dtype = enc_var.dtype
|
|
597
|
+
|
|
598
|
+
# NC_STRING → fixed-width char
|
|
599
|
+
if enc_dtype.kind in ("O", "U", "S"):
|
|
600
|
+
da.encoding["dtype"] = "S1"
|
|
601
|
+
continue
|
|
602
|
+
|
|
603
|
+
# NC_INT64 → int32 for true integers; float64 otherwise
|
|
604
|
+
if enc_dtype == np.int64:
|
|
605
|
+
if da.dtype.kind in ("i", "u"):
|
|
606
|
+
da.encoding["dtype"] = "int32"
|
|
607
|
+
else:
|
|
608
|
+
da.encoding["dtype"] = "float64"
|
|
609
|
+
|
|
369
610
|
saved_filenames = []
|
|
611
|
+
_patch_1d_encodings(dataset_list)
|
|
370
612
|
|
|
371
613
|
output_filenames = [f"{filename}.nc" for filename in output_filenames]
|
|
372
614
|
if verbose:
|
|
@@ -635,13 +877,41 @@ def _remove_edge_nans(
|
|
|
635
877
|
|
|
636
878
|
|
|
637
879
|
def _has_dask() -> bool:
|
|
880
|
+
"""Determine if the Dask package is installed.
|
|
881
|
+
|
|
882
|
+
Returns
|
|
883
|
+
-------
|
|
884
|
+
bool
|
|
885
|
+
`True` if package is found, `False` otherwise.
|
|
886
|
+
|
|
887
|
+
"""
|
|
638
888
|
return find_spec("dask") is not None
|
|
639
889
|
|
|
640
890
|
|
|
641
891
|
def _has_gcsfs() -> bool:
|
|
892
|
+
"""Determine if the GCSFS package is installed.
|
|
893
|
+
|
|
894
|
+
Returns
|
|
895
|
+
-------
|
|
896
|
+
bool
|
|
897
|
+
`True` if package is found, `False` otherwise.
|
|
898
|
+
|
|
899
|
+
"""
|
|
642
900
|
return find_spec("gcsfs") is not None
|
|
643
901
|
|
|
644
902
|
|
|
903
|
+
def _has_copernicus() -> bool:
|
|
904
|
+
"""Determine if the Copernicus Marine Toolkit package is installed.
|
|
905
|
+
|
|
906
|
+
Returns
|
|
907
|
+
-------
|
|
908
|
+
bool
|
|
909
|
+
`True` if package is found, `False` otherwise.
|
|
910
|
+
|
|
911
|
+
"""
|
|
912
|
+
return find_spec("copernicusmarine") is not None
|
|
913
|
+
|
|
914
|
+
|
|
645
915
|
def normalize_longitude(lon: float, straddle: bool) -> float:
|
|
646
916
|
"""Normalize longitude to the appropriate range depending on whether the grid
|
|
647
917
|
straddles the dateline.
|
|
@@ -704,3 +974,28 @@ def infer_nominal_horizontal_resolution(
|
|
|
704
974
|
resolution_in_degrees = resolution_in_m / (meters_per_degree * np.cos(lat_rad))
|
|
705
975
|
|
|
706
976
|
return float(resolution_in_degrees)
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
def _get_pkg_error_msg(purpose: str, package_name: str, option_name: str) -> str:
|
|
980
|
+
"""Generate an error message indicating how to install an optional dependency.
|
|
981
|
+
|
|
982
|
+
Parameters
|
|
983
|
+
----------
|
|
984
|
+
purpose : str
|
|
985
|
+
Description of the feature the package enables.
|
|
986
|
+
package_name : str
|
|
987
|
+
The package name
|
|
988
|
+
option_name : str
|
|
989
|
+
The optional dependency containing the package
|
|
990
|
+
|
|
991
|
+
Returns
|
|
992
|
+
-------
|
|
993
|
+
str
|
|
994
|
+
The formatted error message
|
|
995
|
+
"""
|
|
996
|
+
return textwrap.dedent(f"""\
|
|
997
|
+
To use {purpose}, {package_name} is required but not installed. Install it with:
|
|
998
|
+
• `pip install roms-tools[{option_name}]` or
|
|
999
|
+
• `pip install {package_name}` or
|
|
1000
|
+
• `conda install {package_name}`
|
|
1001
|
+
Alternatively, install `roms-tools` with conda to include all dependencies.""")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: roms-tools
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.2
|
|
4
4
|
Summary: Tools for running and analysing UCLA-ROMS simulations
|
|
5
5
|
Author-email: Nora Loose <nora.loose@gmail.com>, Thomas Nicholas <tom@cworthy.org>, Scott Eilerman <scott.eilerman@cworthy.org>
|
|
6
6
|
License: Apache-2
|
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
18
18
|
Requires-Python: >=3.11
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
-
Requires-Dist: xarray
|
|
21
|
+
Requires-Dist: xarray<2025.8.0,>=2022.6.0
|
|
22
22
|
Requires-Dist: numpy<2.3,>2.0
|
|
23
23
|
Requires-Dist: pooch
|
|
24
24
|
Requires-Dist: matplotlib
|
|
@@ -31,7 +31,7 @@ Requires-Dist: PyYAML
|
|
|
31
31
|
Requires-Dist: pyamg
|
|
32
32
|
Requires-Dist: bottleneck
|
|
33
33
|
Requires-Dist: regionmask
|
|
34
|
-
Requires-Dist: xgcm
|
|
34
|
+
Requires-Dist: xgcm>=0.9.0
|
|
35
35
|
Requires-Dist: numba>=0.61.2
|
|
36
36
|
Requires-Dist: pydantic<3,>2
|
|
37
37
|
Provides-Extra: dask
|
|
@@ -41,6 +41,7 @@ Provides-Extra: stream
|
|
|
41
41
|
Requires-Dist: dask[diagnostics]; extra == "stream"
|
|
42
42
|
Requires-Dist: gcsfs; extra == "stream"
|
|
43
43
|
Requires-Dist: zarr; extra == "stream"
|
|
44
|
+
Requires-Dist: copernicusmarine; extra == "stream"
|
|
44
45
|
Dynamic: license-file
|
|
45
46
|
|
|
46
47
|
# ROMS-Tools
|