PyPI - roms-tools - Versions diffs - 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl - Mend

roms-tools 3.1.1py3-none-any.whl → 3.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

roms_tools/__init__.py +8 -1
roms_tools/analysis/cdr_analysis.py +203 -0
roms_tools/analysis/cdr_ensemble.py +198 -0
roms_tools/analysis/roms_output.py +80 -46
roms_tools/data/grids/GLORYS_global_grid.nc +0 -0
roms_tools/download.py +4 -0
roms_tools/plot.py +131 -30
roms_tools/regrid.py +6 -1
roms_tools/setup/boundary_forcing.py +94 -44
roms_tools/setup/cdr_forcing.py +123 -15
roms_tools/setup/cdr_release.py +161 -8
roms_tools/setup/datasets.py +709 -341
roms_tools/setup/grid.py +167 -139
roms_tools/setup/initial_conditions.py +113 -48
roms_tools/setup/mask.py +63 -7
roms_tools/setup/nesting.py +67 -42
roms_tools/setup/river_forcing.py +45 -19
roms_tools/setup/surface_forcing.py +16 -10
roms_tools/setup/tides.py +1 -2
roms_tools/setup/topography.py +4 -4
roms_tools/setup/utils.py +134 -22
roms_tools/tests/test_analysis/test_cdr_analysis.py +144 -0
roms_tools/tests/test_analysis/test_cdr_ensemble.py +202 -0
roms_tools/tests/test_analysis/test_roms_output.py +61 -3
roms_tools/tests/test_setup/test_boundary_forcing.py +111 -52
roms_tools/tests/test_setup/test_cdr_forcing.py +54 -0
roms_tools/tests/test_setup/test_cdr_release.py +118 -1
roms_tools/tests/test_setup/test_datasets.py +458 -34
roms_tools/tests/test_setup/test_grid.py +238 -121
roms_tools/tests/test_setup/test_initial_conditions.py +94 -41
roms_tools/tests/test_setup/test_surface_forcing.py +28 -3
roms_tools/tests/test_setup/test_utils.py +91 -1
roms_tools/tests/test_setup/test_validation.py +21 -15
roms_tools/tests/test_setup/utils.py +71 -0
roms_tools/tests/test_tiling/test_join.py +241 -0
roms_tools/tests/test_tiling/test_partition.py +45 -0
roms_tools/tests/test_utils.py +224 -2
roms_tools/tiling/join.py +189 -0
roms_tools/tiling/partition.py +44 -30
roms_tools/utils.py +488 -161
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/METADATA +15 -4
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/RECORD +45 -37
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/WHEEL +0 -0
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/licenses/LICENSE +0 -0
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/top_level.txt +0 -0

roms_tools/tests/test_utils.py CHANGED Viewed

@@ -1,7 +1,20 @@
+from collections.abc import Callable
+from pathlib import Path
+from unittest import mock
 import numpy as np
 import pytest
+import xarray as xr
-from roms_tools.utils import _generate_focused_coordinate_range
+from roms_tools.utils import (
+    _path_list_from_input,
+    generate_focused_coordinate_range,
+    get_dask_chunks,
+    has_copernicus,
+    has_dask,
+    has_gcsfs,
+    load_data,
+)
 @pytest.mark.parametrize(
@@ -14,8 +27,217 @@ from roms_tools.utils import _generate_focused_coordinate_range
     ],
 )
 def test_coordinate_range_monotonicity(min_val, max_val, center, sc, N):
-    centers, faces = _generate_focused_coordinate_range(
+    centers, faces = generate_focused_coordinate_range(
         min_val=min_val, max_val=max_val, center=center, sc=sc, N=N
     )
     assert np.all(np.diff(faces) > 0), "faces is not strictly increasing"
     assert np.all(np.diff(centers) > 0), "centers is not strictly increasing"
+class TestPathListFromInput:
+    """A collection of tests for the _path_list_from_input function."""
+    # Test cases that don't require I/O
+    def test_list_of_strings(self):
+        """Test with a list of file paths as strings."""
+        files_list = ["path/to/file1.txt", "path/to/file2.txt"]
+        result = _path_list_from_input(files_list)
+        assert len(result) == 2
+        assert result[0] == Path("path/to/file1.txt")
+        assert result[1] == Path("path/to/file2.txt")
+    def test_list_of_path_objects(self):
+        """Test with a list of pathlib.Path objects."""
+        files_list = [Path("file_a.txt"), Path("file_b.txt")]
+        result = _path_list_from_input(files_list)
+        assert len(result) == 2
+        assert result[0] == Path("file_a.txt")
+        assert result[1] == Path("file_b.txt")
+    def test_single_path_object(self):
+        """Test with a single pathlib.Path object."""
+        file_path = Path("a_single_file.csv")
+        result = _path_list_from_input(file_path)
+        assert len(result) == 1
+        assert result[0] == file_path
+    def test_invalid_input_type_raises(self):
+        """Test that an invalid input type raises a TypeError."""
+        with pytest.raises(TypeError, match="'files' should be str, Path, or List"):
+            _path_list_from_input(123)
+    # Test cases that require I/O and `tmp_path`
+    def test_single_file_as_str(self, tmp_path):
+        """Test with a single file given as a string, requiring a file to exist."""
+        p = tmp_path / "test_file.txt"
+        p.touch()
+        result = _path_list_from_input(str(p))
+        assert len(result) == 1
+        assert result[0] == p
+    def test_wildcard_pattern(self, tmp_path, monkeypatch):
+        """Test with a wildcard pattern, requiring files to exist, using monkeypatch."""
+        # Setup
+        d = tmp_path / "data"
+        d.mkdir()
+        (d / "file1.csv").touch()
+        (d / "file2.csv").touch()
+        (d / "other_file.txt").touch()
+        # Action: Temporarily change the current working directory
+        monkeypatch.chdir(tmp_path)
+        result = _path_list_from_input("data/*.csv")
+        # Assertion
+        assert len(result) == 2
+        assert result[0].name == "file1.csv"
+        assert result[1].name == "file2.csv"
+    def test_non_matching_pattern_raises(self, tmp_path):
+        """Test that a non-matching pattern raises a FileNotFoundError."""
+        with pytest.raises(FileNotFoundError, match="No files matched"):
+            _path_list_from_input(str(tmp_path / "non_existent_file_*.txt"))
+def test_has_dask() -> None:
+    """Verify that dask existence is correctly reported when found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=mock.MagicMock):
+        assert has_dask()
+def test_has_dask_error_when_missing() -> None:
+    """Verify that dask existence is correctly reported when not found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=None):
+        assert not has_dask()
+def test_has_gcfs() -> None:
+    """Verify that GCFS existence is correctly reported when found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=mock.MagicMock):
+        assert has_gcsfs()
+def test_has_gcfs_error_when_missing() -> None:
+    """Verify that GCFS existence is correctly reported when not found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=None):
+        assert not has_gcsfs()
+def test_has_copernicus() -> None:
+    """Verify that copernicus existence is correctly reported when found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=mock.MagicMock):
+        assert has_copernicus()
+def test_has_copernicus_error_when_missing() -> None:
+    """Verify that copernicus existence is correctly reported when not found."""
+    with mock.patch("roms_tools.utils.find_spec", return_value=None):
+        assert not has_copernicus()
+def test_load_data_dask_not_found() -> None:
+    """Verify that load data raises an exception when dask is requested and missing."""
+    with (
+        mock.patch("roms_tools.utils.has_dask", return_value=False),
+        pytest.raises(RuntimeError),
+    ):
+        load_data("foo.zarr", {"a": "a"}, use_dask=True)
+def test_load_data_open_zarr_without_dask() -> None:
+    """Verify that load data raises an exception when zarr is requested without dask."""
+    with (
+        mock.patch("roms_tools.utils.has_dask", return_value=False),
+        pytest.raises(ValueError),
+    ):
+        # read_zarr should require use_dask to be True
+        load_data("foo.zarr", {"a": ""}, use_dask=False, read_zarr=True)
+@pytest.mark.parametrize(
+    ("dataset_name", "expected_dim"),
+    [
+        ("surface_forcing", "time"),
+        ("bgc_surface_forcing", "time"),
+        ("tidal_forcing", "eta_rho"),
+        ("coarse_surface_forcing", "eta_rho"),
+    ],
+)
+def test_load_data_open_dataset(
+    dataset_name: str,
+    expected_dim: str,
+    get_test_data_path: Callable[[str], Path],
+) -> None:
+    """Verify that a zarr file is correctly loaded when not using Dask.
+    This must use xr.open_dataset
+    """
+    ds_path = get_test_data_path(dataset_name)
+    with mock.patch(
+        "roms_tools.utils.xr.open_dataset",
+        wraps=xr.open_dataset,
+    ) as fn_od:
+        ds = load_data(
+            ds_path,
+            {"latitude": "latitude"},
+            use_dask=False,
+        )
+        assert fn_od.called
+    assert expected_dim in ds.dims
+# test get_dask_chunks
+def test_latlon_default_chunks():
+    dim_names = {"latitude": "lat", "longitude": "lon"}
+    expected = {"lat": -1, "lon": -1}
+    result = get_dask_chunks(dim_names)
+    assert result == expected
+def test_latlon_with_depth_and_time():
+    dim_names = {"latitude": "lat", "longitude": "lon", "depth": "z", "time": "t"}
+    expected = {"lat": -1, "lon": -1, "z": -1, "t": 1}
+    result = get_dask_chunks(dim_names)
+    assert result == expected
+def test_latlon_with_time_chunking_false():
+    dim_names = {"latitude": "lat", "longitude": "lon", "time": "t"}
+    expected = {"lat": -1, "lon": -1}
+    result = get_dask_chunks(dim_names, time_chunking=False)
+    assert result == expected
+def test_roms_default_chunks():
+    dim_names = {}
+    expected_keys = {"eta_rho", "eta_v", "xi_rho", "xi_u", "s_rho"}
+    result = get_dask_chunks(dim_names)
+    assert set(result.keys()) == expected_keys
+    assert all(v == -1 for v in result.values())
+def test_roms_with_depth_and_time():
+    dim_names = {"depth": "s_rho", "time": "ocean_time"}
+    result = get_dask_chunks(dim_names)
+    # ROMS default keys + depth + time
+    expected_keys = {"eta_rho", "eta_v", "xi_rho", "xi_u", "s_rho", "ocean_time"}
+    assert set(result.keys()) == expected_keys
+    assert result["ocean_time"] == 1
+    assert result["s_rho"] == -1
+def test_roms_with_ntides():
+    dim_names = {"ntides": "nt"}
+    result = get_dask_chunks(dim_names)
+    assert result["nt"] == 1
+def test_time_chunking_false_roms():
+    dim_names = {"time": "ocean_time"}
+    result = get_dask_chunks(dim_names, time_chunking=False)
+    assert "ocean_time" not in result

roms_tools/tiling/join.py ADDED Viewed

@@ -0,0 +1,189 @@
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Literal, cast
+import xarray as xr
+from roms_tools.utils import FilePaths, _path_list_from_input
+def open_partitions(files: FilePaths) -> xr.Dataset:
+    """
+    Open partitioned ROMS netCDF files as a single dataset.
+    Parameters
+    ----------
+    files: str | List[str | Path]
+        List or wildcard pattern describing files to join,
+        e.g. "roms_rst.20121209133435.*.nc"
+    Returns
+    -------
+    xarray.Dataset
+        Dataset containing unified partitioned datasets
+    """
+    filepaths = _path_list_from_input(files)
+    datasets = [xr.open_dataset(p, decode_timedelta=True) for p in sorted(filepaths)]
+    joined = join_datasets(datasets)
+    return joined
+def join_netcdf(files: FilePaths, output_path: Path | None = None) -> Path:
+    """
+    Join partitioned NetCDFs into a single dataset.
+    Parameters
+    ----------
+    files : str | List[str | Path]
+        List or wildcard pattern describing files to join,
+        e.g. "roms_rst.20121209133435.*.nc"
+    output_path : Path, optional
+        If provided, the joined dataset will be saved to this path.
+        Otherwise, the common base of pattern (e.g. roms_rst.20121209133435.nc)
+        will be used.
+    Returns
+    -------
+    Path
+        The path of the saved file
+    """
+    filepaths = _path_list_from_input(files)
+    # Determine output path if not provided
+    if output_path is None:
+        # e.g. roms_rst.20120101120000.023.nc -> roms_rst.20120101120000.nc
+        output_path = filepaths[0].with_suffix("").with_suffix(".nc")
+    joined = open_partitions(cast(FilePaths, filepaths))
+    joined.to_netcdf(output_path)
+    print(f"Saved joined dataset to: {output_path}")
+    return output_path
+def _find_transitions(dim_sizes: list[int]) -> list[int]:
+    """Finds the indices of all transitions in a list of dimension sizes.
+    A transition is a point where the dimension size changes from the previous one.
+    This function is used to determine the number of partitions (e.g., np_eta or np_xi).
+    Parameters
+    ----------
+    dim_sizes : list[int]
+        A list of integer sizes for a given dimension across multiple datasets.
+    Returns
+    -------
+    List[int]
+        A list of indices where a transition was detected.
+    """
+    transitions: list[int] = []
+    if len(dim_sizes) < 2:
+        return transitions
+    for i in range(1, len(dim_sizes)):
+        if dim_sizes[i] != dim_sizes[i - 1]:
+            transitions.append(i)
+    return transitions
+def _find_common_dims(
+    direction: Literal["xi", "eta"], datasets: Sequence[xr.Dataset]
+) -> list[str]:
+    """Finds all common dimensions along the xi or eta direction amongst a list of Datasets.
+    Parameters
+    ----------
+    direction: str ("xi" or "eta")
+        The direction in which to seek a common dimension
+    datasets: Sequence[xr.Dataset]:
+        The datasets in which to look
+    Returns
+    -------
+    common_dim: list[str]
+        The dimensions common to all specified datasets along 'direction'
+    """
+    if direction not in ["xi", "eta"]:
+        raise ValueError("'direction' must be 'xi' or 'eta'")
+    dims = []
+    for point in ["rho", "u", "v"]:
+        if all(f"{direction}_{point}" in d.dims for d in datasets):
+            dims.append(f"{direction}_{point}")
+    if not dims:
+        raise ValueError(f"No common point found along direction {direction}")
+    return dims
+def _infer_partition_layout_from_datasets(
+    datasets: Sequence[xr.Dataset],
+) -> tuple[int, int]:
+    """Infer np_eta, np_xi from datasets."""
+    nd = len(datasets)
+    if nd == 1:
+        return 1, 1
+    eta_dims = _find_common_dims("eta", datasets)
+    first_eta_transition = nd
+    for eta_dim in eta_dims:
+        dim_sizes = [ds.sizes.get(eta_dim, 0) for ds in datasets]
+        eta_transitions = _find_transitions(dim_sizes)
+        if eta_transitions and (min(eta_transitions) < first_eta_transition):
+            first_eta_transition = min(eta_transitions)
+    if first_eta_transition < nd:
+        np_xi = first_eta_transition
+        np_eta = nd // np_xi
+        return np_xi, np_eta
+    # If we did not successfully find np_xi,np_eta using eta points
+    # then we have a single-column grid:
+    return nd, 1
+def join_datasets(datasets: Sequence[xr.Dataset]) -> xr.Dataset:
+    """Take a sequence of partitioned Datasets and return a joined Dataset."""
+    np_xi, np_eta = _infer_partition_layout_from_datasets(datasets)
+    # Arrange into grid
+    grid = [[datasets[j + i * np_xi] for j in range(np_xi)] for i in range(np_eta)]
+    # Join each row (along xi_*)
+    rows_joined = []
+    for row in grid:
+        all_vars = set().union(*(ds.data_vars for ds in row))
+        row_dataset = xr.Dataset()
+        for varname in all_vars:
+            var_slices = [ds[varname] for ds in row if varname in ds]
+            xi_dims = [dim for dim in var_slices[0].dims if dim.startswith("xi_")]
+            if not xi_dims:
+                row_dataset[varname] = var_slices[0]
+            else:
+                xi_dim = xi_dims[0]
+                row_dataset[varname] = xr.concat(
+                    var_slices, dim=xi_dim, combine_attrs="override"
+                )
+        rows_joined.append(row_dataset)
+    # Join all rows (along eta_*)
+    final_dataset = xr.Dataset()
+    all_vars = set().union(*(ds.data_vars for ds in rows_joined))
+    for varname in all_vars:
+        var_slices = [ds[varname] for ds in rows_joined if varname in ds]
+        eta_dims = [dim for dim in var_slices[0].dims if dim.startswith("eta_")]
+        if not eta_dims:
+            final_dataset[varname] = var_slices[0]
+        else:
+            eta_dim = eta_dims[0]
+            final_dataset[varname] = xr.concat(
+                var_slices, dim=eta_dim, combine_attrs="override"
+            )
+    # Copy attributes from first dataset
+    final_dataset.attrs = datasets[0].attrs
+    return final_dataset

roms_tools/tiling/partition.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from numbers import Integral
 from pathlib import Path
@@ -296,20 +297,21 @@ def partition(
 def partition_netcdf(
-    filepath: str | Path,
+    filepath: str | Path | Sequence[str | Path],
     np_eta: int = 1,
     np_xi: int = 1,
+    output_dir: str | Path | None = None,
     include_coarse_dims: bool = True,
-) -> None:
-    """Partition a ROMS NetCDF file into smaller spatial tiles and save them to disk.
+) -> list[Path]:
+    """Partition one or more ROMS NetCDF files into smaller spatial tiles and save them to disk.
-    This function divides the dataset in the specified NetCDF file into `np_eta` by `np_xi` tiles.
+    This function divides each dataset into `np_eta` by `np_xi` tiles.
     Each tile is saved as a separate NetCDF file.
     Parameters
     ----------
-    filepath : Union[str, Path]
-        The path to the input NetCDF file.
+    filepath : str | Path | Sequence[str | Path]
+        A path or list of paths to input NetCDF files.
     np_eta : int, optional
         The number of partitions along the `eta` direction. Must be a positive integer. Default is 1.
@@ -317,6 +319,10 @@ def partition_netcdf(
     np_xi : int, optional
         The number of partitions along the `xi` direction. Must be a positive integer. Default is 1.
+    output_dir : str | Path | None, optional
+        Directory or base path to save partitioned files.
+        If None, files are saved alongside the input file.
     include_coarse_dims : bool, optional
         Whether to include coarse grid dimensions (`eta_coarse`, `xi_coarse`) in the partitioning.
         If False, these dimensions will not be split. Relevant if none of the coarse resolution variables are actually used by ROMS.
@@ -324,31 +330,39 @@ def partition_netcdf(
     Returns
     -------
-    List[Path]
+    list[Path]
         A list of Path objects for the filenames that were saved.
     """
-    # Ensure filepath is a Path object
-    filepath = Path(filepath)
-    # Open the dataset
-    ds = xr.open_dataset(filepath.with_suffix(".nc"), decode_timedelta=False)
-    # Partition the dataset
-    file_numbers, partitioned_datasets = partition(
-        ds, np_eta=np_eta, np_xi=np_xi, include_coarse_dims=include_coarse_dims
-    )
-    # Generate paths to the partitioned files
-    base_filepath = filepath.with_suffix("")
-    ndigits = len(str(max(np.array(file_numbers))))
-    paths_to_partitioned_files = [
-        Path(f"{base_filepath}.{file_number:0{ndigits}d}")
-        for file_number in file_numbers
-    ]
+    if isinstance(filepath, str | Path):
+        filepaths = [Path(filepath)]
+    else:
+        filepaths = [Path(fp) for fp in filepath]
+    all_saved_filenames = []
+    for fp in filepaths:
+        input_file = fp.with_suffix(".nc")
+        ds = xr.open_dataset(input_file, decode_timedelta=False)
+        file_numbers, partitioned_datasets = partition(
+            ds, np_eta=np_eta, np_xi=np_xi, include_coarse_dims=include_coarse_dims
+        )
+        if output_dir:
+            output_dir = Path(output_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+            base_filepath = output_dir / fp.stem
+        else:
+            base_filepath = fp.with_suffix("")
-    # Save the partitioned datasets to files
-    saved_filenames = save_datasets(
-        partitioned_datasets, paths_to_partitioned_files, verbose=False
-    )
+        ndigits = len(str(max(file_numbers)))
+        paths_to_partitioned_files = [
+            Path(f"{base_filepath}.{num:0{ndigits}d}") for num in file_numbers
+        ]
+        saved = save_datasets(
+            partitioned_datasets, paths_to_partitioned_files, verbose=False
+        )
+        all_saved_filenames.extend(saved)
-    return saved_filenames
+    return all_saved_filenames

roms-tools 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

roms-tools 3.1.1py3-none-any.whl → 3.2.0py3-none-any.whl