PyPI - roms-tools - Versions diffs - 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl - Mend

roms-tools 3.1.1py3-none-any.whl → 3.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

roms_tools/__init__.py +8 -1
roms_tools/analysis/cdr_analysis.py +203 -0
roms_tools/analysis/cdr_ensemble.py +198 -0
roms_tools/analysis/roms_output.py +80 -46
roms_tools/data/grids/GLORYS_global_grid.nc +0 -0
roms_tools/download.py +4 -0
roms_tools/plot.py +131 -30
roms_tools/regrid.py +6 -1
roms_tools/setup/boundary_forcing.py +94 -44
roms_tools/setup/cdr_forcing.py +123 -15
roms_tools/setup/cdr_release.py +161 -8
roms_tools/setup/datasets.py +709 -341
roms_tools/setup/grid.py +167 -139
roms_tools/setup/initial_conditions.py +113 -48
roms_tools/setup/mask.py +63 -7
roms_tools/setup/nesting.py +67 -42
roms_tools/setup/river_forcing.py +45 -19
roms_tools/setup/surface_forcing.py +16 -10
roms_tools/setup/tides.py +1 -2
roms_tools/setup/topography.py +4 -4
roms_tools/setup/utils.py +134 -22
roms_tools/tests/test_analysis/test_cdr_analysis.py +144 -0
roms_tools/tests/test_analysis/test_cdr_ensemble.py +202 -0
roms_tools/tests/test_analysis/test_roms_output.py +61 -3
roms_tools/tests/test_setup/test_boundary_forcing.py +111 -52
roms_tools/tests/test_setup/test_cdr_forcing.py +54 -0
roms_tools/tests/test_setup/test_cdr_release.py +118 -1
roms_tools/tests/test_setup/test_datasets.py +458 -34
roms_tools/tests/test_setup/test_grid.py +238 -121
roms_tools/tests/test_setup/test_initial_conditions.py +94 -41
roms_tools/tests/test_setup/test_surface_forcing.py +28 -3
roms_tools/tests/test_setup/test_utils.py +91 -1
roms_tools/tests/test_setup/test_validation.py +21 -15
roms_tools/tests/test_setup/utils.py +71 -0
roms_tools/tests/test_tiling/test_join.py +241 -0
roms_tools/tests/test_tiling/test_partition.py +45 -0
roms_tools/tests/test_utils.py +224 -2
roms_tools/tiling/join.py +189 -0
roms_tools/tiling/partition.py +44 -30
roms_tools/utils.py +488 -161
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/METADATA +15 -4
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/RECORD +45 -37
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/WHEEL +0 -0
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/licenses/LICENSE +0 -0
{roms_tools-3.1.1.dist-info → roms_tools-3.2.0.dist-info}/top_level.txt +0 -0

roms_tools/setup/datasets.py CHANGED Viewed

@@ -1,9 +1,18 @@
+from __future__ import annotations
+import importlib.util
 import logging
-import time
+import typing
 from collections import Counter, defaultdict
+from collections.abc import Callable, Mapping
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from pathlib import Path
+from types import ModuleType
+from typing import Any, ClassVar, Literal, TypeAlias, cast
+if typing.TYPE_CHECKING:
+    from roms_tools.setup.grid import Grid
 import numpy as np
 import xarray as xr
@@ -17,15 +26,32 @@ from roms_tools.download import (
 )
 from roms_tools.setup.fill import LateralFill
 from roms_tools.setup.utils import (
+    Timed,
     assign_dates_to_climatology,
     convert_cftime_to_datetime,
     gc_dist,
+    get_target_coords,
     get_time_type,
     interpolate_cyclic_time,
     interpolate_from_climatology,
     one_dim_fill,
 )
-from roms_tools.utils import _has_gcsfs, _load_data
+from roms_tools.utils import get_dask_chunks, get_pkg_error_msg, has_gcsfs, load_data
+TConcatEndTypes = Literal["lower", "upper", "both"]
+REPO_ROOT = Path(__file__).resolve().parents[2]
+GLORYS_GLOBAL_GRID_PATH = (
+    REPO_ROOT / "roms_tools" / "data" / "grids" / "GLORYS_global_grid.nc"
+)
+DEFAULT_NR_BUFFER_POINTS = (
+    20  # Default number of buffer points for subdomain selection.
+)
+# Balances performance and accuracy:
+# - Too many points → more expensive computations
+# - Too few points → potential boundary artifacts when lateral refill is performed
+# See discussion: https://github.com/CWorthy-ocean/roms-tools/issues/153
+# This default will be applied consistently across all datasets requiring lateral fill.
+RawDataSource: TypeAlias = dict[str, str | Path | list[str | Path] | bool]
 # lat-lon datasets
@@ -43,7 +69,7 @@ class Dataset:
         Start time for selecting relevant data. If not provided, no time-based filtering is applied.
     end_time : Optional[datetime], optional
         End time for selecting relevant data. If not provided, the dataset selects the time entry
-        closest to `start_time` within the range `[start_time, start_time + 24 hours]`.
+        closest to `start_time` within the range `[start_time, start_time + 24 hours)`.
         If `start_time` is also not provided, no time-based filtering is applied.
     dim_names: Dict[str, str], optional
         Dictionary specifying the names of dimensions in the dataset.
@@ -58,8 +84,19 @@ class Dataset:
         Indicates whether land values require lateral filling. If `True`, ocean values will be extended into land areas
         to replace NaNs or non-ocean values (such as atmospheric values in ERA5 data). If `False`, it is assumed that
         land values are already correctly assigned, and lateral filling will be skipped. Defaults to `True`.
-    use_dask: bool
+    use_dask: bool, optional
         Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
+    read_zarr: bool, optional
+        If True, use the zarr engine to read the dataset, and don't use mfdataset.
+        Defaults to False.
+    allow_flex_time: bool, optional
+        Controls how strictly the dataset selects a time entry when `end_time` is not provided (relevant for initial conditions):
+        - If False (default): requires an exact match to `start_time`. Raises a ValueError if no match exists.
+        - If True: allows a +24h search window after `start_time` and selects the closest available
+          time entry within that window. Raises a ValueError if none are found.
+        Only used when `end_time` is None. Has no effect otherwise.
     apply_post_processing: bool
         Indicates whether to post-process the dataset for futher use. Defaults to True.
@@ -90,23 +127,25 @@ class Dataset:
         }
     )
     var_names: dict[str, str]
-    opt_var_names: dict[str, str] | None = field(default_factory=dict)
-    climatology: bool | None = False
+    opt_var_names: dict[str, str] = field(default_factory=dict)
+    climatology: bool = False
     needs_lateral_fill: bool | None = True
-    use_dask: bool | None = False
+    use_dask: bool = False
+    read_zarr: bool = False
+    allow_flex_time: bool = False
     apply_post_processing: bool | None = True
-    read_zarr: bool | None = False
+    ds_loader_fn: Callable[[], xr.Dataset] | None = None
     is_global: bool = field(init=False, repr=False)
     ds: xr.Dataset = field(init=False, repr=False)
-    def __post_init__(self):
-        """
-        Post-initialization processing:
+    def __post_init__(self) -> None:
+        """Perform post-initialization processing.
         1. Loads the dataset from the specified filename.
-        2. Applies time filtering based on start_time and end_time if provided.
-        3. Selects relevant fields as specified by var_names.
-        4. Ensures latitude values and depth values are in ascending order.
+        2. Applies time filtering based on start_time and end_time (if provided).
+        3. Selects relevant fields as specified by `var_names`.
+        4. Ensures latitude, longitude, and depth values are in ascending order.
         5. Checks if the dataset covers the entire globe and adjusts if necessary.
         """
         # Validate start_time and end_time
@@ -167,13 +206,17 @@ class Dataset:
         ValueError
             If a list of files is provided but self.dim_names["time"] is not available or use_dask=False.
         """
-        ds = _load_data(
-            self.filename, self.dim_names, self.use_dask, read_zarr=self.read_zarr
+        ds = load_data(
+            filename=self.filename,
+            dim_names=self.dim_names,
+            use_dask=self.use_dask,
+            read_zarr=self.read_zarr,
+            ds_loader_fn=self.ds_loader_fn,
         )
         return ds
-    def clean_up(self, ds: xr.Dataset, **kwargs) -> xr.Dataset:
+    def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
         """Dummy method to be overridden by child classes to clean up the dataset.
         This method is intended as a placeholder and should be implemented in subclasses
@@ -206,7 +249,7 @@ class Dataset:
         """
         _check_dataset(ds, self.dim_names, self.var_names)
-    def select_relevant_fields(self, ds) -> xr.Dataset:
+    def select_relevant_fields(self, ds: xr.Dataset) -> xr.Dataset:
         """Selects and returns a subset of the dataset containing only the variables
         specified in `self.var_names`.
@@ -249,7 +292,7 @@ class Dataset:
         """
         return ds
-    def select_relevant_times(self, ds) -> xr.Dataset:
+    def select_relevant_times(self, ds: xr.Dataset) -> xr.Dataset:
         """Select a subset of the dataset based on the specified time range.
         This method filters the dataset to include all records between `start_time` and `end_time`.
@@ -257,7 +300,7 @@ class Dataset:
         after `end_time` are included, even if they fall outside the strict time range.
         If no `end_time` is specified, the method will select the time range of
-        [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
+        [start_time, start_time + 24 hours) and return the closest time entry to `start_time` within that range.
         Parameters
         ----------
@@ -296,8 +339,17 @@ class Dataset:
         """
         time_dim = self.dim_names["time"]
+        # Ensure start_time is not None for type safety
+        if self.start_time is None:
+            raise ValueError("select_relevant_times called but start_time is None.")
         ds = _select_relevant_times(
-            ds, time_dim, self.start_time, self.end_time, self.climatology
+            ds,
+            time_dim,
+            self.start_time,
+            self.end_time,
+            self.climatology,
+            self.allow_flex_time,
         )
         return ds
@@ -344,7 +396,7 @@ class Dataset:
         return ds
-    def infer_horizontal_resolution(self, ds: xr.Dataset):
+    def infer_horizontal_resolution(self, ds: xr.Dataset) -> None:
         """Estimate and set the average horizontal resolution of a dataset based on
         latitude and longitude spacing.
@@ -372,7 +424,7 @@ class Dataset:
         # Set the computed resolution as an attribute
         self.resolution = resolution
-    def compute_minimal_grid_spacing(self, ds: xr.Dataset):
+    def compute_minimal_grid_spacing(self, ds: xr.Dataset) -> float:
         """Compute the minimal grid spacing in a dataset based on latitude and longitude
         spacing, considering Earth's radius.
@@ -434,7 +486,12 @@ class Dataset:
         return is_global
-    def concatenate_longitudes(self, ds, end="upper", verbose=False):
+    def concatenate_longitudes(
+        self,
+        ds: xr.Dataset,
+        end: TConcatEndTypes = "upper",
+        verbose: bool = False,
+    ) -> xr.Dataset:
         """Concatenates fields in dataset twice along the longitude dimension.
         Parameters
@@ -457,58 +514,12 @@ class Dataset:
         ds_concatenated : xr.Dataset
             The concatenated dataset.
         """
-        if verbose:
-            start_time = time.time()
-        ds_concatenated = xr.Dataset()
-        lon = ds[self.dim_names["longitude"]]
-        if end == "lower":
-            lon_minus360 = lon - 360
-            lon_concatenated = xr.concat(
-                [lon_minus360, lon], dim=self.dim_names["longitude"]
-            )
-        elif end == "upper":
-            lon_plus360 = lon + 360
-            lon_concatenated = xr.concat(
-                [lon, lon_plus360], dim=self.dim_names["longitude"]
-            )
-        elif end == "both":
-            lon_minus360 = lon - 360
-            lon_plus360 = lon + 360
-            lon_concatenated = xr.concat(
-                [lon_minus360, lon, lon_plus360], dim=self.dim_names["longitude"]
-            )
-        for var in ds.data_vars:
-            if self.dim_names["longitude"] in ds[var].dims:
-                field = ds[var]
-                if end == "both":
-                    field_concatenated = xr.concat(
-                        [field, field, field], dim=self.dim_names["longitude"]
-                    )
-                else:
-                    field_concatenated = xr.concat(
-                        [field, field], dim=self.dim_names["longitude"]
-                    )
-                if self.use_dask:
-                    field_concatenated = field_concatenated.chunk(
-                        {self.dim_names["longitude"]: -1}
-                    )
-                field_concatenated[self.dim_names["longitude"]] = lon_concatenated
-                ds_concatenated[var] = field_concatenated
-            else:
-                ds_concatenated[var] = ds[var]
-        ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
-        if verbose:
-            logging.info(
-                f"Concatenating the data along the longitude dimension: {time.time() - start_time:.3f} seconds"
+        with Timed(
+            "=== Concatenating the data along the longitude dimension ===",
+            verbose=verbose,
+        ):
+            ds_concatenated = _concatenate_longitudes(
+                ds, self.dim_names, end, self.use_dask
             )
         return ds_concatenated
@@ -543,14 +554,16 @@ class Dataset:
         ds = self.ds.astype({var: "float64" for var in self.ds.data_vars})
         self.ds = ds
+        return None
     def choose_subdomain(
         self,
-        target_coords,
-        buffer_points=20,
-        return_copy=False,
-        return_coords_only=False,
-        verbose=False,
-    ):
+        target_coords: dict[str, Any],
+        buffer_points: int = DEFAULT_NR_BUFFER_POINTS,
+        return_copy: bool = False,
+        return_coords_only: bool = False,
+        verbose: bool = False,
+    ) -> xr.Dataset | Dataset | None:
         """Selects a subdomain from the xarray Dataset based on specified target
         coordinates, extending the selection by a defined buffer. Adjusts longitude
         ranges as necessary to accommodate the dataset's expected range and handles
@@ -587,94 +600,15 @@ class Dataset:
         ValueError
             If the selected latitude or longitude range does not intersect with the dataset.
         """
-        lat_min = target_coords["lat"].min().values
-        lat_max = target_coords["lat"].max().values
-        lon_min = target_coords["lon"].min().values
-        lon_max = target_coords["lon"].max().values
-        margin = self.resolution * buffer_points
-        # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
-        subdomain = self.ds.sel(
-            **{
-                self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
-            }
+        subdomain = choose_subdomain(
+            ds=self.ds,
+            dim_names=self.dim_names,
+            resolution=self.resolution,
+            is_global=self.is_global,
+            target_coords=target_coords,
+            buffer_points=buffer_points,
+            use_dask=self.use_dask,
         )
-        lon = subdomain[self.dim_names["longitude"]]
-        if self.is_global:
-            concats = []
-            # Concatenate only if necessary
-            if lon_max + margin > lon.max():
-                # See if shifting by +360 degrees helps
-                if (lon_min - margin > (lon + 360).min()) and (
-                    lon_max + margin < (lon + 360).max()
-                ):
-                    subdomain[self.dim_names["longitude"]] = lon + 360
-                    lon = subdomain[self.dim_names["longitude"]]
-                else:
-                    concats.append("upper")
-            if lon_min - margin < lon.min():
-                # See if shifting by -360 degrees helps
-                if (lon_min - margin > (lon - 360).min()) and (
-                    lon_max + margin < (lon - 360).max()
-                ):
-                    subdomain[self.dim_names["longitude"]] = lon - 360
-                    lon = subdomain[self.dim_names["longitude"]]
-                else:
-                    concats.append("lower")
-            if concats:
-                end = "both" if len(concats) == 2 else concats[0]
-                subdomain = self.concatenate_longitudes(
-                    subdomain, end=end, verbose=False
-                )
-                lon = subdomain[self.dim_names["longitude"]]
-        else:
-            # Adjust longitude range if needed to match the expected range
-            if not target_coords["straddle"]:
-                if lon.min() < -180:
-                    if lon_max + margin > 0:
-                        lon_min -= 360
-                        lon_max -= 360
-                elif lon.min() < 0:
-                    if lon_max + margin > 180:
-                        lon_min -= 360
-                        lon_max -= 360
-            if target_coords["straddle"]:
-                if lon.max() > 360:
-                    if lon_min - margin < 180:
-                        lon_min += 360
-                        lon_max += 360
-                elif lon.max() > 180:
-                    if lon_min - margin < 0:
-                        lon_min += 360
-                        lon_max += 360
-        # Select the subdomain in longitude direction
-        subdomain = subdomain.sel(
-            **{
-                self.dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
-            }
-        )
-        # Check if the selected subdomain has zero dimensions in latitude or longitude
-        if subdomain[self.dim_names["latitude"]].size == 0:
-            raise ValueError("Selected latitude range does not intersect with dataset.")
-        if subdomain[self.dim_names["longitude"]].size == 0:
-            raise ValueError(
-                "Selected longitude range does not intersect with dataset."
-            )
-        # Adjust longitudes to expected range if needed
-        lon = subdomain[self.dim_names["longitude"]]
-        if target_coords["straddle"]:
-            subdomain[self.dim_names["longitude"]] = xr.where(lon > 180, lon - 360, lon)
-        else:
-            subdomain[self.dim_names["longitude"]] = xr.where(lon < 0, lon + 360, lon)
         if return_coords_only:
             # Create and return a dataset with only latitudes and longitudes
@@ -687,6 +621,7 @@ class Dataset:
             return Dataset.from_ds(self, subdomain)
         else:
             self.ds = subdomain
+            return None
     def apply_lateral_fill(self):
         """Apply lateral fill to variables using the dataset's mask and grid dimensions.
@@ -706,10 +641,6 @@ class Dataset:
         point to the same variable in the dataset.
         """
         if self.needs_lateral_fill:
-            logging.info(
-                "Applying 2D horizontal fill to the source data before regridding."
-            )
             lateral_fill = LateralFill(
                 self.ds["mask"],
                 [self.dim_names["latitude"], self.dim_names["longitude"]],
@@ -740,10 +671,6 @@ class Dataset:
                 else:
                     # Apply standard lateral fill for other variables
                     self.ds[var_name] = lateral_fill.apply(self.ds[var_name])
-        else:
-            logging.info(
-                "2D horizontal fill is skipped because source data already contains filled values."
-            )
     def extrapolate_deepest_to_bottom(self):
         """Extrapolate deepest non-NaN values to fill bottom NaNs along the depth
@@ -760,7 +687,7 @@ class Dataset:
                     )
     @classmethod
-    def from_ds(cls, original_dataset: "Dataset", ds: xr.Dataset) -> "Dataset":
+    def from_ds(cls, original_dataset: Dataset, ds: xr.Dataset) -> Dataset:
         """Substitute the internal dataset of a Dataset object with a new xarray
         Dataset.
@@ -862,7 +789,7 @@ class TPXODataset(Dataset):
             ValueError
                 If longitude or latitude values do not match the grid.
         """
-        ds_grid = _load_data(self.grid_filename, self.dim_names, self.use_dask)
+        ds_grid = load_data(self.grid_filename, self.dim_names, self.use_dask)
         # Define mask and coordinate names based on location
         if self.location == "h":
@@ -893,21 +820,13 @@ class TPXODataset(Dataset):
         # Drop all dimensions except 'longitude' and 'latitude'
         dims_to_keep = {"longitude", "latitude"}
-        dims_to_drop = [dim for dim in ds_grid.dims if dim not in dims_to_keep]
+        dims_to_drop: set[str] = set(ds_grid.dims) - dims_to_keep
         if dims_to_drop:
             ds_grid = ds_grid.isel({dim: 0 for dim in dims_to_drop})
         # Ensure correct dimension order
         ds_grid = ds_grid.transpose("latitude", "longitude")
-        dims_to_keep = {"longitude", "latitude"}
-        dims_to_drop = set(ds_grid.dims) - dims_to_keep
-        ds_grid = (
-            ds_grid.isel({dim: 0 for dim in dims_to_drop}) if dims_to_drop else ds_grid
-        )
-        # Bring dimensions in correct order
-        ds_grid = ds_grid.transpose("latitude", "longitude")
         ds = ds.rename({"con": "nc"})
         ds = ds.assign_coords(
             {
@@ -1042,7 +961,7 @@ class GLORYSDataset(Dataset):
         }
     )
-    climatology: bool | None = False
+    climatology: bool = False
     def post_process(self):
         """Apply a mask to the dataset based on the 'zeta' variable, with 0 where 'zeta'
@@ -1058,23 +977,132 @@ class GLORYSDataset(Dataset):
         None
             The dataset is modified in-place by applying the mask to each variable.
         """
-        mask = xr.where(
-            self.ds[self.var_names["zeta"]].isel({self.dim_names["time"]: 0}).isnull(),
-            0,
-            1,
-        )
-        mask_vel = xr.where(
-            self.ds[self.var_names["u"]]
-            .isel({self.dim_names["time"]: 0, self.dim_names["depth"]: 0})
-            .isnull(),
-            0,
-            1,
-        )
+        zeta = self.ds[self.var_names["zeta"]]
+        u = self.ds[self.var_names["u"]]
+        # Select time=0 if time dimension exists, otherwise use data as-is
+        if self.dim_names["time"] in zeta.dims:
+            zeta_ref = zeta.isel({self.dim_names["time"]: 0})
+        else:
+            zeta_ref = zeta
+        if self.dim_names["time"] in u.dims:
+            u_ref = u.isel({self.dim_names["time"]: 0})
+        else:
+            u_ref = u
+        # Also handle depth for velocity
+        if self.dim_names["depth"] in u_ref.dims:
+            u_ref = u_ref.isel({self.dim_names["depth"]: 0})
+        # Create masks
+        mask = xr.where(zeta_ref.isnull(), 0, 1)
+        mask_vel = xr.where(u_ref.isnull(), 0, 1)
+        # Save to dataset
         self.ds["mask"] = mask
         self.ds["mask_vel"] = mask_vel
+@dataclass(kw_only=True)
+class GLORYSDefaultDataset(GLORYSDataset):
+    """A GLORYS dataset that is loaded from the Copernicus Marine Data Store."""
+    dataset_name: ClassVar[str] = "cmems_mod_glo_phy_my_0.083deg_P1D-m"
+    """The GLORYS dataset-id for requests to the Copernicus Marine Toolkit"""
+    _tk_module: ModuleType | None = None
+    """The dynamically imported Copernicus Marine module."""
+    def __post_init__(self) -> None:
+        """Configure attributes to ensure use of the correct upstream data-source."""
+        self.read_zarr = True
+        self.use_dask = True
+        self.filename = self.dataset_name
+        self.ds_loader_fn = self._load_from_copernicus
+        super().__post_init__()
+    def _check_auth(self, package_name: str) -> None:
+        """Check the local credential hierarchy for auth credentials.
+        Raises
+        ------
+        RuntimeError
+            If auth credentials cannot be found.
+        """
+        if self._tk_module and not self._tk_module.login(check_credentials_valid=True):
+            msg = f"Authenticate with `{package_name} login` to retrieve GLORYS data."
+            raise RuntimeError(msg)
+    def _load_copernicus(self) -> ModuleType:
+        """Dynamically load the optional Copernicus Marine Toolkit dependency.
+        Raises
+        ------
+        RuntimeError
+            - If the toolkit module is not available or cannot be imported.
+            - If auth credentials cannot be found.
+        """
+        package_name = "copernicusmarine"
+        if self._tk_module:
+            self._check_auth(package_name)
+            return self._tk_module
+        spec = importlib.util.find_spec(package_name)
+        if not spec:
+            msg = get_pkg_error_msg("cloud-based GLORYS data", package_name, "stream")
+            raise RuntimeError(msg)
+        try:
+            self._tk_module = importlib.import_module(package_name)
+        except ImportError as e:
+            msg = f"Package `{package_name}` was found but could not be loaded."
+            raise RuntimeError(msg) from e
+        self._check_auth(package_name)
+        return self._tk_module
+    def _load_from_copernicus(self) -> xr.Dataset:
+        """Load a GLORYS dataset supporting streaming.
+        Returns
+        -------
+        xr.Dataset
+            The streaming dataset
+        """
+        copernicusmarine = self._load_copernicus()
+        # ds = copernicusmarine.download_functions.download_zarr.open_dataset_from_arco_series(
+        #    dataset_url="https://s3.waw3-1.cloudferro.com/mdl-arco-geo-025/arco/GLOBAL_MULTIYEAR_PHY_001_030/cmems_mod_glo_phy_my_0.083deg_P1D-m_202311/geoChunked.zarr",
+        #    variables=["thetao", "so", "uo", "vo", "zos"],
+        #    geographical_parameters=copernicusmarine.download_functions.subset_parameters.GeographicalParameters(),
+        #    temporal_parameters=copernicusmarine.download_functions.subset_parameters.TemporalParameters(
+        #        start_datetime=self.start_time, end_datetime=self.end_time
+        #    ),
+        #    depth_parameters=copernicusmarine.download_functions.subset_parameters.DepthParameters(),
+        #    coordinates_selection_method="outside",
+        #    optimum_dask_chunking={
+        #        "time": 1,
+        #        "depth": -1,
+        #        "latitude": -1,
+        #        "longitude": -1,
+        #    },
+        # )
+        ds = copernicusmarine.open_dataset(
+            self.dataset_name,
+            start_datetime=self.start_time,
+            end_datetime=self.end_time,
+            service="arco-geo-series",
+            coordinates_selection_method="outside",
+            chunk_size_limit=-1,
+        )
+        chunks = get_dask_chunks(self.dim_names)
+        ds = ds.chunk(chunks)
+        return ds
 @dataclass(kw_only=True)
 class UnifiedDataset(Dataset):
     """Represents unified BGC data on original grid.
@@ -1199,7 +1227,7 @@ class UnifiedBGCDataset(UnifiedDataset):
         }
     )
-    climatology: bool | None = True
+    climatology: bool = True
 @dataclass(kw_only=True)
@@ -1221,7 +1249,7 @@ class UnifiedBGCSurfaceDataset(UnifiedDataset):
         }
     )
-    climatology: bool | None = True
+    climatology: bool = True
 @dataclass(kw_only=True)
@@ -1336,9 +1364,9 @@ class CESMBGCDataset(CESMDataset):
         }
     )
-    climatology: bool | None = False
+    climatology: bool = False
-    def post_process(self):
+    def post_process(self) -> None:
         """
         Processes and converts CESM data values as follows:
         - Convert depth values from cm to m.
@@ -1407,9 +1435,9 @@ class CESMBGCSurfaceForcingDataset(CESMDataset):
         }
     )
-    climatology: bool | None = False
+    climatology: bool = False
-    def post_process(self):
+    def post_process(self) -> None:
         """Perform post-processing on the dataset to remove specific variables.
         This method checks if the variable "z_t" exists in the dataset. If it does,
@@ -1456,9 +1484,9 @@ class ERA5Dataset(Dataset):
         }
     )
-    climatology: bool | None = False
+    climatology: bool = False
-    def post_process(self):
+    def post_process(self) -> None:
         """
         Processes and converts ERA5 data values as follows:
         - Convert radiation values from J/m^2 to W/m^2.
@@ -1546,15 +1574,11 @@ class ERA5ARCODataset(ERA5Dataset):
         }
     )
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         self.read_zarr = True
-        if not _has_gcsfs():
-            raise RuntimeError(
-                "To use cloud-based ERA5 data, GCSFS is required but not installed. Install it with:\n"
-                "  • `pip install roms-tools[stream]` or\n"
-                "  • `conda install gcsfs`\n"
-                "Alternatively, install `roms-tools` with conda to include all dependencies."
-            )
+        if not has_gcsfs():
+            msg = get_pkg_error_msg("cloud-based ERA5 data", "gcsfs", "stream")
+            raise RuntimeError(msg)
         super().__post_init__()
@@ -1582,9 +1606,9 @@ class ERA5Correction(Dataset):
             "time": "time",
         }
     )
-    climatology: bool | None = True
+    climatology: bool = True
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         if not self.climatology:
             raise NotImplementedError(
                 "Correction data must be a climatology. Set climatology to True."
@@ -1592,32 +1616,31 @@ class ERA5Correction(Dataset):
         super().__post_init__()
-    def choose_subdomain(self, target_coords, straddle: bool):
-        """Converts longitude values in the dataset if necessary and selects a subdomain
-        based on the specified coordinates.
+    def match_subdomain(self, target_coords: dict[str, Any]) -> None:
+        """
+        Selects a subdomain from the dataset matching the specified coordinates.
-        This method converts longitude values between different ranges if required and then extracts a subset of the
-        dataset according to the given coordinates. It updates the dataset in place to reflect the selected subdomain.
+        This method extracts a subset of the dataset (`self.ds`) based on given latitude
+        and longitude values. If the dataset spans the globe, it concatenates longitudes
+        to ensure seamless wrapping.
         Parameters
         ----------
-        target_coords : dict
-            A dictionary specifying the target coordinates for selecting the subdomain. Keys should correspond to the
-            dimension names of the dataset (e.g., latitude and longitude), and values should be the desired ranges or
-            specific coordinate values.
-        straddle : bool
-            If True, assumes that target longitudes are in the range [-180, 180]. If False, assumes longitudes are in the
-            range [0, 360]. This parameter determines how longitude values are converted if necessary.
+        target_coords : dict[str, Any]
+            A dictionary containing the target latitude and longitude values to select.
+            Expected keys: "lat" and "lon", each mapped to a DataArray of coordinates.
         Raises
         ------
         ValueError
-            If the specified subdomain does not fully contain the specified latitude or longitude values. This can occur
-            if the dataset does not cover the full range of provided coordinates.
+            If the selected subdomain does not contain all specified latitude or
+            longitude values.
         Notes
         -----
-        - The dataset (`self.ds`) is updated in place to reflect the chosen subdomain.
+        - The dataset (`self.ds`) is updated in place.
+        - Assumes latitude values in `target_coords["lat"]` are within dataset bounds.
+        - For global datasets, longitude concatenation is applied unconditionally.
         """
         # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is performed)
         subdomain = self.ds.sel({self.dim_names["latitude"]: target_coords["lat"]})
@@ -1731,7 +1754,7 @@ class RiverDataset:
     dim_names: dict[str, str]
     var_names: dict[str, str]
     opt_var_names: dict[str, str] | None = field(default_factory=dict)
-    climatology: bool | None = False
+    climatology: bool = False
     ds: xr.Dataset = field(init=False, repr=False)
     def __post_init__(self):
@@ -1764,7 +1787,7 @@ class RiverDataset:
         ds : xr.Dataset
             The loaded xarray Dataset containing the forcing data.
         """
-        ds = _load_data(
+        ds = load_data(
             self.filename, self.dim_names, use_dask=False, decode_times=False
         )
@@ -1916,7 +1939,7 @@ class RiverDataset:
             The dataset with rivers sorted by their volume in descending order.
             If the volume variable is not available, the original dataset is returned.
         """
-        if "vol" in self.opt_var_names:
+        if self.opt_var_names is not None and "vol" in self.opt_var_names:
             volume_values = ds[self.opt_var_names["vol"]].values
             if isinstance(volume_values, np.ndarray):
                 # Check if all volume values are the same
@@ -2076,7 +2099,7 @@ class DaiRiverDataset(RiverDataset):
             "vol": "vol_stn",
         }
     )
-    climatology: bool | None = False
+    climatology: bool = False
     def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
         """Adds time information to the dataset based on the climatology flag and
@@ -2655,139 +2678,212 @@ def _check_dataset(
 def _select_relevant_times(
-    ds, time_dim, start_time, end_time=None, climatology=False
+    ds: xr.Dataset,
+    time_dim: str,
+    start_time: datetime,
+    end_time: datetime | None = None,
+    climatology: bool = False,
+    allow_flex_time: bool = False,
 ) -> xr.Dataset:
-    """Select a subset of the dataset based on the specified time range.
+    """
+    Select a subset of the dataset based on time constraints.
+    This function supports two main use cases:
+    1. **Time range selection (start_time + end_time provided):**
+       - Returns all records strictly between `start_time` and `end_time`.
+       - Ensures at least one record at or before `start_time` and one record at or
+         after `end_time` are included, even if they fall outside the strict range.
-    This method filters the dataset to include all records between `start_time` and `end_time`.
-    Additionally, it ensures that one record at or before `start_time` and one record at or
-    after `end_time` are included, even if they fall outside the strict time range.
+    2. **Initial condition selection (start_time provided, end_time=None):**
+       - Delegates to `_select_initial_time`, which reduces the dataset to exactly one
+         time entry.
+       - If `allow_flex_time=True`, a +24-hour buffer around `start_time` is allowed,
+         and the closest timestamp is chosen.
+       - If `allow_flex_time=False`, requires an exact timestamp match.
-    If no `end_time` is specified, the method will select the time range of
-    [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
+    Additional behavior:
+    - If `climatology=True`, the dataset must contain exactly 12 time steps. If valid,
+      the climatology dataset is returned without further filtering.
+    - If the dataset uses `cftime` datetime objects, these are converted to
+      `np.datetime64` before filtering.
     Parameters
     ----------
     ds : xr.Dataset
-        The input dataset to be filtered. Must contain a time dimension.
-    time_dim: str
-        Name of time dimension.
+        The dataset to filter. Must contain a valid time dimension.
+    time_dim : str
+        Name of the time dimension in `ds`.
     start_time : datetime
-        The start time for selecting relevant data.
-    end_time : Optional[datetime], optional
-        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided.
-    climatology : bool
-        Indicates whether the dataset is climatological. Defaults to False.
+        Start time for filtering.
+    end_time : datetime or None
+        End time for filtering. If `None`, the function assumes an initial condition
+        use case and selects exactly one timestamp.
+    climatology : bool, optional
+        If True, requires exactly 12 time steps and bypasses normal filtering.
+        Defaults to False.
+    allow_flex_time : bool, optional
+        Whether to allow a +24h search window after `start_time` when `end_time`
+        is None. If False (default), requires an exact match.
     Returns
     -------
     xr.Dataset
-        A dataset filtered to the specified time range, including the closest entries
-        at or before `start_time` and at or after `end_time` if applicable.
+        A filtered dataset containing only the selected time entries.
     Raises
     ------
     ValueError
-        If no matching times are found between `start_time` and `start_time + 24 hours`.
+        - If `climatology=True` but the dataset does not contain exactly 12 time steps.
+        - If `climatology=False` and the dataset contains integer time values.
+        - If no valid records are found within the requested range or window.
     Warns
     -----
     UserWarning
-        If the dataset contains exactly 12 time steps but the climatology flag is not set.
-        This may indicate that the dataset represents climatology data.
-    UserWarning
-        If no records at or before `start_time` or no records at or after `end_time` are found.
-    UserWarning
-        If the dataset does not contain any time dimension or the time dimension is incorrectly named.
+        - If no records exist at or before `start_time` or at or after `end_time`.
+        - If the specified time dimension does not exist in the dataset.
     Notes
     -----
-    - If the `climatology` flag is set and `end_time` is not provided, the method will
-      interpolate initial conditions from climatology data.
-    - If the dataset uses `cftime` datetime objects, these will be converted to standard
-      `np.datetime64` objects before filtering.
+    - For initial conditions (end_time=None), see `_select_initial_time` for details
+      on strict vs. flexible selection behavior.
+    - Logs warnings instead of failing hard when boundary records are missing, and
+      defaults to using the earliest or latest available time in such cases.
     """
-    if time_dim in ds.variables:
-        if climatology:
-            if len(ds[time_dim]) != 12:
-                raise ValueError(
-                    f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
-                )
-            if not end_time:
-                # Convert from timedelta64[ns] to fractional days
-                ds["time"] = ds["time"] / np.timedelta64(1, "D")
-                # Interpolate from climatology for initial conditions
-                ds = interpolate_from_climatology(ds, time_dim, start_time)
-        else:
-            time_type = get_time_type(ds[time_dim])
-            if time_type == "int":
-                raise ValueError(
-                    "The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
-                )
-            if time_type == "cftime":
-                ds = ds.assign_coords(
-                    {time_dim: convert_cftime_to_datetime(ds[time_dim])}
-                )
-            if end_time:
-                end_time = end_time
-                # Identify records before or at start_time
-                before_start = ds[time_dim] <= np.datetime64(start_time)
-                if before_start.any():
-                    closest_before_start = (
-                        ds[time_dim].where(before_start, drop=True).max()
-                    )
-                else:
-                    logging.warning("No records found at or before the start_time.")
-                    closest_before_start = ds[time_dim].min()
+    if time_dim not in ds.variables:
+        logging.warning(
+            f"Dataset does not contain time dimension '{time_dim}'. "
+            "Please check variable naming or dataset structure."
+        )
+        return ds
-                # Identify records after or at end_time
-                after_end = ds[time_dim] >= np.datetime64(end_time)
-                if after_end.any():
-                    closest_after_end = ds[time_dim].where(after_end, drop=True).min()
-                else:
-                    logging.warning("No records found at or after the end_time.")
-                    closest_after_end = ds[time_dim].max()
+    time_type = get_time_type(ds[time_dim])
-                # Select records within the time range and add the closest before/after
-                within_range = (ds[time_dim] > np.datetime64(start_time)) & (
-                    ds[time_dim] < np.datetime64(end_time)
-                )
-                selected_times = ds[time_dim].where(
-                    within_range
-                    | (ds[time_dim] == closest_before_start)
-                    | (ds[time_dim] == closest_after_end),
-                    drop=True,
-                )
-                ds = ds.sel({time_dim: selected_times})
-            else:
-                # Look in time range [start_time, start_time + 24h]
-                end_time = start_time + timedelta(days=1)
-                times = (np.datetime64(start_time) <= ds[time_dim]) & (
-                    ds[time_dim] < np.datetime64(end_time)
-                )
-                if np.all(~times):
-                    raise ValueError(
-                        f"The dataset does not contain any time entries between the specified start_time: {start_time} "
-                        f"and {start_time + timedelta(hours=24)}. "
-                        "Please ensure the dataset includes time entries for that range."
-                    )
+    if climatology:
+        if len(ds[time_dim]) != 12:
+            raise ValueError(
+                f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
+            )
+    else:
+        if time_type == "int":
+            raise ValueError(
+                "The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
+            )
+    if time_type == "cftime":
+        ds = ds.assign_coords({time_dim: convert_cftime_to_datetime(ds[time_dim])})
-                ds = ds.where(times, drop=True)
-                if ds.sizes[time_dim] > 1:
-                    # Pick the time closest to start_time
-                    ds = ds.isel({time_dim: 0})
-                logging.info(
-                    f"Selected time entry closest to the specified start_time ({start_time}) within the range [{start_time}, {start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
-                )
+    if not end_time:
+        # Assume we are looking for exactly one time record for initial conditions
+        return _select_initial_time(
+            ds, time_dim, start_time, climatology, allow_flex_time
+        )
+    if climatology:
+        return ds
+    # Identify records before or at start_time
+    before_start = ds[time_dim] <= np.datetime64(start_time)
+    if before_start.any():
+        closest_before_start = ds[time_dim].where(before_start, drop=True)[-1]
     else:
+        logging.warning(f"No records found at or before the start_time: {start_time}.")
+        closest_before_start = ds[time_dim][0]
+    # Identify records after or at end_time
+    after_end = ds[time_dim] >= np.datetime64(end_time)
+    if after_end.any():
+        closest_after_end = ds[time_dim].where(after_end, drop=True).min()
+    else:
+        logging.warning(f"No records found at or after the end_time: {end_time}.")
+        closest_after_end = ds[time_dim].max()
+    # Select records within the time range and add the closest before/after
+    within_range = (ds[time_dim] > np.datetime64(start_time)) & (
+        ds[time_dim] < np.datetime64(end_time)
+    )
+    selected_times = ds[time_dim].where(
+        within_range
+        | (ds[time_dim] == closest_before_start)
+        | (ds[time_dim] == closest_after_end),
+        drop=True,
+    )
+    ds = ds.sel({time_dim: selected_times})
+    return ds
+def _select_initial_time(
+    ds: xr.Dataset,
+    time_dim: str,
+    ini_time: datetime,
+    climatology: bool,
+    allow_flex_time: bool = False,
+) -> xr.Dataset:
+    """Select exactly one initial time from dataset.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The input dataset with a time dimension.
+    time_dim : str
+        Name of the time dimension.
+    ini_time : datetime
+        The desired initial time.
+    allow_flex_time : bool
+        - If True: allow a +24h window and pick the closest available timestamp.
+        - If False (default): require an exact match, otherwise raise ValueError.
+    Returns
+    -------
+    xr.Dataset
+        Dataset reduced to exactly one timestamp.
+    Raises
+    ------
+    ValueError
+        If no matching time is found (when `allow_flex_time=False`), or no entries are
+        available within the +24h window (when `allow_flex_time=True`).
+    """
+    if climatology:
+        # Convert from timedelta64[ns] to fractional days
+        ds["time"] = ds["time"] / np.timedelta64(1, "D")
+        # Interpolate from climatology for initial conditions
+        return interpolate_from_climatology(ds, time_dim, ini_time)
+    if allow_flex_time:
+        # Look in time range [ini_time, ini_time + 24h)
+        end_time = ini_time + timedelta(days=1)
+        times = (np.datetime64(ini_time) <= ds[time_dim]) & (
+            ds[time_dim] < np.datetime64(end_time)
+        )
+        if np.all(~times):
+            raise ValueError(
+                f"No time entries found between {ini_time} and {end_time}."
+            )
+        ds = ds.where(times, drop=True)
+        if ds.sizes[time_dim] > 1:
+            # Pick the time closest to start_time
+            ds = ds.isel({time_dim: 0})
         logging.warning(
-            "Dataset does not contain any time information. Please check if the time dimension "
-            "is correctly named or if the dataset includes time data."
+            f"Selected time entry closest to the specified start_time in +24 hour range: {ds[time_dim].values}"
         )
+    else:
+        # Strict match required
+        if not (ds[time_dim].values == np.datetime64(ini_time)).any():
+            raise ValueError(
+                f"No exact match found for initial time {ini_time}. Consider setting allow_flex_time to True."
+            )
+        ds = ds.sel({time_dim: np.datetime64(ini_time)})
+    if time_dim not in ds.dims:
+        ds = ds.expand_dims(time_dim)
     return ds
@@ -2916,7 +3012,7 @@ def _deduplicate_river_names(
     # Count all names
     name_counts = Counter(names)
-    seen = defaultdict(int)
+    seen: defaultdict[str, int] = defaultdict(int)
     unique_names = []
     for name in names:
@@ -2935,3 +3031,275 @@ def _deduplicate_river_names(
     ds[name_var] = updated_array
     return ds
+def _concatenate_longitudes(
+    ds: xr.Dataset,
+    dim_names: Mapping[str, str],
+    end: TConcatEndTypes,
+    use_dask: bool = False,
+) -> xr.Dataset:
+    """
+    Concatenate longitude dimension to handle global grids that cross
+    the 0/360-degree or -180/180-degree boundary.
+    Extends the longitude dimension either lower, upper, or both sides
+    by +/- 360 degrees and duplicates the corresponding variables along
+    that dimension.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        Input xarray Dataset to be concatenated.
+    dim_names : Mapping[str, str]
+        Dictionary or mapping containing dimension names. Must include "longitude".
+    end : str
+        Specifies which side(s) to extend:
+        - "lower": extend by subtracting 360 degrees.
+        - "upper": extend by adding 360 degrees.
+        - "both": extend on both sides.
+    use_dask : bool, default False
+        If True, chunk the concatenated longitude dimension using Dask.
+    Returns
+    -------
+    xr.Dataset
+        Dataset with longitude dimension extended and data variables duplicated.
+    Notes
+    -----
+    Only data variables containing the longitude dimension are concatenated;
+    others are left unchanged.
+    """
+    ds_concat = xr.Dataset()
+    lon_name = dim_names["longitude"]
+    lon = ds[lon_name]
+    match end:
+        case "lower":
+            lon_concat = xr.concat([lon - 360, lon], dim=lon_name)
+            n_copies = 2
+        case "upper":
+            lon_concat = xr.concat([lon, lon + 360], dim=lon_name)
+            n_copies = 2
+        case "both":
+            lon_concat = xr.concat([lon - 360, lon, lon + 360], dim=lon_name)
+            n_copies = 3
+        case _:
+            raise ValueError(f"Invalid `end` value: {end}")
+    for var in ds.variables:
+        if lon_name in ds[var].dims:
+            field = ds[var]
+            field_concat = xr.concat([field] * n_copies, dim=lon_name)
+            if use_dask:
+                field_concat = field_concat.chunk({lon_name: -1})
+            ds_concat[var] = field_concat
+        else:
+            ds_concat[var] = ds[var]
+    ds_concat = ds_concat.assign_coords({lon_name: lon_concat.values})
+    return ds_concat
+def choose_subdomain(
+    ds: xr.Dataset,
+    dim_names: Mapping[str, str],
+    resolution: float,
+    is_global: bool,
+    target_coords: Mapping[str, Any],
+    buffer_points: int = 20,
+    use_dask: bool = False,
+) -> xr.Dataset:
+    """
+    Select a subdomain from an xarray Dataset based on target coordinates,
+    with optional buffer points and global longitude handling.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The full xarray Dataset to subset.
+    dim_names : Mapping[str, str]
+        Dictionary mapping logical dimension names to dataset dimension names.
+        Example: {"latitude": "latitude", "longitude": "longitude"}.
+    resolution : float
+        Spatial resolution of the dataset, used to compute buffer margin.
+    is_global : bool
+        Whether the dataset covers global longitude (affects concatenation logic).
+    target_coords : Mapping[str, Any]
+        Dictionary containing target latitude and longitude coordinates.
+        Expected keys: "lat", "lon", and "straddle" (boolean for crossing 180°).
+    buffer_points : int, default 20
+        Number of grid points to extend beyond the target coordinates.
+    use_dask: bool, optional
+        Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is processed eagerly. Defaults to False.
+    Returns
+    -------
+    xr.Dataset
+        Subset of the input Dataset covering the requested coordinates plus buffer.
+    Raises
+    ------
+    ValueError
+        If the selected latitude or longitude range does not intersect the dataset.
+    """
+    lat_min = target_coords["lat"].min().values
+    lat_max = target_coords["lat"].max().values
+    lon_min = target_coords["lon"].min().values
+    lon_max = target_coords["lon"].max().values
+    margin = resolution * buffer_points
+    # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
+    subdomain = ds.sel(
+        **{
+            dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
+        }
+    )
+    lon = subdomain[dim_names["longitude"]]
+    if is_global:
+        concats = []
+        # Concatenate only if necessary
+        if lon_max + margin > lon.max():
+            # See if shifting by +360 degrees helps
+            if (lon_min - margin > (lon + 360).min()) and (
+                lon_max + margin < (lon + 360).max()
+            ):
+                subdomain[dim_names["longitude"]] = lon + 360
+                lon = subdomain[dim_names["longitude"]]
+            else:
+                concats.append("upper")
+        if lon_min - margin < lon.min():
+            # See if shifting by -360 degrees helps
+            if (lon_min - margin > (lon - 360).min()) and (
+                lon_max + margin < (lon - 360).max()
+            ):
+                subdomain[dim_names["longitude"]] = lon - 360
+                lon = subdomain[dim_names["longitude"]]
+            else:
+                concats.append("lower")
+        if concats:
+            end = "both" if len(concats) == 2 else concats[0]
+            end = cast(TConcatEndTypes, end)
+            subdomain = _concatenate_longitudes(
+                subdomain, dim_names=dim_names, end=end, use_dask=use_dask
+            )
+            lon = subdomain[dim_names["longitude"]]
+    else:
+        # Adjust longitude range if needed to match the expected range
+        if not target_coords["straddle"]:
+            if lon.min() < -180:
+                if lon_max + margin > 0:
+                    lon_min -= 360
+                    lon_max -= 360
+            elif lon.min() < 0:
+                if lon_max + margin > 180:
+                    lon_min -= 360
+                    lon_max -= 360
+        if target_coords["straddle"]:
+            if lon.max() > 360:
+                if lon_min - margin < 180:
+                    lon_min += 360
+                    lon_max += 360
+            elif lon.max() > 180:
+                if lon_min - margin < 0:
+                    lon_min += 360
+                    lon_max += 360
+    # Select the subdomain in longitude direction
+    subdomain = subdomain.sel(
+        **{
+            dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
+        }
+    )
+    # Check if the selected subdomain has zero dimensions in latitude or longitude
+    if (
+        dim_names["latitude"] not in subdomain
+        or subdomain[dim_names["latitude"]].size == 0
+    ):
+        raise ValueError("Selected latitude range does not intersect with dataset.")
+    if (
+        dim_names["longitude"] not in subdomain
+        or subdomain[dim_names["longitude"]].size == 0
+    ):
+        raise ValueError("Selected longitude range does not intersect with dataset.")
+    # Adjust longitudes to expected range if needed
+    lon = subdomain[dim_names["longitude"]]
+    if target_coords["straddle"]:
+        subdomain[dim_names["longitude"]] = xr.where(lon > 180, lon - 360, lon)
+    else:
+        subdomain[dim_names["longitude"]] = xr.where(lon < 0, lon + 360, lon)
+    return subdomain
+def get_glorys_bounds(
+    grid: Grid,
+    glorys_grid_path: Path | str | None = None,
+) -> dict[str, float]:
+    """
+    Compute the latitude/longitude bounds of a GLORYS spatial subset
+    that fully covers the given ROMS grid (with margin for regridding).
+    Parameters
+    ----------
+    grid : Grid
+        The grid object.
+    glorys_grid_path : str, optional
+        Path to the GLORYS global grid file. If None, defaults to
+        "<repo_root>/data/grids/GLORYS_global_grid.nc".
+    Returns
+    -------
+    dict[str, float]
+        Dictionary containing the bounding box values:
+        - `"minimum_latitude"` : float
+        - `"maximum_latitude"` : float
+        - `"minimum_longitude"` : float
+        - `"maximum_longitude"` : float
+    Notes
+    -----
+    - The resolution is estimated as the mean of latitude and longitude spacing.
+    """
+    if glorys_grid_path is None:
+        glorys_grid_path = GLORYS_GLOBAL_GRID_PATH
+    ds = xr.open_dataset(glorys_grid_path)
+    # Estimate grid resolution (mean spacing in degrees)
+    res_lat = ds.latitude.diff("latitude").mean()
+    res_lon = ds.longitude.diff("longitude").mean()
+    resolution = (res_lat + res_lon) / 2
+    # Extract target grid coordinates
+    target_coords = get_target_coords(grid)
+    # Select subdomain with margin
+    ds_subset = choose_subdomain(
+        ds=ds,
+        dim_names={"latitude": "latitude", "longitude": "longitude"},
+        resolution=resolution,
+        is_global=True,
+        target_coords=target_coords,
+        buffer_points=DEFAULT_NR_BUFFER_POINTS + 1,
+    )
+    # Compute bounds
+    return {
+        "minimum_latitude": float(ds_subset.latitude.min()),
+        "maximum_latitude": float(ds_subset.latitude.max()),
+        "minimum_longitude": float(ds_subset.longitude.min()),
+        "maximum_longitude": float(ds_subset.longitude.max()),
+    }

roms-tools 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

roms-tools 3.1.1py3-none-any.whl → 3.2.0py3-none-any.whl