PyPI - roms-tools - Versions diffs - 0.20__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

roms-tools 0.20py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

ci/environment.yml +1 -0
roms_tools/__init__.py +1 -2
roms_tools/_version.py +1 -1
roms_tools/setup/boundary_forcing.py +390 -344
roms_tools/setup/datasets.py +838 -141
roms_tools/setup/download.py +118 -0
roms_tools/setup/initial_conditions.py +195 -166
roms_tools/setup/mixins.py +395 -0
roms_tools/setup/surface_forcing.py +596 -0
roms_tools/setup/tides.py +76 -174
roms_tools/setup/topography.py +1 -1
roms_tools/setup/utils.py +190 -0
roms_tools/tests/test_boundary_forcing.py +445 -71
roms_tools/tests/test_datasets.py +73 -9
roms_tools/tests/test_initial_conditions.py +252 -32
roms_tools/tests/test_surface_forcing.py +2622 -0
roms_tools/tests/test_tides.py +13 -14
roms_tools/tests/test_utils.py +16 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/METADATA +7 -3
roms_tools-1.0.1.dist-info/RECORD +31 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/WHEEL +1 -1
roms_tools/setup/atmospheric_forcing.py +0 -935
roms_tools/tests/test_atmospheric_forcing.py +0 -1645
roms_tools-0.20.dist-info/RECORD +0 -28
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/LICENSE +0 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/top_level.txt +0 -0

roms_tools/setup/datasets.py CHANGED Viewed

@@ -1,86 +1,18 @@
-import pooch
 import xarray as xr
 from dataclasses import dataclass, field
 import glob
 from datetime import datetime, timedelta
 import numpy as np
-from typing import Dict, Optional, List
+from typing import Dict, Optional
 import dask
-# Create a Pooch object to manage the global topography data
-pup_data = pooch.create(
-    # Use the default cache folder for the operating system
-    path=pooch.os_cache("roms-tools"),
-    base_url="https://github.com/CWorthy-ocean/roms-tools-data/raw/main/",
-    # The registry specifies the files that can be fetched
-    registry={
-        "etopo5.nc": "sha256:23600e422d59bbf7c3666090166a0d468c8ee16092f4f14e32c4e928fbcd627b",
-    },
-)
-# Create a Pooch object to manage the test data
-pup_test_data = pooch.create(
-    # Use the default cache folder for the operating system
-    path=pooch.os_cache("roms-tools"),
-    base_url="https://github.com/CWorthy-ocean/roms-tools-test-data/raw/main/",
-    # The registry specifies the files that can be fetched
-    registry={
-        "GLORYS_test_data.nc": "648f88ec29c433bcf65f257c1fb9497bd3d5d3880640186336b10ed54f7129d2",
-        "ERA5_regional_test_data.nc": "bd12ce3b562fbea2a80a3b79ba74c724294043c28dc98ae092ad816d74eac794",
-        "ERA5_global_test_data.nc": "8ed177ab64c02caf509b9fb121cf6713f286cc603b1f302f15f3f4eb0c21dc4f",
-        "TPXO_global_test_data.nc": "457bfe87a7b247ec6e04e3c7d3e741ccf223020c41593f8ae33a14f2b5255e60",
-        "TPXO_regional_test_data.nc": "11739245e2286d9c9d342dce5221e6435d2072b50028bef2e86a30287b3b4032",
-    },
+import warnings
+from roms_tools.setup.utils import (
+    assign_dates_to_climatology,
+    interpolate_from_climatology,
+    is_cftime_datetime,
+    convert_cftime_to_datetime,
 )
-def fetch_topo(topography_source: str) -> xr.Dataset:
-    """
-    Load the global topography data as an xarray Dataset.
-    Parameters
-    ----------
-    topography_source : str
-        The source of the topography data to be loaded. Available options:
-        - "ETOPO5"
-    Returns
-    -------
-    xr.Dataset
-        The global topography data as an xarray Dataset.
-    """
-    # Mapping from user-specified topography options to corresponding filenames in the registry
-    topo_dict = {"ETOPO5": "etopo5.nc"}
-    # Fetch the file using Pooch, downloading if necessary
-    fname = pup_data.fetch(topo_dict[topography_source])
-    # Load the dataset using xarray and return it
-    ds = xr.open_dataset(fname)
-    return ds
-def download_test_data(filename: str) -> str:
-    """
-    Download the test data file.
-    Parameters
-    ----------
-    filename : str
-        The name of the test data file to be downloaded. Available options:
-        - "GLORYS_test_data.nc"
-        - "ERA5_regional_test_data.nc"
-        - "ERA5_global_test_data.nc"
-    Returns
-    -------
-    str
-        The path to the downloaded test data file.
-    """
-    # Fetch the file using Pooch, downloading if necessary
-    fname = pup_test_data.fetch(filename)
-    return fname
+from roms_tools.setup.download import download_correction_data
 @dataclass(frozen=True, kw_only=True)
@@ -97,13 +29,17 @@ class Dataset:
     end_time : Optional[datetime], optional
         The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
         or no filtering is applied if start_time is not provided.
-    var_names : List[str]
-        List of variable names that are required in the dataset.
+    var_names: Dict[str, str]
+        Dictionary of variable names that are required in the dataset.
     dim_names: Dict[str, str], optional
         Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to False.
     Attributes
     ----------
+    is_global : bool
+        Indicates whether the dataset covers the entire globe.
     ds : xr.Dataset
         The xarray Dataset containing the forcing data on its original grid.
@@ -123,7 +59,7 @@ class Dataset:
     filename: str
     start_time: Optional[datetime] = None
     end_time: Optional[datetime] = None
-    var_names: List[str]
+    var_names: Dict[str, str]
     dim_names: Dict[str, str] = field(
         default_factory=lambda: {
             "longitude": "longitude",
@@ -131,28 +67,40 @@ class Dataset:
             "time": "time",
         }
     )
+    climatology: Optional[bool] = False
+    is_global: bool = field(init=False, repr=False)
     ds: xr.Dataset = field(init=False, repr=False)
     def __post_init__(self):
+        """
+        Post-initialization processing:
+        1. Loads the dataset from the specified filename.
+        2. Applies time filtering based on start_time and end_time if provided.
+        3. Selects relevant fields as specified by var_names.
+        4. Ensures latitude values are in ascending order.
+        5. Checks if the dataset covers the entire globe and adjusts if necessary.
+        """
         ds = self.load_data()
+        self.check_dataset(ds)
         # Select relevant times
         if "time" in self.dim_names and self.start_time is not None:
+            ds = self.add_time_info(ds)
             ds = self.select_relevant_times(ds)
         # Select relevant fields
         ds = self.select_relevant_fields(ds)
         # Make sure that latitude is ascending
-        diff = np.diff(ds[self.dim_names["latitude"]])
-        if np.all(diff < 0):
-            ds = ds.isel(**{self.dim_names["latitude"]: slice(None, None, -1)})
+        ds = self.ensure_latitude_ascending(ds)
         # Check whether the data covers the entire globe
-        is_global = self.check_if_global(ds)
+        object.__setattr__(self, "is_global", self.check_if_global(ds))
-        if is_global:
+        # If dataset is global concatenate three copies of field along longitude dimension
+        if self.is_global:
             ds = self.concatenate_longitudes(ds)
         object.__setattr__(self, "ds", ds)
@@ -208,6 +156,34 @@ class Dataset:
         return ds
+    def check_dataset(self, ds: xr.Dataset) -> None:
+        """
+        Check if the dataset contains the specified variables and dimensions.
+        Parameters
+        ----------
+        ds : xr.Dataset
+            The xarray Dataset to check.
+        Raises
+        ------
+        ValueError
+            If the dataset does not contain the specified variables or dimensions.
+        """
+        missing_vars = [
+            var for var in self.var_names.values() if var not in ds.data_vars
+        ]
+        if missing_vars:
+            raise ValueError(
+                f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
+            )
+        missing_dims = [dim for dim in self.dim_names.values() if dim not in ds.dims]
+        if missing_dims:
+            raise ValueError(
+                f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_vars}"
+            )
     def select_relevant_fields(self, ds) -> xr.Dataset:
         """
         Selects and returns a subset of the dataset containing only the variables specified in `self.var_names`.
@@ -222,26 +198,36 @@ class Dataset:
         xr.Dataset
             A dataset containing only the variables specified in `self.var_names`.
-        Raises
-        ------
-        ValueError
-            If `ds` does not contain all variables listed in `self.var_names`.
         """
-        missing_vars = [var for var in self.var_names if var not in ds.data_vars]
-        if missing_vars:
-            raise ValueError(
-                f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
-            )
         for var in ds.data_vars:
-            if var not in self.var_names:
+            if var not in self.var_names.values():
                 ds = ds.drop_vars(var)
         return ds
-    def select_relevant_times(self, ds) -> xr.Dataset:
+    import xarray as xr
+    def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
+        """
+        Dummy method to be overridden by child classes to add time information to the dataset.
+        This method is intended as a placeholder and should be implemented in subclasses
+        to provide specific functionality for adding time-related information to the dataset.
+        Parameters
+        ----------
+        ds : xr.Dataset
+            The xarray Dataset to which time information will be added.
+        Returns
+        -------
+        xr.Dataset
+            The xarray Dataset with time information added (as implemented by child classes).
+        """
+        return ds
+    def select_relevant_times(self, ds) -> xr.Dataset:
         """
         Selects and returns the subset of the dataset corresponding to the specified time range.
@@ -259,22 +245,53 @@ class Dataset:
         xr.Dataset
             A dataset containing only the data points within the specified time range.
+        Raises
+        ------
+        ValueError
+            If no matching times are found or if the number of matching times does not meet expectations.
+        Warns
+        -----
+        UserWarning
+            If the dataset contains only 12 time steps but the climatology flag is not set.
+            This may indicate that the dataset represents climatology data.
         """
         time_dim = self.dim_names["time"]
-        if not self.end_time:
-            end_time = self.start_time + timedelta(days=1)
+        if time_dim in ds.coords or time_dim in ds.data_vars:
+            if self.climatology:
+                if not self.end_time:
+                    # Interpolate from climatology for initial conditions
+                    ds = interpolate_from_climatology(
+                        ds, self.dim_names["time"], self.start_time
+                    )
+            else:
+                if len(ds[time_dim]) == 12:
+                    warnings.warn(
+                        "The dataset contains exactly 12 time steps. This may indicate that it is "
+                        "climatological data. Please verify if climatology is appropriate for your "
+                        "analysis and set the climatology flag to True."
+                    )
+                if is_cftime_datetime(ds[time_dim]):
+                    ds = ds.assign_coords(
+                        {time_dim: convert_cftime_to_datetime(ds[time_dim])}
+                    )
+                if not self.end_time:
+                    end_time = self.start_time + timedelta(days=1)
+                else:
+                    end_time = self.end_time
+                times = (np.datetime64(self.start_time) <= ds[time_dim]) & (
+                    ds[time_dim] < np.datetime64(end_time)
+                )
+                ds = ds.where(times, drop=True)
         else:
-            end_time = self.end_time
-        times = (np.datetime64(self.start_time) <= ds[time_dim]) & (
-            ds[time_dim] < np.datetime64(end_time)
-        )
-        ds = ds.where(times, drop=True)
+            warnings.warn(
+                "Dataset does not contain any time information. Please check if the time dimension "
+                "is correctly named or if the dataset includes time data."
+            )
         if not ds.sizes[time_dim]:
-            raise ValueError("No matching times found.")
+            raise ValueError("No matching times found in the dataset.")
         if not self.end_time:
             if ds.sizes[time_dim] != 1:
@@ -285,6 +302,27 @@ class Dataset:
         return ds
+    def ensure_latitude_ascending(self, ds: xr.Dataset) -> xr.Dataset:
+        """
+        Ensure that the latitude dimension is in ascending order.
+        Parameters
+        ----------
+        ds : xr.Dataset
+            The xarray Dataset to check.
+        Returns
+        -------
+        ds : xr.Dataset
+            The xarray Dataset with latitude in ascending order.
+        """
+        # Make sure that latitude is ascending
+        lat_diff = np.diff(ds[self.dim_names["latitude"]])
+        if np.all(lat_diff < 0):
+            ds = ds.isel(**{self.dim_names["latitude"]: slice(None, None, -1)})
+        return ds
     def check_if_global(self, ds) -> bool:
         """
         Checks if the dataset covers the entire globe in the longitude dimension.
@@ -306,7 +344,7 @@ class Dataset:
         dlon = (
             ds[self.dim_names["longitude"]][0] - ds[self.dim_names["longitude"]][-1]
         ) % 360.0
-        is_global = np.isclose(dlon, dlon_mean, rtol=0.0, atol=1e-3)
+        is_global = np.isclose(dlon, dlon_mean, rtol=0.0, atol=1e-3).item()
         return is_global
@@ -341,7 +379,7 @@ class Dataset:
         ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
-        for var in self.var_names:
+        for var in self.var_names.values():
             if self.dim_names["longitude"] in ds[var].dims:
                 field = ds[var]
                 field_concatenated = xr.concat(
@@ -358,15 +396,15 @@ class Dataset:
         self, latitude_range, longitude_range, margin, straddle, return_subdomain=False
     ):
         """
-        Selects a subdomain from the given xarray Dataset based on latitude and longitude ranges,
-        extending the selection by the specified margin. Handles the conversion of longitude values
-        in the dataset from one range to another.
+        Selects a subdomain from the xarray Dataset based on specified latitude and longitude ranges,
+        extending the selection by a specified margin. Handles longitude conversions to accommodate different
+        longitude ranges.
         Parameters
         ----------
-        latitude_range : tuple
+        latitude_range : tuple of float
             A tuple (lat_min, lat_max) specifying the minimum and maximum latitude values of the subdomain.
-        longitude_range : tuple
+        longitude_range : tuple of float
             A tuple (lon_min, lon_max) specifying the minimum and maximum longitude values of the subdomain.
         margin : float
             Margin in degrees to extend beyond the specified latitude and longitude ranges when selecting the subdomain.
@@ -374,45 +412,53 @@ class Dataset:
             If True, target longitudes are expected in the range [-180, 180].
             If False, target longitudes are expected in the range [0, 360].
         return_subdomain : bool, optional
-            If True, returns the subset of the original dataset. If False, assigns it to self.ds.
-            Default is False.
+            If True, returns the subset of the original dataset as an xarray Dataset. If False, assigns the subset to `self.ds`.
+            Defaults to False.
         Returns
         -------
-        xr.Dataset
-            The subset of the original dataset representing the chosen subdomain, including an extended area
-            to cover one extra grid point beyond the specified ranges if return_subdomain is True.
-            Otherwise, returns None.
+        xr.Dataset or None
+            If `return_subdomain` is True, returns the subset of the original dataset representing the chosen subdomain,
+            including an extended area to cover one extra grid point beyond the specified ranges. If `return_subdomain` is False,
+            returns None as the subset is assigned to `self.ds`.
+        Notes
+        -----
+        This method adjusts the longitude range if necessary to ensure it matches the expected range for the dataset.
+        It also handles longitude discontinuities that can occur when converting to different longitude ranges.
+        This is important for avoiding artifacts in the interpolation process.
         Raises
         ------
         ValueError
             If the selected latitude or longitude range does not intersect with the dataset.
         """
         lat_min, lat_max = latitude_range
         lon_min, lon_max = longitude_range
-        lon = self.ds[self.dim_names["longitude"]]
-        # Adjust longitude range if needed to match the expected range
-        if not straddle:
-            if lon.min() < -180:
-                if lon_max + margin > 0:
-                    lon_min -= 360
-                    lon_max -= 360
-            elif lon.min() < 0:
-                if lon_max + margin > 180:
-                    lon_min -= 360
-                    lon_max -= 360
-        if straddle:
-            if lon.max() > 360:
-                if lon_min - margin < 180:
-                    lon_min += 360
-                    lon_max += 360
-            elif lon.max() > 180:
-                if lon_min - margin < 0:
-                    lon_min += 360
-                    lon_max += 360
+        if not self.is_global:
+            # Adjust longitude range if needed to match the expected range
+            lon = self.ds[self.dim_names["longitude"]]
+            if not straddle:
+                if lon.min() < -180:
+                    if lon_max + margin > 0:
+                        lon_min -= 360
+                        lon_max -= 360
+                elif lon.min() < 0:
+                    if lon_max + margin > 180:
+                        lon_min -= 360
+                        lon_max -= 360
+            if straddle:
+                if lon.max() > 360:
+                    if lon_min - margin < 180:
+                        lon_min += 360
+                        lon_max += 360
+                elif lon.max() > 180:
+                    if lon_min - margin < 0:
+                        lon_min += 360
+                        lon_max += 360
         # Select the subdomain
         subdomain = self.ds.sel(
@@ -455,3 +501,654 @@ class Dataset:
         if (depth >= 0).all():
             self.ds[self.dim_names["depth"]] = -depth
+@dataclass(frozen=True, kw_only=True)
+class TPXODataset(Dataset):
+    """
+    Represents tidal data on the original grid from the TPXO dataset.
+    Parameters
+    ----------
+    filename : str
+        The path to the TPXO dataset file.
+    var_names : Dict[str, str], optional
+        Dictionary of variable names required in the dataset. Defaults to:
+        {
+            "h_Re": "h_Re",
+            "h_Im": "h_Im",
+            "sal_Re": "sal_Re",
+            "sal_Im": "sal_Im",
+            "u_Re": "u_Re",
+            "u_Im": "u_Im",
+            "v_Re": "v_Re",
+            "v_Im": "v_Im",
+            "depth": "depth"
+        }
+    dim_names : Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset. Defaults to:
+        {"longitude": "ny", "latitude": "nx", "ntides": "nc"}.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the TPXO tidal model data, loaded from the specified file.
+    reference_date : datetime
+        The reference date for the TPXO data. Default is datetime(1992, 1, 1).
+    """
+    filename: str
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "ssh_Re": "h_Re",
+            "ssh_Im": "h_Im",
+            "sal_Re": "sal_Re",
+            "sal_Im": "sal_Im",
+            "u_Re": "u_Re",
+            "u_Im": "u_Im",
+            "v_Re": "v_Re",
+            "v_Im": "v_Im",
+            "depth": "depth",
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {"longitude": "ny", "latitude": "nx", "ntides": "nc"}
+    )
+    ds: xr.Dataset = field(init=False, repr=False)
+    reference_date: datetime = datetime(1992, 1, 1)
+    def __post_init__(self):
+        # Perform any necessary dataset initialization or modifications here
+        ds = super().load_data()
+        # Clean up dataset
+        ds = ds.assign_coords(
+            {
+                "omega": ds["omega"],
+                "nx": ds["lon_r"].isel(
+                    ny=0
+                ),  # lon_r is constant along ny, i.e., is only a function of nx
+                "ny": ds["lat_r"].isel(
+                    nx=0
+                ),  # lat_r is constant along nx, i.e., is only a function of ny
+            }
+        )
+        ds = ds.rename({"nx": "longitude", "ny": "latitude"})
+        object.__setattr__(
+            self,
+            "dim_names",
+            {
+                "latitude": "latitude",
+                "longitude": "longitude",
+                "ntides": self.dim_names["ntides"],
+            },
+        )
+        # Select relevant fields
+        ds = super().select_relevant_fields(ds)
+        # Check whether the data covers the entire globe
+        object.__setattr__(self, "is_global", super().check_if_global(ds))
+        # If dataset is global concatenate three copies of field along longitude dimension
+        if self.is_global:
+            ds = super().concatenate_longitudes(ds)
+        object.__setattr__(self, "ds", ds)
+    def check_number_constituents(self, ntides: int):
+        """
+        Checks if the number of constituents in the dataset is at least `ntides`.
+        Parameters
+        ----------
+        ntides : int
+            The required number of tidal constituents.
+        Raises
+        ------
+        ValueError
+            If the number of constituents in the dataset is less than `ntides`.
+        """
+        if len(self.ds[self.dim_names["ntides"]]) < ntides:
+            raise ValueError(
+                f"The dataset contains fewer than {ntides} tidal constituents."
+            )
+@dataclass(frozen=True, kw_only=True)
+class GLORYSDataset(Dataset):
+    """
+    Represents GLORYS data on original grid.
+    Parameters
+    ----------
+    filename : str
+        The path to the data files. Can contain wildcards.
+    start_time : Optional[datetime], optional
+        The start time for selecting relevant data. If not provided, the data is not filtered by start time.
+    end_time : Optional[datetime], optional
+        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
+        or no filtering is applied if start_time is not provided.
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to False.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the GLORYS data on its original grid.
+    """
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "temp": "thetao",
+            "salt": "so",
+            "u": "uo",
+            "v": "vo",
+            "zeta": "zos",
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "longitude": "longitude",
+            "latitude": "latitude",
+            "depth": "depth",
+            "time": "time",
+        }
+    )
+    climatology: Optional[bool] = False
+@dataclass(frozen=True, kw_only=True)
+class CESMDataset(Dataset):
+    """
+    Represents CESM data on original grid.
+    Parameters
+    ----------
+    filename : str
+        The path to the data files. Can contain wildcards.
+    start_time : Optional[datetime], optional
+        The start time for selecting relevant data. If not provided, the data is not filtered by start time.
+    end_time : Optional[datetime], optional
+        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
+        or no filtering is applied if start_time is not provided.
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to True.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the GLORYS data on its original grid.
+    """
+    # overwrite load_data method from parent class
+    def load_data(self) -> xr.Dataset:
+        """
+        Load dataset from the specified file.
+        Returns
+        -------
+        ds : xr.Dataset
+            The loaded xarray Dataset containing the forcing data.
+        Raises
+        ------
+        FileNotFoundError
+            If the specified file does not exist.
+        """
+        # Check if the file exists
+        matching_files = glob.glob(self.filename)
+        if not matching_files:
+            raise FileNotFoundError(
+                f"No files found matching the pattern '{self.filename}'."
+            )
+        # Load the dataset
+        with dask.config.set(**{"array.slicing.split_large_chunks": False}):
+            # Define the chunk sizes
+            chunks = {
+                self.dim_names["latitude"]: -1,
+                self.dim_names["longitude"]: -1,
+            }
+            ds = xr.open_mfdataset(
+                self.filename,
+                combine="nested",
+                coords="minimal",
+                compat="override",
+                chunks=chunks,
+                engine="netcdf4",
+            )
+            if "time" not in self.dim_names:
+                if "time" in ds.dims:
+                    self.dim_names["time"] = "time"
+                else:
+                    if "month" in ds.dims:
+                        self.dim_names["time"] = "month"
+                    else:
+                        ds = ds.expand_dims({"time": 1})
+                        self.dim_names["time"] = "time"
+        return ds
+    def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
+        """
+        Adds time information to the dataset based on the climatology flag and dimension names.
+        This method processes the dataset to include time information according to the climatology
+        setting. If the dataset represents climatology data and the time dimension is labeled as
+        "month", it assigns dates to the dataset based on a monthly climatology. Additionally, it
+        handles dimension name updates if necessary.
+        Parameters
+        ----------
+        ds : xr.Dataset
+            The input dataset to which time information will be added.
+        Returns
+        -------
+        xr.Dataset
+            The dataset with time information added, including adjustments for climatology and
+            dimension names.
+        """
+        time_dim = self.dim_names["time"]
+        if self.climatology and time_dim == "month":
+            ds = assign_dates_to_climatology(ds, time_dim)
+            # rename dimension
+            ds = ds.swap_dims({time_dim: "time"})
+            # Update dimension names
+            updated_dim_names = self.dim_names.copy()
+            updated_dim_names["time"] = "time"
+            object.__setattr__(self, "dim_names", updated_dim_names)
+        return ds
+@dataclass(frozen=True, kw_only=True)
+class CESMBGCDataset(CESMDataset):
+    """
+    Represents CESM BGC data on original grid.
+    Parameters
+    ----------
+    filename : str
+        The path to the data files. Can contain wildcards.
+    start_time : Optional[datetime], optional
+        The start time for selecting relevant data. If not provided, the data is not filtered by start time.
+    end_time : Optional[datetime], optional
+        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
+        or no filtering is applied if start_time is not provided.
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to True.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the GLORYS data on its original grid.
+    """
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "PO4": "PO4",
+            "NO3": "NO3",
+            "SiO3": "SiO3",
+            "NH4": "NH4",
+            "Fe": "Fe",
+            "Lig": "Lig",
+            "O2": "O2",
+            "DIC": "DIC",
+            "DIC_ALT_CO2": "DIC_ALT_CO2",
+            "ALK": "ALK",
+            "ALK_ALT_CO2": "ALK_ALT_CO2",
+            "DOC": "DOC",
+            "DON": "DON",
+            "DOP": "DOP",
+            "DOPr": "DOPr",
+            "DONr": "DONr",
+            "DOCr": "DOCr",
+            "spChl": "spChl",
+            "spC": "spC",
+            "spP": "spP",
+            "spFe": "spFe",
+            "diatChl": "diatChl",
+            "diatC": "diatC",
+            "diatP": "diatP",
+            "diatFe": "diatFe",
+            "diatSi": "diatSi",
+            "diazChl": "diazChl",
+            "diazC": "diazC",
+            "diazP": "diazP",
+            "diazFe": "diazFe",
+            "spCaCO3": "spCaCO3",
+            "zooC": "zooC",
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "longitude": "lon",
+            "latitude": "lat",
+            "depth": "z_t",
+        }
+    )
+    climatology: Optional[bool] = True
+    def post_process(self):
+        """
+        Processes and converts CESM data values as follows:
+        - Convert depth values from cm to m.
+        """
+        if self.dim_names["depth"] == "z_t":
+            # Fill variables that only have data in upper 150m with NaNs below
+            if (
+                "z_t_150m" in self.ds.dims
+                and np.equal(self.ds.z_t[:15].values, self.ds.z_t_150m.values).all()
+            ):
+                for var in self.var_names:
+                    if "z_t_150m" in self.ds[var].dims:
+                        self.ds[var] = self.ds[var].rename({"z_t_150m": "z_t"})
+                        self.ds[var] = self.ds[var].chunk({"z_t": -1})
+            # Convert depth from cm to m
+            ds = self.ds.assign_coords({"depth": self.ds["z_t"] / 100})
+            ds["depth"].attrs["long_name"] = "Depth"
+            ds["depth"].attrs["units"] = "m"
+            ds = ds.swap_dims({"z_t": "depth"})
+            if "z_t" in ds:
+                ds = ds.drop_vars("z_t")
+            if "z_t_150m" in ds:
+                ds = ds.drop_vars("z_t_150m")
+            # update dataset
+            object.__setattr__(self, "ds", ds)
+            # Update dim_names with "depth": "depth" key-value pair
+            updated_dim_names = self.dim_names.copy()
+            updated_dim_names["depth"] = "depth"
+            object.__setattr__(self, "dim_names", updated_dim_names)
+@dataclass(frozen=True, kw_only=True)
+class CESMBGCSurfaceForcingDataset(CESMDataset):
+    """
+    Represents CESM BGC surface forcing data on original grid.
+    Parameters
+    ----------
+    filename : str
+        The path to the data files. Can contain wildcards.
+    start_time : Optional[datetime], optional
+        The start time for selecting relevant data. If not provided, the data is not filtered by start time.
+    end_time : Optional[datetime], optional
+        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
+        or no filtering is applied if start_time is not provided.
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to False.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the GLORYS data on its original grid.
+    """
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "pco2_air": "pCO2SURF",
+            "pco2_air_alt": "pCO2SURF",
+            "iron": "IRON_FLUX",
+            "dust": "dust_FLUX_IN",
+            "nox": "NOx_FLUX",
+            "nhy": "NHy_FLUX",
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "longitude": "lon",
+            "latitude": "lat",
+        }
+    )
+    climatology: Optional[bool] = False
+@dataclass(frozen=True, kw_only=True)
+class ERA5Dataset(Dataset):
+    """
+    Represents ERA5 data on original grid.
+    Parameters
+    ----------
+    filename : str
+        The path to the data files. Can contain wildcards.
+    start_time : Optional[datetime], optional
+        The start time for selecting relevant data. If not provided, the data is not filtered by start time.
+    end_time : Optional[datetime], optional
+        The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
+        or no filtering is applied if start_time is not provided.
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+    climatology : bool
+        Indicates whether the dataset is climatological. Defaults to False.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset containing the GLORYS data on its original grid.
+    """
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "uwnd": "u10",
+            "vwnd": "v10",
+            "swrad": "ssr",
+            "lwrad": "strd",
+            "Tair": "t2m",
+            "d2m": "d2m",
+            "rain": "tp",
+            "mask": "sst",
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "longitude": "longitude",
+            "latitude": "latitude",
+            "time": "time",
+        }
+    )
+    climatology: Optional[bool] = False
+    def post_process(self):
+        """
+        Processes and converts ERA5 data values as follows:
+        - Convert radiation values from J/m^2 to W/m^2.
+        - Convert rainfall from meters to cm/day.
+        - Convert temperature from Kelvin to Celsius.
+        - Compute relative humidity if not present, convert to absolute humidity.
+        """
+        # Translate radiation to fluxes. ERA5 stores values integrated over 1 hour.
+        # Convert radiation from J/m^2 to W/m^2
+        self.ds[self.var_names["swrad"]] /= 3600
+        self.ds[self.var_names["lwrad"]] /= 3600
+        self.ds[self.var_names["swrad"]].attrs["units"] = "W/m^2"
+        self.ds[self.var_names["lwrad"]].attrs["units"] = "W/m^2"
+        # Convert rainfall from m to cm/day
+        self.ds[self.var_names["rain"]] *= 100 * 24
+        # Convert temperature from Kelvin to Celsius
+        self.ds[self.var_names["Tair"]] -= 273.15
+        self.ds[self.var_names["d2m"]] -= 273.15
+        self.ds[self.var_names["Tair"]].attrs["units"] = "degrees C"
+        self.ds[self.var_names["d2m"]].attrs["units"] = "degrees C"
+        # Compute relative humidity if not present
+        if "qair" not in self.ds.data_vars:
+            qair = np.exp(
+                (17.625 * self.ds[self.var_names["d2m"]])
+                / (243.04 + self.ds[self.var_names["d2m"]])
+            ) / np.exp(
+                (17.625 * self.ds[self.var_names["Tair"]])
+                / (243.04 + self.ds[self.var_names["Tair"]])
+            )
+            # Convert relative to absolute humidity
+            patm = 1010.0
+            cff = (
+                (1.0007 + 3.46e-6 * patm)
+                * 6.1121
+                * np.exp(
+                    17.502
+                    * self.ds[self.var_names["Tair"]]
+                    / (240.97 + self.ds[self.var_names["Tair"]])
+                )
+            )
+            cff = cff * qair
+            self.ds["qair"] = 0.62197 * (cff / (patm - 0.378 * cff))
+            self.ds["qair"].attrs["long_name"] = "Absolute humidity at 2m"
+            self.ds["qair"].attrs["units"] = "kg/kg"
+            # Update var_names dictionary
+            var_names = {**self.var_names, "qair": "qair"}
+            object.__setattr__(self, "var_names", var_names)
+        if "mask" in self.var_names.keys():
+            mask = xr.where(self.ds[self.var_names["mask"]].isel(time=0).isnull(), 0, 1)
+            for var in self.ds.data_vars:
+                self.ds[var] = xr.where(mask == 1, self.ds[var], np.nan)
+@dataclass(frozen=True, kw_only=True)
+class ERA5Correction(Dataset):
+    """
+    Global dataset to correct ERA5 radiation. The dataset contains multiplicative correction factors for the ERA5 shortwave radiation, obtained by comparing the COREv2 climatology to the ERA5 climatology.
+    Parameters
+    ----------
+    filename : str, optional
+        The path to the correction files. Defaults to download_correction_data('SSR_correction.nc').
+    var_names: Dict[str, str], optional
+        Dictionary of variable names that are required in the dataset.
+        Defaults to {"swr_corr": "ssr_corr"}.
+    dim_names: Dict[str, str], optional
+        Dictionary specifying the names of dimensions in the dataset.
+        Defaults to {"longitude": "longitude", "latitude": "latitude", "time": "time"}.
+    climatology : bool, optional
+        Indicates if the correction data is a climatology. Defaults to True.
+    Attributes
+    ----------
+    ds : xr.Dataset
+        The loaded xarray Dataset containing the correction data.
+    """
+    filename: str = field(
+        default_factory=lambda: download_correction_data("SSR_correction.nc")
+    )
+    var_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "swr_corr": "ssr_corr",  # multiplicative correction factor for ERA5 shortwave radiation
+        }
+    )
+    dim_names: Dict[str, str] = field(
+        default_factory=lambda: {
+            "longitude": "longitude",
+            "latitude": "latitude",
+            "time": "time",
+        }
+    )
+    climatology: Optional[bool] = True
+    ds: xr.Dataset = field(init=False, repr=False)
+    def __post_init__(self):
+        if not self.climatology:
+            raise NotImplementedError(
+                "Correction data must be a climatology. Set climatology to True."
+            )
+        super().__post_init__()
+    def choose_subdomain(self, coords, straddle: bool):
+        """
+        Converts longitude values in the dataset if necessary and selects a subdomain based on the specified coordinates.
+        This method converts longitude values between different ranges if required and then extracts a subset of the
+        dataset according to the given coordinates. It updates the dataset in place to reflect the selected subdomain.
+        Parameters
+        ----------
+        coords : dict
+            A dictionary specifying the target coordinates for selecting the subdomain. Keys should correspond to the
+            dimension names of the dataset (e.g., latitude and longitude), and values should be the desired ranges or
+            specific coordinate values.
+        straddle : bool
+            If True, assumes that target longitudes are in the range [-180, 180]. If False, assumes longitudes are in the
+            range [0, 360]. This parameter determines how longitude values are converted if necessary.
+        Raises
+        ------
+        ValueError
+            If the specified subdomain does not fully contain the specified latitude or longitude values. This can occur
+            if the dataset does not cover the full range of provided coordinates.
+        Notes
+        -----
+        - The dataset (`self.ds`) is updated in place to reflect the chosen subdomain.
+        """
+        lon = self.ds[self.dim_names["longitude"]]
+        if not self.is_global:
+            if lon.min().values < 0 and not straddle:
+                # Convert from [-180, 180] to [0, 360]
+                self.ds[self.dim_names["longitude"]] = xr.where(lon < 0, lon + 360, lon)
+            if lon.max().values > 180 and straddle:
+                # Convert from [0, 360] to [-180, 180]
+                self.ds[self.dim_names["longitude"]] = xr.where(
+                    lon > 180, lon - 360, lon
+                )
+        # Select the subdomain based on the specified latitude and longitude ranges
+        subdomain = self.ds.sel(**coords)
+        # Check if the selected subdomain contains the specified latitude and longitude values
+        if not subdomain[self.dim_names["latitude"]].equals(
+            coords[self.dim_names["latitude"]]
+        ):
+            raise ValueError(
+                "The correction dataset does not contain all specified latitude values."
+            )
+        if not subdomain[self.dim_names["longitude"]].equals(
+            coords[self.dim_names["longitude"]]
+        ):
+            raise ValueError(
+                "The correction dataset does not contain all specified longitude values."
+            )
+        object.__setattr__(self, "ds", subdomain)

roms-tools 0.20__py3-none-any.whl → 1.0.1__py3-none-any.whl

roms-tools 0.20py3-none-any.whl → 1.0.1py3-none-any.whl