PyPI - roms-tools - Versions diffs - 0.20__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

roms-tools 0.20py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

ci/environment.yml +1 -0
roms_tools/__init__.py +1 -2
roms_tools/_version.py +1 -1
roms_tools/setup/boundary_forcing.py +390 -344
roms_tools/setup/datasets.py +838 -141
roms_tools/setup/download.py +118 -0
roms_tools/setup/initial_conditions.py +195 -166
roms_tools/setup/mixins.py +395 -0
roms_tools/setup/surface_forcing.py +596 -0
roms_tools/setup/tides.py +76 -174
roms_tools/setup/topography.py +1 -1
roms_tools/setup/utils.py +190 -0
roms_tools/tests/test_boundary_forcing.py +445 -71
roms_tools/tests/test_datasets.py +73 -9
roms_tools/tests/test_initial_conditions.py +252 -32
roms_tools/tests/test_surface_forcing.py +2622 -0
roms_tools/tests/test_tides.py +13 -14
roms_tools/tests/test_utils.py +16 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/METADATA +7 -3
roms_tools-1.0.1.dist-info/RECORD +31 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/WHEEL +1 -1
roms_tools/setup/atmospheric_forcing.py +0 -935
roms_tools/tests/test_atmospheric_forcing.py +0 -1645
roms_tools-0.20.dist-info/RECORD +0 -28
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/LICENSE +0 -0
{roms_tools-0.20.dist-info → roms_tools-1.0.1.dist-info}/top_level.txt +0 -0

roms_tools/setup/tides.py CHANGED Viewed

@@ -3,165 +3,21 @@ import xarray as xr
 import numpy as np
 import yaml
 import importlib.metadata
+from typing import Dict, Union
 from dataclasses import dataclass, field, asdict
 from roms_tools.setup.grid import Grid
 from roms_tools.setup.plot import _plot
 from roms_tools.setup.fill import fill_and_interpolate
-from roms_tools.setup.datasets import Dataset
+from roms_tools.setup.datasets import TPXODataset
 from roms_tools.setup.utils import (
     nan_check,
     interpolate_from_rho_to_u,
     interpolate_from_rho_to_v,
 )
-from typing import Dict, List
 import matplotlib.pyplot as plt
-@dataclass(frozen=True, kw_only=True)
-class TPXO(Dataset):
-    """
-    Represents tidal data on original grid.
-    Parameters
-    ----------
-    filename : str
-        The path to the TPXO dataset.
-    var_names : List[str], optional
-        List of variable names that are required in the dataset. Defaults to
-        ["h_Re", "h_Im", "sal_Re", "sal_Im", "u_Re", "u_Im", "v_Re", "v_Im"].
-    dim_names: Dict[str, str], optional
-        Dictionary specifying the names of dimensions in the dataset. Defaults to
-        {"longitude": "ny", "latitude": "nx"}.
-    Attributes
-    ----------
-    ds : xr.Dataset
-        The xarray Dataset containing TPXO tidal model data.
-    """
-    filename: str
-    var_names: List[str] = field(
-        default_factory=lambda: [
-            "h_Re",
-            "h_Im",
-            "sal_Re",
-            "sal_Im",
-            "u_Re",
-            "u_Im",
-            "v_Re",
-            "v_Im",
-            "depth",
-        ]
-    )
-    dim_names: Dict[str, str] = field(
-        default_factory=lambda: {"longitude": "ny", "latitude": "nx", "ntides": "nc"}
-    )
-    ds: xr.Dataset = field(init=False, repr=False)
-    def __post_init__(self):
-        # Perform any necessary dataset initialization or modifications here
-        ds = super().load_data()
-        # Clean up dataset
-        ds = ds.assign_coords(
-            {
-                "omega": ds["omega"],
-                "nx": ds["lon_r"].isel(
-                    ny=0
-                ),  # lon_r is constant along ny, i.e., is only a function of nx
-                "ny": ds["lat_r"].isel(
-                    nx=0
-                ),  # lat_r is constant along nx, i.e., is only a function of ny
-            }
-        )
-        ds = ds.rename({"nx": "longitude", "ny": "latitude"})
-        object.__setattr__(
-            self,
-            "dim_names",
-            {
-                "latitude": "latitude",
-                "longitude": "longitude",
-                "ntides": self.dim_names["ntides"],
-            },
-        )
-        # Select relevant fields
-        ds = super().select_relevant_fields(ds)
-        # Check whether the data covers the entire globe
-        is_global = self.check_if_global(ds)
-        if is_global:
-            ds = self.concatenate_longitudes(ds)
-        object.__setattr__(self, "ds", ds)
-    def check_number_constituents(self, ntides: int):
-        """
-        Checks if the number of constituents in the dataset is at least `ntides`.
-        Parameters
-        ----------
-        ntides : int
-            The required number of tidal constituents.
-        Raises
-        ------
-        ValueError
-            If the number of constituents in the dataset is less than `ntides`.
-        """
-        if len(self.ds[self.dim_names["ntides"]]) < ntides:
-            raise ValueError(
-                f"The dataset contains fewer than {ntides} tidal constituents."
-            )
-    def get_corrected_tides(self, model_reference_date, allan_factor):
-        # Get equilibrium tides
-        tpc = compute_equilibrium_tide(self.ds["longitude"], self.ds["latitude"]).isel(
-            nc=self.ds.nc
-        )
-        # Correct for SAL
-        tsc = allan_factor * (self.ds["sal_Re"] + 1j * self.ds["sal_Im"])
-        tpc = tpc - tsc
-        # Elevations and transports
-        thc = self.ds["h_Re"] + 1j * self.ds["h_Im"]
-        tuc = self.ds["u_Re"] + 1j * self.ds["u_Im"]
-        tvc = self.ds["v_Re"] + 1j * self.ds["v_Im"]
-        # Apply correction for phases and amplitudes
-        pf, pu, aa = egbert_correction(model_reference_date)
-        pf = pf.isel(nc=self.ds.nc)
-        pu = pu.isel(nc=self.ds.nc)
-        aa = aa.isel(nc=self.ds.nc)
-        tpxo_reference_date = datetime(1992, 1, 1)
-        dt = (model_reference_date - tpxo_reference_date).days * 3600 * 24
-        thc = pf * thc * np.exp(1j * (self.ds["omega"] * dt + pu + aa))
-        tuc = pf * tuc * np.exp(1j * (self.ds["omega"] * dt + pu + aa))
-        tvc = pf * tvc * np.exp(1j * (self.ds["omega"] * dt + pu + aa))
-        tpc = pf * tpc * np.exp(1j * (self.ds["omega"] * dt + pu + aa))
-        tides = {
-            "ssh_Re": thc.real,
-            "ssh_Im": thc.imag,
-            "u_Re": tuc.real,
-            "u_Im": tuc.imag,
-            "v_Re": tvc.real,
-            "v_Im": tvc.imag,
-            "pot_Re": tpc.real,
-            "pot_Im": tpc.imag,
-            "omega": self.ds["omega"],
-        }
-        for k in tides.keys():
-            tides[k] = tides[k].rename({"nc": "ntides"})
-        return tides
 @dataclass(frozen=True, kw_only=True)
 class TidalForcing:
     """
@@ -171,16 +27,16 @@ class TidalForcing:
     ----------
     grid : Grid
         The grid object representing the ROMS grid associated with the tidal forcing data.
-    filename: str
-        The path to the native tidal dataset.
+    source : Dict[str, Union[str, None]]
+        Dictionary specifying the source of the tidal data:
+        - "name" (str): Name of the data source (e.g., "TPXO").
+        - "path" (str): Path to the tidal data file. Can contain wildcards.
     ntides : int, optional
         Number of constituents to consider. Maximum number is 14. Default is 10.
-    model_reference_date : datetime, optional
-        The reference date for the ROMS simulation. Default is datetime(2000, 1, 1).
-    source : str, optional
-        The source of the tidal data. Default is "TPXO".
     allan_factor : float, optional
         The Allan factor used in tidal model computation. Default is 2.0.
+    model_reference_date : datetime, optional
+        The reference date for the ROMS simulation. Default is datetime(2000, 1, 1).
     Attributes
     ----------
@@ -189,27 +45,31 @@ class TidalForcing:
     Examples
     --------
-    >>> grid = Grid(...)
-    >>> tidal_forcing = TidalForcing(grid)
-    >>> print(tidal_forcing.ds)
+    >>> tidal_forcing = TidalForcing(
+    ...     grid=grid, source={"name": "TPXO", "path": "tpxo_data.nc"}
+    ... )
     """
     grid: Grid
-    filename: str
+    source: Dict[str, Union[str, None]]
     ntides: int = 10
-    model_reference_date: datetime = datetime(2000, 1, 1)
-    source: str = "TPXO"
     allan_factor: float = 2.0
+    model_reference_date: datetime = datetime(2000, 1, 1)
     ds: xr.Dataset = field(init=False, repr=False)
     def __post_init__(self):
-        if self.source == "TPXO":
-            data = TPXO(filename=self.filename)
+        if "name" not in self.source.keys():
+            raise ValueError("`source` must include a 'name'.")
+        if "path" not in self.source.keys():
+            raise ValueError("`source` must include a 'path'.")
+        if self.source["name"] == "TPXO":
+            data = TPXODataset(filename=self.source["path"])
         else:
-            raise ValueError('Only "TPXO" is a valid option for source.')
+            raise ValueError('Only "TPXO" is a valid option for source["name"].')
         data.check_number_constituents(self.ntides)
-        # operate on longitudes between -180 and 180 unless ROMS domain lies at least 5 degrees in lontitude away from Greenwich meridian
+        # operate on longitudes between -180 and 180 unless ROMS domain lies at least 5 degrees in longitude away from Greenwich meridian
         lon = self.grid.ds.lon_rho
         lat = self.grid.ds.lat_rho
         angle = self.grid.ds.angle
@@ -220,14 +80,9 @@ class TidalForcing:
             lon = xr.where(lon < 0, lon + 360, lon)
             straddle = False
-        # The following consists of two steps:
-        # Step 1: Choose subdomain of forcing data including safety margin for interpolation, and Step 2: Convert to the proper longitude range.
-        # We perform these two steps for two reasons:
-        # A) Since the horizontal dimensions consist of a single chunk, selecting a subdomain before interpolation is a lot more performant.
-        # B) Step 1 is necessary to avoid discontinuous longitudes that could be introduced by Step 2. Specifically, discontinuous longitudes
-        # can lead to artifacts in the interpolation process. Specifically, if there is a data gap if data is not global,
-        # discontinuous longitudes could result in values that appear to come from a distant location instead of producing NaNs.
-        # These NaNs are important as they can be identified and handled appropriately by the nan_check function.
+        # Restrict data to relevant subdomain to achieve better performance and to avoid discontinuous longitudes introduced by converting
+        # to a different longitude range (+- 360 degrees). Discontinues longitudes can lead to artifacts in the interpolation process that
+        # would not be detected by the nan_check function.
         data.choose_subdomain(
             latitude_range=[lat.min().values, lat.max().values],
             longitude_range=[lon.min().values, lon.max().values],
@@ -235,7 +90,7 @@ class TidalForcing:
             straddle=straddle,
         )
-        tides = data.get_corrected_tides(self.model_reference_date, self.allan_factor)
+        tides = self._get_corrected_tides(data)
         # select desired number of constituents
         for k in tides.keys():
@@ -326,7 +181,7 @@ class TidalForcing:
         ds.attrs["roms_tools_version"] = roms_tools_version
-        ds.attrs["source"] = self.source
+        ds.attrs["source"] = self.source["name"]
         ds.attrs["model_reference_date"] = str(self.model_reference_date)
         ds.attrs["allan_factor"] = self.allan_factor
@@ -430,10 +285,9 @@ class TidalForcing:
         # Extract tidal forcing data
         tidal_forcing_data = {
             "TidalForcing": {
-                "filename": self.filename,
+                "source": self.source,
                 "ntides": self.ntides,
                 "model_reference_date": self.model_reference_date.isoformat(),
-                "source": self.source,
                 "allan_factor": self.allan_factor,
             }
         }
@@ -494,6 +348,54 @@ class TidalForcing:
         # Create and return an instance of TidalForcing
         return cls(grid=grid, **tidal_forcing_params)
+    def _get_corrected_tides(self, data):
+        # Get equilibrium tides
+        tpc = compute_equilibrium_tide(
+            data.ds[data.dim_names["longitude"]], data.ds[data.dim_names["latitude"]]
+        )
+        tpc = tpc.isel(**{data.dim_names["ntides"]: data.ds[data.dim_names["ntides"]]})
+        # Correct for SAL
+        tsc = self.allan_factor * (
+            data.ds[data.var_names["sal_Re"]] + 1j * data.ds[data.var_names["sal_Im"]]
+        )
+        tpc = tpc - tsc
+        # Elevations and transports
+        thc = data.ds[data.var_names["ssh_Re"]] + 1j * data.ds[data.var_names["ssh_Im"]]
+        tuc = data.ds[data.var_names["u_Re"]] + 1j * data.ds[data.var_names["u_Im"]]
+        tvc = data.ds[data.var_names["v_Re"]] + 1j * data.ds[data.var_names["v_Im"]]
+        # Apply correction for phases and amplitudes
+        pf, pu, aa = egbert_correction(self.model_reference_date)
+        pf = pf.isel(**{data.dim_names["ntides"]: data.ds[data.dim_names["ntides"]]})
+        pu = pu.isel(**{data.dim_names["ntides"]: data.ds[data.dim_names["ntides"]]})
+        aa = aa.isel(**{data.dim_names["ntides"]: data.ds[data.dim_names["ntides"]]})
+        dt = (self.model_reference_date - data.reference_date).days * 3600 * 24
+        thc = pf * thc * np.exp(1j * (data.ds["omega"] * dt + pu + aa))
+        tuc = pf * tuc * np.exp(1j * (data.ds["omega"] * dt + pu + aa))
+        tvc = pf * tvc * np.exp(1j * (data.ds["omega"] * dt + pu + aa))
+        tpc = pf * tpc * np.exp(1j * (data.ds["omega"] * dt + pu + aa))
+        tides = {
+            "ssh_Re": thc.real,
+            "ssh_Im": thc.imag,
+            "u_Re": tuc.real,
+            "u_Im": tuc.imag,
+            "v_Re": tvc.real,
+            "v_Im": tvc.imag,
+            "pot_Re": tpc.real,
+            "pot_Im": tpc.imag,
+            "omega": data.ds["omega"],
+        }
+        for k in tides.keys():
+            tides[k] = tides[k].rename({data.dim_names["ntides"]: "ntides"})
+        return tides
 def modified_julian_days(year, month, day, hour=0):
     """

roms_tools/setup/topography.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 import gcm_filters
 from scipy.interpolate import RegularGridInterpolator
 from scipy.ndimage import label
-from roms_tools.setup.datasets import fetch_topo
+from roms_tools.setup.download import fetch_topo
 from roms_tools.setup.utils import interpolate_from_rho_to_u, interpolate_from_rho_to_v
 import warnings
 from itertools import count

roms_tools/setup/utils.py CHANGED Viewed

@@ -1,4 +1,8 @@
 import xarray as xr
+import numpy as np
+from typing import Union
+import pandas as pd
+import cftime
 def nan_check(field, mask) -> None:
@@ -160,3 +164,189 @@ def extrapolate_deepest_to_bottom(field: xr.DataArray, dim: str) -> xr.DataArray
     )
     return field_interpolated
+def assign_dates_to_climatology(ds: xr.Dataset, time_dim: str) -> xr.Dataset:
+    """
+    Assigns climatology dates to the dataset's time dimension.
+    This function updates the dataset's time coordinates to reflect climatological dates.
+    It defines fixed day increments for each month and assigns these to the specified time dimension.
+    The increments represent the cumulative days at mid-month for each month.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The xarray Dataset to which climatological dates will be assigned.
+    time_dim : str
+        The name of the time dimension in the dataset that will be updated with climatological dates.
+    Returns
+    -------
+    xr.Dataset
+        The updated xarray Dataset with climatological dates assigned to the specified time dimension.
+    """
+    # Define the days in each month and convert to timedelta
+    increments = [15, 30, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30]
+    days = np.cumsum(increments)
+    timedelta_ns = np.array(days, dtype="timedelta64[D]").astype("timedelta64[ns]")
+    time = xr.DataArray(timedelta_ns, dims=[time_dim])
+    ds = ds.assign_coords({"time": time})
+    return ds
+def interpolate_from_climatology(
+    field: Union[xr.DataArray, xr.Dataset],
+    time_dim_name: str,
+    time: Union[xr.DataArray, pd.DatetimeIndex],
+) -> Union[xr.DataArray, xr.Dataset]:
+    """
+    Interpolates the given field temporally based on the specified time points.
+    If `field` is an xarray.Dataset, this function applies the interpolation to all data variables in the dataset.
+    Parameters
+    ----------
+    field : xarray.DataArray or xarray.Dataset
+        The field data to be interpolated. Can be a single DataArray or a Dataset.
+    time_dim_name : str
+        The name of the dimension in `field` that represents time.
+    time : xarray.DataArray or pandas.DatetimeIndex
+        The target time points for interpolation.
+    Returns
+    -------
+    xarray.DataArray or xarray.Dataset
+        The field values interpolated to the specified time points. The type matches the input type.
+    """
+    def interpolate_single_field(data_array: xr.DataArray) -> xr.DataArray:
+        if isinstance(time, xr.DataArray):
+            # Extract day of year from xarray.DataArray
+            day_of_year = time.dt.dayofyear
+        else:
+            if np.size(time) == 1:
+                day_of_year = time.timetuple().tm_yday
+            else:
+                day_of_year = np.array([t.timetuple().tm_yday for t in time])
+        data_array[time_dim_name] = data_array[time_dim_name].dt.days
+        # Concatenate across the beginning and end of the year
+        time_concat = xr.concat(
+            [
+                data_array[time_dim_name][-1] - 365.25,
+                data_array[time_dim_name],
+                365.25 + data_array[time_dim_name][0],
+            ],
+            dim=time_dim_name,
+        )
+        data_array_concat = xr.concat(
+            [
+                data_array.isel(**{time_dim_name: -1}),
+                data_array,
+                data_array.isel(**{time_dim_name: 0}),
+            ],
+            dim=time_dim_name,
+        )
+        data_array_concat[time_dim_name] = time_concat
+        # Interpolate to specified times
+        data_array_interpolated = data_array_concat.interp(
+            **{time_dim_name: day_of_year}, method="linear"
+        )
+        if np.size(time) == 1:
+            data_array_interpolated = data_array_interpolated.expand_dims(
+                {time_dim_name: 1}
+            )
+        return data_array_interpolated
+    if isinstance(field, xr.DataArray):
+        return interpolate_single_field(field)
+    elif isinstance(field, xr.Dataset):
+        interpolated_data_vars = {
+            var: interpolate_single_field(data_array)
+            for var, data_array in field.data_vars.items()
+        }
+        return xr.Dataset(interpolated_data_vars, attrs=field.attrs)
+    else:
+        raise TypeError("Input 'field' must be an xarray.DataArray or xarray.Dataset.")
+def is_cftime_datetime(data_array: xr.DataArray) -> bool:
+    """
+    Checks if the xarray DataArray contains cftime datetime objects.
+    Parameters
+    ----------
+    data_array : xr.DataArray
+        The xarray DataArray to be checked for cftime datetime objects.
+    Returns
+    -------
+    bool
+        True if the DataArray contains cftime datetime objects, False otherwise.
+    Raises
+    ------
+    TypeError
+        If the values in the DataArray are not of type numpy.ndarray or list.
+    """
+    # List of cftime datetime types
+    cftime_types = (
+        cftime.DatetimeNoLeap,
+        cftime.DatetimeJulian,
+        cftime.DatetimeGregorian,
+    )
+    # Check if any of the coordinate values are of cftime type
+    if isinstance(data_array.values, (np.ndarray, list)):
+        # Check the dtype of the array; numpy datetime64 indicates it's not cftime
+        if data_array.values.dtype == "datetime64[ns]":
+            return False
+        # Check if any of the values in the array are instances of cftime types
+        return any(isinstance(value, cftime_types) for value in data_array.values)
+    # Handle unexpected types
+    raise TypeError("DataArray values must be of type numpy.ndarray or list.")
+def convert_cftime_to_datetime(data_array: np.ndarray) -> np.ndarray:
+    """
+    Converts cftime datetime objects to numpy datetime64 objects in a numpy ndarray.
+    Parameters
+    ----------
+    data_array : np.ndarray
+        The numpy ndarray containing cftime datetime objects to be converted.
+    Returns
+    -------
+    np.ndarray
+        The ndarray with cftime datetimes converted to numpy datetime64 objects.
+    Notes
+    -----
+    This function is intended to be used with numpy ndarrays. If you need to convert
+    cftime datetime objects in an xarray.DataArray, please use the appropriate function
+    to handle xarray.DataArray conversions.
+    """
+    # List of cftime datetime types
+    cftime_types = (
+        cftime.DatetimeNoLeap,
+        cftime.DatetimeJulian,
+        cftime.DatetimeGregorian,
+    )
+    # Define a conversion function for cftime to numpy datetime64
+    def convert_datetime(dt):
+        if isinstance(dt, cftime_types):
+            # Convert to ISO format and then to nanosecond precision
+            return np.datetime64(dt.isoformat(), "ns")
+        return np.datetime64(dt, "ns")
+    return np.vectorize(convert_datetime)(data_array)

roms-tools 0.20__py3-none-any.whl → 1.0.1__py3-none-any.whl

roms-tools 0.20py3-none-any.whl → 1.0.1py3-none-any.whl