PyPI - eo-tides - Versions diffs - 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

eo-tides 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

eo_tides/__init__.py +7 -4
eo_tides/eo.py +184 -161
eo_tides/model.py +350 -366
eo_tides/stats.py +74 -36
eo_tides/utils.py +453 -1
eo_tides/validation.py +5 -5
{eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/METADATA +20 -10
eo_tides-0.3.0.dist-info/RECORD +11 -0
{eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/WHEEL +1 -1
eo_tides-0.1.1.dist-info/RECORD +0 -11
{eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/LICENSE +0 -0
{eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/top_level.txt +0 -0

eo_tides/model.py CHANGED Viewed

@@ -2,14 +2,14 @@
 from __future__ import annotations
 import os
-import pathlib
 import textwrap
-import warnings
 from concurrent.futures import ProcessPoolExecutor
 from concurrent.futures.process import BrokenProcessPool
 from functools import partial
 from typing import TYPE_CHECKING
+import psutil
 # Only import if running type checking
 if TYPE_CHECKING:
     import xarray as xr
@@ -19,349 +19,9 @@ import numpy as np
 import pandas as pd
 import pyproj
 import pyTMD
-from colorama import Style, init
-from pyTMD.io.model import load_database, model
 from tqdm import tqdm
-from .utils import idw
-def _set_directory(directory):
-    """
-    Set tide modelling files directory. If no custom
-    path is provided, try global environmental variable
-    instead.
-    """
-    if directory is None:
-        if "EO_TIDES_TIDE_MODELS" in os.environ:
-            directory = os.environ["EO_TIDES_TIDE_MODELS"]
-        else:
-            raise Exception(
-                "No tide model directory provided via `directory`, and/or no "
-                "`EO_TIDES_TIDE_MODELS` environment variable found. "
-                "Please provide a valid path to your tide model directory."
-            )
-    # Verify path exists
-    directory = pathlib.Path(directory).expanduser()
-    if not directory.exists():
-        raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
-    else:
-        return directory
-def list_models(
-    directory: str | os.PathLike | None = None,
-    show_available: bool = True,
-    show_supported: bool = True,
-    raise_error: bool = False,
-) -> tuple[list[str], list[str]]:
-    """
-    List all tide models available for tide modelling, and
-    all models supported by `eo-tides` and `pyTMD`.
-    This function scans the specified tide model directory
-    and returns a list of models that are available in the
-    directory as well as the full list of all supported models.
-    For instructions on setting up tide models, see:
-    <https://geoscienceaustralia.github.io/eo-tides/setup/>
-    Parameters
-    ----------
-    directory : str, optional
-        The directory containing tide model data files. If no path is
-        provided, this will default to the environment variable
-        `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
-        Tide modelling files should be stored in sub-folders for each
-        model that match the structure required by `pyTMD`
-        (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
-    show_available : bool, optional
-        Whether to print a list of locally available models.
-    show_supported : bool, optional
-        Whether to print a list of all supported models, in
-        addition to models available locally.
-    raise_error : bool, optional
-        If True, raise an error if no available models are found.
-        If False, raise a warning.
-    Returns
-    -------
-    available_models : list of str
-        A list of all tide models available within `directory`.
-    supported_models : list of str
-        A list of all tide models supported by `eo-tides`.
-    """
-    init()  # Initialize colorama
-    # Set tide modelling files directory. If no custom path is
-    # provided, try global environment variable.
-    directory = _set_directory(directory)
-    # Get full list of supported models from pyTMD database
-    model_database = load_database()["elevation"]
-    supported_models = list(model_database.keys())
-    # Extract expected model paths
-    expected_paths = {}
-    for m in supported_models:
-        model_file = model_database[m]["model_file"]
-        model_file = model_file[0] if isinstance(model_file, list) else model_file
-        expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
-    # Define column widths
-    status_width = 4  # Width for emoji
-    name_width = max(len(name) for name in supported_models)
-    path_width = max(len(path) for path in expected_paths.values())
-    # Print list of supported models, marking available and
-    # unavailable models and appending available to list
-    if show_available or show_supported:
-        total_width = min(status_width + name_width + path_width + 6, 80)
-        print("─" * total_width)
-        print(f"{'󠀠🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
-        print("─" * total_width)
-    available_models = []
-    for m in supported_models:
-        try:
-            model_file = model(directory=directory).elevation(m=m)
-            available_models.append(m)
-            if show_available:
-                # Mark available models with a green tick
-                status = "✅"
-                print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
-        except FileNotFoundError:
-            if show_supported:
-                # Mark unavailable models with a red cross
-                status = "❌"
-                print(
-                    f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
-                )
-    if show_available or show_supported:
-        print("─" * total_width)
-        # Print summary
-        print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
-        print(f"Available models: {len(available_models)}/{len(supported_models)}")
-    # Raise error or warning if no models are available
-    if not available_models:
-        warning_msg = textwrap.dedent(
-            f"""
-            No valid tide models are available in `{directory}`.
-            Are you sure you have provided the correct `directory` path, or set the
-            `EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
-            tide model directory?
-            """
-        ).strip()
-        if raise_error:
-            raise Exception(warning_msg)
-        else:
-            warnings.warn(warning_msg, UserWarning)
-    # Return list of available and supported models
-    return available_models, supported_models
-def _model_tides(
-    model,
-    x,
-    y,
-    time,
-    directory,
-    crs,
-    crop,
-    method,
-    extrapolate,
-    cutoff,
-    output_units,
-    mode,
-):
-    """Worker function applied in parallel by `model_tides`. Handles the
-    extraction of tide modelling constituents and tide modelling using
-    `pyTMD`.
-    """
-    # Obtain model details
-    pytmd_model = pyTMD.io.model(directory).elevation(model)
-    # Convert x, y to latitude/longitude
-    transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
-    lon, lat = transformer.transform(x.flatten(), y.flatten())
-    # Convert datetime
-    timescale = pyTMD.time.timescale().from_datetime(time.flatten())
-    # Calculate bounds for cropping
-    buffer = 1  # one degree on either side
-    bounds = [
-        lon.min() - buffer,
-        lon.max() + buffer,
-        lat.min() - buffer,
-        lat.max() + buffer,
-    ]
-    try:
-        # Read tidal constants and interpolate to grid points
-        if pytmd_model.format in ("OTIS", "ATLAS-compact", "TMD3"):
-            amp, ph, D, c = pyTMD.io.OTIS.extract_constants(
-                lon,
-                lat,
-                pytmd_model.grid_file,
-                pytmd_model.model_file,
-                pytmd_model.projection,
-                type=pytmd_model.type,
-                grid=pytmd_model.file_format,
-                crop=crop,
-                bounds=bounds,
-                method=method,
-                extrapolate=extrapolate,
-                cutoff=cutoff,
-            )
-            # Use delta time at 2000.0 to match TMD outputs
-            deltat = np.zeros((len(timescale)), dtype=np.float64)
-        elif pytmd_model.format in ("ATLAS-netcdf",):
-            amp, ph, D, c = pyTMD.io.ATLAS.extract_constants(
-                lon,
-                lat,
-                pytmd_model.grid_file,
-                pytmd_model.model_file,
-                type=pytmd_model.type,
-                crop=crop,
-                bounds=bounds,
-                method=method,
-                extrapolate=extrapolate,
-                cutoff=cutoff,
-                scale=pytmd_model.scale,
-                compressed=pytmd_model.compressed,
-            )
-            # Use delta time at 2000.0 to match TMD outputs
-            deltat = np.zeros((len(timescale)), dtype=np.float64)
-        elif pytmd_model.format in ("GOT-ascii", "GOT-netcdf"):
-            amp, ph, c = pyTMD.io.GOT.extract_constants(
-                lon,
-                lat,
-                pytmd_model.model_file,
-                grid=pytmd_model.file_format,
-                crop=crop,
-                bounds=bounds,
-                method=method,
-                extrapolate=extrapolate,
-                cutoff=cutoff,
-                scale=pytmd_model.scale,
-                compressed=pytmd_model.compressed,
-            )
-            # Delta time (TT - UT1)
-            deltat = timescale.tt_ut1
-        elif pytmd_model.format in ("FES-ascii", "FES-netcdf"):
-            amp, ph = pyTMD.io.FES.extract_constants(
-                lon,
-                lat,
-                pytmd_model.model_file,
-                type=pytmd_model.type,
-                version=pytmd_model.version,
-                crop=crop,
-                bounds=bounds,
-                method=method,
-                extrapolate=extrapolate,
-                cutoff=cutoff,
-                scale=pytmd_model.scale,
-                compressed=pytmd_model.compressed,
-            )
-            # Available model constituents
-            c = pytmd_model.constituents
-            # Delta time (TT - UT1)
-            deltat = timescale.tt_ut1
-        else:
-            raise Exception(
-                f"Unsupported model format ({pytmd_model.format}). This may be due to an incompatible version of `pyTMD`."
-            )
-    # Raise error if constituent files no not cover analysis extent
-    except IndexError:
-        error_msg = textwrap.dedent(
-            f"""
-            The {model} tide model constituent files do not cover the requested analysis extent.
-            This can occur if you are using clipped model files to improve run times.
-            Consider using model files that cover your entire analysis area, or set `crop=False`
-            to reduce the extent of tide model constituent files that is loaded.
-            """
-        ).strip()
-        raise Exception(error_msg)
-    # Calculate complex phase in radians for Euler's
-    cph = -1j * ph * np.pi / 180.0
-    # Calculate constituent oscillation
-    hc = amp * np.exp(cph)
-    # Determine the number of points and times to process. If in
-    # "one-to-many" mode, these counts are used to repeat our extracted
-    # constituents and timesteps so we can extract tides for all
-    # combinations of our input times and tide modelling points.
-    # If in "one-to-one" mode, we avoid this step by setting counts to 1
-    # (e.g. "repeat 1 times")
-    points_repeat = len(x) if mode == "one-to-many" else 1
-    time_repeat = len(time) if mode == "one-to-many" else 1
-    # If in "one-to-many" mode, repeat constituents to length of time
-    # and number of input coords before passing to `predict_tide_drift`
-    t, hc, deltat = (
-        np.tile(timescale.tide, points_repeat),
-        hc.repeat(time_repeat, axis=0),
-        np.tile(deltat, points_repeat),
-    )
-    # Predict tidal elevations at time and infer minor corrections
-    npts = len(t)
-    tide = np.ma.zeros((npts), fill_value=np.nan)
-    tide.mask = np.any(hc.mask, axis=1)
-    # Predict tides
-    tide.data[:] = pyTMD.predict.drift(t, hc, c, deltat=deltat, corrections=pytmd_model.corrections)
-    minor = pyTMD.predict.infer_minor(
-        t,
-        hc,
-        c,
-        deltat=deltat,
-        corrections=pytmd_model.corrections,
-        minor=pytmd_model.minor,
-    )
-    tide.data[:] += minor.data[:]
-    # Replace invalid values with fill value
-    tide.data[tide.mask] = tide.fill_value
-    # Convert data to pandas.DataFrame, and set index to our input
-    # time/x/y values
-    tide_df = pd.DataFrame({
-        "time": np.tile(time, points_repeat),
-        "x": np.repeat(x, time_repeat),
-        "y": np.repeat(y, time_repeat),
-        "tide_model": model,
-        "tide_height": tide,
-    }).set_index(["time", "x", "y"])
-    # Optionally convert outputs to integer units (can save memory)
-    if output_units == "m":
-        tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
-    elif output_units == "cm":
-        tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
-    elif output_units == "mm":
-        tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
-    return tide_df
+from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
 def _ensemble_model(
@@ -529,20 +189,195 @@ def _ensemble_model(
     return pd.concat(ensemble_list)
+def _parallel_splits(
+    total_points: int,
+    model_count: int,
+    parallel_max: int | None = None,
+    min_points_per_split: int = 1000,
+) -> int:
+    """
+    Calculates the optimal number of parallel splits for data
+    processing based on system resources and processing constraints.
+    Parameters:
+    -----------
+    total_points : int
+        Total number of data points to process
+    model_count : int
+        Number of models that will be run in parallel
+    parallel_max : int, optional
+        Maximum number of parallel processes to use. If None, uses CPU core count
+    min_points_per_split : int, default=1000
+        Minimum number of points that should be processed in each split
+    """
+    # Get available CPUs. First see if `CPU_GUARANTEE` exists in
+    # environment (if running in JupyterHub); if not use psutil
+    # followed by standard CPU count
+    if parallel_max is None:
+        # Take the first valid output
+        raw_value = os.environ.get("CPU_GUARANTEE") or psutil.cpu_count(logical=False) or os.cpu_count() or 1
+        # Convert to integer
+        if isinstance(raw_value, str):
+            parallel_max = int(float(raw_value))
+        else:
+            parallel_max = int(raw_value)
+    # Calculate optimal number of splits based on constraints
+    splits_by_size = total_points / min_points_per_split
+    splits_by_cpu = parallel_max / model_count
+    optimal_splits = min(splits_by_size, splits_by_cpu)
+    # Convert to integer and ensure at least 1 split
+    final_split_count = int(max(1, optimal_splits))
+    return final_split_count
+def _model_tides(
+    model,
+    x,
+    y,
+    time,
+    directory,
+    crs,
+    crop,
+    method,
+    extrapolate,
+    cutoff,
+    output_units,
+    mode,
+):
+    """Worker function applied in parallel by `model_tides`. Handles the
+    extraction of tide modelling constituents and tide modelling using
+    `pyTMD`.
+    """
+    # Obtain model details
+    pytmd_model = pyTMD.io.model(directory).elevation(model)
+    # Reproject x, y to latitude/longitude
+    transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
+    lon, lat = transformer.transform(x.flatten(), y.flatten())
+    # Convert datetime
+    timescale = pyTMD.time.timescale().from_datetime(time.flatten())
+    try:
+        # Read tidal constants and interpolate to grid points
+        amp, ph, c = pytmd_model.extract_constants(
+            lon,
+            lat,
+            type=pytmd_model.type,
+            crop=crop,
+            method=method,
+            extrapolate=extrapolate,
+            cutoff=cutoff,
+            append_node=False,
+            # append_node=True,
+        )
+        # TODO: Return constituents
+        # print(amp.shape, ph.shape, c)
+        # print(pd.DataFrame({"amplitude": amp}))
+    # Raise error if constituent files no not cover analysis extent
+    except IndexError:
+        error_msg = f"""
+        The {model} tide model constituent files do not cover the analysis extent
+        ({min(lon):.2f}, {max(lon):.2f}, {min(lat):.2f}, {max(lat):.2f}).
+        This can occur if you are using clipped model files to improve run times.
+        Consider using model files that cover your entire analysis area, or set `crop=False`
+        to reduce the extent of tide model constituent files that is loaded.
+        """
+        raise Exception(textwrap.dedent(error_msg).strip()) from None
+    # Calculate complex phase in radians for Euler's
+    cph = -1j * ph * np.pi / 180.0
+    # Calculate constituent oscillation
+    hc = amp * np.exp(cph)
+    # Compute delta times based on model
+    if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
+        # Use delta time at 2000.0 to match TMD outputs
+        deltat = np.zeros_like(timescale.tt_ut1)
+    else:
+        # Use interpolated delta times
+        deltat = timescale.tt_ut1
+    # In "one-to-many" mode, extracted tidal constituents and timesteps
+    # are repeated/multiplied out to match the number of input points and
+    # timesteps, enabling the modeling of tides across all combinations
+    # of input times and points. In "one-to-one" mode, no repetition is
+    # needed, so each repeat count is set to 1.
+    points_repeat = len(x) if mode == "one-to-many" else 1
+    time_repeat = len(time) if mode == "one-to-many" else 1
+    t, hc, deltat = (
+        np.tile(timescale.tide, points_repeat),
+        hc.repeat(time_repeat, axis=0),
+        np.tile(deltat, points_repeat),
+    )
+    # Create arrays to hold outputs
+    tide = np.ma.zeros((len(t)), fill_value=np.nan)
+    tide.mask = np.any(hc.mask, axis=1)
+    # Predict tidal elevations at time and infer minor corrections
+    tide.data[:] = pyTMD.predict.drift(
+        t,
+        hc,
+        c,
+        deltat=deltat,
+        corrections=pytmd_model.corrections,
+    )
+    minor = pyTMD.predict.infer_minor(
+        t,
+        hc,
+        c,
+        deltat=deltat,
+        corrections=pytmd_model.corrections,
+        minor=pytmd_model.minor,
+    )
+    tide.data[:] += minor.data[:]
+    # Replace invalid values with fill value
+    tide.data[tide.mask] = tide.fill_value
+    # Convert data to pandas.DataFrame, and set index to our input
+    # time/x/y values
+    tide_df = pd.DataFrame({
+        "time": np.tile(time, points_repeat),
+        "x": np.repeat(x, time_repeat),
+        "y": np.repeat(y, time_repeat),
+        "tide_model": model,
+        "tide_height": tide,
+    }).set_index(["time", "x", "y"])
+    # Optionally convert outputs to integer units (can save memory)
+    if output_units == "m":
+        tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
+    elif output_units == "cm":
+        tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
+    elif output_units == "mm":
+        tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
+    return tide_df
 def model_tides(
     x: float | list[float] | xr.DataArray,
     y: float | list[float] | xr.DataArray,
-    time: np.ndarray | pd.DatetimeIndex,
+    time: DatetimeLike,
     model: str | list[str] = "EOT20",
     directory: str | os.PathLike | None = None,
     crs: str = "EPSG:4326",
     crop: bool = True,
-    method: str = "spline",
+    method: str = "linear",
     extrapolate: bool = True,
     cutoff: float | None = None,
     mode: str = "one-to-many",
     parallel: bool = True,
-    parallel_splits: int = 5,
+    parallel_splits: int | str = "auto",
+    parallel_max: int | None = None,
     output_units: str = "m",
     output_format: str = "long",
     ensemble_models: list[str] | None = None,
@@ -578,10 +413,11 @@ def model_tides(
         the location at which to model tides. By default these
         coordinates should be lat/lon; use "crs" if they
         are in a custom coordinate reference system.
-    time : Numpy datetime array or pandas.DatetimeIndex
-        An array containing `datetime64[ns]` values or a
-        `pandas.DatetimeIndex` providing the times at which to
-        model tides in UTC time.
+    time : DatetimeLike
+        Times at which to model tide heights (in UTC). Accepts
+        any format that can be converted by `pandas.to_datetime()`;
+        e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
+        datetime.datetime and strings (e.g. "2020-01-01 23:00").
     model : str or list of str, optional
         The tide model (or models) to use to model tides.
         Defaults to "EOT20"; for a full list of available/supported
@@ -602,11 +438,11 @@ def model_tides(
         1 degree buffer around all input points. Defaults to True.
     method : str, optional
         Method used to interpolate tidal constituents
-        from model files. Options include:
+        from model files. Defaults to "linear"; options include:
-        - "spline": scipy bivariate spline interpolation (default)
-        - "bilinear": quick bilinear interpolation
         - "linear", "nearest": scipy regular grid interpolations
+        - "spline": scipy bivariate spline interpolation
+        - "bilinear": quick bilinear interpolation
     extrapolate : bool, optional
         Whether to extrapolate tides for x and y coordinates outside of
         the valid tide modelling domain using nearest-neighbor.
@@ -632,12 +468,16 @@ def model_tides(
         parallel. Optionally, tide modelling can also be run in parallel
         across input x and y coordinates (see "parallel_splits" below).
         Default is True.
-    parallel_splits : int, optional
+    parallel_splits : str or int, optional
         Whether to split the input x and y coordinates into smaller,
         evenly-sized chunks that are processed in parallel. This can
         provide a large performance boost when processing large numbers
-        of coordinates. The default is 5 chunks, which will split
-        coordinates into 5 parallelised chunks.
+        of coordinates. The default is "auto", which will automatically
+        attempt to determine optimal splits based on available CPUs,
+        the number of input points, and the number of models.
+    parallel_max : int, optional
+        Maximum number of processes to run in parallel. The default of
+        None will automatically determine this from your available CPUs.
     output_units : str, optional
         Whether to return modelled tides in floating point metre units,
         or integer centimetre units (i.e. scaled by 100) or integer
@@ -674,9 +514,10 @@ def model_tides(
     models_requested = list(np.atleast_1d(model))
     x = np.atleast_1d(x)
     y = np.atleast_1d(y)
-    time = np.atleast_1d(time)
+    time = _standardise_time(time)
     # Validate input arguments
+    assert time is not None, "Times for modelling tides muyst be provided via `time`."
     assert method in ("bilinear", "spline", "linear", "nearest")
     assert output_units in (
         "m",
@@ -695,10 +536,6 @@ def model_tides(
             "you intended to model multiple timesteps at each point."
         )
-    # If time passed as a single Timestamp, convert to datetime64
-    if isinstance(time, pd.Timestamp):
-        time = time.to_datetime64()
     # Set tide modelling files directory. If no custom path is
     # provided, try global environment variable.
     directory = _set_directory(directory)
@@ -770,13 +607,28 @@ def model_tides(
         mode=mode,
     )
-    # Ensure requested parallel splits is not smaller than number of points
-    parallel_splits = min(parallel_splits, len(x))
+    # If automatic parallel splits, calculate optimal value
+    # based on available parallelisation, number of points
+    # and number of models
+    if parallel_splits == "auto":
+        parallel_splits = _parallel_splits(
+            total_points=len(x),
+            model_count=len(models_to_process),
+            parallel_max=parallel_max,
+        )
+    # Verify that parallel splits are not larger than number of points
+    assert isinstance(parallel_splits, int)
+    if parallel_splits > len(x):
+        raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
     # Parallelise if either multiple models or multiple splits requested
     if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
-        with ProcessPoolExecutor() as executor:
-            print(f"Modelling tides using {', '.join(models_to_process)} in parallel")
+        with ProcessPoolExecutor(max_workers=parallel_max) as executor:
+            print(
+                f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
+            )
             # Optionally split lon/lat points into `splits_n` chunks
             # that will be applied in parallel
@@ -824,7 +676,7 @@ def model_tides(
         model_outputs = []
         for model_i in models_to_process:
-            print(f"Modelling tides using {model_i}")
+            print(f"Modelling tides with {model_i}")
             tide_df = iter_func(model_i, x, y, time)
             model_outputs.append(tide_df)
@@ -854,3 +706,135 @@ def model_tides(
             tide_df = tide_df.reindex(output_indices)
     return tide_df
+def model_phases(
+    x: float | list[float] | xr.DataArray,
+    y: float | list[float] | xr.DataArray,
+    time: DatetimeLike,
+    model: str | list[str] = "EOT20",
+    directory: str | os.PathLike | None = None,
+    time_offset: str = "15 min",
+    return_tides: bool = False,
+    **model_tides_kwargs,
+) -> pd.DataFrame:
+    """
+    Model tide phases (low-flow, high-flow, high-ebb, low-ebb)
+    at multiple coordinates and/or timesteps using using one
+    or more ocean tide models.
+    Ebb and low phases are calculated by running the
+    `eo_tides.model.model_tides` function twice, once for
+    the requested timesteps, and again after subtracting a
+    small time offset (by default, 15 minutes). If tides
+    increased over this period, they are assigned as "flow";
+    if they decreased, they are assigned as "ebb".
+    Tides are considered "high" if equal or greater than 0
+    metres tide height, otherwise "low".
+    This function supports all parameters that are supported
+    by `model_tides`.
+    Parameters
+    ----------
+    x, y : float or list of float
+        One or more x and y coordinates used to define
+        the location at which to model tide phases. By default
+        these coordinates should be lat/lon; use "crs" if they
+        are in a custom coordinate reference system.
+    time : DatetimeLike
+        Times at which to model tide phases (in UTC). Accepts
+        any format that can be converted by `pandas.to_datetime()`;
+        e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
+        datetime.datetime and strings (e.g. "2020-01-01 23:00").
+    model : str or list of str, optional
+        The tide model (or models) to use to compute tide phases.
+        Defaults to "EOT20"; for a full list of available/supported
+        models, run `eo_tides.model.list_models`.
+    directory : str, optional
+        The directory containing tide model data files. If no path is
+        provided, this will default to the environment variable
+        `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
+        Tide modelling files should be stored in sub-folders for each
+        model that match the structure required by `pyTMD`
+        (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
+    time_offset: str, optional
+        The time offset/delta used to generate a time series of
+        offset tide heights required for phase calculation. Defeaults
+        to "15 min"; can be any string passed to `pandas.Timedelta`.
+    return_tides: bool, optional
+        Whether to return intermediate modelled tide heights as a
+        "tide_height" column in the output dataframe. Defaults to False.
+    **model_tides_kwargs :
+        Optional parameters passed to the `eo_tides.model.model_tides`
+        function. Important parameters include `output_format` (e.g.
+        whether to return results in wide or long format), `crop`
+        (whether to crop tide model constituent files on-the-fly to
+        improve performance) etc.
+    Returns
+    -------
+    pandas.DataFrame
+        A dataframe containing modelled tide phases.
+    """
+    # Pop output format and mode for special handling
+    output_format = model_tides_kwargs.pop("output_format", "long")
+    mode = model_tides_kwargs.pop("mode", "one-to-many")
+    # Model tides
+    tide_df = model_tides(
+        x=x,
+        y=y,
+        time=time,
+        model=model,
+        directory=directory,
+        **model_tides_kwargs,
+    )
+    # Model tides for a time 15 minutes prior to each previously
+    # modelled satellite acquisition time. This allows us to compare
+    # tide heights to see if they are rising or falling.
+    pre_df = model_tides(
+        x=x,
+        y=y,
+        time=time - pd.Timedelta(time_offset),
+        model=model,
+        directory=directory,
+        **model_tides_kwargs,
+    )
+    # Compare tides computed for each timestep. If the previous tide
+    # was higher than the current tide, the tide is 'ebbing'. If the
+    # previous tide was lower, the tide is 'flowing'
+    ebb_flow = (tide_df.tide_height < pre_df.tide_height.values).replace({True: "ebb", False: "flow"})
+    # If tides are greater than 0, then "high", otherwise "low"
+    high_low = (tide_df.tide_height >= 0).replace({True: "high", False: "low"})
+    # Combine into one string and add to data
+    tide_df["tide_phase"] = high_low.astype(str) + "-" + ebb_flow.astype(str)
+    # Optionally convert to a wide format dataframe with a tide model in
+    # each dataframe column
+    if output_format == "wide":
+        # Pivot into wide format with each time model as a column
+        print("Converting to a wide format dataframe")
+        tide_df = tide_df.pivot(columns="tide_model")
+        # If in 'one-to-one' mode, reindex using our input time/x/y
+        # values to ensure the output is sorted the same as our inputs
+        if mode == "one-to-one":
+            output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
+            tide_df = tide_df.reindex(output_indices)
+        # Optionally drop tides
+        if not return_tides:
+            return tide_df.drop("tide_height", axis=1)["tide_phase"]
+    # Optionally drop tide heights
+    if not return_tides:
+        return tide_df.drop("tide_height", axis=1)
+    return tide_df

eo-tides 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

eo-tides 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl