eo-tides 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py CHANGED
@@ -2,14 +2,14 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import os
5
- import pathlib
6
5
  import textwrap
7
- import warnings
8
6
  from concurrent.futures import ProcessPoolExecutor
9
7
  from concurrent.futures.process import BrokenProcessPool
10
8
  from functools import partial
11
9
  from typing import TYPE_CHECKING
12
10
 
11
+ import psutil
12
+
13
13
  # Only import if running type checking
14
14
  if TYPE_CHECKING:
15
15
  import xarray as xr
@@ -19,349 +19,9 @@ import numpy as np
19
19
  import pandas as pd
20
20
  import pyproj
21
21
  import pyTMD
22
- from colorama import Style, init
23
- from pyTMD.io.model import load_database, model
24
22
  from tqdm import tqdm
25
23
 
26
- from .utils import idw
27
-
28
-
29
- def _set_directory(directory):
30
- """
31
- Set tide modelling files directory. If no custom
32
- path is provided, try global environmental variable
33
- instead.
34
- """
35
- if directory is None:
36
- if "EO_TIDES_TIDE_MODELS" in os.environ:
37
- directory = os.environ["EO_TIDES_TIDE_MODELS"]
38
- else:
39
- raise Exception(
40
- "No tide model directory provided via `directory`, and/or no "
41
- "`EO_TIDES_TIDE_MODELS` environment variable found. "
42
- "Please provide a valid path to your tide model directory."
43
- )
44
-
45
- # Verify path exists
46
- directory = pathlib.Path(directory).expanduser()
47
- if not directory.exists():
48
- raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
49
- else:
50
- return directory
51
-
52
-
53
- def list_models(
54
- directory: str | os.PathLike | None = None,
55
- show_available: bool = True,
56
- show_supported: bool = True,
57
- raise_error: bool = False,
58
- ) -> tuple[list[str], list[str]]:
59
- """
60
- List all tide models available for tide modelling, and
61
- all models supported by `eo-tides` and `pyTMD`.
62
-
63
- This function scans the specified tide model directory
64
- and returns a list of models that are available in the
65
- directory as well as the full list of all supported models.
66
-
67
- For instructions on setting up tide models, see:
68
- <https://geoscienceaustralia.github.io/eo-tides/setup/>
69
-
70
- Parameters
71
- ----------
72
- directory : str, optional
73
- The directory containing tide model data files. If no path is
74
- provided, this will default to the environment variable
75
- `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
76
- Tide modelling files should be stored in sub-folders for each
77
- model that match the structure required by `pyTMD`
78
- (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
79
- show_available : bool, optional
80
- Whether to print a list of locally available models.
81
- show_supported : bool, optional
82
- Whether to print a list of all supported models, in
83
- addition to models available locally.
84
- raise_error : bool, optional
85
- If True, raise an error if no available models are found.
86
- If False, raise a warning.
87
-
88
- Returns
89
- -------
90
- available_models : list of str
91
- A list of all tide models available within `directory`.
92
- supported_models : list of str
93
- A list of all tide models supported by `eo-tides`.
94
- """
95
- init() # Initialize colorama
96
-
97
- # Set tide modelling files directory. If no custom path is
98
- # provided, try global environment variable.
99
- directory = _set_directory(directory)
100
-
101
- # Get full list of supported models from pyTMD database
102
- model_database = load_database()["elevation"]
103
- supported_models = list(model_database.keys())
104
-
105
- # Extract expected model paths
106
- expected_paths = {}
107
- for m in supported_models:
108
- model_file = model_database[m]["model_file"]
109
- model_file = model_file[0] if isinstance(model_file, list) else model_file
110
- expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
111
-
112
- # Define column widths
113
- status_width = 4 # Width for emoji
114
- name_width = max(len(name) for name in supported_models)
115
- path_width = max(len(path) for path in expected_paths.values())
116
-
117
- # Print list of supported models, marking available and
118
- # unavailable models and appending available to list
119
- if show_available or show_supported:
120
- total_width = min(status_width + name_width + path_width + 6, 80)
121
- print("─" * total_width)
122
- print(f"{'󠀠🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
123
- print("─" * total_width)
124
-
125
- available_models = []
126
- for m in supported_models:
127
- try:
128
- model_file = model(directory=directory).elevation(m=m)
129
- available_models.append(m)
130
-
131
- if show_available:
132
- # Mark available models with a green tick
133
- status = "✅"
134
- print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
135
- except FileNotFoundError:
136
- if show_supported:
137
- # Mark unavailable models with a red cross
138
- status = "❌"
139
- print(
140
- f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
141
- )
142
-
143
- if show_available or show_supported:
144
- print("─" * total_width)
145
-
146
- # Print summary
147
- print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
148
- print(f"Available models: {len(available_models)}/{len(supported_models)}")
149
-
150
- # Raise error or warning if no models are available
151
- if not available_models:
152
- warning_msg = textwrap.dedent(
153
- f"""
154
- No valid tide models are available in `{directory}`.
155
- Are you sure you have provided the correct `directory` path, or set the
156
- `EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
157
- tide model directory?
158
- """
159
- ).strip()
160
-
161
- if raise_error:
162
- raise Exception(warning_msg)
163
- else:
164
- warnings.warn(warning_msg, UserWarning)
165
-
166
- # Return list of available and supported models
167
- return available_models, supported_models
168
-
169
-
170
- def _model_tides(
171
- model,
172
- x,
173
- y,
174
- time,
175
- directory,
176
- crs,
177
- crop,
178
- method,
179
- extrapolate,
180
- cutoff,
181
- output_units,
182
- mode,
183
- ):
184
- """Worker function applied in parallel by `model_tides`. Handles the
185
- extraction of tide modelling constituents and tide modelling using
186
- `pyTMD`.
187
- """
188
- # Obtain model details
189
- pytmd_model = pyTMD.io.model(directory).elevation(model)
190
-
191
- # Convert x, y to latitude/longitude
192
- transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
193
- lon, lat = transformer.transform(x.flatten(), y.flatten())
194
-
195
- # Convert datetime
196
- timescale = pyTMD.time.timescale().from_datetime(time.flatten())
197
-
198
- # Calculate bounds for cropping
199
- buffer = 1 # one degree on either side
200
- bounds = [
201
- lon.min() - buffer,
202
- lon.max() + buffer,
203
- lat.min() - buffer,
204
- lat.max() + buffer,
205
- ]
206
-
207
- try:
208
- # Read tidal constants and interpolate to grid points
209
- if pytmd_model.format in ("OTIS", "ATLAS-compact", "TMD3"):
210
- amp, ph, D, c = pyTMD.io.OTIS.extract_constants(
211
- lon,
212
- lat,
213
- pytmd_model.grid_file,
214
- pytmd_model.model_file,
215
- pytmd_model.projection,
216
- type=pytmd_model.type,
217
- grid=pytmd_model.file_format,
218
- crop=crop,
219
- bounds=bounds,
220
- method=method,
221
- extrapolate=extrapolate,
222
- cutoff=cutoff,
223
- )
224
-
225
- # Use delta time at 2000.0 to match TMD outputs
226
- deltat = np.zeros((len(timescale)), dtype=np.float64)
227
-
228
- elif pytmd_model.format in ("ATLAS-netcdf",):
229
- amp, ph, D, c = pyTMD.io.ATLAS.extract_constants(
230
- lon,
231
- lat,
232
- pytmd_model.grid_file,
233
- pytmd_model.model_file,
234
- type=pytmd_model.type,
235
- crop=crop,
236
- bounds=bounds,
237
- method=method,
238
- extrapolate=extrapolate,
239
- cutoff=cutoff,
240
- scale=pytmd_model.scale,
241
- compressed=pytmd_model.compressed,
242
- )
243
-
244
- # Use delta time at 2000.0 to match TMD outputs
245
- deltat = np.zeros((len(timescale)), dtype=np.float64)
246
-
247
- elif pytmd_model.format in ("GOT-ascii", "GOT-netcdf"):
248
- amp, ph, c = pyTMD.io.GOT.extract_constants(
249
- lon,
250
- lat,
251
- pytmd_model.model_file,
252
- grid=pytmd_model.file_format,
253
- crop=crop,
254
- bounds=bounds,
255
- method=method,
256
- extrapolate=extrapolate,
257
- cutoff=cutoff,
258
- scale=pytmd_model.scale,
259
- compressed=pytmd_model.compressed,
260
- )
261
-
262
- # Delta time (TT - UT1)
263
- deltat = timescale.tt_ut1
264
-
265
- elif pytmd_model.format in ("FES-ascii", "FES-netcdf"):
266
- amp, ph = pyTMD.io.FES.extract_constants(
267
- lon,
268
- lat,
269
- pytmd_model.model_file,
270
- type=pytmd_model.type,
271
- version=pytmd_model.version,
272
- crop=crop,
273
- bounds=bounds,
274
- method=method,
275
- extrapolate=extrapolate,
276
- cutoff=cutoff,
277
- scale=pytmd_model.scale,
278
- compressed=pytmd_model.compressed,
279
- )
280
-
281
- # Available model constituents
282
- c = pytmd_model.constituents
283
-
284
- # Delta time (TT - UT1)
285
- deltat = timescale.tt_ut1
286
- else:
287
- raise Exception(
288
- f"Unsupported model format ({pytmd_model.format}). This may be due to an incompatible version of `pyTMD`."
289
- )
290
-
291
- # Raise error if constituent files no not cover analysis extent
292
- except IndexError:
293
- error_msg = textwrap.dedent(
294
- f"""
295
- The {model} tide model constituent files do not cover the requested analysis extent.
296
- This can occur if you are using clipped model files to improve run times.
297
- Consider using model files that cover your entire analysis area, or set `crop=False`
298
- to reduce the extent of tide model constituent files that is loaded.
299
- """
300
- ).strip()
301
- raise Exception(error_msg)
302
-
303
- # Calculate complex phase in radians for Euler's
304
- cph = -1j * ph * np.pi / 180.0
305
-
306
- # Calculate constituent oscillation
307
- hc = amp * np.exp(cph)
308
-
309
- # Determine the number of points and times to process. If in
310
- # "one-to-many" mode, these counts are used to repeat our extracted
311
- # constituents and timesteps so we can extract tides for all
312
- # combinations of our input times and tide modelling points.
313
- # If in "one-to-one" mode, we avoid this step by setting counts to 1
314
- # (e.g. "repeat 1 times")
315
- points_repeat = len(x) if mode == "one-to-many" else 1
316
- time_repeat = len(time) if mode == "one-to-many" else 1
317
-
318
- # If in "one-to-many" mode, repeat constituents to length of time
319
- # and number of input coords before passing to `predict_tide_drift`
320
- t, hc, deltat = (
321
- np.tile(timescale.tide, points_repeat),
322
- hc.repeat(time_repeat, axis=0),
323
- np.tile(deltat, points_repeat),
324
- )
325
-
326
- # Predict tidal elevations at time and infer minor corrections
327
- npts = len(t)
328
- tide = np.ma.zeros((npts), fill_value=np.nan)
329
- tide.mask = np.any(hc.mask, axis=1)
330
-
331
- # Predict tides
332
- tide.data[:] = pyTMD.predict.drift(t, hc, c, deltat=deltat, corrections=pytmd_model.corrections)
333
- minor = pyTMD.predict.infer_minor(
334
- t,
335
- hc,
336
- c,
337
- deltat=deltat,
338
- corrections=pytmd_model.corrections,
339
- minor=pytmd_model.minor,
340
- )
341
- tide.data[:] += minor.data[:]
342
-
343
- # Replace invalid values with fill value
344
- tide.data[tide.mask] = tide.fill_value
345
-
346
- # Convert data to pandas.DataFrame, and set index to our input
347
- # time/x/y values
348
- tide_df = pd.DataFrame({
349
- "time": np.tile(time, points_repeat),
350
- "x": np.repeat(x, time_repeat),
351
- "y": np.repeat(y, time_repeat),
352
- "tide_model": model,
353
- "tide_height": tide,
354
- }).set_index(["time", "x", "y"])
355
-
356
- # Optionally convert outputs to integer units (can save memory)
357
- if output_units == "m":
358
- tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
359
- elif output_units == "cm":
360
- tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
361
- elif output_units == "mm":
362
- tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
363
-
364
- return tide_df
24
+ from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
365
25
 
366
26
 
367
27
  def _ensemble_model(
@@ -529,20 +189,195 @@ def _ensemble_model(
529
189
  return pd.concat(ensemble_list)
530
190
 
531
191
 
192
+ def _parallel_splits(
193
+ total_points: int,
194
+ model_count: int,
195
+ parallel_max: int | None = None,
196
+ min_points_per_split: int = 1000,
197
+ ) -> int:
198
+ """
199
+ Calculates the optimal number of parallel splits for data
200
+ processing based on system resources and processing constraints.
201
+
202
+ Parameters:
203
+ -----------
204
+ total_points : int
205
+ Total number of data points to process
206
+ model_count : int
207
+ Number of models that will be run in parallel
208
+ parallel_max : int, optional
209
+ Maximum number of parallel processes to use. If None, uses CPU core count
210
+ min_points_per_split : int, default=1000
211
+ Minimum number of points that should be processed in each split
212
+ """
213
+ # Get available CPUs. First see if `CPU_GUARANTEE` exists in
214
+ # environment (if running in JupyterHub); if not use psutil
215
+ # followed by standard CPU count
216
+ if parallel_max is None:
217
+ # Take the first valid output
218
+ raw_value = os.environ.get("CPU_GUARANTEE") or psutil.cpu_count(logical=False) or os.cpu_count() or 1
219
+
220
+ # Convert to integer
221
+ if isinstance(raw_value, str):
222
+ parallel_max = int(float(raw_value))
223
+ else:
224
+ parallel_max = int(raw_value)
225
+
226
+ # Calculate optimal number of splits based on constraints
227
+ splits_by_size = total_points / min_points_per_split
228
+ splits_by_cpu = parallel_max / model_count
229
+ optimal_splits = min(splits_by_size, splits_by_cpu)
230
+
231
+ # Convert to integer and ensure at least 1 split
232
+ final_split_count = int(max(1, optimal_splits))
233
+ return final_split_count
234
+
235
+
236
+ def _model_tides(
237
+ model,
238
+ x,
239
+ y,
240
+ time,
241
+ directory,
242
+ crs,
243
+ crop,
244
+ method,
245
+ extrapolate,
246
+ cutoff,
247
+ output_units,
248
+ mode,
249
+ ):
250
+ """Worker function applied in parallel by `model_tides`. Handles the
251
+ extraction of tide modelling constituents and tide modelling using
252
+ `pyTMD`.
253
+ """
254
+ # Obtain model details
255
+ pytmd_model = pyTMD.io.model(directory).elevation(model)
256
+
257
+ # Reproject x, y to latitude/longitude
258
+ transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
259
+ lon, lat = transformer.transform(x.flatten(), y.flatten())
260
+
261
+ # Convert datetime
262
+ timescale = pyTMD.time.timescale().from_datetime(time.flatten())
263
+
264
+ try:
265
+ # Read tidal constants and interpolate to grid points
266
+ amp, ph, c = pytmd_model.extract_constants(
267
+ lon,
268
+ lat,
269
+ type=pytmd_model.type,
270
+ crop=crop,
271
+ method=method,
272
+ extrapolate=extrapolate,
273
+ cutoff=cutoff,
274
+ append_node=False,
275
+ # append_node=True,
276
+ )
277
+
278
+ # TODO: Return constituents
279
+ # print(amp.shape, ph.shape, c)
280
+ # print(pd.DataFrame({"amplitude": amp}))
281
+
282
+ # Raise error if constituent files no not cover analysis extent
283
+ except IndexError:
284
+ error_msg = f"""
285
+ The {model} tide model constituent files do not cover the analysis extent
286
+ ({min(lon):.2f}, {max(lon):.2f}, {min(lat):.2f}, {max(lat):.2f}).
287
+ This can occur if you are using clipped model files to improve run times.
288
+ Consider using model files that cover your entire analysis area, or set `crop=False`
289
+ to reduce the extent of tide model constituent files that is loaded.
290
+ """
291
+ raise Exception(textwrap.dedent(error_msg).strip()) from None
292
+
293
+ # Calculate complex phase in radians for Euler's
294
+ cph = -1j * ph * np.pi / 180.0
295
+
296
+ # Calculate constituent oscillation
297
+ hc = amp * np.exp(cph)
298
+
299
+ # Compute delta times based on model
300
+ if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
301
+ # Use delta time at 2000.0 to match TMD outputs
302
+ deltat = np.zeros_like(timescale.tt_ut1)
303
+ else:
304
+ # Use interpolated delta times
305
+ deltat = timescale.tt_ut1
306
+
307
+ # In "one-to-many" mode, extracted tidal constituents and timesteps
308
+ # are repeated/multiplied out to match the number of input points and
309
+ # timesteps, enabling the modeling of tides across all combinations
310
+ # of input times and points. In "one-to-one" mode, no repetition is
311
+ # needed, so each repeat count is set to 1.
312
+ points_repeat = len(x) if mode == "one-to-many" else 1
313
+ time_repeat = len(time) if mode == "one-to-many" else 1
314
+ t, hc, deltat = (
315
+ np.tile(timescale.tide, points_repeat),
316
+ hc.repeat(time_repeat, axis=0),
317
+ np.tile(deltat, points_repeat),
318
+ )
319
+
320
+ # Create arrays to hold outputs
321
+ tide = np.ma.zeros((len(t)), fill_value=np.nan)
322
+ tide.mask = np.any(hc.mask, axis=1)
323
+
324
+ # Predict tidal elevations at time and infer minor corrections
325
+ tide.data[:] = pyTMD.predict.drift(
326
+ t,
327
+ hc,
328
+ c,
329
+ deltat=deltat,
330
+ corrections=pytmd_model.corrections,
331
+ )
332
+ minor = pyTMD.predict.infer_minor(
333
+ t,
334
+ hc,
335
+ c,
336
+ deltat=deltat,
337
+ corrections=pytmd_model.corrections,
338
+ minor=pytmd_model.minor,
339
+ )
340
+ tide.data[:] += minor.data[:]
341
+
342
+ # Replace invalid values with fill value
343
+ tide.data[tide.mask] = tide.fill_value
344
+
345
+ # Convert data to pandas.DataFrame, and set index to our input
346
+ # time/x/y values
347
+ tide_df = pd.DataFrame({
348
+ "time": np.tile(time, points_repeat),
349
+ "x": np.repeat(x, time_repeat),
350
+ "y": np.repeat(y, time_repeat),
351
+ "tide_model": model,
352
+ "tide_height": tide,
353
+ }).set_index(["time", "x", "y"])
354
+
355
+ # Optionally convert outputs to integer units (can save memory)
356
+ if output_units == "m":
357
+ tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
358
+ elif output_units == "cm":
359
+ tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
360
+ elif output_units == "mm":
361
+ tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
362
+
363
+ return tide_df
364
+
365
+
532
366
  def model_tides(
533
367
  x: float | list[float] | xr.DataArray,
534
368
  y: float | list[float] | xr.DataArray,
535
- time: np.ndarray | pd.DatetimeIndex,
369
+ time: DatetimeLike,
536
370
  model: str | list[str] = "EOT20",
537
371
  directory: str | os.PathLike | None = None,
538
372
  crs: str = "EPSG:4326",
539
373
  crop: bool = True,
540
- method: str = "spline",
374
+ method: str = "linear",
541
375
  extrapolate: bool = True,
542
376
  cutoff: float | None = None,
543
377
  mode: str = "one-to-many",
544
378
  parallel: bool = True,
545
- parallel_splits: int = 5,
379
+ parallel_splits: int | str = "auto",
380
+ parallel_max: int | None = None,
546
381
  output_units: str = "m",
547
382
  output_format: str = "long",
548
383
  ensemble_models: list[str] | None = None,
@@ -578,10 +413,11 @@ def model_tides(
578
413
  the location at which to model tides. By default these
579
414
  coordinates should be lat/lon; use "crs" if they
580
415
  are in a custom coordinate reference system.
581
- time : Numpy datetime array or pandas.DatetimeIndex
582
- An array containing `datetime64[ns]` values or a
583
- `pandas.DatetimeIndex` providing the times at which to
584
- model tides in UTC time.
416
+ time : DatetimeLike
417
+ Times at which to model tide heights (in UTC). Accepts
418
+ any format that can be converted by `pandas.to_datetime()`;
419
+ e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
420
+ datetime.datetime and strings (e.g. "2020-01-01 23:00").
585
421
  model : str or list of str, optional
586
422
  The tide model (or models) to use to model tides.
587
423
  Defaults to "EOT20"; for a full list of available/supported
@@ -602,11 +438,11 @@ def model_tides(
602
438
  1 degree buffer around all input points. Defaults to True.
603
439
  method : str, optional
604
440
  Method used to interpolate tidal constituents
605
- from model files. Options include:
441
+ from model files. Defaults to "linear"; options include:
606
442
 
607
- - "spline": scipy bivariate spline interpolation (default)
608
- - "bilinear": quick bilinear interpolation
609
443
  - "linear", "nearest": scipy regular grid interpolations
444
+ - "spline": scipy bivariate spline interpolation
445
+ - "bilinear": quick bilinear interpolation
610
446
  extrapolate : bool, optional
611
447
  Whether to extrapolate tides for x and y coordinates outside of
612
448
  the valid tide modelling domain using nearest-neighbor.
@@ -632,12 +468,16 @@ def model_tides(
632
468
  parallel. Optionally, tide modelling can also be run in parallel
633
469
  across input x and y coordinates (see "parallel_splits" below).
634
470
  Default is True.
635
- parallel_splits : int, optional
471
+ parallel_splits : str or int, optional
636
472
  Whether to split the input x and y coordinates into smaller,
637
473
  evenly-sized chunks that are processed in parallel. This can
638
474
  provide a large performance boost when processing large numbers
639
- of coordinates. The default is 5 chunks, which will split
640
- coordinates into 5 parallelised chunks.
475
+ of coordinates. The default is "auto", which will automatically
476
+ attempt to determine optimal splits based on available CPUs,
477
+ the number of input points, and the number of models.
478
+ parallel_max : int, optional
479
+ Maximum number of processes to run in parallel. The default of
480
+ None will automatically determine this from your available CPUs.
641
481
  output_units : str, optional
642
482
  Whether to return modelled tides in floating point metre units,
643
483
  or integer centimetre units (i.e. scaled by 100) or integer
@@ -674,9 +514,10 @@ def model_tides(
674
514
  models_requested = list(np.atleast_1d(model))
675
515
  x = np.atleast_1d(x)
676
516
  y = np.atleast_1d(y)
677
- time = np.atleast_1d(time)
517
+ time = _standardise_time(time)
678
518
 
679
519
  # Validate input arguments
520
+ assert time is not None, "Times for modelling tides muyst be provided via `time`."
680
521
  assert method in ("bilinear", "spline", "linear", "nearest")
681
522
  assert output_units in (
682
523
  "m",
@@ -695,10 +536,6 @@ def model_tides(
695
536
  "you intended to model multiple timesteps at each point."
696
537
  )
697
538
 
698
- # If time passed as a single Timestamp, convert to datetime64
699
- if isinstance(time, pd.Timestamp):
700
- time = time.to_datetime64()
701
-
702
539
  # Set tide modelling files directory. If no custom path is
703
540
  # provided, try global environment variable.
704
541
  directory = _set_directory(directory)
@@ -770,13 +607,28 @@ def model_tides(
770
607
  mode=mode,
771
608
  )
772
609
 
773
- # Ensure requested parallel splits is not smaller than number of points
774
- parallel_splits = min(parallel_splits, len(x))
610
+ # If automatic parallel splits, calculate optimal value
611
+ # based on available parallelisation, number of points
612
+ # and number of models
613
+ if parallel_splits == "auto":
614
+ parallel_splits = _parallel_splits(
615
+ total_points=len(x),
616
+ model_count=len(models_to_process),
617
+ parallel_max=parallel_max,
618
+ )
619
+
620
+ # Verify that parallel splits are not larger than number of points
621
+ assert isinstance(parallel_splits, int)
622
+ if parallel_splits > len(x):
623
+ raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
775
624
 
776
625
  # Parallelise if either multiple models or multiple splits requested
626
+
777
627
  if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
778
- with ProcessPoolExecutor() as executor:
779
- print(f"Modelling tides using {', '.join(models_to_process)} in parallel")
628
+ with ProcessPoolExecutor(max_workers=parallel_max) as executor:
629
+ print(
630
+ f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
631
+ )
780
632
 
781
633
  # Optionally split lon/lat points into `splits_n` chunks
782
634
  # that will be applied in parallel
@@ -824,7 +676,7 @@ def model_tides(
824
676
  model_outputs = []
825
677
 
826
678
  for model_i in models_to_process:
827
- print(f"Modelling tides using {model_i}")
679
+ print(f"Modelling tides with {model_i}")
828
680
  tide_df = iter_func(model_i, x, y, time)
829
681
  model_outputs.append(tide_df)
830
682
 
@@ -854,3 +706,135 @@ def model_tides(
854
706
  tide_df = tide_df.reindex(output_indices)
855
707
 
856
708
  return tide_df
709
+
710
+
711
+ def model_phases(
712
+ x: float | list[float] | xr.DataArray,
713
+ y: float | list[float] | xr.DataArray,
714
+ time: DatetimeLike,
715
+ model: str | list[str] = "EOT20",
716
+ directory: str | os.PathLike | None = None,
717
+ time_offset: str = "15 min",
718
+ return_tides: bool = False,
719
+ **model_tides_kwargs,
720
+ ) -> pd.DataFrame:
721
+ """
722
+ Model tide phases (low-flow, high-flow, high-ebb, low-ebb)
723
+ at multiple coordinates and/or timesteps using using one
724
+ or more ocean tide models.
725
+
726
+ Ebb and low phases are calculated by running the
727
+ `eo_tides.model.model_tides` function twice, once for
728
+ the requested timesteps, and again after subtracting a
729
+ small time offset (by default, 15 minutes). If tides
730
+ increased over this period, they are assigned as "flow";
731
+ if they decreased, they are assigned as "ebb".
732
+ Tides are considered "high" if equal or greater than 0
733
+ metres tide height, otherwise "low".
734
+
735
+ This function supports all parameters that are supported
736
+ by `model_tides`.
737
+
738
+ Parameters
739
+ ----------
740
+ x, y : float or list of float
741
+ One or more x and y coordinates used to define
742
+ the location at which to model tide phases. By default
743
+ these coordinates should be lat/lon; use "crs" if they
744
+ are in a custom coordinate reference system.
745
+ time : DatetimeLike
746
+ Times at which to model tide phases (in UTC). Accepts
747
+ any format that can be converted by `pandas.to_datetime()`;
748
+ e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
749
+ datetime.datetime and strings (e.g. "2020-01-01 23:00").
750
+ model : str or list of str, optional
751
+ The tide model (or models) to use to compute tide phases.
752
+ Defaults to "EOT20"; for a full list of available/supported
753
+ models, run `eo_tides.model.list_models`.
754
+ directory : str, optional
755
+ The directory containing tide model data files. If no path is
756
+ provided, this will default to the environment variable
757
+ `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
758
+ Tide modelling files should be stored in sub-folders for each
759
+ model that match the structure required by `pyTMD`
760
+ (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
761
+ time_offset: str, optional
762
+ The time offset/delta used to generate a time series of
763
+ offset tide heights required for phase calculation. Defeaults
764
+ to "15 min"; can be any string passed to `pandas.Timedelta`.
765
+ return_tides: bool, optional
766
+ Whether to return intermediate modelled tide heights as a
767
+ "tide_height" column in the output dataframe. Defaults to False.
768
+ **model_tides_kwargs :
769
+ Optional parameters passed to the `eo_tides.model.model_tides`
770
+ function. Important parameters include `output_format` (e.g.
771
+ whether to return results in wide or long format), `crop`
772
+ (whether to crop tide model constituent files on-the-fly to
773
+ improve performance) etc.
774
+
775
+ Returns
776
+ -------
777
+ pandas.DataFrame
778
+ A dataframe containing modelled tide phases.
779
+
780
+ """
781
+
782
+ # Pop output format and mode for special handling
783
+ output_format = model_tides_kwargs.pop("output_format", "long")
784
+ mode = model_tides_kwargs.pop("mode", "one-to-many")
785
+
786
+ # Model tides
787
+ tide_df = model_tides(
788
+ x=x,
789
+ y=y,
790
+ time=time,
791
+ model=model,
792
+ directory=directory,
793
+ **model_tides_kwargs,
794
+ )
795
+
796
+ # Model tides for a time 15 minutes prior to each previously
797
+ # modelled satellite acquisition time. This allows us to compare
798
+ # tide heights to see if they are rising or falling.
799
+ pre_df = model_tides(
800
+ x=x,
801
+ y=y,
802
+ time=time - pd.Timedelta(time_offset),
803
+ model=model,
804
+ directory=directory,
805
+ **model_tides_kwargs,
806
+ )
807
+
808
+ # Compare tides computed for each timestep. If the previous tide
809
+ # was higher than the current tide, the tide is 'ebbing'. If the
810
+ # previous tide was lower, the tide is 'flowing'
811
+ ebb_flow = (tide_df.tide_height < pre_df.tide_height.values).replace({True: "ebb", False: "flow"})
812
+
813
+ # If tides are greater than 0, then "high", otherwise "low"
814
+ high_low = (tide_df.tide_height >= 0).replace({True: "high", False: "low"})
815
+
816
+ # Combine into one string and add to data
817
+ tide_df["tide_phase"] = high_low.astype(str) + "-" + ebb_flow.astype(str)
818
+
819
+ # Optionally convert to a wide format dataframe with a tide model in
820
+ # each dataframe column
821
+ if output_format == "wide":
822
+ # Pivot into wide format with each time model as a column
823
+ print("Converting to a wide format dataframe")
824
+ tide_df = tide_df.pivot(columns="tide_model")
825
+
826
+ # If in 'one-to-one' mode, reindex using our input time/x/y
827
+ # values to ensure the output is sorted the same as our inputs
828
+ if mode == "one-to-one":
829
+ output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
830
+ tide_df = tide_df.reindex(output_indices)
831
+
832
+ # Optionally drop tides
833
+ if not return_tides:
834
+ return tide_df.drop("tide_height", axis=1)["tide_phase"]
835
+
836
+ # Optionally drop tide heights
837
+ if not return_tides:
838
+ return tide_df.drop("tide_height", axis=1)
839
+
840
+ return tide_df