eo-tides 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py CHANGED
@@ -21,172 +21,7 @@ import pyproj
21
21
  import pyTMD
22
22
  from tqdm import tqdm
23
23
 
24
- from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
25
-
26
-
27
- def _ensemble_model(
28
- tide_df,
29
- crs,
30
- ensemble_models,
31
- ensemble_func=None,
32
- ensemble_top_n=3,
33
- ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.geojson",
34
- ranking_valid_perc=0.02,
35
- **idw_kwargs,
36
- ):
37
- """Combine multiple tide models into a single locally optimised
38
- ensemble tide model using external model ranking data (e.g.
39
- satellite altimetry or NDWI-tide correlations along the coastline)
40
- to inform the selection of the best local models.
41
-
42
- This function performs the following steps:
43
- 1. Takes a dataframe of tide heights from multiple tide models, as
44
- produced by `eo_tides.model.model_tides`
45
- 1. Loads model ranking points from a GeoJSON file, filters them
46
- based on the valid data percentage, and retains relevant columns
47
- 2. Interpolates the model rankings into the "x" and "y" coordinates
48
- of the original dataframe using Inverse Weighted Interpolation (IDW)
49
- 3. Uses rankings to combine multiple tide models into a single
50
- optimised ensemble model (by default, by taking the mean of the
51
- top 3 ranked models)
52
- 4. Returns a new dataFrame with the combined ensemble model predictions
53
-
54
- Parameters
55
- ----------
56
- tide_df : pandas.DataFrame
57
- DataFrame produced by `eo_tides.model.model_tides`, containing
58
- tide model predictions with columns:
59
- `["time", "x", "y", "tide_height", "tide_model"]`.
60
- crs : string
61
- Coordinate reference system for the "x" and "y" coordinates in
62
- `tide_df`. Used to ensure that interpolations are performed
63
- in the correct CRS.
64
- ensemble_models : list
65
- A list of models to include in the ensemble modelling process.
66
- All values must exist as columns with the prefix "rank_" in
67
- `ranking_points`.
68
- ensemble_func : dict, optional
69
- By default, a simple ensemble model will be calculated by taking
70
- the mean of the `ensemble_top_n` tide models at each location.
71
- However, a dictionary containing more complex ensemble
72
- calculations can also be provided. Dictionary keys are used
73
- to name output ensemble models; functions should take a column
74
- named "rank" and convert it to a weighting, e.g.:
75
- `ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
76
- ensemble_top_n : int, optional
77
- If `ensemble_func` is None, this sets the number of top models
78
- to include in the mean ensemble calculation. Defaults to 3.
79
- ranking_points : str, optional
80
- Path to the GeoJSON file containing model ranking points. This
81
- dataset should include columns containing rankings for each tide
82
- model, named with the prefix "rank_". e.g. "rank_EOT20".
83
- Low values should represent high rankings (e.g. 1 = top ranked).
84
- ranking_valid_perc : float, optional
85
- Minimum percentage of valid data required to include a model
86
- rank point in the analysis, as defined in a column named
87
- "valid_perc". Defaults to 0.02.
88
- **idw_kwargs
89
- Optional keyword arguments to pass to the `idw` function used
90
- for interpolation. Useful values include `k` (number of nearest
91
- neighbours to use in interpolation), `max_dist` (maximum
92
- distance to nearest neighbours), and `k_min` (minimum number of
93
- neighbours required after `max_dist` is applied).
94
-
95
- Returns
96
- -------
97
- pandas.DataFrame
98
- DataFrame containing the ensemble model predictions, matching
99
- the format of the input `tide_df` (e.g. columns `["time", "x",
100
- "y", "tide_height", "tide_model"]`. By default the 'tide_model'
101
- column will be labeled "ensemble" for the combined model
102
- predictions (but if a custom dictionary of ensemble functions is
103
- provided via `ensemble_func`, each ensemble will be named using
104
- the provided dictionary keys).
105
-
106
- """
107
- # Extract x and y coords from dataframe
108
- x = tide_df.index.get_level_values(level="x")
109
- y = tide_df.index.get_level_values(level="y")
110
-
111
- # Load model ranks points and reproject to same CRS as x and y
112
- model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
113
- model_ranks_gdf = (
114
- gpd.read_file(ranking_points)
115
- .to_crs(crs)
116
- .query(f"valid_perc > {ranking_valid_perc}")
117
- .dropna()[model_ranking_cols + ["geometry"]]
118
- )
119
-
120
- # Use points to interpolate model rankings into requested x and y
121
- id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
122
- print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
123
- ensemble_ranks_df = (
124
- # Run IDW interpolation on subset of ranking columns
125
- pd.DataFrame(
126
- idw(
127
- input_z=model_ranks_gdf[model_ranking_cols],
128
- input_x=model_ranks_gdf.geometry.x,
129
- input_y=model_ranks_gdf.geometry.y,
130
- output_x=x,
131
- output_y=y,
132
- **idw_kwargs,
133
- ),
134
- columns=model_ranking_cols,
135
- )
136
- .assign(x=x, y=y)
137
- # Drop any duplicates then melt columns into long format
138
- .drop_duplicates()
139
- .melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
140
- # Remore "rank_" prefix to get plain model names
141
- .replace({"^rank_": ""}, regex=True)
142
- # Set index columns and rank across groups
143
- .set_index(["tide_model", "x", "y"])
144
- .groupby(["x", "y"])
145
- .rank()
146
- )
147
-
148
- # If no custom ensemble funcs are provided, use a default ensemble
149
- # calculation that takes the mean of the top N tide models
150
- if ensemble_func is None:
151
- ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
152
-
153
- # Create output list to hold computed ensemble model outputs
154
- ensemble_list = []
155
-
156
- # Loop through all provided ensemble generation functions
157
- for ensemble_n, ensemble_f in ensemble_func.items():
158
- print(f"Combining models into single {ensemble_n} model")
159
-
160
- # Join ranks to input tide data, compute weightings and group
161
- grouped = (
162
- # Add tide model as an index so we can join with model ranks
163
- tide_df.set_index("tide_model", append=True)
164
- .join(ensemble_ranks_df)
165
- # Add temp columns containing weightings and weighted values
166
- .assign(
167
- weights=ensemble_f, # use custom func to compute weights
168
- weighted=lambda i: i.tide_height * i.weights,
169
- )
170
- # Groupby is specified in a weird order here as this seems
171
- # to be the easiest way to preserve correct index sorting
172
- .groupby(["x", "y", "time"])
173
- )
174
-
175
- # Use weightings to combine multiple models into single ensemble
176
- ensemble_df = (
177
- # Calculate weighted mean and convert back to dataframe
178
- grouped.weighted.sum()
179
- .div(grouped.weights.sum())
180
- .to_frame("tide_height")
181
- # Label ensemble model and ensure indexes are in expected order
182
- .assign(tide_model=ensemble_n)
183
- .reorder_levels(["time", "x", "y"], axis=0)
184
- )
185
-
186
- ensemble_list.append(ensemble_df)
187
-
188
- # Combine all ensemble models and return as a single dataframe
189
- return pd.concat(ensemble_list)
24
+ from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw
190
25
 
191
26
 
192
27
  def _parallel_splits(
@@ -240,12 +75,13 @@ def _model_tides(
240
75
  time,
241
76
  directory,
242
77
  crs,
243
- crop,
78
+ mode,
79
+ output_units,
244
80
  method,
245
81
  extrapolate,
246
82
  cutoff,
247
- output_units,
248
- mode,
83
+ crop,
84
+ crop_buffer,
249
85
  ):
250
86
  """Worker function applied in parallel by `model_tides`. Handles the
251
87
  extraction of tide modelling constituents and tide modelling using
@@ -268,14 +104,15 @@ def _model_tides(
268
104
  lat,
269
105
  type=pytmd_model.type,
270
106
  crop=crop,
107
+ buffer=crop_buffer,
271
108
  method=method,
272
109
  extrapolate=extrapolate,
273
110
  cutoff=cutoff,
274
111
  append_node=False,
275
- # append_node=True,
276
112
  )
277
113
 
278
114
  # TODO: Return constituents
115
+ # print(model, amp.shape)
279
116
  # print(amp.shape, ph.shape, c)
280
117
  # print(pd.DataFrame({"amplitude": amp}))
281
118
 
@@ -363,6 +200,189 @@ def _model_tides(
363
200
  return tide_df
364
201
 
365
202
 
203
+ def ensemble_tides(
204
+ tide_df,
205
+ crs,
206
+ ensemble_models,
207
+ ensemble_func=None,
208
+ ensemble_top_n=3,
209
+ ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.fgb",
210
+ ranking_valid_perc=0.02,
211
+ **idw_kwargs,
212
+ ):
213
+ """Combine multiple tide models into a single locally optimised
214
+ ensemble tide model using external model ranking data (e.g.
215
+ satellite altimetry or NDWI-tide correlations along the coastline)
216
+ to inform the selection of the best local models.
217
+
218
+ This function performs the following steps:
219
+
220
+ 1. Takes a dataframe of tide heights from multiple tide models, as
221
+ produced by `eo_tides.model.model_tides`
222
+ 2. Loads model ranking points from an external file, filters them
223
+ based on the valid data percentage, and retains relevant columns
224
+ 3. Interpolates the model rankings into the coordinates of the
225
+ original dataframe using Inverse Weighted Interpolation (IDW)
226
+ 4. Uses rankings to combine multiple tide models into a single
227
+ optimised ensemble model (by default, by taking the mean of the
228
+ top 3 ranked models)
229
+ 5. Returns a new dataframe with the combined ensemble model predictions
230
+
231
+ Parameters
232
+ ----------
233
+ tide_df : pandas.DataFrame
234
+ DataFrame produced by `eo_tides.model.model_tides`, containing
235
+ tide model predictions in long format with columns:
236
+ `["time", "x", "y", "tide_height", "tide_model"]`.
237
+ crs : string
238
+ Coordinate reference system for the "x" and "y" coordinates in
239
+ `tide_df`. Used to ensure that interpolations are performed
240
+ in the correct CRS.
241
+ ensemble_models : list
242
+ A list of models to include in the ensemble modelling process.
243
+ All values must exist as columns with the prefix "rank_" in
244
+ `ranking_points`.
245
+ ensemble_func : dict, optional
246
+ By default, a simple ensemble model will be calculated by taking
247
+ the mean of the `ensemble_top_n` tide models at each location.
248
+ However, a dictionary containing more complex ensemble
249
+ calculations can also be provided. Dictionary keys are used
250
+ to name output ensemble models; functions should take a column
251
+ named "rank" and convert it to a weighting, e.g.:
252
+ `ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
253
+ ensemble_top_n : int, optional
254
+ If `ensemble_func` is None, this sets the number of top models
255
+ to include in the mean ensemble calculation. Defaults to 3.
256
+ ranking_points : str, optional
257
+ Path to the file containing model ranking points. This dataset
258
+ should include columns containing rankings for each tide
259
+ model, named with the prefix "rank_". e.g. "rank_EOT20".
260
+ Low values should represent high rankings (e.g. 1 = top ranked).
261
+ The default value points to an example file covering Australia.
262
+ ranking_valid_perc : float, optional
263
+ Minimum percentage of valid data required to include a model
264
+ rank point in the analysis, as defined in a column named
265
+ "valid_perc". Defaults to 0.02.
266
+ **idw_kwargs
267
+ Optional keyword arguments to pass to the `idw` function used
268
+ for interpolation. Useful values include `k` (number of nearest
269
+ neighbours to use in interpolation), `max_dist` (maximum
270
+ distance to nearest neighbours), and `k_min` (minimum number of
271
+ neighbours required after `max_dist` is applied).
272
+
273
+ Returns
274
+ -------
275
+ pandas.DataFrame
276
+ DataFrame containing the ensemble model predictions, matching
277
+ the format of the input `tide_df` (e.g. columns `["time", "x",
278
+ "y", "tide_height", "tide_model"]`. By default the 'tide_model'
279
+ column will be labeled "ensemble" for the combined model
280
+ predictions (but if a custom dictionary of ensemble functions is
281
+ provided via `ensemble_func`, each ensemble will be named using
282
+ the provided dictionary keys).
283
+
284
+ """
285
+ # Raise data if `tide_df` provided in wide format
286
+ if "tide_model" not in tide_df:
287
+ raise Exception(
288
+ "`tide_df` does not contain the expected 'tide_model' and "
289
+ "'tide_height' columns. Ensure that tides were modelled in "
290
+ "long format (i.e. `output_format='long'` in `model_tides`)."
291
+ )
292
+
293
+ # Extract x and y coords from dataframe
294
+ x = tide_df.index.get_level_values(level="x")
295
+ y = tide_df.index.get_level_values(level="y")
296
+
297
+ # Load model ranks points and reproject to same CRS as x and y
298
+ model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
299
+ try:
300
+ model_ranks_gdf = (
301
+ gpd.read_file(ranking_points, engine="pyogrio")
302
+ .to_crs(crs)
303
+ .query(f"valid_perc > {ranking_valid_perc}")
304
+ .dropna(how="all")[model_ranking_cols + ["geometry"]]
305
+ )
306
+ except KeyError:
307
+ error_msg = f"""
308
+ Not all of the expected "rank_" columns {model_ranking_cols} were
309
+ found in the columns of the ranking points file ({ranking_points}).
310
+ Consider passing a custom list of models using `ensemble_models`.
311
+ """
312
+ raise Exception(textwrap.dedent(error_msg).strip()) from None
313
+
314
+ # Use points to interpolate model rankings into requested x and y
315
+ id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
316
+ print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
317
+ ensemble_ranks_df = (
318
+ # Run IDW interpolation on subset of ranking columns
319
+ pd.DataFrame(
320
+ idw(
321
+ input_z=model_ranks_gdf[model_ranking_cols],
322
+ input_x=model_ranks_gdf.geometry.x,
323
+ input_y=model_ranks_gdf.geometry.y,
324
+ output_x=x,
325
+ output_y=y,
326
+ **idw_kwargs,
327
+ ),
328
+ columns=model_ranking_cols,
329
+ )
330
+ .assign(x=x, y=y)
331
+ # Drop any duplicates then melt columns into long format
332
+ .drop_duplicates()
333
+ .melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
334
+ # Remore "rank_" prefix to get plain model names
335
+ .replace({"^rank_": ""}, regex=True)
336
+ # Set index columns and rank across groups
337
+ .set_index(["tide_model", "x", "y"])
338
+ .groupby(["x", "y"])
339
+ .rank()
340
+ )
341
+
342
+ # If no custom ensemble funcs are provided, use a default ensemble
343
+ # calculation that takes the mean of the top N tide models
344
+ if ensemble_func is None:
345
+ ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
346
+
347
+ # Create output list to hold computed ensemble model outputs
348
+ ensemble_list = []
349
+
350
+ # Loop through all provided ensemble generation functions
351
+ for ensemble_n, ensemble_f in ensemble_func.items():
352
+ print(f"Combining models into single {ensemble_n} model")
353
+
354
+ # Join ranks to input tide data, compute weightings and group
355
+ grouped = (
356
+ # Add tide model as an index so we can join with model ranks
357
+ tide_df.set_index("tide_model", append=True)
358
+ .join(ensemble_ranks_df)
359
+ # Add temp columns containing weightings and weighted values
360
+ .assign(
361
+ weights=ensemble_f, # use custom func to compute weights
362
+ weighted=lambda i: i.tide_height * i.weights,
363
+ )
364
+ # Groupby is specified in a weird order here as this seems
365
+ # to be the easiest way to preserve correct index sorting
366
+ .groupby(["x", "y", "time"])
367
+ )
368
+
369
+ # Use weightings to combine multiple models into single ensemble
370
+ ensemble_df = (
371
+ # Calculate weighted mean and convert back to dataframe
372
+ grouped.weighted.sum()
373
+ .div(grouped.weights.sum())
374
+ .to_frame("tide_height")
375
+ # Label ensemble model and ensure indexes are in expected order
376
+ .assign(tide_model=ensemble_n)
377
+ .reorder_levels(["time", "x", "y"], axis=0)
378
+ )
379
+
380
+ ensemble_list.append(ensemble_df)
381
+
382
+ # Combine all ensemble models and return as a single dataframe
383
+ return pd.concat(ensemble_list)
384
+
385
+
366
386
  def model_tides(
367
387
  x: float | list[float] | xr.DataArray,
368
388
  y: float | list[float] | xr.DataArray,
@@ -370,16 +390,17 @@ def model_tides(
370
390
  model: str | list[str] = "EOT20",
371
391
  directory: str | os.PathLike | None = None,
372
392
  crs: str = "EPSG:4326",
373
- crop: bool = True,
393
+ mode: str = "one-to-many",
394
+ output_format: str = "long",
395
+ output_units: str = "m",
374
396
  method: str = "linear",
375
397
  extrapolate: bool = True,
376
398
  cutoff: float | None = None,
377
- mode: str = "one-to-many",
399
+ crop: bool = True,
400
+ crop_buffer: float | None = 5,
378
401
  parallel: bool = True,
379
402
  parallel_splits: int | str = "auto",
380
403
  parallel_max: int | None = None,
381
- output_units: str = "m",
382
- output_format: str = "long",
383
404
  ensemble_models: list[str] | None = None,
384
405
  **ensemble_kwargs,
385
406
  ) -> pd.DataFrame:
@@ -418,10 +439,12 @@ def model_tides(
418
439
  any format that can be converted by `pandas.to_datetime()`;
419
440
  e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
420
441
  datetime.datetime and strings (e.g. "2020-01-01 23:00").
442
+ For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
421
443
  model : str or list of str, optional
422
- The tide model (or models) to use to model tides.
423
- Defaults to "EOT20"; for a full list of available/supported
424
- models, run `eo_tides.model.list_models`.
444
+ The tide model (or list of models) to use to model tides.
445
+ Defaults to "EOT20"; specify "all" to use all models available
446
+ in `directory`. For a full list of available and supported models,
447
+ run `eo_tides.model.list_models`.
425
448
  directory : str, optional
426
449
  The directory containing tide model data files. If no path is
427
450
  provided, this will default to the environment variable
@@ -432,10 +455,29 @@ def model_tides(
432
455
  crs : str, optional
433
456
  Input coordinate reference system for x and y coordinates.
434
457
  Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
435
- crop : bool, optional
436
- Whether to crop tide model constituent files on-the-fly to
437
- improve performance. Cropping will be performed based on a
438
- 1 degree buffer around all input points. Defaults to True.
458
+ mode : str, optional
459
+ The analysis mode to use for tide modelling. Supports two options:
460
+
461
+ - "one-to-many": Models tides for every timestep in "time" at
462
+ every input x and y coordinate point. This is useful if you
463
+ want to model tides for a specific list of timesteps across
464
+ multiple spatial points (e.g. for the same set of satellite
465
+ acquisition times at various locations across your study area).
466
+ - "one-to-one": Model tides using a unique timestep for each
467
+ set of x and y coordinates. In this mode, the number of x and
468
+ y points must equal the number of timesteps provided in "time".
469
+ output_format : str, optional
470
+ Whether to return the output dataframe in long format (with
471
+ results stacked vertically along "tide_model" and "tide_height"
472
+ columns), or wide format (with a column for each tide model).
473
+ Defaults to "long".
474
+ output_units : str, optional
475
+ Whether to return modelled tides in floating point metre units,
476
+ or integer centimetre units (i.e. scaled by 100) or integer
477
+ millimetre units (i.e. scaled by 1000. Returning outputs in
478
+ integer units can be useful for reducing memory usage.
479
+ Defaults to "m" for metres; set to "cm" for centimetres or "mm"
480
+ for millimetres.
439
481
  method : str, optional
440
482
  Method used to interpolate tidal constituents
441
483
  from model files. Defaults to "linear"; options include:
@@ -444,30 +486,27 @@ def model_tides(
444
486
  - "spline": scipy bivariate spline interpolation
445
487
  - "bilinear": quick bilinear interpolation
446
488
  extrapolate : bool, optional
447
- Whether to extrapolate tides for x and y coordinates outside of
489
+ Whether to extrapolate tides into x and y coordinates outside of
448
490
  the valid tide modelling domain using nearest-neighbor.
449
491
  cutoff : float, optional
450
492
  Extrapolation cutoff in kilometers. The default is None, which
451
493
  will extrapolate for all points regardless of distance from the
452
494
  valid tide modelling domain.
453
- mode : str, optional
454
- The analysis mode to use for tide modelling. Supports two options:
455
-
456
- - "one-to-many": Models tides for every timestep in "time" at
457
- every input x and y coordinate point. This is useful if you
458
- want to model tides for a specific list of timesteps across
459
- multiple spatial points (e.g. for the same set of satellite
460
- acquisition times at various locations across your study area).
461
- - "one-to-one": Model tides using a unique timestep for each
462
- set of x and y coordinates. In this mode, the number of x and
463
- y points must equal the number of timesteps provided in "time".
464
-
495
+ crop : bool, optional
496
+ Whether to crop tide model constituent files on-the-fly to
497
+ improve performance. Defaults to True; use `crop_buffer`
498
+ to customise the buffer distance used to crop the files.
499
+ crop_buffer : int or float, optional
500
+ The buffer distance in degrees used to crop tide model
501
+ constituent files around the modelling area. Defaults to 5,
502
+ which will crop constituents using a five degree buffer on either
503
+ side of the analysis extent.
465
504
  parallel : bool, optional
466
- Whether to parallelise tide modelling using `concurrent.futures`.
467
- If multiple tide models are requested, these will be run in
468
- parallel. Optionally, tide modelling can also be run in parallel
469
- across input x and y coordinates (see "parallel_splits" below).
470
- Default is True.
505
+ Whether to parallelise tide modelling. If multiple tide models are
506
+ requested, these will be run in parallel using `concurrent.futures`.
507
+ If enough workers are available, the analysis will also be split
508
+ into spatial chunks for additional parallelisation (see "parallel_splits"
509
+ below). Default is True.
471
510
  parallel_splits : str or int, optional
472
511
  Whether to split the input x and y coordinates into smaller,
473
512
  evenly-sized chunks that are processed in parallel. This can
@@ -478,23 +517,12 @@ def model_tides(
478
517
  parallel_max : int, optional
479
518
  Maximum number of processes to run in parallel. The default of
480
519
  None will automatically determine this from your available CPUs.
481
- output_units : str, optional
482
- Whether to return modelled tides in floating point metre units,
483
- or integer centimetre units (i.e. scaled by 100) or integer
484
- millimetre units (i.e. scaled by 1000. Returning outputs in
485
- integer units can be useful for reducing memory usage.
486
- Defaults to "m" for metres; set to "cm" for centimetres or "mm"
487
- for millimetres.
488
- output_format : str, optional
489
- Whether to return the output dataframe in long format (with
490
- results stacked vertically along "tide_model" and "tide_height"
491
- columns), or wide format (with a column for each tide model).
492
- Defaults to "long".
493
520
  ensemble_models : list of str, optional
494
521
  An optional list of models used to generate the ensemble tide
495
522
  model if "ensemble" tide modelling is requested. Defaults to
496
- ["FES2014", "TPXO9-atlas-v5", "EOT20", "HAMTIDE11", "GOT4.10",
497
- "FES2012", "TPXO8-atlas-v1"].
523
+ `["EOT20", "FES2012", "FES2014_extrapolated", "FES2022_extrapolated",
524
+ "GOT4.10", "GOT5.5_extrapolated", "GOT5.6_extrapolated",
525
+ "TPXO10-atlas-v2-nc", "TPXO8-atlas-nc", "TPXO9-atlas-v5-nc"]`.
498
526
  **ensemble_kwargs :
499
527
  Keyword arguments used to customise the generation of optional
500
528
  ensemble tide models if "ensemble" modelling are requested.
@@ -511,13 +539,12 @@ def model_tides(
511
539
 
512
540
  """
513
541
  # Turn inputs into arrays for consistent handling
514
- models_requested = list(np.atleast_1d(model))
515
542
  x = np.atleast_1d(x)
516
543
  y = np.atleast_1d(y)
517
544
  time = _standardise_time(time)
518
545
 
519
546
  # Validate input arguments
520
- assert time is not None, "Times for modelling tides muyst be provided via `time`."
547
+ assert time is not None, "Times for modelling tides must be provided via `time`."
521
548
  assert method in ("bilinear", "spline", "linear", "nearest")
522
549
  assert output_units in (
523
550
  "m",
@@ -528,6 +555,8 @@ def model_tides(
528
555
  "long",
529
556
  "wide",
530
557
  ), "Output format must be either 'long' or 'wide'."
558
+ assert np.issubdtype(x.dtype, np.number), "`x` must contain only valid numeric values, and must not be None."
559
+ assert np.issubdtype(y.dtype, np.number), "`y` must contain only valid numeric values, and must not be None.."
531
560
  assert len(x) == len(y), "x and y must be the same length."
532
561
  if mode == "one-to-one":
533
562
  assert len(x) == len(time), (
@@ -540,58 +569,12 @@ def model_tides(
540
569
  # provided, try global environment variable.
541
570
  directory = _set_directory(directory)
542
571
 
543
- # Get full list of supported models from pyTMD database;
544
- # add ensemble option to list of models
545
- available_models, valid_models = list_models(
546
- directory, show_available=False, show_supported=False, raise_error=True
572
+ # Standardise model list, handling "all" and "ensemble" functionality
573
+ models_to_process, models_requested, ensemble_models = _standardise_models(
574
+ model=model,
575
+ directory=directory,
576
+ ensemble_models=ensemble_models,
547
577
  )
548
- # TODO: This is hacky, find a better way. Perhaps a kwarg that
549
- # turns ensemble functionality on, and checks that supplied
550
- # models match models expected for ensemble?
551
- available_models = available_models + ["ensemble"]
552
- valid_models = valid_models + ["ensemble"]
553
-
554
- # Error if any models are not supported
555
- if not all(m in valid_models for m in models_requested):
556
- error_text = (
557
- f"One or more of the requested models are not valid:\n"
558
- f"{models_requested}\n\n"
559
- "The following models are supported:\n"
560
- f"{valid_models}"
561
- )
562
- raise ValueError(error_text)
563
-
564
- # Error if any models are not available in `directory`
565
- if not all(m in available_models for m in models_requested):
566
- error_text = (
567
- f"One or more of the requested models are valid, but not available in `{directory}`:\n"
568
- f"{models_requested}\n\n"
569
- f"The following models are available in `{directory}`:\n"
570
- f"{available_models}"
571
- )
572
- raise ValueError(error_text)
573
-
574
- # If ensemble modelling is requested, use a custom list of models
575
- # for subsequent processing
576
- if "ensemble" in models_requested:
577
- print("Running ensemble tide modelling")
578
- models_to_process = (
579
- ensemble_models
580
- if ensemble_models is not None
581
- else [
582
- "FES2014",
583
- "TPXO9-atlas-v5",
584
- "EOT20",
585
- "HAMTIDE11",
586
- "GOT4.10",
587
- "FES2012",
588
- "TPXO8-atlas-v1",
589
- ]
590
- )
591
-
592
- # Otherwise, models to process are the same as those requested
593
- else:
594
- models_to_process = models_requested
595
578
 
596
579
  # Update tide modelling func to add default keyword arguments that
597
580
  # are used for every iteration during parallel processing
@@ -599,12 +582,13 @@ def model_tides(
599
582
  _model_tides,
600
583
  directory=directory,
601
584
  crs=crs,
602
- crop=crop,
585
+ mode=mode,
586
+ output_units=output_units,
603
587
  method=method,
604
588
  extrapolate=extrapolate,
605
589
  cutoff=np.inf if cutoff is None else cutoff,
606
- output_units=output_units,
607
- mode=mode,
590
+ crop=crop,
591
+ crop_buffer=crop_buffer,
608
592
  )
609
593
 
610
594
  # If automatic parallel splits, calculate optimal value
@@ -623,7 +607,6 @@ def model_tides(
623
607
  raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
624
608
 
625
609
  # Parallelise if either multiple models or multiple splits requested
626
-
627
610
  if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
628
611
  with ProcessPoolExecutor(max_workers=parallel_max) as executor:
629
612
  print(
@@ -685,7 +668,7 @@ def model_tides(
685
668
 
686
669
  # Optionally compute ensemble model and add to dataframe
687
670
  if "ensemble" in models_requested:
688
- ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
671
+ ensemble_df = ensemble_tides(tide_df, crs, ensemble_models, **ensemble_kwargs)
689
672
 
690
673
  # Update requested models with any custom ensemble models, then
691
674
  # filter the dataframe to keep only models originally requested
@@ -747,10 +730,12 @@ def model_phases(
747
730
  any format that can be converted by `pandas.to_datetime()`;
748
731
  e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
749
732
  datetime.datetime and strings (e.g. "2020-01-01 23:00").
733
+ For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
750
734
  model : str or list of str, optional
751
- The tide model (or models) to use to compute tide phases.
752
- Defaults to "EOT20"; for a full list of available/supported
753
- models, run `eo_tides.model.list_models`.
735
+ The tide model (or list of models) to use to model tides.
736
+ Defaults to "EOT20"; specify "all" to use all models available
737
+ in `directory`. For a full list of available and supported models,
738
+ run `eo_tides.model.list_models`.
754
739
  directory : str, optional
755
740
  The directory containing tide model data files. If no path is
756
741
  provided, this will default to the environment variable