eo-tides 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py ADDED
@@ -0,0 +1,1104 @@
1
+ import os
2
+ import pathlib
3
+ from concurrent.futures import ProcessPoolExecutor
4
+ from functools import partial
5
+
6
+ import geopandas as gpd
7
+ import numpy as np
8
+ import odc.geo.xr
9
+ import pandas as pd
10
+ import pyproj
11
+ import pyTMD
12
+ from tqdm import tqdm
13
+
14
+ from eo_tides.utils import idw
15
+
16
+
17
+ def _model_tides(
18
+ model,
19
+ x,
20
+ y,
21
+ time,
22
+ directory,
23
+ crs,
24
+ crop,
25
+ method,
26
+ extrapolate,
27
+ cutoff,
28
+ output_units,
29
+ mode,
30
+ ):
31
+ """Worker function applied in parallel by `model_tides`. Handles the
32
+ extraction of tide modelling constituents and tide modelling using
33
+ `pyTMD`.
34
+ """
35
+ # import pyTMD.eop
36
+ # import pyTMD.io
37
+ # import pyTMD.io.model
38
+ # import pyTMD.predict
39
+ # import pyTMD.spatial
40
+ # import pyTMD.time
41
+ # import pyTMD.utilities
42
+
43
+ # Get parameters for tide model; use custom definition file for
44
+ # FES2012 (leave this as an undocumented feature for now)
45
+ # if model == "FES2012":
46
+ # pytmd_model = pyTMD.io.model(directory).from_file(
47
+ # directory / "model_FES2012.def"
48
+ # )
49
+ # elif model == "TPXO8-atlas-v1":
50
+ # pytmd_model = pyTMD.io.model(directory).from_file(directory / "model_TPXO8.def")
51
+ # else:
52
+ # pytmd_model = pyTMD.io.model(
53
+ # directory, format="netcdf", compressed=False
54
+ # ).elevation(model)
55
+
56
+ # if model in NONSTANDARD_MODELS:
57
+ # model_params = NONSTANDARD_MODELS[model]
58
+ # model_params_bytes = io.BytesIO(json.dumps(model_params).encode("utf-8"))
59
+ # pytmd_model = pyTMD.io.model(directory).from_file(definition_file=model_params_bytes)
60
+
61
+ # else:
62
+
63
+ pytmd_model = pyTMD.io.model(directory).elevation(model)
64
+
65
+ # Convert x, y to latitude/longitude
66
+ transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
67
+ lon, lat = transformer.transform(x.flatten(), y.flatten())
68
+
69
+ # Convert datetime
70
+ timescale = pyTMD.time.timescale().from_datetime(time.flatten())
71
+
72
+ # Calculate bounds for cropping
73
+ buffer = 1 # one degree on either side
74
+ bounds = [
75
+ lon.min() - buffer,
76
+ lon.max() + buffer,
77
+ lat.min() - buffer,
78
+ lat.max() + buffer,
79
+ ]
80
+
81
+ # Read tidal constants and interpolate to grid points
82
+ if pytmd_model.format in ("OTIS", "ATLAS-compact", "TMD3"):
83
+ amp, ph, D, c = pyTMD.io.OTIS.extract_constants(
84
+ lon,
85
+ lat,
86
+ pytmd_model.grid_file,
87
+ pytmd_model.model_file,
88
+ pytmd_model.projection,
89
+ type=pytmd_model.type,
90
+ grid=pytmd_model.file_format,
91
+ crop=crop,
92
+ bounds=bounds,
93
+ method=method,
94
+ extrapolate=extrapolate,
95
+ cutoff=cutoff,
96
+ )
97
+
98
+ # Use delta time at 2000.0 to match TMD outputs
99
+ deltat = np.zeros((len(timescale)), dtype=np.float64)
100
+
101
+ elif pytmd_model.format in ("ATLAS-netcdf",):
102
+ amp, ph, D, c = pyTMD.io.ATLAS.extract_constants(
103
+ lon,
104
+ lat,
105
+ pytmd_model.grid_file,
106
+ pytmd_model.model_file,
107
+ type=pytmd_model.type,
108
+ crop=crop,
109
+ bounds=bounds,
110
+ method=method,
111
+ extrapolate=extrapolate,
112
+ cutoff=cutoff,
113
+ scale=pytmd_model.scale,
114
+ compressed=pytmd_model.compressed,
115
+ )
116
+
117
+ # Use delta time at 2000.0 to match TMD outputs
118
+ deltat = np.zeros((len(timescale)), dtype=np.float64)
119
+
120
+ elif pytmd_model.format in ("GOT-ascii", "GOT-netcdf"):
121
+ amp, ph, c = pyTMD.io.GOT.extract_constants(
122
+ lon,
123
+ lat,
124
+ pytmd_model.model_file,
125
+ grid=pytmd_model.type,
126
+ crop=crop,
127
+ bounds=bounds,
128
+ method=method,
129
+ extrapolate=extrapolate,
130
+ cutoff=cutoff,
131
+ scale=pytmd_model.scale,
132
+ compressed=pytmd_model.compressed,
133
+ )
134
+
135
+ # Delta time (TT - UT1)
136
+ deltat = timescale.tt_ut1
137
+
138
+ elif pytmd_model.format in ("FES-ascii", "FES-netcdf"):
139
+ amp, ph = pyTMD.io.FES.extract_constants(
140
+ lon,
141
+ lat,
142
+ pytmd_model.model_file,
143
+ type=pytmd_model.type,
144
+ version=pytmd_model.version,
145
+ crop=crop,
146
+ bounds=bounds,
147
+ method=method,
148
+ extrapolate=extrapolate,
149
+ cutoff=cutoff,
150
+ scale=pytmd_model.scale,
151
+ compressed=pytmd_model.compressed,
152
+ )
153
+
154
+ # Available model constituents
155
+ c = pytmd_model.constituents
156
+
157
+ # Delta time (TT - UT1)
158
+ deltat = timescale.tt_ut1
159
+
160
+ # Calculate complex phase in radians for Euler's
161
+ cph = -1j * ph * np.pi / 180.0
162
+
163
+ # Calculate constituent oscillation
164
+ hc = amp * np.exp(cph)
165
+
166
+ # Determine the number of points and times to process. If in
167
+ # "one-to-many" mode, these counts are used to repeat our extracted
168
+ # constituents and timesteps so we can extract tides for all
169
+ # combinations of our input times and tide modelling points.
170
+ # If in "one-to-one" mode, we avoid this step by setting counts to 1
171
+ # (e.g. "repeat 1 times")
172
+ points_repeat = len(x) if mode == "one-to-many" else 1
173
+ time_repeat = len(time) if mode == "one-to-many" else 1
174
+
175
+ # If in "one-to-many" mode, repeat constituents to length of time
176
+ # and number of input coords before passing to `predict_tide_drift`
177
+ t, hc, deltat = (
178
+ np.tile(timescale.tide, points_repeat),
179
+ hc.repeat(time_repeat, axis=0),
180
+ np.tile(deltat, points_repeat),
181
+ )
182
+
183
+ # Predict tidal elevations at time and infer minor corrections
184
+ npts = len(t)
185
+ tide = np.ma.zeros((npts), fill_value=np.nan)
186
+ tide.mask = np.any(hc.mask, axis=1)
187
+
188
+ # Predict tides
189
+ tide.data[:] = pyTMD.predict.drift(t, hc, c, deltat=deltat, corrections=pytmd_model.corrections)
190
+ minor = pyTMD.predict.infer_minor(
191
+ t,
192
+ hc,
193
+ c,
194
+ deltat=deltat,
195
+ corrections=pytmd_model.corrections,
196
+ minor=pytmd_model.minor,
197
+ )
198
+ tide.data[:] += minor.data[:]
199
+
200
+ # Replace invalid values with fill value
201
+ tide.data[tide.mask] = tide.fill_value
202
+
203
+ # Convert data to pandas.DataFrame, and set index to our input
204
+ # time/x/y values
205
+ tide_df = pd.DataFrame({
206
+ "time": np.tile(time, points_repeat),
207
+ "x": np.repeat(x, time_repeat),
208
+ "y": np.repeat(y, time_repeat),
209
+ "tide_model": model,
210
+ "tide_m": tide,
211
+ }).set_index(["time", "x", "y"])
212
+
213
+ # Optionally convert outputs to integer units (can save memory)
214
+ if output_units == "m":
215
+ tide_df["tide_m"] = tide_df.tide_m.astype(np.float32)
216
+ elif output_units == "cm":
217
+ tide_df["tide_m"] = (tide_df.tide_m * 100).astype(np.int16)
218
+ elif output_units == "mm":
219
+ tide_df["tide_m"] = (tide_df.tide_m * 1000).astype(np.int16)
220
+
221
+ return tide_df
222
+
223
+
224
+ def _ensemble_model(
225
+ x,
226
+ y,
227
+ crs,
228
+ tide_df,
229
+ ensemble_models,
230
+ ensemble_func=None,
231
+ ensemble_top_n=3,
232
+ ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.geojson",
233
+ ranking_valid_perc=0.02,
234
+ **idw_kwargs,
235
+ ):
236
+ """Combine multiple tide models into a single locally optimised
237
+ ensemble tide model using external model ranking data (e.g.
238
+ satellite altimetry or NDWI-tide correlations along the coastline)
239
+ to inform the selection of the best local models.
240
+
241
+ This function performs the following steps:
242
+ 1. Loads model ranking points from a GeoJSON file, filters them
243
+ based on the valid data percentage, and retains relevant columns
244
+ 2. Interpolates the model rankings into the requested x and y
245
+ coordinates using Inverse Weighted Interpolation (IDW)
246
+ 3. Uses rankings to combine multiple tide models into a single
247
+ optimised ensemble model (by default, by taking the mean of the
248
+ top 3 ranked models)
249
+ 4. Returns a DataFrame with the combined ensemble model predictions
250
+
251
+ Parameters
252
+ ----------
253
+ x : array-like
254
+ Array of x-coordinates where the ensemble model predictions are
255
+ required.
256
+ y : array-like
257
+ Array of y-coordinates where the ensemble model predictions are
258
+ required.
259
+ crs : string
260
+ Input coordinate reference system for x and y coordinates. Used
261
+ to ensure that interpolations are performed in the correct CRS.
262
+ tide_df : pandas.DataFrame
263
+ DataFrame containing tide model predictions with columns
264
+ `["time", "x", "y", "tide_m", "tide_model"]`.
265
+ ensemble_models : list
266
+ A list of models to include in the ensemble modelling process.
267
+ All values must exist as columns with the prefix "rank_" in
268
+ `ranking_points`.
269
+ ensemble_func : dict, optional
270
+ By default, a simple ensemble model will be calculated by taking
271
+ the mean of the `ensemble_top_n` tide models at each location.
272
+ However, a dictionary containing more complex ensemble
273
+ calculations can also be provided. Dictionary keys are used
274
+ to name output ensemble models; functions should take a column
275
+ named "rank" and convert it to a weighting, e.g.:
276
+ `ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
277
+ ensemble_top_n : int, optional
278
+ If `ensemble_func` is None, this sets the number of top models
279
+ to include in the mean ensemble calculation. Defaults to 3.
280
+ ranking_points : str, optional
281
+ Path to the GeoJSON file containing model ranking points. This
282
+ dataset should include columns containing rankings for each tide
283
+ model, named with the prefix "rank_". e.g. "rank_FES2014".
284
+ Low values should represent high rankings (e.g. 1 = top ranked).
285
+ ranking_valid_perc : float, optional
286
+ Minimum percentage of valid data required to include a model
287
+ rank point in the analysis, as defined in a column named
288
+ "valid_perc". Defaults to 0.02.
289
+ **idw_kwargs
290
+ Optional keyword arguments to pass to the `idw` function used
291
+ for interpolation. Useful values include `k` (number of nearest
292
+ neighbours to use in interpolation), `max_dist` (maximum
293
+ distance to nearest neighbours), and `k_min` (minimum number of
294
+ neighbours required after `max_dist` is applied).
295
+
296
+ Returns
297
+ -------
298
+ pandas.DataFrame
299
+ DataFrame containing the ensemble model predictions, matching
300
+ the format of the input `tide_df` (e.g. columns `["time", "x",
301
+ "y", "tide_m", "tide_model"]`. By default the 'tide_model'
302
+ column will be labeled "ensemble" for the combined model
303
+ predictions (but if a custom dictionary of ensemble functions is
304
+ provided via `ensemble_func`, each ensemble will be named using
305
+ the provided dictionary keys).
306
+
307
+ """
308
+ # Load model ranks points and reproject to same CRS as x and y
309
+ model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
310
+ model_ranks_gdf = (
311
+ gpd.read_file(ranking_points)
312
+ .to_crs(crs)
313
+ .query(f"valid_perc > {ranking_valid_perc}")
314
+ .dropna()[model_ranking_cols + ["geometry"]]
315
+ )
316
+
317
+ # Use points to interpolate model rankings into requested x and y
318
+ id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
319
+ print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
320
+ ensemble_ranks_df = (
321
+ # Run IDW interpolation on subset of ranking columns
322
+ pd.DataFrame(
323
+ idw(
324
+ input_z=model_ranks_gdf[model_ranking_cols],
325
+ input_x=model_ranks_gdf.geometry.x,
326
+ input_y=model_ranks_gdf.geometry.y,
327
+ output_x=x,
328
+ output_y=y,
329
+ **idw_kwargs,
330
+ ),
331
+ columns=model_ranking_cols,
332
+ )
333
+ .assign(x=x, y=y)
334
+ # Drop any duplicates then melt columns into long format
335
+ .drop_duplicates()
336
+ .melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
337
+ # Remore "rank_" prefix to get plain model names
338
+ .replace({"^rank_": ""}, regex=True)
339
+ # Set index columns and rank across groups
340
+ .set_index(["tide_model", "x", "y"])
341
+ .groupby(["x", "y"])
342
+ .rank()
343
+ )
344
+
345
+ # If no custom ensemble funcs are provided, use a default ensemble
346
+ # calculation that takes the mean of the top N tide models
347
+ if ensemble_func is None:
348
+ ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
349
+
350
+ # Create output list to hold computed ensemble model outputs
351
+ ensemble_list = []
352
+
353
+ # Loop through all provided ensemble generation functions
354
+ for ensemble_n, ensemble_f in ensemble_func.items():
355
+ print(f"Combining models into single {ensemble_n} model")
356
+
357
+ # Join ranks to input tide data, compute weightings and group
358
+ grouped = (
359
+ # Add tide model as an index so we can join with model ranks
360
+ tide_df.set_index("tide_model", append=True)
361
+ .join(ensemble_ranks_df)
362
+ # Add temp columns containing weightings and weighted values
363
+ .assign(
364
+ weights=ensemble_f, # use custom func to compute weights
365
+ weighted=lambda i: i.tide_m * i.weights,
366
+ )
367
+ # Groupby is specified in a weird order here as this seems
368
+ # to be the easiest way to preserve correct index sorting
369
+ .groupby(["x", "y", "time"])
370
+ )
371
+
372
+ # Use weightings to combine multiple models into single ensemble
373
+ ensemble_df = (
374
+ # Calculate weighted mean and convert back to dataframe
375
+ grouped.weighted.sum()
376
+ .div(grouped.weights.sum())
377
+ .to_frame("tide_m")
378
+ # Label ensemble model and ensure indexes are in expected order
379
+ .assign(tide_model=ensemble_n)
380
+ .reorder_levels(["time", "x", "y"], axis=0)
381
+ )
382
+
383
+ ensemble_list.append(ensemble_df)
384
+
385
+ # Combine all ensemble models and return as a single dataframe
386
+ return pd.concat(ensemble_list)
387
+
388
+
389
+ def model_tides(
390
+ x,
391
+ y,
392
+ time,
393
+ model="FES2014",
394
+ directory=None,
395
+ crs="EPSG:4326",
396
+ crop=True,
397
+ method="spline",
398
+ extrapolate=True,
399
+ cutoff=None,
400
+ mode="one-to-many",
401
+ parallel=True,
402
+ parallel_splits=5,
403
+ output_units="m",
404
+ output_format="long",
405
+ ensemble_models=None,
406
+ **ensemble_kwargs,
407
+ ):
408
+ """Compute tides at multiple points and times using tidal harmonics.
409
+
410
+ This function supports all tidal models supported by `pyTMD`,
411
+ including FES Finite Element Solution models, TPXO TOPEX/POSEIDON
412
+ models, EOT Empirical Ocean Tide models, GOT Global Ocean Tide
413
+ models, and HAMTIDE Hamburg direct data Assimilation Methods for
414
+ Tides models.
415
+
416
+ This function requires access to tide model data files.
417
+ These should be placed in a folder with subfolders matching
418
+ the formats specified by `pyTMD`:
419
+ <https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories>
420
+
421
+ For FES2014 (<https://www.aviso.altimetry.fr/es/data/products/auxiliary-products/global-tide-fes/description-fes2014.html>):
422
+
423
+ - `{directory}/fes2014/ocean_tide/`
424
+
425
+ For FES2022 (<https://www.aviso.altimetry.fr/en/data/products/auxiliary-products/global-tide-fes.html>):
426
+
427
+ - `{directory}/fes2022b/ocean_tide/`
428
+
429
+ For TPXO8-atlas (<https://www.tpxo.net/tpxo-products-and-registration>):
430
+
431
+ - `{directory}/tpxo8_atlas/`
432
+
433
+ For TPXO9-atlas-v5 (<https://www.tpxo.net/tpxo-products-and-registration>):
434
+
435
+ - `{directory}/TPXO9_atlas_v5/`
436
+
437
+ For EOT20 (<https://www.seanoe.org/data/00683/79489/>):
438
+
439
+ - `{directory}/EOT20/ocean_tides/`
440
+
441
+ For GOT4.10c (<https://earth.gsfc.nasa.gov/geo/data/ocean-tide-models>):
442
+
443
+ - `{directory}/GOT4.10c/grids_oceantide_netcdf/`
444
+
445
+ For HAMTIDE (<https://www.cen.uni-hamburg.de/en/icdc/data/ocean/hamtide.html>):
446
+
447
+ - `{directory}/hamtide/`
448
+
449
+ This function is a modification of the `pyTMD` package's
450
+ `compute_tide_corrections` function. For more info:
451
+ <https://pytmd.readthedocs.io/en/stable/user_guide/compute_tide_corrections.html>
452
+
453
+ Parameters
454
+ ----------
455
+ x, y : float or list of floats
456
+ One or more x and y coordinates used to define
457
+ the location at which to model tides. By default these
458
+ coordinates should be lat/lon; use "crs" if they
459
+ are in a custom coordinate reference system.
460
+ time : A datetime array or pandas.DatetimeIndex
461
+ An array containing `datetime64[ns]` values or a
462
+ `pandas.DatetimeIndex` providing the times at which to
463
+ model tides in UTC time.
464
+ model : string, optional
465
+ The tide model used to model tides. Options include:
466
+
467
+ - "FES2014" (pre-configured on DEA Sandbox)
468
+ - "FES2022"
469
+ - "TPXO9-atlas-v5"
470
+ - "TPXO8-atlas"
471
+ - "EOT20"
472
+ - "HAMTIDE11"
473
+ - "GOT4.10"
474
+ - "ensemble" (advanced ensemble tide model functionality;
475
+ combining multiple models based on external model rankings)
476
+ directory : string, optional
477
+ The directory containing tide model data files. If no path is
478
+ provided, this will default to the environment variable
479
+ `EO_TIDES_TIDE_MODELS` if set, otherwise "/var/share/tide_models".
480
+ Tide modelling files should be stored in sub-folders for each
481
+ model that match the structure provided by `pyTMD`.
482
+
483
+ For example:
484
+
485
+ - `{directory}/fes2014/ocean_tide/`
486
+ - `{directory}/tpxo8_atlas/`
487
+ - `{directory}/TPXO9_atlas_v5/`
488
+ crs : str, optional
489
+ Input coordinate reference system for x and y coordinates.
490
+ Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
491
+ crop : bool optional
492
+ Whether to crop tide model constituent files on-the-fly to
493
+ improve performance. Cropping will be performed based on a
494
+ 1 degree buffer around all input points. Defaults to True.
495
+ method : string, optional
496
+ Method used to interpolate tidal constituents
497
+ from model files. Options include:
498
+
499
+ - "spline": scipy bivariate spline interpolation (default)
500
+ - "bilinear": quick bilinear interpolation
501
+ - "linear", "nearest": scipy regular grid interpolations
502
+ extrapolate : bool, optional
503
+ Whether to extrapolate tides for x and y coordinates outside of
504
+ the valid tide modelling domain using nearest-neighbor.
505
+ cutoff : int or float, optional
506
+ Extrapolation cutoff in kilometers. The default is None, which
507
+ will extrapolate for all points regardless of distance from the
508
+ valid tide modelling domain.
509
+ mode : string, optional
510
+ The analysis mode to use for tide modelling. Supports two options:
511
+
512
+ - "one-to-many": Models tides for every timestep in "time" at
513
+ every input x and y coordinate point. This is useful if you
514
+ want to model tides for a specific list of timesteps across
515
+ multiple spatial points (e.g. for the same set of satellite
516
+ acquisition times at various locations across your study area).
517
+ - "one-to-one": Model tides using a different timestep for each
518
+ x and y coordinate point. In this mode, the number of x and
519
+ y points must equal the number of timesteps provided in "time".
520
+ parallel : boolean, optional
521
+ Whether to parallelise tide modelling using `concurrent.futures`.
522
+ If multiple tide models are requested, these will be run in
523
+ parallel. Optionally, tide modelling can also be run in parallel
524
+ across input x and y coordinates (see "parallel_splits" below).
525
+ Default is True.
526
+ parallel_splits : int, optional
527
+ Whether to split the input x and y coordinates into smaller,
528
+ evenly-sized chunks that are processed in parallel. This can
529
+ provide a large performance boost when processing large numbers
530
+ of coordinates. The default is 5 chunks, which will split
531
+ coordinates into 5 parallelised chunks.
532
+ output_units : str, optional
533
+ Whether to return modelled tides in floating point metre units,
534
+ or integer centimetre units (i.e. scaled by 100) or integer
535
+ millimetre units (i.e. scaled by 1000. Returning outputs in
536
+ integer units can be useful for reducing memory usage.
537
+ Defaults to "m" for metres; set to "cm" for centimetres or "mm"
538
+ for millimetres.
539
+ output_format : str, optional
540
+ Whether to return the output dataframe in long format (with
541
+ results stacked vertically along "tide_model" and "tide_m"
542
+ columns), or wide format (with a column for each tide model).
543
+ Defaults to "long".
544
+ ensemble_models : list, optional
545
+ An optional list of models used to generate the ensemble tide
546
+ model if "ensemble" tide modelling is requested. Defaults to
547
+ ["FES2014", "TPXO9-atlas-v5", "EOT20", "HAMTIDE11", "GOT4.10",
548
+ "FES2012", "TPXO8-atlas-v1"].
549
+ **ensemble_kwargs :
550
+ Keyword arguments used to customise the generation of optional
551
+ ensemble tide models if "ensemble" modelling are requested.
552
+ These are passed to the underlying `_ensemble_model` function.
553
+ Useful parameters include `ranking_points` (path to model
554
+ rankings data), `k` (for controlling how model rankings are
555
+ interpolated), and `ensemble_top_n` (how many top models to use
556
+ in the ensemble calculation).
557
+
558
+ Returns
559
+ -------
560
+ pandas.DataFrame
561
+ A dataframe containing modelled tide heights.
562
+
563
+ """
564
+ # Set tide modelling files directory. If no custom path is provided,
565
+ # first try global environmental var, then "/var/share/tide_models"
566
+ if directory is None:
567
+ if "EO_TIDES_TIDE_MODELS" in os.environ:
568
+ directory = os.environ["EO_TIDES_TIDE_MODELS"]
569
+ else:
570
+ directory = "/var/share/tide_models"
571
+
572
+ # Verify path exists
573
+ directory = pathlib.Path(directory).expanduser()
574
+ if not directory.exists():
575
+ raise FileNotFoundError("Invalid tide directory")
576
+
577
+ # If time passed as a single Timestamp, convert to datetime64
578
+ if isinstance(time, pd.Timestamp):
579
+ time = time.to_datetime64()
580
+
581
+ # Turn inputs into arrays for consistent handling
582
+ models_requested = np.atleast_1d(model)
583
+ x = np.atleast_1d(x)
584
+ y = np.atleast_1d(y)
585
+ time = np.atleast_1d(time)
586
+
587
+ # Validate input arguments
588
+ assert method in ("bilinear", "spline", "linear", "nearest")
589
+ assert output_units in (
590
+ "m",
591
+ "cm",
592
+ "mm",
593
+ ), "Output units must be either 'm', 'cm', or 'mm'."
594
+ assert output_format in (
595
+ "long",
596
+ "wide",
597
+ ), "Output format must be either 'long' or 'wide'."
598
+ assert len(x) == len(y), "x and y must be the same length."
599
+ if mode == "one-to-one":
600
+ assert len(x) == len(time), (
601
+ "The number of supplied x and y points and times must be "
602
+ "identical in 'one-to-one' mode. Use 'one-to-many' mode if "
603
+ "you intended to model multiple timesteps at each point."
604
+ )
605
+
606
+ # Verify that all provided models are supported
607
+ valid_models = [
608
+ # Standard built-in pyTMD models
609
+ "EOT20",
610
+ "FES2014",
611
+ "FES2022",
612
+ "GOT4.10",
613
+ "HAMTIDE11",
614
+ "TPXO8-atlas", # binary version, not suitable for clipping
615
+ "TPXO9-atlas-v5",
616
+ # Non-standard models, defined internally
617
+ "FES2012",
618
+ "FES2014_extrapolated",
619
+ "FES2022_extrapolated",
620
+ "GOT5.6",
621
+ "GOT5.6_extrapolated",
622
+ "TPXO8-atlas-v1", # netCDF version
623
+ # Advanced ensemble model functionality
624
+ "ensemble",
625
+ ]
626
+ if not all(m in valid_models for m in models_requested):
627
+ raise ValueError(
628
+ f"One or more of the models requested {models_requested} is "
629
+ f"not valid. The following models are currently supported: "
630
+ f"{valid_models}",
631
+ )
632
+
633
+ # If ensemble modelling is requested, use a custom list of models
634
+ # for subsequent processing
635
+ if "ensemble" in models_requested:
636
+ print("Running ensemble tide modelling")
637
+ models_to_process = (
638
+ ensemble_models
639
+ if ensemble_models is not None
640
+ else [
641
+ "FES2014",
642
+ "TPXO9-atlas-v5",
643
+ "EOT20",
644
+ "HAMTIDE11",
645
+ "GOT4.10",
646
+ "FES2012",
647
+ "TPXO8-atlas-v1",
648
+ ]
649
+ )
650
+
651
+ # Otherwise, models to process are the same as those requested
652
+ else:
653
+ models_to_process = models_requested
654
+
655
+ # Update tide modelling func to add default keyword arguments that
656
+ # are used for every iteration during parallel processing
657
+ iter_func = partial(
658
+ _model_tides,
659
+ directory=directory,
660
+ crs=crs,
661
+ crop=crop,
662
+ method=method,
663
+ extrapolate=extrapolate,
664
+ cutoff=np.inf if cutoff is None else cutoff,
665
+ output_units=output_units,
666
+ mode=mode,
667
+ )
668
+
669
+ # Ensure requested parallel splits is not smaller than number of points
670
+ parallel_splits = min(parallel_splits, len(x))
671
+
672
+ # Parallelise if either multiple models or multiple splits requested
673
+ if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
674
+ with ProcessPoolExecutor() as executor:
675
+ print(f"Modelling tides using {', '.join(models_to_process)} in parallel")
676
+
677
+ # Optionally split lon/lat points into `splits_n` chunks
678
+ # that will be applied in parallel
679
+ x_split = np.array_split(x, parallel_splits)
680
+ y_split = np.array_split(y, parallel_splits)
681
+
682
+ # Get every combination of models and lat/lon points, and
683
+ # extract as iterables that can be passed to `executor.map()`
684
+ # In "one-to-many" mode, pass entire set of timesteps to each
685
+ # parallel iteration by repeating timesteps by number of total
686
+ # parallel iterations. In "one-to-one" mode, split up
687
+ # timesteps into smaller parallel chunks too.
688
+ if mode == "one-to-many":
689
+ model_iters, x_iters, y_iters = zip(
690
+ *[(m, x_split[i], y_split[i]) for m in models_to_process for i in range(parallel_splits)],
691
+ )
692
+ time_iters = [time] * len(model_iters)
693
+ elif mode == "one-to-one":
694
+ time_split = np.array_split(time, parallel_splits)
695
+ model_iters, x_iters, y_iters, time_iters = zip(
696
+ *[
697
+ (m, x_split[i], y_split[i], time_split[i])
698
+ for m in models_to_process
699
+ for i in range(parallel_splits)
700
+ ],
701
+ )
702
+
703
+ # Apply func in parallel, iterating through each input param
704
+ model_outputs = list(
705
+ tqdm(
706
+ executor.map(iter_func, model_iters, x_iters, y_iters, time_iters),
707
+ total=len(model_iters),
708
+ ),
709
+ )
710
+
711
+ # Model tides in series if parallelisation is off
712
+ else:
713
+ model_outputs = []
714
+
715
+ for model_i in models_to_process:
716
+ print(f"Modelling tides using {model_i}")
717
+ tide_df = iter_func(model_i, x, y, time)
718
+ model_outputs.append(tide_df)
719
+
720
+ # Combine outputs into a single dataframe
721
+ tide_df = pd.concat(model_outputs, axis=0)
722
+
723
+ # Optionally compute ensemble model and add to dataframe
724
+ if "ensemble" in models_requested:
725
+ ensemble_df = _ensemble_model(x, y, crs, tide_df, models_to_process, **ensemble_kwargs)
726
+
727
+ # Update requested models with any custom ensemble models, then
728
+ # filter the dataframe to keep only models originally requested
729
+ models_requested = np.union1d(models_requested, ensemble_df.tide_model.unique())
730
+ tide_df = pd.concat([tide_df, ensemble_df]).query("tide_model in @models_requested")
731
+
732
+ # Optionally convert to a wide format dataframe with a tide model in
733
+ # each dataframe column
734
+ if output_format == "wide":
735
+ # Pivot into wide format with each time model as a column
736
+ print("Converting to a wide format dataframe")
737
+ tide_df = tide_df.pivot(columns="tide_model", values="tide_m")
738
+
739
+ # If in 'one-to-one' mode, reindex using our input time/x/y
740
+ # values to ensure the output is sorted the same as our inputs
741
+ if mode == "one-to-one":
742
+ output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
743
+ tide_df = tide_df.reindex(output_indices)
744
+
745
+ return tide_df
746
+
747
+
748
+ def _pixel_tides_resample(
749
+ tides_lowres,
750
+ ds,
751
+ resample_method="bilinear",
752
+ dask_chunks="auto",
753
+ dask_compute=True,
754
+ ):
755
+ """Resamples low resolution tides modelled by `pixel_tides` into the
756
+ geobox (e.g. spatial resolution and extent) of the original higher
757
+ resolution satellite dataset.
758
+
759
+ Parameters
760
+ ----------
761
+ tides_lowres : xarray.DataArray
762
+ The low resolution tide modelling data array to be resampled.
763
+ ds : xarray.Dataset
764
+ The dataset whose geobox will be used as the template for the
765
+ resampling operation. This is typically the same satellite
766
+ dataset originally passed to `pixel_tides`.
767
+ resample_method : string, optional
768
+ The resampling method to use. Defaults to "bilinear"; valid
769
+ options include "nearest", "cubic", "min", "max", "average" etc.
770
+ dask_chunks : str or tuple, optional
771
+ Can be used to configure custom Dask chunking for the final
772
+ resampling step. The default of "auto" will automatically set
773
+ x/y chunks to match those in `ds` if they exist, otherwise will
774
+ set x/y chunks that cover the entire extent of the dataset.
775
+ For custom chunks, provide a tuple in the form `(y, x)`, e.g.
776
+ `(2048, 2048)`.
777
+ dask_compute : bool, optional
778
+ Whether to compute results of the resampling step using Dask.
779
+ If False, this will return `tides_highres` as a Dask array.
780
+
781
+ Returns
782
+ -------
783
+ tides_highres, tides_lowres : tuple of xr.DataArrays
784
+ In addition to `tides_lowres` (see above), a high resolution
785
+ array of tide heights will be generated matching the
786
+ exact spatial resolution and extent of `ds`.
787
+
788
+ """
789
+ # Determine spatial dimensions
790
+ y_dim, x_dim = ds.odc.spatial_dims
791
+
792
+ # Convert array to Dask, using no chunking along y and x dims,
793
+ # and a single chunk for each timestep/quantile and tide model
794
+ tides_lowres_dask = tides_lowres.chunk({d: None if d in [y_dim, x_dim] else 1 for d in tides_lowres.dims})
795
+
796
+ # Automatically set Dask chunks for reprojection if set to "auto".
797
+ # This will either use x/y chunks if they exist in `ds`, else
798
+ # will cover the entire x and y dims) so we don't end up with
799
+ # hundreds of tiny x and y chunks due to the small size of
800
+ # `tides_lowres` (possible odc.geo bug?)
801
+ if dask_chunks == "auto":
802
+ if ds.chunks is not None:
803
+ if (y_dim in ds.chunks) & (x_dim in ds.chunks):
804
+ dask_chunks = (ds.chunks[y_dim], ds.chunks[x_dim])
805
+ else:
806
+ dask_chunks = ds.odc.geobox.shape
807
+ else:
808
+ dask_chunks = ds.odc.geobox.shape
809
+
810
+ # Reproject into the GeoBox of `ds` using odc.geo and Dask
811
+ tides_highres = tides_lowres_dask.odc.reproject(
812
+ how=ds.odc.geobox,
813
+ chunks=dask_chunks,
814
+ resampling=resample_method,
815
+ ).rename("tide_m")
816
+
817
+ # Optionally process and load into memory with Dask
818
+ if dask_compute:
819
+ tides_highres.load()
820
+
821
+ return tides_highres, tides_lowres
822
+
823
+
824
+ def pixel_tides(
825
+ ds,
826
+ times=None,
827
+ resample=True,
828
+ calculate_quantiles=None,
829
+ resolution=None,
830
+ buffer=None,
831
+ resample_method="bilinear",
832
+ model="FES2014",
833
+ dask_chunks="auto",
834
+ dask_compute=True,
835
+ **model_tides_kwargs,
836
+ ):
837
+ """Obtain tide heights for each pixel in a dataset by modelling
838
+ tides into a low-resolution grid surrounding the dataset,
839
+ then (optionally) spatially resample this low-res data back
840
+ into the original higher resolution dataset extent and resolution.
841
+
842
+ Parameters
843
+ ----------
844
+ ds : xarray.Dataset
845
+ A dataset whose geobox (`ds.odc.geobox`) will be used to define
846
+ the spatial extent of the low resolution tide modelling grid.
847
+ times : pandas.DatetimeIndex or list of pandas.Timestamps, optional
848
+ By default, the function will model tides using the times
849
+ contained in the `time` dimension of `ds`. Alternatively, this
850
+ param can be used to model tides for a custom set of times
851
+ instead. For example:
852
+ `times=pd.date_range(start="2000", end="2001", freq="5h")`
853
+ resample : bool, optional
854
+ Whether to resample low resolution tides back into `ds`'s original
855
+ higher resolution grid. Set this to `False` if you do not want
856
+ low resolution tides to be re-projected back to higher resolution.
857
+ calculate_quantiles : list or np.array, optional
858
+ Rather than returning all individual tides, low-resolution tides
859
+ can be first aggregated using a quantile calculation by passing in
860
+ a list or array of quantiles to compute. For example, this could
861
+ be used to calculate the min/max tide across all times:
862
+ `calculate_quantiles=[0.0, 1.0]`.
863
+ resolution : int, optional
864
+ The desired resolution of the low-resolution grid used for tide
865
+ modelling. The default None will create a 5000 m resolution grid
866
+ if `ds` has a projected CRS (i.e. metre units), or a 0.05 degree
867
+ resolution grid if `ds` has a geographic CRS (e.g. degree units).
868
+ Note: higher resolutions do not necessarily provide better
869
+ tide modelling performance, as results will be limited by the
870
+ resolution of the underlying global tide model (e.g. 1/16th
871
+ degree / ~5 km resolution grid for FES2014).
872
+ buffer : int, optional
873
+ The amount by which to buffer the higher resolution grid extent
874
+ when creating the new low resolution grid. This buffering is
875
+ important as it ensures that ensure pixel-based tides are seamless
876
+ across dataset boundaries. This buffer will eventually be clipped
877
+ away when the low-resolution data is re-projected back to the
878
+ resolution and extent of the higher resolution dataset. To
879
+ ensure that at least two pixels occur outside of the dataset
880
+ bounds, the default None applies a 12000 m buffer if `ds` has a
881
+ projected CRS (i.e. metre units), or a 0.12 degree buffer if
882
+ `ds` has a geographic CRS (e.g. degree units).
883
+ resample_method : string, optional
884
+ If resampling is requested (see `resample` above), use this
885
+ resampling method when converting from low resolution to high
886
+ resolution pixels. Defaults to "bilinear"; valid options include
887
+ "nearest", "cubic", "min", "max", "average" etc.
888
+ model : string or list of strings
889
+ The tide model or a list of models used to model tides, as
890
+ supported by the `pyTMD` Python package. Options include:
891
+ - "FES2014" (default; pre-configured on DEA Sandbox)
892
+ - "FES2022"
893
+ - "TPXO8-atlas"
894
+ - "TPXO9-atlas-v5"
895
+ - "EOT20"
896
+ - "HAMTIDE11"
897
+ - "GOT4.10"
898
+ dask_chunks : str or tuple, optional
899
+ Can be used to configure custom Dask chunking for the final
900
+ resampling step. The default of "auto" will automatically set
901
+ x/y chunks to match those in `ds` if they exist, otherwise will
902
+ set x/y chunks that cover the entire extent of the dataset.
903
+ For custom chunks, provide a tuple in the form `(y, x)`, e.g.
904
+ `(2048, 2048)`.
905
+ dask_compute : bool, optional
906
+ Whether to compute results of the resampling step using Dask.
907
+ If False, this will return `tides_highres` as a Dask array.
908
+ **model_tides_kwargs :
909
+ Optional parameters passed to the `dea_tools.coastal.model_tides`
910
+ function. Important parameters include "directory" (used to
911
+ specify the location of input tide modelling files) and "cutoff"
912
+ (used to extrapolate modelled tides away from the coast; if not
913
+ specified here, cutoff defaults to `np.inf`).
914
+
915
+ Returns
916
+ -------
917
+ If `resample` is False:
918
+
919
+ tides_lowres : xr.DataArray
920
+ A low resolution data array giving either tide heights every
921
+ timestep in `ds` (if `times` is None), tide heights at every
922
+ time in `times` (if `times` is not None), or tide height quantiles
923
+ for every quantile provided by `calculate_quantiles`.
924
+
925
+ If `resample` is True:
926
+
927
+ tides_highres, tides_lowres : tuple of xr.DataArrays
928
+ In addition to `tides_lowres` (see above), a high resolution
929
+ array of tide heights will be generated that matches the
930
+ exact spatial resolution and extent of `ds`. This will contain
931
+ either tide heights every timestep in `ds` (if `times` is None),
932
+ tide heights at every time in `times` (if `times` is not None),
933
+ or tide height quantiles for every quantile provided by
934
+ `calculate_quantiles`.
935
+
936
+ """
937
+ from odc.geo.geobox import GeoBox
938
+
939
+ # First test if no time dimension and nothing passed to `times`
940
+ if ("time" not in ds.dims) & (times is None):
941
+ raise ValueError(
942
+ "`ds` does not contain a 'time' dimension. Times are required "
943
+ "for modelling tides: please pass in a set of custom tides "
944
+ "using the `times` parameter. For example: "
945
+ "`times=pd.date_range(start='2000', end='2001', freq='5h')`",
946
+ )
947
+
948
+ # If custom times are provided, convert them to a consistent
949
+ # pandas.DatatimeIndex format
950
+ if times is not None:
951
+ if isinstance(times, list):
952
+ time_coords = pd.DatetimeIndex(times)
953
+ elif isinstance(times, pd.Timestamp):
954
+ time_coords = pd.DatetimeIndex([times])
955
+ else:
956
+ time_coords = times
957
+
958
+ # Otherwise, use times from `ds` directly
959
+ else:
960
+ time_coords = ds.coords["time"]
961
+
962
+ # Set defaults passed to `model_tides`
963
+ model_tides_kwargs.setdefault("cutoff", np.inf)
964
+
965
+ # Standardise model into a list for easy handling
966
+ model = [model] if isinstance(model, str) else model
967
+
968
+ # Test if no time dimension and nothing passed to `times`
969
+ if ("time" not in ds.dims) & (times is None):
970
+ raise ValueError(
971
+ "`ds` does not contain a 'time' dimension. Times are required "
972
+ "for modelling tides: please pass in a set of custom tides "
973
+ "using the `times` parameter. For example: "
974
+ "`times=pd.date_range(start='2000', end='2001', freq='5h')`",
975
+ )
976
+
977
+ # If custom times are provided, convert them to a consistent
978
+ # pandas.DatatimeIndex format
979
+ if times is not None:
980
+ if isinstance(times, list):
981
+ time_coords = pd.DatetimeIndex(times)
982
+ elif isinstance(times, pd.Timestamp):
983
+ time_coords = pd.DatetimeIndex([times])
984
+ else:
985
+ time_coords = times
986
+
987
+ # Otherwise, use times from `ds` directly
988
+ else:
989
+ time_coords = ds.coords["time"]
990
+
991
+ # Determine spatial dimensions
992
+ y_dim, x_dim = ds.odc.spatial_dims
993
+
994
+ # Determine resolution and buffer, using different defaults for
995
+ # geographic (i.e. degrees) and projected (i.e. metres) CRSs:
996
+ crs_units = ds.odc.geobox.crs.units[0][0:6]
997
+ if ds.odc.geobox.crs.geographic:
998
+ if resolution is None:
999
+ resolution = 0.05
1000
+ elif resolution > 360:
1001
+ raise ValueError(
1002
+ f"A resolution of greater than 360 was "
1003
+ f"provided, but `ds` has a geographic CRS "
1004
+ f"in {crs_units} units. Did you accidently "
1005
+ f"provide a resolution in projected "
1006
+ f"(i.e. metre) units?",
1007
+ )
1008
+ if buffer is None:
1009
+ buffer = 0.12
1010
+ else:
1011
+ if resolution is None:
1012
+ resolution = 5000
1013
+ elif resolution < 1:
1014
+ raise ValueError(
1015
+ f"A resolution of less than 1 was provided, "
1016
+ f"but `ds` has a projected CRS in "
1017
+ f"{crs_units} units. Did you accidently "
1018
+ f"provide a resolution in geographic "
1019
+ f"(degree) units?",
1020
+ )
1021
+ if buffer is None:
1022
+ buffer = 12000
1023
+
1024
+ # Raise error if resolution is less than dataset resolution
1025
+ dataset_res = ds.odc.geobox.resolution.x
1026
+ if resolution < dataset_res:
1027
+ raise ValueError(
1028
+ f"The resolution of the low-resolution tide "
1029
+ f"modelling grid ({resolution:.2f}) is less "
1030
+ f"than `ds`'s pixel resolution ({dataset_res:.2f}). "
1031
+ f"This can cause extremely slow tide modelling "
1032
+ f"performance. Please select provide a resolution "
1033
+ f"greater than {dataset_res:.2f} using "
1034
+ f"`pixel_tides`'s 'resolution' parameter.",
1035
+ )
1036
+
1037
+ # Create a new reduced resolution tide modelling grid after
1038
+ # first buffering the grid
1039
+ print(f"Creating reduced resolution {resolution} x {resolution} {crs_units} tide modelling array")
1040
+ buffered_geobox = ds.odc.geobox.buffered(buffer)
1041
+ rescaled_geobox = GeoBox.from_bbox(bbox=buffered_geobox.boundingbox, resolution=resolution)
1042
+ rescaled_ds = odc.geo.xr.xr_zeros(rescaled_geobox)
1043
+
1044
+ # Flatten grid to 1D, then add time dimension
1045
+ flattened_ds = rescaled_ds.stack(z=(x_dim, y_dim))
1046
+ flattened_ds = flattened_ds.expand_dims(dim={"time": time_coords.values})
1047
+
1048
+ # Model tides in parallel, returning a pandas.DataFrame
1049
+ tide_df = model_tides(
1050
+ x=flattened_ds[x_dim],
1051
+ y=flattened_ds[y_dim],
1052
+ time=flattened_ds.time,
1053
+ crs=f"EPSG:{ds.odc.geobox.crs.epsg}",
1054
+ model=model,
1055
+ **model_tides_kwargs,
1056
+ )
1057
+
1058
+ # Convert our pandas.DataFrame tide modelling outputs to xarray
1059
+ tides_lowres = (
1060
+ # Rename x and y dataframe indexes to match x and y xarray dims
1061
+ tide_df.rename_axis(["time", x_dim, y_dim])
1062
+ # Add tide model column to dataframe indexes so we can convert
1063
+ # our dataframe to a multidimensional xarray
1064
+ .set_index("tide_model", append=True)
1065
+ # Convert to xarray and select our tide modelling xr.DataArray
1066
+ .to_xarray()
1067
+ .tide_m
1068
+ # Re-index and transpose into our input coordinates and dim order
1069
+ .reindex_like(rescaled_ds)
1070
+ .transpose("tide_model", "time", y_dim, x_dim)
1071
+ )
1072
+
1073
+ # Optionally calculate and return quantiles rather than raw data.
1074
+ # Set dtype to dtype of the input data as quantile always returns
1075
+ # float64 (memory intensive)
1076
+ if calculate_quantiles is not None:
1077
+ print("Computing tide quantiles")
1078
+ tides_lowres = tides_lowres.quantile(q=calculate_quantiles, dim="time").astype(tides_lowres.dtype)
1079
+
1080
+ # If only one tidal model exists, squeeze out "tide_model" dim
1081
+ if len(tides_lowres.tide_model) == 1:
1082
+ tides_lowres = tides_lowres.squeeze("tide_model")
1083
+
1084
+ # Ensure CRS is present before we apply any resampling
1085
+ tides_lowres = tides_lowres.odc.assign_crs(ds.odc.geobox.crs)
1086
+
1087
+ # Reproject into original high resolution grid
1088
+ if resample:
1089
+ print("Reprojecting tides into original array")
1090
+ tides_highres, tides_lowres = _pixel_tides_resample(
1091
+ tides_lowres,
1092
+ ds,
1093
+ resample_method,
1094
+ dask_chunks,
1095
+ dask_compute,
1096
+ )
1097
+ return tides_highres, tides_lowres
1098
+
1099
+ print("Returning low resolution tide array")
1100
+ return tides_lowres
1101
+
1102
+
1103
+ if __name__ == "__main__": # pragma: no cover
1104
+ pass