eo-tides 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +46 -0
- eo_tides/eo.py +519 -0
- eo_tides/model.py +221 -539
- eo_tides/stats.py +257 -6
- eo_tides/validation.py +3 -3
- eo_tides-0.0.21.dist-info/LICENSE +201 -0
- {eo_tides-0.0.19.dist-info → eo_tides-0.0.21.dist-info}/METADATA +36 -18
- eo_tides-0.0.21.dist-info/RECORD +11 -0
- eo_tides-0.0.19.dist-info/LICENSE +0 -21
- eo_tides-0.0.19.dist-info/RECORD +0 -10
- {eo_tides-0.0.19.dist-info → eo_tides-0.0.21.dist-info}/WHEEL +0 -0
- {eo_tides-0.0.19.dist-info → eo_tides-0.0.21.dist-info}/top_level.txt +0 -0
eo_tides/model.py
CHANGED
@@ -1,79 +1,165 @@
|
|
1
|
+
# Used to postpone evaluation of type annotations
|
2
|
+
from __future__ import annotations
|
3
|
+
|
1
4
|
import os
|
2
5
|
import pathlib
|
6
|
+
import warnings
|
3
7
|
from concurrent.futures import ProcessPoolExecutor
|
4
8
|
from functools import partial
|
9
|
+
from typing import TYPE_CHECKING
|
10
|
+
|
11
|
+
# Only import if running type checking
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
import xarray as xr
|
5
14
|
|
6
15
|
import geopandas as gpd
|
7
16
|
import numpy as np
|
8
|
-
import odc.geo.xr
|
9
17
|
import pandas as pd
|
10
18
|
import pyproj
|
11
19
|
import pyTMD
|
20
|
+
from colorama import Style, init
|
12
21
|
from pyTMD.io.model import load_database, model
|
13
22
|
from tqdm import tqdm
|
14
23
|
|
15
|
-
from
|
24
|
+
from .utils import idw
|
25
|
+
|
26
|
+
|
27
|
+
def _set_directory(directory):
|
28
|
+
"""
|
29
|
+
Set tide modelling files directory. If no custom
|
30
|
+
path is provided, try global environmental variable
|
31
|
+
instead.
|
32
|
+
"""
|
33
|
+
if directory is None:
|
34
|
+
if "EO_TIDES_TIDE_MODELS" in os.environ:
|
35
|
+
directory = os.environ["EO_TIDES_TIDE_MODELS"]
|
36
|
+
else:
|
37
|
+
raise Exception(
|
38
|
+
"No tide model directory provided via `directory`, and/or no "
|
39
|
+
"`EO_TIDES_TIDE_MODELS` environment variable found. "
|
40
|
+
"Please provide a valid path to your tide model directory."
|
41
|
+
)
|
42
|
+
|
43
|
+
# Verify path exists
|
44
|
+
directory = pathlib.Path(directory).expanduser()
|
45
|
+
if not directory.exists():
|
46
|
+
raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
|
47
|
+
else:
|
48
|
+
return directory
|
16
49
|
|
17
50
|
|
18
|
-
def
|
51
|
+
def list_models(
|
52
|
+
directory: str | os.PathLike | None = None,
|
53
|
+
show_available: bool = True,
|
54
|
+
show_supported: bool = True,
|
55
|
+
raise_error: bool = False,
|
56
|
+
) -> tuple[list[str], list[str]]:
|
19
57
|
"""
|
20
|
-
|
21
|
-
|
58
|
+
List all tide models available for tide modelling, and
|
59
|
+
all models supported by `eo-tides` and `pyTMD`.
|
22
60
|
|
23
61
|
This function scans the specified tide model directory
|
24
|
-
|
25
|
-
|
26
|
-
directory as well as the full list of supported models.
|
62
|
+
and returns a list of models that are available in the
|
63
|
+
directory as well as the full list of all supported models.
|
27
64
|
|
28
65
|
For instructions on setting up tide models, see:
|
29
66
|
<https://geoscienceaustralia.github.io/eo-tides/setup/>
|
30
67
|
|
31
68
|
Parameters
|
32
69
|
----------
|
33
|
-
directory : str
|
34
|
-
|
70
|
+
directory : str, optional
|
71
|
+
The directory containing tide model data files. If no path is
|
72
|
+
provided, this will default to the environment variable
|
73
|
+
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
74
|
+
Tide modelling files should be stored in sub-folders for each
|
75
|
+
model that match the structure required by `pyTMD`
|
76
|
+
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
77
|
+
show_available : bool, optional
|
78
|
+
Whether to print a list of locally available models.
|
79
|
+
show_supported : bool, optional
|
80
|
+
Whether to print a list of all supported models, in
|
81
|
+
addition to models available locally.
|
82
|
+
raise_error : bool, optional
|
83
|
+
If True, raise an error if no available models are found.
|
84
|
+
If False, raise a warning.
|
35
85
|
|
36
86
|
Returns
|
37
87
|
-------
|
38
|
-
|
39
|
-
A list of all
|
40
|
-
|
88
|
+
available_models : list of str
|
89
|
+
A list of all tide models available within `directory`.
|
90
|
+
supported_models : list of str
|
91
|
+
A list of all tide models supported by `eo-tides`.
|
41
92
|
"""
|
42
|
-
#
|
93
|
+
init() # Initialize colorama
|
43
94
|
|
44
|
-
# Set tide modelling files directory. If no custom path is
|
45
|
-
#
|
46
|
-
|
47
|
-
if "EO_TIDES_TIDE_MODELS" in os.environ:
|
48
|
-
directory = os.environ["EO_TIDES_TIDE_MODELS"]
|
49
|
-
else:
|
50
|
-
directory = "/var/share/tide_models"
|
95
|
+
# Set tide modelling files directory. If no custom path is
|
96
|
+
# provided, try global environment variable.
|
97
|
+
directory = _set_directory(directory)
|
51
98
|
|
52
|
-
#
|
53
|
-
|
54
|
-
|
55
|
-
|
99
|
+
# Get full list of supported models from pyTMD database
|
100
|
+
model_database = load_database()["elevation"]
|
101
|
+
supported_models = list(model_database.keys())
|
102
|
+
|
103
|
+
# Extract expected model paths
|
104
|
+
expected_paths = {}
|
105
|
+
for m in supported_models:
|
106
|
+
model_file = model_database[m]["model_file"]
|
107
|
+
model_file = model_file[0] if isinstance(model_file, list) else model_file
|
108
|
+
expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
|
56
109
|
|
57
|
-
#
|
58
|
-
|
110
|
+
# Define column widths
|
111
|
+
status_width = 4 # Width for emoji
|
112
|
+
name_width = max(len(name) for name in supported_models)
|
113
|
+
path_width = max(len(path) for path in expected_paths.values())
|
59
114
|
|
60
115
|
# Print list of supported models, marking available and
|
61
116
|
# unavailable models and appending available to list
|
62
|
-
|
63
|
-
|
117
|
+
if show_available or show_supported:
|
118
|
+
total_width = min(status_width + name_width + path_width + 6, 80)
|
119
|
+
print("─" * total_width)
|
120
|
+
print(f"{'🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
|
121
|
+
print("─" * total_width)
|
122
|
+
|
123
|
+
available_models = []
|
64
124
|
for m in supported_models:
|
65
125
|
try:
|
66
|
-
model(directory=directory).elevation(m=m)
|
67
|
-
|
68
|
-
|
69
|
-
|
126
|
+
model_file = model(directory=directory).elevation(m=m)
|
127
|
+
available_models.append(m)
|
128
|
+
|
129
|
+
if show_available:
|
130
|
+
# Mark available models with a green tick
|
131
|
+
status = "✅"
|
132
|
+
print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
|
70
133
|
except:
|
71
134
|
if show_supported:
|
72
135
|
# Mark unavailable models with a red cross
|
73
|
-
|
136
|
+
status = "❌"
|
137
|
+
print(
|
138
|
+
f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
|
139
|
+
)
|
74
140
|
|
75
|
-
|
76
|
-
|
141
|
+
if show_available or show_supported:
|
142
|
+
print("─" * total_width)
|
143
|
+
|
144
|
+
# Print summary
|
145
|
+
print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
|
146
|
+
print(f"Available models: {len(available_models)}/{len(supported_models)}")
|
147
|
+
|
148
|
+
# Raise error or warning if no models are available
|
149
|
+
if not available_models:
|
150
|
+
warning_text = (
|
151
|
+
f"No valid tide models are available in `{directory}`. "
|
152
|
+
"Are you sure you have provided the correct `directory` path, "
|
153
|
+
"or set the `EO_TIDES_TIDE_MODELS` environment variable "
|
154
|
+
"to point to the location of your tide model directory?"
|
155
|
+
)
|
156
|
+
if raise_error:
|
157
|
+
raise Exception(warning_text)
|
158
|
+
else:
|
159
|
+
warnings.warn(warning_text, UserWarning)
|
160
|
+
|
161
|
+
# Return list of available and supported models
|
162
|
+
return available_models, supported_models
|
77
163
|
|
78
164
|
|
79
165
|
def _model_tides(
|
@@ -94,34 +180,7 @@ def _model_tides(
|
|
94
180
|
extraction of tide modelling constituents and tide modelling using
|
95
181
|
`pyTMD`.
|
96
182
|
"""
|
97
|
-
#
|
98
|
-
# import pyTMD.io
|
99
|
-
# import pyTMD.io.model
|
100
|
-
# import pyTMD.predict
|
101
|
-
# import pyTMD.spatial
|
102
|
-
# import pyTMD.time
|
103
|
-
# import pyTMD.utilities
|
104
|
-
|
105
|
-
# Get parameters for tide model; use custom definition file for
|
106
|
-
# FES2012 (leave this as an undocumented feature for now)
|
107
|
-
# if model == "FES2012":
|
108
|
-
# pytmd_model = pyTMD.io.model(directory).from_file(
|
109
|
-
# directory / "model_FES2012.def"
|
110
|
-
# )
|
111
|
-
# elif model == "TPXO8-atlas-v1":
|
112
|
-
# pytmd_model = pyTMD.io.model(directory).from_file(directory / "model_TPXO8.def")
|
113
|
-
# else:
|
114
|
-
# pytmd_model = pyTMD.io.model(
|
115
|
-
# directory, format="netcdf", compressed=False
|
116
|
-
# ).elevation(model)
|
117
|
-
|
118
|
-
# if model in NONSTANDARD_MODELS:
|
119
|
-
# model_params = NONSTANDARD_MODELS[model]
|
120
|
-
# model_params_bytes = io.BytesIO(json.dumps(model_params).encode("utf-8"))
|
121
|
-
# pytmd_model = pyTMD.io.model(directory).from_file(definition_file=model_params_bytes)
|
122
|
-
|
123
|
-
# else:
|
124
|
-
|
183
|
+
# Obtain model details
|
125
184
|
pytmd_model = pyTMD.io.model(directory).elevation(model)
|
126
185
|
|
127
186
|
# Convert x, y to latitude/longitude
|
@@ -284,10 +343,8 @@ def _model_tides(
|
|
284
343
|
|
285
344
|
|
286
345
|
def _ensemble_model(
|
287
|
-
x,
|
288
|
-
y,
|
289
|
-
crs,
|
290
346
|
tide_df,
|
347
|
+
crs,
|
291
348
|
ensemble_models,
|
292
349
|
ensemble_func=None,
|
293
350
|
ensemble_top_n=3,
|
@@ -301,29 +358,27 @@ def _ensemble_model(
|
|
301
358
|
to inform the selection of the best local models.
|
302
359
|
|
303
360
|
This function performs the following steps:
|
361
|
+
1. Takes a dataframe of tide heights from multiple tide models, as
|
362
|
+
produced by `eo_tides.model.model_tides`
|
304
363
|
1. Loads model ranking points from a GeoJSON file, filters them
|
305
364
|
based on the valid data percentage, and retains relevant columns
|
306
|
-
2. Interpolates the model rankings into the
|
307
|
-
|
365
|
+
2. Interpolates the model rankings into the "x" and "y" coordinates
|
366
|
+
of the original dataframe using Inverse Weighted Interpolation (IDW)
|
308
367
|
3. Uses rankings to combine multiple tide models into a single
|
309
368
|
optimised ensemble model (by default, by taking the mean of the
|
310
369
|
top 3 ranked models)
|
311
|
-
4. Returns a
|
370
|
+
4. Returns a new dataFrame with the combined ensemble model predictions
|
312
371
|
|
313
372
|
Parameters
|
314
373
|
----------
|
315
|
-
x : array-like
|
316
|
-
Array of x-coordinates where the ensemble model predictions are
|
317
|
-
required.
|
318
|
-
y : array-like
|
319
|
-
Array of y-coordinates where the ensemble model predictions are
|
320
|
-
required.
|
321
|
-
crs : string
|
322
|
-
Input coordinate reference system for x and y coordinates. Used
|
323
|
-
to ensure that interpolations are performed in the correct CRS.
|
324
374
|
tide_df : pandas.DataFrame
|
325
|
-
DataFrame
|
375
|
+
DataFrame produced by `eo_tides.model.model_tides`, containing
|
376
|
+
tide model predictions with columns:
|
326
377
|
`["time", "x", "y", "tide_height", "tide_model"]`.
|
378
|
+
crs : string
|
379
|
+
Coordinate reference system for the "x" and "y" coordinates in
|
380
|
+
`tide_df`. Used to ensure that interpolations are performed
|
381
|
+
in the correct CRS.
|
327
382
|
ensemble_models : list
|
328
383
|
A list of models to include in the ensemble modelling process.
|
329
384
|
All values must exist as columns with the prefix "rank_" in
|
@@ -342,7 +397,7 @@ def _ensemble_model(
|
|
342
397
|
ranking_points : str, optional
|
343
398
|
Path to the GeoJSON file containing model ranking points. This
|
344
399
|
dataset should include columns containing rankings for each tide
|
345
|
-
model, named with the prefix "rank_". e.g. "
|
400
|
+
model, named with the prefix "rank_". e.g. "rank_EOT20".
|
346
401
|
Low values should represent high rankings (e.g. 1 = top ranked).
|
347
402
|
ranking_valid_perc : float, optional
|
348
403
|
Minimum percentage of valid data required to include a model
|
@@ -367,6 +422,10 @@ def _ensemble_model(
|
|
367
422
|
the provided dictionary keys).
|
368
423
|
|
369
424
|
"""
|
425
|
+
# Extract x and y coords from dataframe
|
426
|
+
x = tide_df.index.get_level_values(level="x")
|
427
|
+
y = tide_df.index.get_level_values(level="y")
|
428
|
+
|
370
429
|
# Load model ranks points and reproject to same CRS as x and y
|
371
430
|
model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
|
372
431
|
model_ranks_gdf = (
|
@@ -449,36 +508,36 @@ def _ensemble_model(
|
|
449
508
|
|
450
509
|
|
451
510
|
def model_tides(
|
452
|
-
x,
|
453
|
-
y,
|
454
|
-
time,
|
455
|
-
model="
|
456
|
-
directory=None,
|
457
|
-
crs="EPSG:4326",
|
458
|
-
crop=True,
|
459
|
-
method="spline",
|
460
|
-
extrapolate=True,
|
461
|
-
cutoff=None,
|
462
|
-
mode="one-to-many",
|
463
|
-
parallel=True,
|
464
|
-
parallel_splits=5,
|
465
|
-
output_units="m",
|
466
|
-
output_format="long",
|
467
|
-
ensemble_models=None,
|
511
|
+
x: float | list[float] | xr.DataArray,
|
512
|
+
y: float | list[float] | xr.DataArray,
|
513
|
+
time: np.ndarray | pd.DatetimeIndex,
|
514
|
+
model: str | list[str] = "EOT20",
|
515
|
+
directory: str | os.PathLike | None = None,
|
516
|
+
crs: str = "EPSG:4326",
|
517
|
+
crop: bool = True,
|
518
|
+
method: str = "spline",
|
519
|
+
extrapolate: bool = True,
|
520
|
+
cutoff: float | None = None,
|
521
|
+
mode: str = "one-to-many",
|
522
|
+
parallel: bool = True,
|
523
|
+
parallel_splits: int = 5,
|
524
|
+
output_units: str = "m",
|
525
|
+
output_format: str = "long",
|
526
|
+
ensemble_models: list[str] | None = None,
|
468
527
|
**ensemble_kwargs,
|
469
|
-
):
|
528
|
+
) -> pd.DataFrame:
|
470
529
|
"""
|
471
|
-
|
472
|
-
|
530
|
+
Model tide heights at multiple coordinates and/or timesteps
|
531
|
+
using using one or more ocean tide models.
|
473
532
|
|
474
533
|
This function is parallelised to improve performance, and
|
475
534
|
supports all tidal models supported by `pyTMD`, including:
|
476
535
|
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
536
|
+
- Empirical Ocean Tide model (EOT20)
|
537
|
+
- Finite Element Solution tide models (FES2022, FES2014, FES2012)
|
538
|
+
- TOPEX/POSEIDON global tide models (TPXO10, TPXO9, TPXO8)
|
539
|
+
- Global Ocean Tide models (GOT5.6, GOT5.5, GOT4.10, GOT4.8, GOT4.7)
|
540
|
+
- Hamburg direct data Assimilation Methods for Tides models (HAMTIDE11)
|
482
541
|
|
483
542
|
This function requires access to tide model data files.
|
484
543
|
These should be placed in a folder with subfolders matching
|
@@ -487,52 +546,39 @@ def model_tides(
|
|
487
546
|
<https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories>
|
488
547
|
|
489
548
|
This function is a modification of the `pyTMD` package's
|
490
|
-
`
|
491
|
-
<https://pytmd.readthedocs.io/en/
|
549
|
+
`compute_tidal_elevations` function. For more info:
|
550
|
+
<https://pytmd.readthedocs.io/en/latest/api_reference/compute_tidal_elevations.html>
|
492
551
|
|
493
552
|
Parameters
|
494
553
|
----------
|
495
|
-
x, y : float or list of
|
554
|
+
x, y : float or list of float
|
496
555
|
One or more x and y coordinates used to define
|
497
556
|
the location at which to model tides. By default these
|
498
557
|
coordinates should be lat/lon; use "crs" if they
|
499
558
|
are in a custom coordinate reference system.
|
500
|
-
time :
|
559
|
+
time : Numpy datetime array or pandas.DatetimeIndex
|
501
560
|
An array containing `datetime64[ns]` values or a
|
502
561
|
`pandas.DatetimeIndex` providing the times at which to
|
503
562
|
model tides in UTC time.
|
504
|
-
model :
|
505
|
-
The tide model
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
- "TPXO9-atlas-v5"
|
510
|
-
- "TPXO8-atlas"
|
511
|
-
- "EOT20"
|
512
|
-
- "HAMTIDE11"
|
513
|
-
- "GOT4.10"
|
514
|
-
- "ensemble" (advanced ensemble tide model functionality;
|
515
|
-
combining multiple models based on external model rankings)
|
516
|
-
directory : string, optional
|
563
|
+
model : str or list of str, optional
|
564
|
+
The tide model (or models) to use to model tides.
|
565
|
+
Defaults to "EOT20"; for a full list of available/supported
|
566
|
+
models, run `eo_tides.model.list_models`.
|
567
|
+
directory : str, optional
|
517
568
|
The directory containing tide model data files. If no path is
|
518
569
|
provided, this will default to the environment variable
|
519
|
-
`EO_TIDES_TIDE_MODELS` if set,
|
570
|
+
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
520
571
|
Tide modelling files should be stored in sub-folders for each
|
521
|
-
model that match the structure
|
522
|
-
|
523
|
-
For example:
|
524
|
-
|
525
|
-
- `{directory}/fes2014/ocean_tide/`
|
526
|
-
- `{directory}/tpxo8_atlas/`
|
527
|
-
- `{directory}/TPXO9_atlas_v5/`
|
572
|
+
model that match the structure required by `pyTMD`
|
573
|
+
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
528
574
|
crs : str, optional
|
529
575
|
Input coordinate reference system for x and y coordinates.
|
530
576
|
Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
|
531
|
-
crop : bool optional
|
577
|
+
crop : bool, optional
|
532
578
|
Whether to crop tide model constituent files on-the-fly to
|
533
579
|
improve performance. Cropping will be performed based on a
|
534
580
|
1 degree buffer around all input points. Defaults to True.
|
535
|
-
method :
|
581
|
+
method : str, optional
|
536
582
|
Method used to interpolate tidal constituents
|
537
583
|
from model files. Options include:
|
538
584
|
|
@@ -542,11 +588,11 @@ def model_tides(
|
|
542
588
|
extrapolate : bool, optional
|
543
589
|
Whether to extrapolate tides for x and y coordinates outside of
|
544
590
|
the valid tide modelling domain using nearest-neighbor.
|
545
|
-
cutoff :
|
591
|
+
cutoff : float, optional
|
546
592
|
Extrapolation cutoff in kilometers. The default is None, which
|
547
593
|
will extrapolate for all points regardless of distance from the
|
548
594
|
valid tide modelling domain.
|
549
|
-
mode :
|
595
|
+
mode : str, optional
|
550
596
|
The analysis mode to use for tide modelling. Supports two options:
|
551
597
|
|
552
598
|
- "one-to-many": Models tides for every timestep in "time" at
|
@@ -558,7 +604,7 @@ def model_tides(
|
|
558
604
|
set of x and y coordinates. In this mode, the number of x and
|
559
605
|
y points must equal the number of timesteps provided in "time".
|
560
606
|
|
561
|
-
parallel :
|
607
|
+
parallel : bool, optional
|
562
608
|
Whether to parallelise tide modelling using `concurrent.futures`.
|
563
609
|
If multiple tide models are requested, these will be run in
|
564
610
|
parallel. Optionally, tide modelling can also be run in parallel
|
@@ -582,7 +628,7 @@ def model_tides(
|
|
582
628
|
results stacked vertically along "tide_model" and "tide_height"
|
583
629
|
columns), or wide format (with a column for each tide model).
|
584
630
|
Defaults to "long".
|
585
|
-
ensemble_models : list, optional
|
631
|
+
ensemble_models : list of str, optional
|
586
632
|
An optional list of models used to generate the ensemble tide
|
587
633
|
model if "ensemble" tide modelling is requested. Defaults to
|
588
634
|
["FES2014", "TPXO9-atlas-v5", "EOT20", "HAMTIDE11", "GOT4.10",
|
@@ -602,25 +648,8 @@ def model_tides(
|
|
602
648
|
A dataframe containing modelled tide heights.
|
603
649
|
|
604
650
|
"""
|
605
|
-
# Set tide modelling files directory. If no custom path is provided,
|
606
|
-
# first try global environmental var, then "/var/share/tide_models"
|
607
|
-
if directory is None:
|
608
|
-
if "EO_TIDES_TIDE_MODELS" in os.environ:
|
609
|
-
directory = os.environ["EO_TIDES_TIDE_MODELS"]
|
610
|
-
else:
|
611
|
-
directory = "/var/share/tide_models"
|
612
|
-
|
613
|
-
# Verify path exists
|
614
|
-
directory = pathlib.Path(directory).expanduser()
|
615
|
-
if not directory.exists():
|
616
|
-
raise FileNotFoundError("Invalid tide directory")
|
617
|
-
|
618
|
-
# If time passed as a single Timestamp, convert to datetime64
|
619
|
-
if isinstance(time, pd.Timestamp):
|
620
|
-
time = time.to_datetime64()
|
621
|
-
|
622
651
|
# Turn inputs into arrays for consistent handling
|
623
|
-
models_requested = np.atleast_1d(model)
|
652
|
+
models_requested = list(np.atleast_1d(model))
|
624
653
|
x = np.atleast_1d(x)
|
625
654
|
y = np.atleast_1d(y)
|
626
655
|
time = np.atleast_1d(time)
|
@@ -644,32 +673,44 @@ def model_tides(
|
|
644
673
|
"you intended to model multiple timesteps at each point."
|
645
674
|
)
|
646
675
|
|
647
|
-
#
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
676
|
+
# If time passed as a single Timestamp, convert to datetime64
|
677
|
+
if isinstance(time, pd.Timestamp):
|
678
|
+
time = time.to_datetime64()
|
679
|
+
|
680
|
+
# Set tide modelling files directory. If no custom path is
|
681
|
+
# provided, try global environment variable.
|
682
|
+
directory = _set_directory(directory)
|
683
|
+
|
684
|
+
# Get full list of supported models from pyTMD database;
|
685
|
+
# add ensemble option to list of models
|
686
|
+
available_models, valid_models = list_models(
|
687
|
+
directory, show_available=False, show_supported=False, raise_error=True
|
688
|
+
)
|
689
|
+
# TODO: This is hacky, find a better way. Perhaps a kwarg that
|
690
|
+
# turns ensemble functionality on, and checks that supplied
|
691
|
+
# models match models expected for ensemble?
|
692
|
+
available_models = available_models + ["ensemble"]
|
693
|
+
valid_models = valid_models + ["ensemble"]
|
694
|
+
|
695
|
+
# Error if any models are not supported
|
667
696
|
if not all(m in valid_models for m in models_requested):
|
668
|
-
|
669
|
-
f"One or more of the models
|
670
|
-
f"
|
671
|
-
|
697
|
+
error_text = (
|
698
|
+
f"One or more of the requested models are not valid:\n"
|
699
|
+
f"{models_requested}\n\n"
|
700
|
+
"The following models are supported:\n"
|
701
|
+
f"{valid_models}"
|
672
702
|
)
|
703
|
+
raise ValueError(error_text)
|
704
|
+
|
705
|
+
# Error if any models are not available in `directory`
|
706
|
+
if not all(m in available_models for m in models_requested):
|
707
|
+
error_text = (
|
708
|
+
f"One or more of the requested models are valid, but not available in `{directory}`:\n"
|
709
|
+
f"{models_requested}\n\n"
|
710
|
+
f"The following models are available in `{directory}`:\n"
|
711
|
+
f"{available_models}"
|
712
|
+
)
|
713
|
+
raise ValueError(error_text)
|
673
714
|
|
674
715
|
# If ensemble modelling is requested, use a custom list of models
|
675
716
|
# for subsequent processing
|
@@ -763,11 +804,11 @@ def model_tides(
|
|
763
804
|
|
764
805
|
# Optionally compute ensemble model and add to dataframe
|
765
806
|
if "ensemble" in models_requested:
|
766
|
-
ensemble_df = _ensemble_model(
|
807
|
+
ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
|
767
808
|
|
768
809
|
# Update requested models with any custom ensemble models, then
|
769
810
|
# filter the dataframe to keep only models originally requested
|
770
|
-
models_requested = np.union1d(models_requested, ensemble_df.tide_model.unique())
|
811
|
+
models_requested = list(np.union1d(models_requested, ensemble_df.tide_model.unique()))
|
771
812
|
tide_df = pd.concat([tide_df, ensemble_df]).query("tide_model in @models_requested")
|
772
813
|
|
773
814
|
# Optionally convert to a wide format dataframe with a tide model in
|
@@ -784,362 +825,3 @@ def model_tides(
|
|
784
825
|
tide_df = tide_df.reindex(output_indices)
|
785
826
|
|
786
827
|
return tide_df
|
787
|
-
|
788
|
-
|
789
|
-
def _pixel_tides_resample(
|
790
|
-
tides_lowres,
|
791
|
-
ds,
|
792
|
-
resample_method="bilinear",
|
793
|
-
dask_chunks="auto",
|
794
|
-
dask_compute=True,
|
795
|
-
):
|
796
|
-
"""Resamples low resolution tides modelled by `pixel_tides` into the
|
797
|
-
geobox (e.g. spatial resolution and extent) of the original higher
|
798
|
-
resolution satellite dataset.
|
799
|
-
|
800
|
-
Parameters
|
801
|
-
----------
|
802
|
-
tides_lowres : xarray.DataArray
|
803
|
-
The low resolution tide modelling data array to be resampled.
|
804
|
-
ds : xarray.Dataset
|
805
|
-
The dataset whose geobox will be used as the template for the
|
806
|
-
resampling operation. This is typically the same satellite
|
807
|
-
dataset originally passed to `pixel_tides`.
|
808
|
-
resample_method : string, optional
|
809
|
-
The resampling method to use. Defaults to "bilinear"; valid
|
810
|
-
options include "nearest", "cubic", "min", "max", "average" etc.
|
811
|
-
dask_chunks : str or tuple, optional
|
812
|
-
Can be used to configure custom Dask chunking for the final
|
813
|
-
resampling step. The default of "auto" will automatically set
|
814
|
-
x/y chunks to match those in `ds` if they exist, otherwise will
|
815
|
-
set x/y chunks that cover the entire extent of the dataset.
|
816
|
-
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
|
817
|
-
`(2048, 2048)`.
|
818
|
-
dask_compute : bool, optional
|
819
|
-
Whether to compute results of the resampling step using Dask.
|
820
|
-
If False, this will return `tides_highres` as a Dask array.
|
821
|
-
|
822
|
-
Returns
|
823
|
-
-------
|
824
|
-
tides_highres, tides_lowres : tuple of xr.DataArrays
|
825
|
-
In addition to `tides_lowres` (see above), a high resolution
|
826
|
-
array of tide heights will be generated matching the
|
827
|
-
exact spatial resolution and extent of `ds`.
|
828
|
-
|
829
|
-
"""
|
830
|
-
# Determine spatial dimensions
|
831
|
-
y_dim, x_dim = ds.odc.spatial_dims
|
832
|
-
|
833
|
-
# Convert array to Dask, using no chunking along y and x dims,
|
834
|
-
# and a single chunk for each timestep/quantile and tide model
|
835
|
-
tides_lowres_dask = tides_lowres.chunk({d: None if d in [y_dim, x_dim] else 1 for d in tides_lowres.dims})
|
836
|
-
|
837
|
-
# Automatically set Dask chunks for reprojection if set to "auto".
|
838
|
-
# This will either use x/y chunks if they exist in `ds`, else
|
839
|
-
# will cover the entire x and y dims) so we don't end up with
|
840
|
-
# hundreds of tiny x and y chunks due to the small size of
|
841
|
-
# `tides_lowres` (possible odc.geo bug?)
|
842
|
-
if dask_chunks == "auto":
|
843
|
-
if ds.chunks is not None:
|
844
|
-
if (y_dim in ds.chunks) & (x_dim in ds.chunks):
|
845
|
-
dask_chunks = (ds.chunks[y_dim], ds.chunks[x_dim])
|
846
|
-
else:
|
847
|
-
dask_chunks = ds.odc.geobox.shape
|
848
|
-
else:
|
849
|
-
dask_chunks = ds.odc.geobox.shape
|
850
|
-
|
851
|
-
# Reproject into the GeoBox of `ds` using odc.geo and Dask
|
852
|
-
tides_highres = tides_lowres_dask.odc.reproject(
|
853
|
-
how=ds.odc.geobox,
|
854
|
-
chunks=dask_chunks,
|
855
|
-
resampling=resample_method,
|
856
|
-
).rename("tide_height")
|
857
|
-
|
858
|
-
# Optionally process and load into memory with Dask
|
859
|
-
if dask_compute:
|
860
|
-
tides_highres.load()
|
861
|
-
|
862
|
-
return tides_highres, tides_lowres
|
863
|
-
|
864
|
-
|
865
|
-
def pixel_tides(
|
866
|
-
ds,
|
867
|
-
times=None,
|
868
|
-
resample=True,
|
869
|
-
calculate_quantiles=None,
|
870
|
-
resolution=None,
|
871
|
-
buffer=None,
|
872
|
-
resample_method="bilinear",
|
873
|
-
model="FES2014",
|
874
|
-
dask_chunks="auto",
|
875
|
-
dask_compute=True,
|
876
|
-
**model_tides_kwargs,
|
877
|
-
):
|
878
|
-
"""Obtain tide heights for each pixel in a dataset by modelling
|
879
|
-
tides into a low-resolution grid surrounding the dataset,
|
880
|
-
then (optionally) spatially resample this low-res data back
|
881
|
-
into the original higher resolution dataset extent and resolution.
|
882
|
-
|
883
|
-
Parameters
|
884
|
-
----------
|
885
|
-
ds : xarray.Dataset
|
886
|
-
A dataset whose geobox (`ds.odc.geobox`) will be used to define
|
887
|
-
the spatial extent of the low resolution tide modelling grid.
|
888
|
-
times : pandas.DatetimeIndex or list of pandas.Timestamps, optional
|
889
|
-
By default, the function will model tides using the times
|
890
|
-
contained in the `time` dimension of `ds`. Alternatively, this
|
891
|
-
param can be used to model tides for a custom set of times
|
892
|
-
instead. For example:
|
893
|
-
`times=pd.date_range(start="2000", end="2001", freq="5h")`
|
894
|
-
resample : bool, optional
|
895
|
-
Whether to resample low resolution tides back into `ds`'s original
|
896
|
-
higher resolution grid. Set this to `False` if you do not want
|
897
|
-
low resolution tides to be re-projected back to higher resolution.
|
898
|
-
calculate_quantiles : list or np.array, optional
|
899
|
-
Rather than returning all individual tides, low-resolution tides
|
900
|
-
can be first aggregated using a quantile calculation by passing in
|
901
|
-
a list or array of quantiles to compute. For example, this could
|
902
|
-
be used to calculate the min/max tide across all times:
|
903
|
-
`calculate_quantiles=[0.0, 1.0]`.
|
904
|
-
resolution : int, optional
|
905
|
-
The desired resolution of the low-resolution grid used for tide
|
906
|
-
modelling. The default None will create a 5000 m resolution grid
|
907
|
-
if `ds` has a projected CRS (i.e. metre units), or a 0.05 degree
|
908
|
-
resolution grid if `ds` has a geographic CRS (e.g. degree units).
|
909
|
-
Note: higher resolutions do not necessarily provide better
|
910
|
-
tide modelling performance, as results will be limited by the
|
911
|
-
resolution of the underlying global tide model (e.g. 1/16th
|
912
|
-
degree / ~5 km resolution grid for FES2014).
|
913
|
-
buffer : int, optional
|
914
|
-
The amount by which to buffer the higher resolution grid extent
|
915
|
-
when creating the new low resolution grid. This buffering is
|
916
|
-
important as it ensures that ensure pixel-based tides are seamless
|
917
|
-
across dataset boundaries. This buffer will eventually be clipped
|
918
|
-
away when the low-resolution data is re-projected back to the
|
919
|
-
resolution and extent of the higher resolution dataset. To
|
920
|
-
ensure that at least two pixels occur outside of the dataset
|
921
|
-
bounds, the default None applies a 12000 m buffer if `ds` has a
|
922
|
-
projected CRS (i.e. metre units), or a 0.12 degree buffer if
|
923
|
-
`ds` has a geographic CRS (e.g. degree units).
|
924
|
-
resample_method : string, optional
|
925
|
-
If resampling is requested (see `resample` above), use this
|
926
|
-
resampling method when converting from low resolution to high
|
927
|
-
resolution pixels. Defaults to "bilinear"; valid options include
|
928
|
-
"nearest", "cubic", "min", "max", "average" etc.
|
929
|
-
model : string or list of strings
|
930
|
-
The tide model or a list of models used to model tides, as
|
931
|
-
supported by the `pyTMD` Python package. Options include:
|
932
|
-
- "FES2014" (default; pre-configured on DEA Sandbox)
|
933
|
-
- "FES2022"
|
934
|
-
- "TPXO8-atlas"
|
935
|
-
- "TPXO9-atlas-v5"
|
936
|
-
- "EOT20"
|
937
|
-
- "HAMTIDE11"
|
938
|
-
- "GOT4.10"
|
939
|
-
dask_chunks : str or tuple, optional
|
940
|
-
Can be used to configure custom Dask chunking for the final
|
941
|
-
resampling step. The default of "auto" will automatically set
|
942
|
-
x/y chunks to match those in `ds` if they exist, otherwise will
|
943
|
-
set x/y chunks that cover the entire extent of the dataset.
|
944
|
-
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
|
945
|
-
`(2048, 2048)`.
|
946
|
-
dask_compute : bool, optional
|
947
|
-
Whether to compute results of the resampling step using Dask.
|
948
|
-
If False, this will return `tides_highres` as a Dask array.
|
949
|
-
**model_tides_kwargs :
|
950
|
-
Optional parameters passed to the `dea_tools.coastal.model_tides`
|
951
|
-
function. Important parameters include "directory" (used to
|
952
|
-
specify the location of input tide modelling files) and "cutoff"
|
953
|
-
(used to extrapolate modelled tides away from the coast; if not
|
954
|
-
specified here, cutoff defaults to `np.inf`).
|
955
|
-
|
956
|
-
Returns
|
957
|
-
-------
|
958
|
-
If `resample` is False:
|
959
|
-
|
960
|
-
tides_lowres : xr.DataArray
|
961
|
-
A low resolution data array giving either tide heights every
|
962
|
-
timestep in `ds` (if `times` is None), tide heights at every
|
963
|
-
time in `times` (if `times` is not None), or tide height quantiles
|
964
|
-
for every quantile provided by `calculate_quantiles`.
|
965
|
-
|
966
|
-
If `resample` is True:
|
967
|
-
|
968
|
-
tides_highres, tides_lowres : tuple of xr.DataArrays
|
969
|
-
In addition to `tides_lowres` (see above), a high resolution
|
970
|
-
array of tide heights will be generated that matches the
|
971
|
-
exact spatial resolution and extent of `ds`. This will contain
|
972
|
-
either tide heights every timestep in `ds` (if `times` is None),
|
973
|
-
tide heights at every time in `times` (if `times` is not None),
|
974
|
-
or tide height quantiles for every quantile provided by
|
975
|
-
`calculate_quantiles`.
|
976
|
-
|
977
|
-
"""
|
978
|
-
from odc.geo.geobox import GeoBox
|
979
|
-
|
980
|
-
# First test if no time dimension and nothing passed to `times`
|
981
|
-
if ("time" not in ds.dims) & (times is None):
|
982
|
-
raise ValueError(
|
983
|
-
"`ds` does not contain a 'time' dimension. Times are required "
|
984
|
-
"for modelling tides: please pass in a set of custom tides "
|
985
|
-
"using the `times` parameter. For example: "
|
986
|
-
"`times=pd.date_range(start='2000', end='2001', freq='5h')`",
|
987
|
-
)
|
988
|
-
|
989
|
-
# If custom times are provided, convert them to a consistent
|
990
|
-
# pandas.DatatimeIndex format
|
991
|
-
if times is not None:
|
992
|
-
if isinstance(times, list):
|
993
|
-
time_coords = pd.DatetimeIndex(times)
|
994
|
-
elif isinstance(times, pd.Timestamp):
|
995
|
-
time_coords = pd.DatetimeIndex([times])
|
996
|
-
else:
|
997
|
-
time_coords = times
|
998
|
-
|
999
|
-
# Otherwise, use times from `ds` directly
|
1000
|
-
else:
|
1001
|
-
time_coords = ds.coords["time"]
|
1002
|
-
|
1003
|
-
# Set defaults passed to `model_tides`
|
1004
|
-
model_tides_kwargs.setdefault("cutoff", np.inf)
|
1005
|
-
|
1006
|
-
# Standardise model into a list for easy handling
|
1007
|
-
model = [model] if isinstance(model, str) else model
|
1008
|
-
|
1009
|
-
# Test if no time dimension and nothing passed to `times`
|
1010
|
-
if ("time" not in ds.dims) & (times is None):
|
1011
|
-
raise ValueError(
|
1012
|
-
"`ds` does not contain a 'time' dimension. Times are required "
|
1013
|
-
"for modelling tides: please pass in a set of custom tides "
|
1014
|
-
"using the `times` parameter. For example: "
|
1015
|
-
"`times=pd.date_range(start='2000', end='2001', freq='5h')`",
|
1016
|
-
)
|
1017
|
-
|
1018
|
-
# If custom times are provided, convert them to a consistent
|
1019
|
-
# pandas.DatatimeIndex format
|
1020
|
-
if times is not None:
|
1021
|
-
if isinstance(times, list):
|
1022
|
-
time_coords = pd.DatetimeIndex(times)
|
1023
|
-
elif isinstance(times, pd.Timestamp):
|
1024
|
-
time_coords = pd.DatetimeIndex([times])
|
1025
|
-
else:
|
1026
|
-
time_coords = times
|
1027
|
-
|
1028
|
-
# Otherwise, use times from `ds` directly
|
1029
|
-
else:
|
1030
|
-
time_coords = ds.coords["time"]
|
1031
|
-
|
1032
|
-
# Determine spatial dimensions
|
1033
|
-
y_dim, x_dim = ds.odc.spatial_dims
|
1034
|
-
|
1035
|
-
# Determine resolution and buffer, using different defaults for
|
1036
|
-
# geographic (i.e. degrees) and projected (i.e. metres) CRSs:
|
1037
|
-
crs_units = ds.odc.geobox.crs.units[0][0:6]
|
1038
|
-
if ds.odc.geobox.crs.geographic:
|
1039
|
-
if resolution is None:
|
1040
|
-
resolution = 0.05
|
1041
|
-
elif resolution > 360:
|
1042
|
-
raise ValueError(
|
1043
|
-
f"A resolution of greater than 360 was "
|
1044
|
-
f"provided, but `ds` has a geographic CRS "
|
1045
|
-
f"in {crs_units} units. Did you accidently "
|
1046
|
-
f"provide a resolution in projected "
|
1047
|
-
f"(i.e. metre) units?",
|
1048
|
-
)
|
1049
|
-
if buffer is None:
|
1050
|
-
buffer = 0.12
|
1051
|
-
else:
|
1052
|
-
if resolution is None:
|
1053
|
-
resolution = 5000
|
1054
|
-
elif resolution < 1:
|
1055
|
-
raise ValueError(
|
1056
|
-
f"A resolution of less than 1 was provided, "
|
1057
|
-
f"but `ds` has a projected CRS in "
|
1058
|
-
f"{crs_units} units. Did you accidently "
|
1059
|
-
f"provide a resolution in geographic "
|
1060
|
-
f"(degree) units?",
|
1061
|
-
)
|
1062
|
-
if buffer is None:
|
1063
|
-
buffer = 12000
|
1064
|
-
|
1065
|
-
# Raise error if resolution is less than dataset resolution
|
1066
|
-
dataset_res = ds.odc.geobox.resolution.x
|
1067
|
-
if resolution < dataset_res:
|
1068
|
-
raise ValueError(
|
1069
|
-
f"The resolution of the low-resolution tide "
|
1070
|
-
f"modelling grid ({resolution:.2f}) is less "
|
1071
|
-
f"than `ds`'s pixel resolution ({dataset_res:.2f}). "
|
1072
|
-
f"This can cause extremely slow tide modelling "
|
1073
|
-
f"performance. Please select provide a resolution "
|
1074
|
-
f"greater than {dataset_res:.2f} using "
|
1075
|
-
f"`pixel_tides`'s 'resolution' parameter.",
|
1076
|
-
)
|
1077
|
-
|
1078
|
-
# Create a new reduced resolution tide modelling grid after
|
1079
|
-
# first buffering the grid
|
1080
|
-
print(f"Creating reduced resolution {resolution} x {resolution} {crs_units} tide modelling array")
|
1081
|
-
buffered_geobox = ds.odc.geobox.buffered(buffer)
|
1082
|
-
rescaled_geobox = GeoBox.from_bbox(bbox=buffered_geobox.boundingbox, resolution=resolution)
|
1083
|
-
rescaled_ds = odc.geo.xr.xr_zeros(rescaled_geobox)
|
1084
|
-
|
1085
|
-
# Flatten grid to 1D, then add time dimension
|
1086
|
-
flattened_ds = rescaled_ds.stack(z=(x_dim, y_dim))
|
1087
|
-
flattened_ds = flattened_ds.expand_dims(dim={"time": time_coords.values})
|
1088
|
-
|
1089
|
-
# Model tides in parallel, returning a pandas.DataFrame
|
1090
|
-
tide_df = model_tides(
|
1091
|
-
x=flattened_ds[x_dim],
|
1092
|
-
y=flattened_ds[y_dim],
|
1093
|
-
time=flattened_ds.time,
|
1094
|
-
crs=f"EPSG:{ds.odc.geobox.crs.epsg}",
|
1095
|
-
model=model,
|
1096
|
-
**model_tides_kwargs,
|
1097
|
-
)
|
1098
|
-
|
1099
|
-
# Convert our pandas.DataFrame tide modelling outputs to xarray
|
1100
|
-
tides_lowres = (
|
1101
|
-
# Rename x and y dataframe indexes to match x and y xarray dims
|
1102
|
-
tide_df.rename_axis(["time", x_dim, y_dim])
|
1103
|
-
# Add tide model column to dataframe indexes so we can convert
|
1104
|
-
# our dataframe to a multidimensional xarray
|
1105
|
-
.set_index("tide_model", append=True)
|
1106
|
-
# Convert to xarray and select our tide modelling xr.DataArray
|
1107
|
-
.to_xarray()
|
1108
|
-
.tide_height
|
1109
|
-
# Re-index and transpose into our input coordinates and dim order
|
1110
|
-
.reindex_like(rescaled_ds)
|
1111
|
-
.transpose("tide_model", "time", y_dim, x_dim)
|
1112
|
-
)
|
1113
|
-
|
1114
|
-
# Optionally calculate and return quantiles rather than raw data.
|
1115
|
-
# Set dtype to dtype of the input data as quantile always returns
|
1116
|
-
# float64 (memory intensive)
|
1117
|
-
if calculate_quantiles is not None:
|
1118
|
-
print("Computing tide quantiles")
|
1119
|
-
tides_lowres = tides_lowres.quantile(q=calculate_quantiles, dim="time").astype(tides_lowres.dtype)
|
1120
|
-
|
1121
|
-
# If only one tidal model exists, squeeze out "tide_model" dim
|
1122
|
-
if len(tides_lowres.tide_model) == 1:
|
1123
|
-
tides_lowres = tides_lowres.squeeze("tide_model")
|
1124
|
-
|
1125
|
-
# Ensure CRS is present before we apply any resampling
|
1126
|
-
tides_lowres = tides_lowres.odc.assign_crs(ds.odc.geobox.crs)
|
1127
|
-
|
1128
|
-
# Reproject into original high resolution grid
|
1129
|
-
if resample:
|
1130
|
-
print("Reprojecting tides into original array")
|
1131
|
-
tides_highres, tides_lowres = _pixel_tides_resample(
|
1132
|
-
tides_lowres,
|
1133
|
-
ds,
|
1134
|
-
resample_method,
|
1135
|
-
dask_chunks,
|
1136
|
-
dask_compute,
|
1137
|
-
)
|
1138
|
-
return tides_highres, tides_lowres
|
1139
|
-
|
1140
|
-
print("Returning low resolution tide array")
|
1141
|
-
return tides_lowres
|
1142
|
-
|
1143
|
-
|
1144
|
-
if __name__ == "__main__": # pragma: no cover
|
1145
|
-
pass
|