eo-tides 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +50 -0
- eo_tides/eo.py +532 -0
- eo_tides/model.py +825 -0
- eo_tides/stats.py +581 -0
- eo_tides/utils.py +705 -0
- eo_tides/validation.py +334 -0
- eo_tides-0.5.0.dist-info/LICENSE +201 -0
- eo_tides-0.5.0.dist-info/METADATA +118 -0
- eo_tides-0.5.0.dist-info/RECORD +11 -0
- eo_tides-0.5.0.dist-info/WHEEL +5 -0
- eo_tides-0.5.0.dist-info/top_level.txt +1 -0
eo_tides/stats.py
ADDED
@@ -0,0 +1,581 @@
|
|
1
|
+
# Used to postpone evaluation of type annotations
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import os
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
import matplotlib.pyplot as plt
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
import xarray as xr
|
11
|
+
|
12
|
+
# Only import if running type checking
|
13
|
+
if TYPE_CHECKING:
|
14
|
+
from odc.geo.geobox import GeoBox
|
15
|
+
|
16
|
+
from .eo import _pixel_tides_resample, _resample_chunks, _standardise_inputs, pixel_tides, tag_tides
|
17
|
+
from .utils import DatetimeLike
|
18
|
+
|
19
|
+
|
20
|
+
def _tide_statistics(obs_tides, all_tides, min_max_q=(0.0, 1.0), dim="time"):
|
21
|
+
# Calculate means of observed and modelled tides
|
22
|
+
mot = obs_tides.mean(dim=dim)
|
23
|
+
mat = all_tides.mean(dim=dim)
|
24
|
+
|
25
|
+
# Identify highest and lowest observed tides
|
26
|
+
obs_tides_q = obs_tides.quantile(q=min_max_q, dim=dim).astype("float32")
|
27
|
+
lot = obs_tides_q.isel(quantile=0, drop=True)
|
28
|
+
hot = obs_tides_q.isel(quantile=-1, drop=True)
|
29
|
+
|
30
|
+
# Identify highest and lowest modelled tides
|
31
|
+
all_tides_q = all_tides.quantile(q=min_max_q, dim=dim).astype("float32")
|
32
|
+
lat = all_tides_q.isel(quantile=0, drop=True)
|
33
|
+
hat = all_tides_q.isel(quantile=-1, drop=True)
|
34
|
+
|
35
|
+
# Calculate tidal range
|
36
|
+
otr = hot - lot
|
37
|
+
tr = hat - lat
|
38
|
+
|
39
|
+
# Calculate Bishop-Taylor et al. 2018 tidal metrics
|
40
|
+
spread = otr / tr
|
41
|
+
offset_low_m = lot - lat
|
42
|
+
offset_high_m = hat - hot
|
43
|
+
offset_low = offset_low_m / tr
|
44
|
+
offset_high = offset_high_m / tr
|
45
|
+
|
46
|
+
# Combine into a single dataset
|
47
|
+
stats_ds = xr.merge(
|
48
|
+
[
|
49
|
+
mot.rename("mot"),
|
50
|
+
mat.rename("mat"),
|
51
|
+
hot.rename("hot"),
|
52
|
+
hat.rename("hat"),
|
53
|
+
lot.rename("lot"),
|
54
|
+
lat.rename("lat"),
|
55
|
+
otr.rename("otr"),
|
56
|
+
tr.rename("tr"),
|
57
|
+
spread.rename("spread"),
|
58
|
+
offset_low.rename("offset_low"),
|
59
|
+
offset_high.rename("offset_high"),
|
60
|
+
],
|
61
|
+
compat="override",
|
62
|
+
)
|
63
|
+
|
64
|
+
return stats_ds
|
65
|
+
|
66
|
+
|
67
|
+
def _stats_plain_english(mot, mat, hot, hat, lot, lat, otr, tr, spread, offset_low, offset_high):
|
68
|
+
# Plain text descriptors
|
69
|
+
mean_diff = "higher" if mot > mat else "lower"
|
70
|
+
mean_diff_icon = "ā¬ļø" if mot > mat else "ā¬ļø"
|
71
|
+
spread_icon = "š¢" if spread >= 0.9 else "š”" if 0.7 < spread <= 0.9 else "š“"
|
72
|
+
low_tide_icon = "š¢" if offset_low <= 0.1 else "š”" if 0.1 <= offset_low < 0.2 else "š“"
|
73
|
+
high_tide_icon = "š¢" if offset_high <= 0.1 else "š”" if 0.1 <= offset_high < 0.2 else "š“"
|
74
|
+
|
75
|
+
# Print summary
|
76
|
+
print(f"\n\nš Modelled astronomical tide range: {tr:.2f} m ({lat:.2f} to {hat:.2f} m).")
|
77
|
+
print(f"š°ļø Observed tide range: {otr:.2f} m ({lot:.2f} to {hot:.2f} m).\n")
|
78
|
+
print(f"{spread_icon} {spread:.0%} of the modelled astronomical tide range was observed at this location.")
|
79
|
+
print(
|
80
|
+
f"{high_tide_icon} The highest {offset_high:.0%} ({offset_high * tr:.2f} m) of the tide range was never observed."
|
81
|
+
)
|
82
|
+
print(
|
83
|
+
f"{low_tide_icon} The lowest {offset_low:.0%} ({offset_low * tr:.2f} m) of the tide range was never observed.\n"
|
84
|
+
)
|
85
|
+
print(f"š Mean modelled astronomical tide height: {mat:.2f} m.")
|
86
|
+
print(f"š°ļø Mean observed tide height: {mot:.2f} m.")
|
87
|
+
print(
|
88
|
+
f"{mean_diff_icon} The mean observed tide height was {mot - mat:.2f} m {mean_diff} than the mean modelled astronomical tide height."
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def _stats_figure(
|
93
|
+
all_tides_da, obs_tides_da, hot, hat, lot, lat, spread, offset_low, offset_high, plot_var, point_col=None
|
94
|
+
):
|
95
|
+
"""
|
96
|
+
Plot tide bias statistics as a figure, including both
|
97
|
+
satellite observations and all modelled tides.
|
98
|
+
"""
|
99
|
+
|
100
|
+
# Create plot and add all modelled tides
|
101
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
102
|
+
all_tides_da.plot(ax=ax, alpha=0.4, label="Modelled tides")
|
103
|
+
|
104
|
+
# Loop through custom variable values if provided
|
105
|
+
if plot_var is not None:
|
106
|
+
# Create a list of marker styles
|
107
|
+
markers = [
|
108
|
+
"o",
|
109
|
+
"^",
|
110
|
+
"s",
|
111
|
+
"D",
|
112
|
+
"v",
|
113
|
+
"<",
|
114
|
+
">",
|
115
|
+
"p",
|
116
|
+
"*",
|
117
|
+
"h",
|
118
|
+
"H",
|
119
|
+
"+",
|
120
|
+
"x",
|
121
|
+
"d",
|
122
|
+
"|",
|
123
|
+
"_",
|
124
|
+
]
|
125
|
+
|
126
|
+
# Sort values to allow correct grouping
|
127
|
+
obs_tides_da = obs_tides_da.sortby("time")
|
128
|
+
plot_var = plot_var.sortby("time")
|
129
|
+
|
130
|
+
# Iterate and plot each group
|
131
|
+
for i, (label, group) in enumerate(obs_tides_da.groupby(plot_var)):
|
132
|
+
group.plot.line(
|
133
|
+
ax=ax,
|
134
|
+
linewidth=0.0,
|
135
|
+
color=point_col,
|
136
|
+
marker=markers[i % len(markers)],
|
137
|
+
label=label,
|
138
|
+
markeredgecolor="black",
|
139
|
+
markeredgewidth=0.6,
|
140
|
+
)
|
141
|
+
|
142
|
+
# Otherwise, plot all data at once
|
143
|
+
else:
|
144
|
+
obs_tides_da.plot.line(
|
145
|
+
ax=ax,
|
146
|
+
marker="o",
|
147
|
+
linewidth=0.0,
|
148
|
+
color="black" if point_col is None else point_col,
|
149
|
+
markeredgecolor="black",
|
150
|
+
markeredgewidth=0.6,
|
151
|
+
label="Satellite observations",
|
152
|
+
)
|
153
|
+
|
154
|
+
# Add legend and remove title
|
155
|
+
ax.legend(
|
156
|
+
loc="upper center",
|
157
|
+
bbox_to_anchor=(0.5, 1.04),
|
158
|
+
ncol=20,
|
159
|
+
borderaxespad=0,
|
160
|
+
frameon=False,
|
161
|
+
)
|
162
|
+
ax.set_title("")
|
163
|
+
|
164
|
+
# Add horizontal lines for spread/offsets
|
165
|
+
ax.axhline(lot, color="black", linestyle=":", linewidth=1)
|
166
|
+
ax.axhline(hot, color="black", linestyle=":", linewidth=1)
|
167
|
+
ax.axhline(lat, color="black", linestyle=":", linewidth=1)
|
168
|
+
ax.axhline(hat, color="black", linestyle=":", linewidth=1)
|
169
|
+
|
170
|
+
# Add text annotations for spread/offsets
|
171
|
+
ax.annotate(
|
172
|
+
f" High tide\n offset ({offset_high:.0%})",
|
173
|
+
xy=(all_tides_da.time.max(), np.mean([hat, hot])),
|
174
|
+
va="center",
|
175
|
+
)
|
176
|
+
ax.annotate(
|
177
|
+
f" Spread\n ({spread:.0%})",
|
178
|
+
xy=(all_tides_da.time.max(), np.mean([lot, hot])),
|
179
|
+
va="center",
|
180
|
+
)
|
181
|
+
ax.annotate(
|
182
|
+
f" Low tide\n offset ({offset_low:.0%})",
|
183
|
+
xy=(all_tides_da.time.max(), np.mean([lat, lot])),
|
184
|
+
)
|
185
|
+
|
186
|
+
# Remove top right axes and add labels
|
187
|
+
ax.spines["right"].set_visible(False)
|
188
|
+
ax.spines["top"].set_visible(False)
|
189
|
+
ax.set_ylabel("Tide height (m)")
|
190
|
+
ax.set_xlabel("")
|
191
|
+
ax.margins(x=0.015)
|
192
|
+
|
193
|
+
return fig
|
194
|
+
|
195
|
+
|
196
|
+
def tide_stats(
|
197
|
+
data: xr.Dataset | xr.DataArray | GeoBox,
|
198
|
+
time: DatetimeLike | None = None,
|
199
|
+
model: str = "EOT20",
|
200
|
+
directory: str | os.PathLike | None = None,
|
201
|
+
tidepost_lat: float | None = None,
|
202
|
+
tidepost_lon: float | None = None,
|
203
|
+
plain_english: bool = True,
|
204
|
+
plot: bool = True,
|
205
|
+
plot_var: str | None = None,
|
206
|
+
point_col: str | None = None,
|
207
|
+
modelled_freq: str = "3h",
|
208
|
+
min_max_q: tuple = (0.0, 1.0),
|
209
|
+
round_stats: int = 3,
|
210
|
+
**tag_tides_kwargs,
|
211
|
+
) -> pd.Series:
|
212
|
+
"""
|
213
|
+
Takes a multi-dimensional dataset and generate tide statistics
|
214
|
+
and satellite-observed tide bias metrics, calculated based on
|
215
|
+
every timestep in the satellite data and the geographic centroid
|
216
|
+
of the imagery.
|
217
|
+
|
218
|
+
By comparing the subset of tides observed by satellites
|
219
|
+
against the full astronomical tidal range, we can evaluate
|
220
|
+
whether the tides observed by satellites are biased
|
221
|
+
(e.g. fail to observe either the highest or lowest tides) due
|
222
|
+
to tide aliasing interactions with sun-synchronous satellite
|
223
|
+
overpasses.
|
224
|
+
|
225
|
+
For more information about the tidal statistics computed by this
|
226
|
+
function, refer to Figure 8 in Bishop-Taylor et al. 2018:
|
227
|
+
<https://www.sciencedirect.com/science/article/pii/S0272771418308783#fig8>
|
228
|
+
|
229
|
+
Parameters
|
230
|
+
----------
|
231
|
+
data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
|
232
|
+
A multi-dimensional dataset or GeoBox pixel grid that will
|
233
|
+
be used to calculate tide statistics. If `data` is an
|
234
|
+
xarray object, it should include a "time" dimension.
|
235
|
+
If no "time" dimension exists or if `data` is a GeoBox,
|
236
|
+
then times must be passed using the `time` parameter.
|
237
|
+
time : DatetimeLike, optional
|
238
|
+
By default, tides will be modelled using times from the
|
239
|
+
"time" dimension of `data`. Alternatively, this param can
|
240
|
+
be used to provide a custom set of times. Accepts any format
|
241
|
+
that can be converted by `pandas.to_datetime()`. For example:
|
242
|
+
`time=pd.date_range(start="2000", end="2001", freq="5h")`
|
243
|
+
model : str or list of str, optional
|
244
|
+
The tide model (or list of models) to use to model tides.
|
245
|
+
If a list is provided, the resulting statistics will be
|
246
|
+
returned as a `pandas.Dataframe`; otherwise a `pandas.Series`.
|
247
|
+
Defaults to "EOT20"; specify "all" to use all models available
|
248
|
+
in `directory`. For a full list of available and supported
|
249
|
+
models, run `eo_tides.utils.list_models`.
|
250
|
+
directory : str, optional
|
251
|
+
The directory containing tide model data files. If no path is
|
252
|
+
provided, this will default to the environment variable
|
253
|
+
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
254
|
+
Tide modelling files should be stored in sub-folders for each
|
255
|
+
model that match the structure required by `pyTMD`
|
256
|
+
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
257
|
+
tidepost_lat, tidepost_lon : float or int, optional
|
258
|
+
Optional coordinates used to model tides. The default is None,
|
259
|
+
which uses the centroid of the dataset as the tide modelling
|
260
|
+
location.
|
261
|
+
plain_english : bool, optional
|
262
|
+
An optional boolean indicating whether to print a plain english
|
263
|
+
version of the tidal statistics to the screen. Defaults to True;
|
264
|
+
only supported when a single tide model is passed to `model`.
|
265
|
+
plot : bool, optional
|
266
|
+
An optional boolean indicating whether to plot how satellite-
|
267
|
+
observed tide heights compare against the full tidal range.
|
268
|
+
Defaults to True; only supported when a single tide model is
|
269
|
+
passed to `model`.
|
270
|
+
plot_var : str, optional
|
271
|
+
Optional name of a coordinate, dimension or variable in the array
|
272
|
+
that will be used to plot observations with unique symbols.
|
273
|
+
Defaults to None, which will plot all observations as circles.
|
274
|
+
point_col : str, optional
|
275
|
+
Colour used to plot points on the graph. Defaults to None which
|
276
|
+
will automatically select colours.
|
277
|
+
modelled_freq : str, optional
|
278
|
+
An optional string giving the frequency at which to model tides
|
279
|
+
when computing the full modelled tidal range. Defaults to '3h',
|
280
|
+
which computes a tide height for every three hours across the
|
281
|
+
temporal extent of `data`.
|
282
|
+
min_max_q : tuple, optional
|
283
|
+
Quantiles used to calculate max and min observed and modelled
|
284
|
+
astronomical tides. By default `(0.0, 1.0)` which is equivalent
|
285
|
+
to minimum and maximum; to use a softer threshold that is more
|
286
|
+
robust to outliers, use e.g. `(0.1, 0.9)`.
|
287
|
+
round_stats : int, optional
|
288
|
+
The number of decimal places used to round the output statistics.
|
289
|
+
Defaults to 3.
|
290
|
+
**tag_tides_kwargs :
|
291
|
+
Optional parameters passed to the `eo_tides.eo.tag_tides`
|
292
|
+
function that is used to model tides for each observed and
|
293
|
+
modelled timestep.
|
294
|
+
|
295
|
+
Returns
|
296
|
+
-------
|
297
|
+
stats_df : pandas.Series or pandas.Dataframe
|
298
|
+
A pandas object containing the following statistics:
|
299
|
+
|
300
|
+
- `y`: latitude used for modelling tide heights
|
301
|
+
- `x`: longitude used for modelling tide heights
|
302
|
+
- `mot`: mean tide height observed by the satellite (metres)
|
303
|
+
- `mat`: mean modelled astronomical tide height (metres)
|
304
|
+
- `lot`: minimum tide height observed by the satellite (metres)
|
305
|
+
- `lat`: minimum tide height from modelled astronomical tidal range (metres)
|
306
|
+
- `hot`: maximum tide height observed by the satellite (metres)
|
307
|
+
- `hat`: maximum tide height from modelled astronomical tidal range (metres)
|
308
|
+
- `otr`: tidal range observed by the satellite (metres)
|
309
|
+
- `tr`: modelled astronomical tide range (metres)
|
310
|
+
- `spread`: proportion of the full modelled tidal range observed by the satellite
|
311
|
+
- `offset_low`: proportion of the lowest tides never observed by the satellite
|
312
|
+
- `offset_high`: proportion of the highest tides never observed by the satellite
|
313
|
+
"""
|
314
|
+
|
315
|
+
# Standardise data inputs, time and models
|
316
|
+
gbox, obs_times = _standardise_inputs(data, time)
|
317
|
+
|
318
|
+
# Generate range of times covering entire period of satellite record
|
319
|
+
assert obs_times is not None
|
320
|
+
all_times = pd.date_range(
|
321
|
+
start=obs_times.min().item(),
|
322
|
+
end=obs_times.max().item(),
|
323
|
+
freq=modelled_freq,
|
324
|
+
)
|
325
|
+
|
326
|
+
# If custom tide modelling locations are not provided, use the
|
327
|
+
# dataset centroid
|
328
|
+
if not tidepost_lat or not tidepost_lon:
|
329
|
+
tidepost_lon, tidepost_lat = gbox.geographic_extent.centroid.coords[0]
|
330
|
+
|
331
|
+
# Model tides for observed timesteps
|
332
|
+
obs_tides_da = tag_tides(
|
333
|
+
gbox,
|
334
|
+
time=obs_times,
|
335
|
+
model=model,
|
336
|
+
directory=directory,
|
337
|
+
tidepost_lat=tidepost_lat, # type: ignore
|
338
|
+
tidepost_lon=tidepost_lon, # type: ignore
|
339
|
+
**tag_tides_kwargs,
|
340
|
+
)
|
341
|
+
|
342
|
+
# Model tides for all modelled timesteps
|
343
|
+
all_tides_da = tag_tides(
|
344
|
+
gbox,
|
345
|
+
time=all_times,
|
346
|
+
model=model,
|
347
|
+
directory=directory,
|
348
|
+
tidepost_lat=tidepost_lat, # type: ignore
|
349
|
+
tidepost_lon=tidepost_lon, # type: ignore
|
350
|
+
**tag_tides_kwargs,
|
351
|
+
)
|
352
|
+
|
353
|
+
# Calculate statistics
|
354
|
+
stats_ds = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
|
355
|
+
|
356
|
+
# Convert to pandas and add tide post coordinates
|
357
|
+
stats_df = stats_ds.to_pandas().astype("float32")
|
358
|
+
stats_df["x"] = tidepost_lon
|
359
|
+
stats_df["y"] = tidepost_lat
|
360
|
+
|
361
|
+
# Convert coordinates to index if dataframe
|
362
|
+
if isinstance(stats_df, pd.DataFrame):
|
363
|
+
stats_df = stats_df.set_index(["x", "y"], append=True)
|
364
|
+
|
365
|
+
# If a series, print and plot summaries
|
366
|
+
else:
|
367
|
+
if plain_english:
|
368
|
+
_stats_plain_english(
|
369
|
+
mot=stats_df.mot,
|
370
|
+
mat=stats_df.mat,
|
371
|
+
hot=stats_df.hot,
|
372
|
+
hat=stats_df.hat,
|
373
|
+
lot=stats_df.lot,
|
374
|
+
lat=stats_df.lat,
|
375
|
+
otr=stats_df.otr,
|
376
|
+
tr=stats_df.tr,
|
377
|
+
spread=stats_df.spread,
|
378
|
+
offset_low=stats_df.offset_low,
|
379
|
+
offset_high=stats_df.offset_high,
|
380
|
+
)
|
381
|
+
|
382
|
+
if plot:
|
383
|
+
_stats_figure(
|
384
|
+
all_tides_da=all_tides_da,
|
385
|
+
obs_tides_da=obs_tides_da,
|
386
|
+
hot=stats_df.hot,
|
387
|
+
hat=stats_df.hat,
|
388
|
+
lot=stats_df.lot,
|
389
|
+
lat=stats_df.lat,
|
390
|
+
spread=stats_df.spread,
|
391
|
+
offset_low=stats_df.offset_low,
|
392
|
+
offset_high=stats_df.offset_high,
|
393
|
+
plot_var=data[plot_var] if plot_var else None,
|
394
|
+
point_col=point_col,
|
395
|
+
)
|
396
|
+
|
397
|
+
# Return in Pandas format
|
398
|
+
return stats_df.round(round_stats)
|
399
|
+
|
400
|
+
|
401
|
+
def pixel_stats(
|
402
|
+
data: xr.Dataset | xr.DataArray | GeoBox,
|
403
|
+
time: DatetimeLike | None = None,
|
404
|
+
model: str | list[str] = "EOT20",
|
405
|
+
directory: str | os.PathLike | None = None,
|
406
|
+
resample: bool = True,
|
407
|
+
modelled_freq: str = "3h",
|
408
|
+
min_max_q: tuple[float, float] = (0.0, 1.0),
|
409
|
+
resample_method: str = "bilinear",
|
410
|
+
dask_chunks: tuple[float, float] | None = None,
|
411
|
+
dask_compute: bool = True,
|
412
|
+
extrapolate: bool = True,
|
413
|
+
cutoff: float = 10,
|
414
|
+
**pixel_tides_kwargs,
|
415
|
+
) -> xr.Dataset:
|
416
|
+
"""
|
417
|
+
Takes a multi-dimensional dataset and generate spatial
|
418
|
+
tide statistics and satellite-observed tide bias metrics,
|
419
|
+
calculated based on every timestep in the satellite data and
|
420
|
+
modelled into the spatial extent of the imagery.
|
421
|
+
|
422
|
+
By comparing the subset of tides observed by satellites
|
423
|
+
against the full astronomical tidal range, we can evaluate
|
424
|
+
whether the tides observed by satellites are biased
|
425
|
+
(e.g. fail to observe either the highest or lowest tides)
|
426
|
+
due to tide aliasing interactions with sun-synchronous satellite
|
427
|
+
overpasses.
|
428
|
+
|
429
|
+
Compared to `tide_stats`, this function models tide metrics
|
430
|
+
spatially to produce a two-dimensional output for each statistic.
|
431
|
+
|
432
|
+
For more information about the tidal statistics computed by this
|
433
|
+
function, refer to Figure 8 in Bishop-Taylor et al. 2018:
|
434
|
+
<https://www.sciencedirect.com/science/article/pii/S0272771418308783#fig8>
|
435
|
+
|
436
|
+
Parameters
|
437
|
+
----------
|
438
|
+
data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
|
439
|
+
A multi-dimensional dataset or GeoBox pixel grid that will
|
440
|
+
be used to calculate spatial tide statistics. If `data`
|
441
|
+
is an xarray object, it should include a "time" dimension.
|
442
|
+
If no "time" dimension exists or if `data` is a GeoBox,
|
443
|
+
then times must be passed using the `time` parameter.
|
444
|
+
time : DatetimeLike, optional
|
445
|
+
By default, tides will be modelled using times from the
|
446
|
+
"time" dimension of `data`. Alternatively, this param can
|
447
|
+
be used to provide a custom set of times. Accepts any format
|
448
|
+
that can be converted by `pandas.to_datetime()`. For example:
|
449
|
+
`time=pd.date_range(start="2000", end="2001", freq="5h")`
|
450
|
+
model : str or list of str, optional
|
451
|
+
The tide model (or list of models) to use to model tides.
|
452
|
+
If a list is provided, a new "tide_model" dimension will be
|
453
|
+
added to the `xarray.Dataset` output. Defaults to "EOT20";
|
454
|
+
specify "all" to use all models available in `directory`.
|
455
|
+
For a full list of available and supported models, run
|
456
|
+
`eo_tides.utils.list_models`.
|
457
|
+
directory : str, optional
|
458
|
+
The directory containing tide model data files. If no path is
|
459
|
+
provided, this will default to the environment variable
|
460
|
+
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
461
|
+
Tide modelling files should be stored in sub-folders for each
|
462
|
+
model that match the structure required by `pyTMD`
|
463
|
+
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
464
|
+
resample : bool, optional
|
465
|
+
Whether to resample tide statistics back into `data`'s original
|
466
|
+
higher resolution grid. Set this to `False` if you want to return
|
467
|
+
lower-resolution tide statistics (which can be useful for
|
468
|
+
assessing tide biases across large spatial extents).
|
469
|
+
modelled_freq : str, optional
|
470
|
+
An optional string giving the frequency at which to model tides
|
471
|
+
when computing the full modelled tidal range. Defaults to '3h',
|
472
|
+
which computes a tide height for every three hours across the
|
473
|
+
temporal extent of `data`.
|
474
|
+
min_max_q : tuple, optional
|
475
|
+
Quantiles used to calculate max and min observed and modelled
|
476
|
+
astronomical tides. By default `(0.0, 1.0)` which is equivalent
|
477
|
+
to minimum and maximum; for a softer threshold that is more
|
478
|
+
robust to outliers use e.g. `(0.1, 0.9)`.
|
479
|
+
resample_method : str, optional
|
480
|
+
If resampling is requested (see `resample` above), use this
|
481
|
+
resampling method when resampling from low resolution to high
|
482
|
+
resolution pixels. Defaults to "bilinear"; valid options include
|
483
|
+
"nearest", "cubic", "min", "max", "average" etc.
|
484
|
+
dask_chunks : tuple of float, optional
|
485
|
+
Can be used to configure custom Dask chunking for the final
|
486
|
+
resampling step. By default, chunks will be automatically set
|
487
|
+
to match y/x chunks from `data` if they exist; otherwise chunks
|
488
|
+
will be chosen to cover the entire y/x extent of the dataset.
|
489
|
+
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
|
490
|
+
`(2048, 2048)`.
|
491
|
+
dask_compute : bool, optional
|
492
|
+
Whether to compute results of the resampling step using Dask.
|
493
|
+
If False, `stats_ds` will be returned as a Dask-enabled array.
|
494
|
+
extrapolate : bool, optional
|
495
|
+
Whether to extrapolate tides into x and y coordinates outside of
|
496
|
+
the valid tide modelling domain using nearest-neighbor. Defaults
|
497
|
+
to True.
|
498
|
+
cutoff : float, optional
|
499
|
+
Extrapolation cutoff in kilometers. To avoid producing tide
|
500
|
+
statistics too far inland, the default is 10 km.
|
501
|
+
**pixel_tides_kwargs :
|
502
|
+
Optional parameters passed to the `eo_tides.eo.pixel_tides`
|
503
|
+
function.
|
504
|
+
|
505
|
+
Returns
|
506
|
+
-------
|
507
|
+
stats_ds : xarray.Dataset
|
508
|
+
An `xarray.Dataset` containing the following statistics as two-dimensional data variables:
|
509
|
+
|
510
|
+
- `mot`: mean tide height observed by the satellite (metres)
|
511
|
+
- `mat`: mean modelled astronomical tide height (metres)
|
512
|
+
- `lot`: minimum tide height observed by the satellite (metres)
|
513
|
+
- `lat`: minimum tide height from modelled astronomical tidal range (metres)
|
514
|
+
- `hot`: maximum tide height observed by the satellite (metres)
|
515
|
+
- `hat`: maximum tide height from modelled astronomical tidal range (metres)
|
516
|
+
- `otr`: tidal range observed by the satellite (metres)
|
517
|
+
- `tr`: modelled astronomical tide range (metres)
|
518
|
+
- `spread`: proportion of the full modelled tidal range observed by the satellite
|
519
|
+
- `offset_low`: proportion of the lowest tides never observed by the satellite
|
520
|
+
- `offset_high`: proportion of the highest tides never observed by the satellite
|
521
|
+
|
522
|
+
"""
|
523
|
+
|
524
|
+
# Standardise data inputs, time and models
|
525
|
+
gbox, obs_times = _standardise_inputs(data, time)
|
526
|
+
dask_chunks = _resample_chunks(data, dask_chunks)
|
527
|
+
model = [model] if isinstance(model, str) else model
|
528
|
+
|
529
|
+
# Generate range of times covering entire period of satellite record
|
530
|
+
assert obs_times is not None
|
531
|
+
all_times = pd.date_range(
|
532
|
+
start=obs_times.min().item(),
|
533
|
+
end=obs_times.max().item(),
|
534
|
+
freq=modelled_freq,
|
535
|
+
)
|
536
|
+
|
537
|
+
# Model tides for observed timesteps
|
538
|
+
obs_tides_da = pixel_tides(
|
539
|
+
gbox,
|
540
|
+
time=obs_times,
|
541
|
+
model=model,
|
542
|
+
directory=directory,
|
543
|
+
resample=False,
|
544
|
+
extrapolate=extrapolate,
|
545
|
+
cutoff=cutoff,
|
546
|
+
**pixel_tides_kwargs,
|
547
|
+
)
|
548
|
+
|
549
|
+
# Model tides for all modelled timesteps
|
550
|
+
all_tides_da = pixel_tides(
|
551
|
+
gbox,
|
552
|
+
time=all_times,
|
553
|
+
model=model,
|
554
|
+
directory=directory,
|
555
|
+
resample=False,
|
556
|
+
extrapolate=extrapolate,
|
557
|
+
cutoff=cutoff,
|
558
|
+
**pixel_tides_kwargs,
|
559
|
+
)
|
560
|
+
|
561
|
+
# Calculate statistics
|
562
|
+
stats_lowres = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
|
563
|
+
|
564
|
+
# Assign CRS and geobox to allow reprojection
|
565
|
+
stats_lowres = stats_lowres.odc.assign_crs(crs=gbox.crs)
|
566
|
+
|
567
|
+
# Reproject statistics into original high resolution grid
|
568
|
+
if resample:
|
569
|
+
print("Reprojecting statistics into original resolution")
|
570
|
+
stats_highres = _pixel_tides_resample(
|
571
|
+
stats_lowres,
|
572
|
+
gbox,
|
573
|
+
resample_method,
|
574
|
+
dask_chunks,
|
575
|
+
dask_compute,
|
576
|
+
None,
|
577
|
+
)
|
578
|
+
return stats_highres
|
579
|
+
|
580
|
+
print("Returning low resolution statistics array")
|
581
|
+
return stats_lowres
|