eo-tides 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +2 -1
- eo_tides/eo.py +23 -12
- eo_tides/model.py +259 -274
- eo_tides/stats.py +261 -272
- eo_tides/utils.py +112 -3
- eo_tides/validation.py +23 -21
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/METADATA +23 -22
- eo_tides-0.4.0.dist-info/RECORD +11 -0
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/WHEEL +1 -1
- eo_tides-0.3.0.dist-info/RECORD +0 -11
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/LICENSE +0 -0
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/top_level.txt +0 -0
eo_tides/stats.py
CHANGED
@@ -8,44 +8,101 @@ import matplotlib.pyplot as plt
|
|
8
8
|
import numpy as np
|
9
9
|
import pandas as pd
|
10
10
|
import xarray as xr
|
11
|
-
from scipy import stats
|
12
11
|
|
13
12
|
# Only import if running type checking
|
14
13
|
if TYPE_CHECKING:
|
15
|
-
import xarray as xr
|
16
14
|
from odc.geo.geobox import GeoBox
|
17
15
|
|
18
|
-
from .eo import _standardise_inputs, pixel_tides, tag_tides
|
19
|
-
from .model import model_tides
|
16
|
+
from .eo import _pixel_tides_resample, _resample_chunks, _standardise_inputs, pixel_tides, tag_tides
|
20
17
|
from .utils import DatetimeLike
|
21
18
|
|
22
19
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
20
|
+
def _tide_statistics(obs_tides, all_tides, min_max_q=(0.0, 1.0), dim="time"):
|
21
|
+
# Calculate means of observed and modelled tides
|
22
|
+
mot = obs_tides.mean(dim=dim)
|
23
|
+
mat = all_tides.mean(dim=dim)
|
24
|
+
|
25
|
+
# Identify highest and lowest observed tides
|
26
|
+
obs_tides_q = obs_tides.quantile(q=min_max_q, dim=dim).astype("float32")
|
27
|
+
lot = obs_tides_q.isel(quantile=0, drop=True)
|
28
|
+
hot = obs_tides_q.isel(quantile=-1, drop=True)
|
29
|
+
|
30
|
+
# Identify highest and lowest modelled tides
|
31
|
+
all_tides_q = all_tides.quantile(q=min_max_q, dim=dim).astype("float32")
|
32
|
+
lat = all_tides_q.isel(quantile=0, drop=True)
|
33
|
+
hat = all_tides_q.isel(quantile=-1, drop=True)
|
34
|
+
|
35
|
+
# Calculate tidal range
|
36
|
+
otr = hot - lot
|
37
|
+
tr = hat - lat
|
38
|
+
|
39
|
+
# Calculate Bishop-Taylor et al. 2018 tidal metrics
|
40
|
+
spread = otr / tr
|
41
|
+
offset_low_m = lot - lat
|
42
|
+
offset_high_m = hat - hot
|
43
|
+
offset_low = offset_low_m / tr
|
44
|
+
offset_high = offset_high_m / tr
|
45
|
+
|
46
|
+
# Combine into a single dataset
|
47
|
+
stats_ds = xr.merge(
|
48
|
+
[
|
49
|
+
mot.rename("mot"),
|
50
|
+
mat.rename("mat"),
|
51
|
+
hot.rename("hot"),
|
52
|
+
hat.rename("hat"),
|
53
|
+
lot.rename("lot"),
|
54
|
+
lat.rename("lat"),
|
55
|
+
otr.rename("otr"),
|
56
|
+
tr.rename("tr"),
|
57
|
+
spread.rename("spread"),
|
58
|
+
offset_low.rename("offset_low"),
|
59
|
+
offset_high.rename("offset_high"),
|
60
|
+
],
|
61
|
+
compat="override",
|
62
|
+
)
|
63
|
+
|
64
|
+
return stats_ds
|
65
|
+
|
66
|
+
|
67
|
+
def _stats_plain_english(mot, mat, hot, hat, lot, lat, otr, tr, spread, offset_low, offset_high):
|
68
|
+
# Plain text descriptors
|
69
|
+
mean_diff = "higher" if mot > mat else "lower"
|
70
|
+
mean_diff_icon = "⬆️" if mot > mat else "⬇️"
|
71
|
+
spread_icon = "🟢" if spread >= 0.9 else "🟡" if 0.7 < spread <= 0.9 else "🔴"
|
72
|
+
low_tide_icon = "🟢" if offset_low <= 0.1 else "🟡" if 0.1 <= offset_low < 0.2 else "🔴"
|
73
|
+
high_tide_icon = "🟢" if offset_high <= 0.1 else "🟡" if 0.1 <= offset_high < 0.2 else "🔴"
|
74
|
+
|
75
|
+
# Print summary
|
76
|
+
print(f"\n\n🌊 Modelled astronomical tide range: {tr:.2f} m ({lat:.2f} to {hat:.2f} m).")
|
77
|
+
print(f"🛰️ Observed tide range: {otr:.2f} m ({lot:.2f} to {hot:.2f} m).\n")
|
78
|
+
print(f"{spread_icon} {spread:.0%} of the modelled astronomical tide range was observed at this location.")
|
79
|
+
print(
|
80
|
+
f"{high_tide_icon} The highest {offset_high:.0%} ({offset_high * tr:.2f} m) of the tide range was never observed."
|
81
|
+
)
|
82
|
+
print(
|
83
|
+
f"{low_tide_icon} The lowest {offset_low:.0%} ({offset_low * tr:.2f} m) of the tide range was never observed.\n"
|
84
|
+
)
|
85
|
+
print(f"🌊 Mean modelled astronomical tide height: {mat:.2f} m.")
|
86
|
+
print(f"🛰️ Mean observed tide height: {mot:.2f} m.")
|
87
|
+
print(
|
88
|
+
f"{mean_diff_icon} The mean observed tide height was {mot - mat:.2f} m {mean_diff} than the mean modelled astronomical tide height."
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def _stats_figure(
|
93
|
+
all_tides_da, obs_tides_da, hot, hat, lot, lat, spread, offset_low, offset_high, plot_var, point_col=None
|
37
94
|
):
|
38
95
|
"""
|
39
96
|
Plot tide bias statistics as a figure, including both
|
40
97
|
satellite observations and all modelled tides.
|
41
98
|
"""
|
42
99
|
|
43
|
-
# Create plot and add all
|
100
|
+
# Create plot and add all modelled tides
|
44
101
|
fig, ax = plt.subplots(figsize=(10, 6))
|
45
|
-
|
102
|
+
all_tides_da.plot(ax=ax, alpha=0.4, label="Modelled tides")
|
46
103
|
|
47
|
-
#
|
48
|
-
if
|
104
|
+
# Loop through custom variable values if provided
|
105
|
+
if plot_var is not None:
|
49
106
|
# Create a list of marker styles
|
50
107
|
markers = [
|
51
108
|
"o",
|
@@ -65,23 +122,32 @@ def _plot_biases(
|
|
65
122
|
"|",
|
66
123
|
"_",
|
67
124
|
]
|
68
|
-
|
69
|
-
|
125
|
+
|
126
|
+
# Sort values to allow correct grouping
|
127
|
+
obs_tides_da = obs_tides_da.sortby("time")
|
128
|
+
plot_var = plot_var.sortby("time")
|
129
|
+
|
130
|
+
# Iterate and plot each group
|
131
|
+
for i, (label, group) in enumerate(obs_tides_da.groupby(plot_var)):
|
132
|
+
group.plot.line(
|
70
133
|
ax=ax,
|
71
134
|
linewidth=0.0,
|
72
|
-
color=
|
135
|
+
color=point_col,
|
73
136
|
marker=markers[i % len(markers)],
|
74
|
-
|
75
|
-
|
137
|
+
label=label,
|
138
|
+
markeredgecolor="black",
|
139
|
+
markeredgewidth=0.6,
|
76
140
|
)
|
141
|
+
|
77
142
|
# Otherwise, plot all data at once
|
78
143
|
else:
|
79
144
|
obs_tides_da.plot.line(
|
80
145
|
ax=ax,
|
81
146
|
marker="o",
|
82
147
|
linewidth=0.0,
|
83
|
-
color="black",
|
84
|
-
|
148
|
+
color="black" if point_col is None else point_col,
|
149
|
+
markeredgecolor="black",
|
150
|
+
markeredgewidth=0.6,
|
85
151
|
label="Satellite observations",
|
86
152
|
)
|
87
153
|
|
@@ -95,15 +161,6 @@ def _plot_biases(
|
|
95
161
|
)
|
96
162
|
ax.set_title("")
|
97
163
|
|
98
|
-
# Add linear regression line
|
99
|
-
if obs_linreg is not None:
|
100
|
-
ax.plot(
|
101
|
-
obs_tides_da.time.isel(time=[0, -1]),
|
102
|
-
obs_linreg.intercept + obs_linreg.slope * obs_x[[0, -1]],
|
103
|
-
"r",
|
104
|
-
label="fitted line",
|
105
|
-
)
|
106
|
-
|
107
164
|
# Add horizontal lines for spread/offsets
|
108
165
|
ax.axhline(lot, color="black", linestyle=":", linewidth=1)
|
109
166
|
ax.axhline(hot, color="black", linestyle=":", linewidth=1)
|
@@ -113,17 +170,17 @@ def _plot_biases(
|
|
113
170
|
# Add text annotations for spread/offsets
|
114
171
|
ax.annotate(
|
115
172
|
f" High tide\n offset ({offset_high:.0%})",
|
116
|
-
xy=(
|
173
|
+
xy=(all_tides_da.time.max(), np.mean([hat, hot])),
|
117
174
|
va="center",
|
118
175
|
)
|
119
176
|
ax.annotate(
|
120
177
|
f" Spread\n ({spread:.0%})",
|
121
|
-
xy=(
|
178
|
+
xy=(all_tides_da.time.max(), np.mean([lot, hot])),
|
122
179
|
va="center",
|
123
180
|
)
|
124
181
|
ax.annotate(
|
125
182
|
f" Low tide\n offset ({offset_low:.0%})",
|
126
|
-
xy=(
|
183
|
+
xy=(all_tides_da.time.max(), np.mean([lat, lot])),
|
127
184
|
)
|
128
185
|
|
129
186
|
# Remove top right axes and add labels
|
@@ -145,23 +202,25 @@ def tide_stats(
|
|
145
202
|
tidepost_lon: float | None = None,
|
146
203
|
plain_english: bool = True,
|
147
204
|
plot: bool = True,
|
148
|
-
|
205
|
+
plot_var: str | None = None,
|
206
|
+
point_col: str | None = None,
|
149
207
|
modelled_freq: str = "3h",
|
150
|
-
linear_reg: bool = False,
|
151
208
|
min_max_q: tuple = (0.0, 1.0),
|
152
209
|
round_stats: int = 3,
|
153
|
-
**
|
210
|
+
**tag_tides_kwargs,
|
154
211
|
) -> pd.Series:
|
155
212
|
"""
|
156
213
|
Takes a multi-dimensional dataset and generate tide statistics
|
157
214
|
and satellite-observed tide bias metrics, calculated based on
|
158
|
-
every timestep in the
|
215
|
+
every timestep in the satellite data and the geographic centroid
|
159
216
|
of the imagery.
|
160
217
|
|
161
218
|
By comparing the subset of tides observed by satellites
|
162
219
|
against the full astronomical tidal range, we can evaluate
|
163
220
|
whether the tides observed by satellites are biased
|
164
|
-
(e.g. fail to observe either the highest or lowest tides)
|
221
|
+
(e.g. fail to observe either the highest or lowest tides) due
|
222
|
+
to tide aliasing interactions with sun-synchronous satellite
|
223
|
+
overpasses.
|
165
224
|
|
166
225
|
For more information about the tidal statistics computed by this
|
167
226
|
function, refer to Figure 8 in Bishop-Taylor et al. 2018:
|
@@ -181,10 +240,13 @@ def tide_stats(
|
|
181
240
|
be used to provide a custom set of times. Accepts any format
|
182
241
|
that can be converted by `pandas.to_datetime()`. For example:
|
183
242
|
`time=pd.date_range(start="2000", end="2001", freq="5h")`
|
184
|
-
model : str, optional
|
185
|
-
The tide model to use to model tides.
|
186
|
-
|
187
|
-
`
|
243
|
+
model : str or list of str, optional
|
244
|
+
The tide model (or list of models) to use to model tides.
|
245
|
+
If a list is provided, the resulting statistics will be
|
246
|
+
returned as a `pandas.Dataframe`; otherwise a `pandas.Series`.
|
247
|
+
Defaults to "EOT20"; specify "all" to use all models available
|
248
|
+
in `directory`. For a full list of available and supported
|
249
|
+
models, run `eo_tides.model.list_models`.
|
188
250
|
directory : str, optional
|
189
251
|
The directory containing tide model data files. If no path is
|
190
252
|
provided, this will default to the environment variable
|
@@ -198,26 +260,25 @@ def tide_stats(
|
|
198
260
|
location.
|
199
261
|
plain_english : bool, optional
|
200
262
|
An optional boolean indicating whether to print a plain english
|
201
|
-
version of the tidal statistics to the screen. Defaults to True
|
263
|
+
version of the tidal statistics to the screen. Defaults to True;
|
264
|
+
only supported when a single tide model is passed to `model`.
|
202
265
|
plot : bool, optional
|
203
266
|
An optional boolean indicating whether to plot how satellite-
|
204
267
|
observed tide heights compare against the full tidal range.
|
205
|
-
Defaults to True
|
206
|
-
|
268
|
+
Defaults to True; only supported when a single tide model is
|
269
|
+
passed to `model`.
|
270
|
+
plot_var : str, optional
|
207
271
|
Optional name of a coordinate, dimension or variable in the array
|
208
272
|
that will be used to plot observations with unique symbols.
|
209
273
|
Defaults to None, which will plot all observations as circles.
|
274
|
+
point_col : str, optional
|
275
|
+
Colour used to plot points on the graph. Defaults to None which
|
276
|
+
will automatically select colours.
|
210
277
|
modelled_freq : str, optional
|
211
278
|
An optional string giving the frequency at which to model tides
|
212
279
|
when computing the full modelled tidal range. Defaults to '3h',
|
213
280
|
which computes a tide height for every three hours across the
|
214
281
|
temporal extent of `data`.
|
215
|
-
linear_reg: bool, optional
|
216
|
-
Whether to return linear regression statistics that assess
|
217
|
-
whether satellite-observed tides show any decreasing or
|
218
|
-
increasing trends over time. This may indicate whether your
|
219
|
-
satellite data may produce misleading trends based on uneven
|
220
|
-
sampling of the local tide regime.
|
221
282
|
min_max_q : tuple, optional
|
222
283
|
Quantiles used to calculate max and min observed and modelled
|
223
284
|
astronomical tides. By default `(0.0, 1.0)` which is equivalent
|
@@ -226,17 +287,15 @@ def tide_stats(
|
|
226
287
|
round_stats : int, optional
|
227
288
|
The number of decimal places used to round the output statistics.
|
228
289
|
Defaults to 3.
|
229
|
-
**
|
230
|
-
Optional parameters passed to the `eo_tides.
|
231
|
-
function
|
232
|
-
|
233
|
-
`np.inf`), `crop` (whether to crop tide model constituent files
|
234
|
-
on-the-fly to improve performance) etc.
|
290
|
+
**tag_tides_kwargs :
|
291
|
+
Optional parameters passed to the `eo_tides.eo.tag_tides`
|
292
|
+
function that is used to model tides for each observed and
|
293
|
+
modelled timestep.
|
235
294
|
|
236
295
|
Returns
|
237
296
|
-------
|
238
|
-
stats_df : pandas.Series
|
239
|
-
A
|
297
|
+
stats_df : pandas.Series or pandas.Dataframe
|
298
|
+
A pandas object containing the following statistics:
|
240
299
|
|
241
300
|
- `y`: latitude used for modelling tide heights
|
242
301
|
- `x`: longitude used for modelling tide heights
|
@@ -251,158 +310,92 @@ def tide_stats(
|
|
251
310
|
- `spread`: proportion of the full modelled tidal range observed by the satellite
|
252
311
|
- `offset_low`: proportion of the lowest tides never observed by the satellite
|
253
312
|
- `offset_high`: proportion of the highest tides never observed by the satellite
|
254
|
-
|
255
|
-
If `linear_reg = True`, the output will also contain:
|
256
|
-
|
257
|
-
- `observed_slope`: slope of any relationship between observed tide heights and time
|
258
|
-
- `observed_pval`: significance/p-value of any relationship between observed tide heights and time
|
259
313
|
"""
|
314
|
+
|
260
315
|
# Standardise data inputs, time and models
|
261
|
-
gbox,
|
316
|
+
gbox, obs_times = _standardise_inputs(data, time)
|
262
317
|
|
263
|
-
#
|
264
|
-
|
265
|
-
|
318
|
+
# Generate range of times covering entire period of satellite record
|
319
|
+
assert obs_times is not None
|
320
|
+
all_times = pd.date_range(
|
321
|
+
start=obs_times.min().item(),
|
322
|
+
end=obs_times.max().item(),
|
323
|
+
freq=modelled_freq,
|
324
|
+
)
|
266
325
|
|
267
326
|
# If custom tide modelling locations are not provided, use the
|
268
327
|
# dataset centroid
|
269
328
|
if not tidepost_lat or not tidepost_lon:
|
270
329
|
tidepost_lon, tidepost_lat = gbox.geographic_extent.centroid.coords[0]
|
271
330
|
|
272
|
-
# Model tides for
|
273
|
-
assert time_coords is not None
|
331
|
+
# Model tides for observed timesteps
|
274
332
|
obs_tides_da = tag_tides(
|
275
333
|
gbox,
|
276
|
-
time=
|
334
|
+
time=obs_times,
|
277
335
|
model=model,
|
278
336
|
directory=directory,
|
279
337
|
tidepost_lat=tidepost_lat, # type: ignore
|
280
338
|
tidepost_lon=tidepost_lon, # type: ignore
|
281
|
-
|
282
|
-
**model_tides_kwargs,
|
339
|
+
**tag_tides_kwargs,
|
283
340
|
)
|
284
|
-
if isinstance(data, (xr.Dataset, xr.DataArray)):
|
285
|
-
obs_tides_da = obs_tides_da.reindex_like(data)
|
286
341
|
|
287
|
-
#
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
freq=modelled_freq,
|
292
|
-
)
|
293
|
-
|
294
|
-
# Model tides for each timestep
|
295
|
-
all_tides_df = model_tides(
|
296
|
-
x=tidepost_lon, # type: ignore
|
297
|
-
y=tidepost_lat, # type: ignore
|
298
|
-
time=all_timerange,
|
342
|
+
# Model tides for all modelled timesteps
|
343
|
+
all_tides_da = tag_tides(
|
344
|
+
gbox,
|
345
|
+
time=all_times,
|
299
346
|
model=model,
|
300
347
|
directory=directory,
|
301
|
-
|
302
|
-
|
348
|
+
tidepost_lat=tidepost_lat, # type: ignore
|
349
|
+
tidepost_lon=tidepost_lon, # type: ignore
|
350
|
+
**tag_tides_kwargs,
|
303
351
|
)
|
304
352
|
|
305
|
-
#
|
306
|
-
|
307
|
-
all_mean = all_tides_df.tide_height.mean()
|
308
|
-
obs_min, obs_max = obs_tides_da.quantile(min_max_q).values
|
309
|
-
all_min, all_max = all_tides_df.tide_height.quantile(min_max_q).values
|
353
|
+
# Calculate statistics
|
354
|
+
stats_ds = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
|
310
355
|
|
311
|
-
#
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
# Calculate Bishop-Taylor et al. 2018 tidal metrics
|
316
|
-
spread = obs_range / all_range
|
317
|
-
low_tide_offset_m = abs(all_min - obs_min)
|
318
|
-
high_tide_offset_m = abs(all_max - obs_max)
|
319
|
-
low_tide_offset = low_tide_offset_m / all_range
|
320
|
-
high_tide_offset = high_tide_offset_m / all_range
|
321
|
-
|
322
|
-
# Plain text descriptors
|
323
|
-
mean_diff = "higher" if obs_mean > all_mean else "lower"
|
324
|
-
mean_diff_icon = "⬆️" if obs_mean > all_mean else "⬇️"
|
325
|
-
spread_icon = "🟢" if spread >= 0.9 else "🟡" if 0.7 < spread <= 0.9 else "🔴"
|
326
|
-
low_tide_icon = "🟢" if low_tide_offset <= 0.1 else "🟡" if 0.1 <= low_tide_offset < 0.2 else "🔴"
|
327
|
-
high_tide_icon = "🟢" if high_tide_offset <= 0.1 else "🟡" if 0.1 <= high_tide_offset < 0.2 else "🔴"
|
356
|
+
# Convert to pandas and add tide post coordinates
|
357
|
+
stats_df = stats_ds.to_pandas().astype("float32")
|
358
|
+
stats_df["x"] = tidepost_lon
|
359
|
+
stats_df["y"] = tidepost_lat
|
328
360
|
|
329
|
-
#
|
330
|
-
|
331
|
-
|
332
|
-
)
|
333
|
-
obs_y = obs_tides_da.values.astype(np.float32)
|
334
|
-
|
335
|
-
# Compute linear regression
|
336
|
-
obs_linreg = stats.linregress(x=obs_x, y=obs_y)
|
361
|
+
# Convert coordinates to index if dataframe
|
362
|
+
if isinstance(stats_df, pd.DataFrame):
|
363
|
+
stats_df = stats_df.set_index(["x", "y"], append=True)
|
337
364
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
365
|
+
# If a series, print and plot summaries
|
366
|
+
else:
|
367
|
+
if plain_english:
|
368
|
+
_stats_plain_english(
|
369
|
+
mot=stats_df.mot,
|
370
|
+
mat=stats_df.mat,
|
371
|
+
hot=stats_df.hot,
|
372
|
+
hat=stats_df.hat,
|
373
|
+
lot=stats_df.lot,
|
374
|
+
lat=stats_df.lat,
|
375
|
+
otr=stats_df.otr,
|
376
|
+
tr=stats_df.tr,
|
377
|
+
spread=stats_df.spread,
|
378
|
+
offset_low=stats_df.offset_low,
|
379
|
+
offset_high=stats_df.offset_high,
|
380
|
+
)
|
353
381
|
|
354
|
-
if
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
lot=obs_min,
|
369
|
-
hat=all_max,
|
370
|
-
hot=obs_max,
|
371
|
-
offset_low=low_tide_offset,
|
372
|
-
offset_high=high_tide_offset,
|
373
|
-
spread=spread,
|
374
|
-
plot_col=data[plot_col] if plot_col else None,
|
375
|
-
obs_linreg=obs_linreg if linear_reg else None,
|
376
|
-
obs_x=obs_x,
|
377
|
-
all_timerange=all_timerange,
|
378
|
-
)
|
382
|
+
if plot:
|
383
|
+
_stats_figure(
|
384
|
+
all_tides_da=all_tides_da,
|
385
|
+
obs_tides_da=obs_tides_da,
|
386
|
+
hot=stats_df.hot,
|
387
|
+
hat=stats_df.hat,
|
388
|
+
lot=stats_df.lot,
|
389
|
+
lat=stats_df.lat,
|
390
|
+
spread=stats_df.spread,
|
391
|
+
offset_low=stats_df.offset_low,
|
392
|
+
offset_high=stats_df.offset_high,
|
393
|
+
plot_var=data[plot_var] if plot_var else None,
|
394
|
+
point_col=point_col,
|
395
|
+
)
|
379
396
|
|
380
|
-
#
|
381
|
-
|
382
|
-
"y": tidepost_lat,
|
383
|
-
"x": tidepost_lon,
|
384
|
-
"mot": obs_mean,
|
385
|
-
"mat": all_mean,
|
386
|
-
"lot": obs_min,
|
387
|
-
"lat": all_min,
|
388
|
-
"hot": obs_max,
|
389
|
-
"hat": all_max,
|
390
|
-
"otr": obs_range,
|
391
|
-
"tr": all_range,
|
392
|
-
"spread": spread,
|
393
|
-
"offset_low": low_tide_offset,
|
394
|
-
"offset_high": high_tide_offset,
|
395
|
-
}
|
396
|
-
|
397
|
-
if linear_reg:
|
398
|
-
output_stats.update({
|
399
|
-
"observed_slope": obs_linreg.slope,
|
400
|
-
"observed_pval": obs_linreg.pvalue,
|
401
|
-
})
|
402
|
-
|
403
|
-
# Return pandas data
|
404
|
-
stats_df = pd.Series(output_stats).round(round_stats)
|
405
|
-
return stats_df
|
397
|
+
# Return in Pandas format
|
398
|
+
return stats_df.round(round_stats)
|
406
399
|
|
407
400
|
|
408
401
|
def pixel_stats(
|
@@ -410,26 +403,31 @@ def pixel_stats(
|
|
410
403
|
time: DatetimeLike | None = None,
|
411
404
|
model: str | list[str] = "EOT20",
|
412
405
|
directory: str | os.PathLike | None = None,
|
413
|
-
resample: bool =
|
406
|
+
resample: bool = True,
|
414
407
|
modelled_freq: str = "3h",
|
415
408
|
min_max_q: tuple[float, float] = (0.0, 1.0),
|
409
|
+
resample_method: str = "bilinear",
|
410
|
+
dask_chunks: tuple[float, float] | None = None,
|
411
|
+
dask_compute: bool = True,
|
416
412
|
extrapolate: bool = True,
|
417
413
|
cutoff: float = 10,
|
418
414
|
**pixel_tides_kwargs,
|
419
415
|
) -> xr.Dataset:
|
420
416
|
"""
|
421
|
-
Takes a multi-dimensional dataset and generate
|
417
|
+
Takes a multi-dimensional dataset and generate spatial
|
422
418
|
tide statistics and satellite-observed tide bias metrics,
|
423
|
-
calculated based on every timestep in the
|
419
|
+
calculated based on every timestep in the satellite data and
|
424
420
|
modelled into the spatial extent of the imagery.
|
425
421
|
|
426
422
|
By comparing the subset of tides observed by satellites
|
427
423
|
against the full astronomical tidal range, we can evaluate
|
428
424
|
whether the tides observed by satellites are biased
|
429
|
-
(e.g. fail to observe either the highest or lowest tides)
|
425
|
+
(e.g. fail to observe either the highest or lowest tides)
|
426
|
+
due to tide aliasing interactions with sun-synchronous satellite
|
427
|
+
overpasses.
|
430
428
|
|
431
429
|
Compared to `tide_stats`, this function models tide metrics
|
432
|
-
spatially to produce a two-dimensional output.
|
430
|
+
spatially to produce a two-dimensional output for each statistic.
|
433
431
|
|
434
432
|
For more information about the tidal statistics computed by this
|
435
433
|
function, refer to Figure 8 in Bishop-Taylor et al. 2018:
|
@@ -439,7 +437,7 @@ def pixel_stats(
|
|
439
437
|
----------
|
440
438
|
data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
|
441
439
|
A multi-dimensional dataset or GeoBox pixel grid that will
|
442
|
-
be used to calculate
|
440
|
+
be used to calculate spatial tide statistics. If `data`
|
443
441
|
is an xarray object, it should include a "time" dimension.
|
444
442
|
If no "time" dimension exists or if `data` is a GeoBox,
|
445
443
|
then times must be passed using the `time` parameter.
|
@@ -450,10 +448,12 @@ def pixel_stats(
|
|
450
448
|
that can be converted by `pandas.to_datetime()`. For example:
|
451
449
|
`time=pd.date_range(start="2000", end="2001", freq="5h")`
|
452
450
|
model : str or list of str, optional
|
453
|
-
The tide model (or models) to use to model tides.
|
454
|
-
provided, a new "tide_model" dimension will be
|
455
|
-
Defaults to "EOT20";
|
456
|
-
models
|
451
|
+
The tide model (or list of models) to use to model tides.
|
452
|
+
If a list is provided, a new "tide_model" dimension will be
|
453
|
+
added to the `xarray.Dataset` output. Defaults to "EOT20";
|
454
|
+
specify "all" to use all models available in `directory`.
|
455
|
+
For a full list of available and supported models, run
|
456
|
+
`eo_tides.model.list_models`.
|
457
457
|
directory : str, optional
|
458
458
|
The directory containing tide model data files. If no path is
|
459
459
|
provided, this will default to the environment variable
|
@@ -463,9 +463,9 @@ def pixel_stats(
|
|
463
463
|
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
464
464
|
resample : bool, optional
|
465
465
|
Whether to resample tide statistics back into `data`'s original
|
466
|
-
higher resolution grid.
|
467
|
-
lower-resolution statistics
|
468
|
-
|
466
|
+
higher resolution grid. Set this to `False` if you want to return
|
467
|
+
lower-resolution tide statistics (which can be useful for
|
468
|
+
assessing tide biases across large spatial extents).
|
469
469
|
modelled_freq : str, optional
|
470
470
|
An optional string giving the frequency at which to model tides
|
471
471
|
when computing the full modelled tidal range. Defaults to '3h',
|
@@ -474,10 +474,25 @@ def pixel_stats(
|
|
474
474
|
min_max_q : tuple, optional
|
475
475
|
Quantiles used to calculate max and min observed and modelled
|
476
476
|
astronomical tides. By default `(0.0, 1.0)` which is equivalent
|
477
|
-
to minimum and maximum;
|
478
|
-
robust to outliers
|
477
|
+
to minimum and maximum; for a softer threshold that is more
|
478
|
+
robust to outliers use e.g. `(0.1, 0.9)`.
|
479
|
+
resample_method : str, optional
|
480
|
+
If resampling is requested (see `resample` above), use this
|
481
|
+
resampling method when resampling from low resolution to high
|
482
|
+
resolution pixels. Defaults to "bilinear"; valid options include
|
483
|
+
"nearest", "cubic", "min", "max", "average" etc.
|
484
|
+
dask_chunks : tuple of float, optional
|
485
|
+
Can be used to configure custom Dask chunking for the final
|
486
|
+
resampling step. By default, chunks will be automatically set
|
487
|
+
to match y/x chunks from `data` if they exist; otherwise chunks
|
488
|
+
will be chosen to cover the entire y/x extent of the dataset.
|
489
|
+
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
|
490
|
+
`(2048, 2048)`.
|
491
|
+
dask_compute : bool, optional
|
492
|
+
Whether to compute results of the resampling step using Dask.
|
493
|
+
If False, `stats_ds` will be returned as a Dask-enabled array.
|
479
494
|
extrapolate : bool, optional
|
480
|
-
Whether to extrapolate tides
|
495
|
+
Whether to extrapolate tides into x and y coordinates outside of
|
481
496
|
the valid tide modelling domain using nearest-neighbor. Defaults
|
482
497
|
to True.
|
483
498
|
cutoff : float, optional
|
@@ -492,6 +507,8 @@ def pixel_stats(
|
|
492
507
|
stats_ds : xarray.Dataset
|
493
508
|
An `xarray.Dataset` containing the following statistics as two-dimensional data variables:
|
494
509
|
|
510
|
+
- `mot`: mean tide height observed by the satellite (metres)
|
511
|
+
- `mat`: mean modelled astronomical tide height (metres)
|
495
512
|
- `lot`: minimum tide height observed by the satellite (metres)
|
496
513
|
- `lat`: minimum tide height from modelled astronomical tidal range (metres)
|
497
514
|
- `hot`: maximum tide height observed by the satellite (metres)
|
@@ -503,90 +520,62 @@ def pixel_stats(
|
|
503
520
|
- `offset_high`: proportion of the highest tides never observed by the satellite
|
504
521
|
|
505
522
|
"""
|
523
|
+
|
506
524
|
# Standardise data inputs, time and models
|
507
|
-
gbox,
|
525
|
+
gbox, obs_times = _standardise_inputs(data, time)
|
526
|
+
dask_chunks = _resample_chunks(data, dask_chunks)
|
508
527
|
model = [model] if isinstance(model, str) else model
|
509
528
|
|
510
|
-
#
|
511
|
-
assert
|
512
|
-
|
529
|
+
# Generate range of times covering entire period of satellite record
|
530
|
+
assert obs_times is not None
|
531
|
+
all_times = pd.date_range(
|
532
|
+
start=obs_times.min().item(),
|
533
|
+
end=obs_times.max().item(),
|
534
|
+
freq=modelled_freq,
|
535
|
+
)
|
536
|
+
|
537
|
+
# Model tides for observed timesteps
|
538
|
+
obs_tides_da = pixel_tides(
|
513
539
|
gbox,
|
514
|
-
time=
|
515
|
-
resample=False,
|
540
|
+
time=obs_times,
|
516
541
|
model=model,
|
517
542
|
directory=directory,
|
518
|
-
|
543
|
+
resample=False,
|
519
544
|
extrapolate=extrapolate,
|
520
545
|
cutoff=cutoff,
|
521
546
|
**pixel_tides_kwargs,
|
522
547
|
)
|
523
548
|
|
524
|
-
#
|
525
|
-
|
526
|
-
start=time_coords.min().item(),
|
527
|
-
end=time_coords.max().item(),
|
528
|
-
freq=modelled_freq,
|
529
|
-
)
|
530
|
-
|
531
|
-
# Model all tides
|
532
|
-
all_tides = pixel_tides(
|
549
|
+
# Model tides for all modelled timesteps
|
550
|
+
all_tides_da = pixel_tides(
|
533
551
|
gbox,
|
534
|
-
time=
|
552
|
+
time=all_times,
|
535
553
|
model=model,
|
536
554
|
directory=directory,
|
537
|
-
calculate_quantiles=min_max_q,
|
538
555
|
resample=False,
|
539
556
|
extrapolate=extrapolate,
|
540
557
|
cutoff=cutoff,
|
541
558
|
**pixel_tides_kwargs,
|
542
559
|
)
|
543
560
|
|
544
|
-
#
|
545
|
-
|
546
|
-
# mot = obs_tides.mean(dim="time")
|
547
|
-
# mat = all_tides.mean(dim="time")
|
548
|
-
|
549
|
-
# Calculate min and max tides
|
550
|
-
lot = obs_tides.isel(quantile=0)
|
551
|
-
hot = obs_tides.isel(quantile=-1)
|
552
|
-
lat = all_tides.isel(quantile=0)
|
553
|
-
hat = all_tides.isel(quantile=-1)
|
561
|
+
# Calculate statistics
|
562
|
+
stats_lowres = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
|
554
563
|
|
555
|
-
#
|
556
|
-
|
557
|
-
tr = hat - lat
|
564
|
+
# Assign CRS and geobox to allow reprojection
|
565
|
+
stats_lowres = stats_lowres.odc.assign_crs(crs=gbox.crs)
|
558
566
|
|
559
|
-
#
|
560
|
-
spread = otr / tr
|
561
|
-
offset_low_m = abs(lat - lot)
|
562
|
-
offset_high_m = abs(hat - hot)
|
563
|
-
offset_low = offset_low_m / tr
|
564
|
-
offset_high = offset_high_m / tr
|
565
|
-
|
566
|
-
# Combine into a single dataset
|
567
|
-
stats_ds = (
|
568
|
-
xr.merge(
|
569
|
-
[
|
570
|
-
# mot.rename("mot"),
|
571
|
-
# mat.rename("mat"),
|
572
|
-
hot.rename("hot"),
|
573
|
-
hat.rename("hat"),
|
574
|
-
lot.rename("lot"),
|
575
|
-
lat.rename("lat"),
|
576
|
-
otr.rename("otr"),
|
577
|
-
tr.rename("tr"),
|
578
|
-
spread.rename("spread"),
|
579
|
-
offset_low.rename("offset_low"),
|
580
|
-
offset_high.rename("offset_high"),
|
581
|
-
],
|
582
|
-
compat="override",
|
583
|
-
)
|
584
|
-
.drop_vars("quantile")
|
585
|
-
.odc.assign_crs(crs=gbox.crs)
|
586
|
-
)
|
587
|
-
|
588
|
-
# Optionally resample into the original pixel grid of `data`
|
567
|
+
# Reproject statistics into original high resolution grid
|
589
568
|
if resample:
|
590
|
-
|
569
|
+
print("Reprojecting statistics into original resolution")
|
570
|
+
stats_highres = _pixel_tides_resample(
|
571
|
+
stats_lowres,
|
572
|
+
gbox,
|
573
|
+
resample_method,
|
574
|
+
dask_chunks,
|
575
|
+
dask_compute,
|
576
|
+
None,
|
577
|
+
)
|
578
|
+
return stats_highres
|
591
579
|
|
592
|
-
|
580
|
+
print("Returning low resolution statistics array")
|
581
|
+
return stats_lowres
|