eo-tides 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/stats.py CHANGED
@@ -8,44 +8,101 @@ import matplotlib.pyplot as plt
8
8
  import numpy as np
9
9
  import pandas as pd
10
10
  import xarray as xr
11
- from scipy import stats
12
11
 
13
12
  # Only import if running type checking
14
13
  if TYPE_CHECKING:
15
- import xarray as xr
16
14
  from odc.geo.geobox import GeoBox
17
15
 
18
- from .eo import _standardise_inputs, pixel_tides, tag_tides
19
- from .model import model_tides
16
+ from .eo import _pixel_tides_resample, _resample_chunks, _standardise_inputs, pixel_tides, tag_tides
20
17
  from .utils import DatetimeLike
21
18
 
22
19
 
23
- def _plot_biases(
24
- all_tides_df,
25
- obs_tides_da,
26
- lat,
27
- lot,
28
- hat,
29
- hot,
30
- offset_low,
31
- offset_high,
32
- spread,
33
- plot_col,
34
- obs_linreg,
35
- obs_x,
36
- all_timerange,
20
+ def _tide_statistics(obs_tides, all_tides, min_max_q=(0.0, 1.0), dim="time"):
21
+ # Calculate means of observed and modelled tides
22
+ mot = obs_tides.mean(dim=dim)
23
+ mat = all_tides.mean(dim=dim)
24
+
25
+ # Identify highest and lowest observed tides
26
+ obs_tides_q = obs_tides.quantile(q=min_max_q, dim=dim).astype("float32")
27
+ lot = obs_tides_q.isel(quantile=0, drop=True)
28
+ hot = obs_tides_q.isel(quantile=-1, drop=True)
29
+
30
+ # Identify highest and lowest modelled tides
31
+ all_tides_q = all_tides.quantile(q=min_max_q, dim=dim).astype("float32")
32
+ lat = all_tides_q.isel(quantile=0, drop=True)
33
+ hat = all_tides_q.isel(quantile=-1, drop=True)
34
+
35
+ # Calculate tidal range
36
+ otr = hot - lot
37
+ tr = hat - lat
38
+
39
+ # Calculate Bishop-Taylor et al. 2018 tidal metrics
40
+ spread = otr / tr
41
+ offset_low_m = lot - lat
42
+ offset_high_m = hat - hot
43
+ offset_low = offset_low_m / tr
44
+ offset_high = offset_high_m / tr
45
+
46
+ # Combine into a single dataset
47
+ stats_ds = xr.merge(
48
+ [
49
+ mot.rename("mot"),
50
+ mat.rename("mat"),
51
+ hot.rename("hot"),
52
+ hat.rename("hat"),
53
+ lot.rename("lot"),
54
+ lat.rename("lat"),
55
+ otr.rename("otr"),
56
+ tr.rename("tr"),
57
+ spread.rename("spread"),
58
+ offset_low.rename("offset_low"),
59
+ offset_high.rename("offset_high"),
60
+ ],
61
+ compat="override",
62
+ )
63
+
64
+ return stats_ds
65
+
66
+
67
+ def _stats_plain_english(mot, mat, hot, hat, lot, lat, otr, tr, spread, offset_low, offset_high):
68
+ # Plain text descriptors
69
+ mean_diff = "higher" if mot > mat else "lower"
70
+ mean_diff_icon = "⬆️" if mot > mat else "⬇️"
71
+ spread_icon = "🟢" if spread >= 0.9 else "🟡" if 0.7 < spread <= 0.9 else "🔴"
72
+ low_tide_icon = "🟢" if offset_low <= 0.1 else "🟡" if 0.1 <= offset_low < 0.2 else "🔴"
73
+ high_tide_icon = "🟢" if offset_high <= 0.1 else "🟡" if 0.1 <= offset_high < 0.2 else "🔴"
74
+
75
+ # Print summary
76
+ print(f"\n\n🌊 Modelled astronomical tide range: {tr:.2f} m ({lat:.2f} to {hat:.2f} m).")
77
+ print(f"🛰️ Observed tide range: {otr:.2f} m ({lot:.2f} to {hot:.2f} m).\n")
78
+ print(f"{spread_icon} {spread:.0%} of the modelled astronomical tide range was observed at this location.")
79
+ print(
80
+ f"{high_tide_icon} The highest {offset_high:.0%} ({offset_high * tr:.2f} m) of the tide range was never observed."
81
+ )
82
+ print(
83
+ f"{low_tide_icon} The lowest {offset_low:.0%} ({offset_low * tr:.2f} m) of the tide range was never observed.\n"
84
+ )
85
+ print(f"🌊 Mean modelled astronomical tide height: {mat:.2f} m.")
86
+ print(f"🛰️ Mean observed tide height: {mot:.2f} m.")
87
+ print(
88
+ f"{mean_diff_icon} The mean observed tide height was {mot - mat:.2f} m {mean_diff} than the mean modelled astronomical tide height."
89
+ )
90
+
91
+
92
+ def _stats_figure(
93
+ all_tides_da, obs_tides_da, hot, hat, lot, lat, spread, offset_low, offset_high, plot_var, point_col=None
37
94
  ):
38
95
  """
39
96
  Plot tide bias statistics as a figure, including both
40
97
  satellite observations and all modelled tides.
41
98
  """
42
99
 
43
- # Create plot and add all time and observed tide data
100
+ # Create plot and add all modelled tides
44
101
  fig, ax = plt.subplots(figsize=(10, 6))
45
- all_tides_df.reset_index(["x", "y"]).tide_height.plot(ax=ax, alpha=0.4, label="Modelled tides")
102
+ all_tides_da.plot(ax=ax, alpha=0.4, label="Modelled tides")
46
103
 
47
- # Look through custom column values if provided
48
- if plot_col is not None:
104
+ # Loop through custom variable values if provided
105
+ if plot_var is not None:
49
106
  # Create a list of marker styles
50
107
  markers = [
51
108
  "o",
@@ -65,23 +122,32 @@ def _plot_biases(
65
122
  "|",
66
123
  "_",
67
124
  ]
68
- for i, value in enumerate(np.unique(plot_col)):
69
- obs_tides_da.sel(time=plot_col == value).plot.line(
125
+
126
+ # Sort values to allow correct grouping
127
+ obs_tides_da = obs_tides_da.sortby("time")
128
+ plot_var = plot_var.sortby("time")
129
+
130
+ # Iterate and plot each group
131
+ for i, (label, group) in enumerate(obs_tides_da.groupby(plot_var)):
132
+ group.plot.line(
70
133
  ax=ax,
71
134
  linewidth=0.0,
72
- color="black",
135
+ color=point_col,
73
136
  marker=markers[i % len(markers)],
74
- markersize=4,
75
- label=value,
137
+ label=label,
138
+ markeredgecolor="black",
139
+ markeredgewidth=0.6,
76
140
  )
141
+
77
142
  # Otherwise, plot all data at once
78
143
  else:
79
144
  obs_tides_da.plot.line(
80
145
  ax=ax,
81
146
  marker="o",
82
147
  linewidth=0.0,
83
- color="black",
84
- markersize=3.5,
148
+ color="black" if point_col is None else point_col,
149
+ markeredgecolor="black",
150
+ markeredgewidth=0.6,
85
151
  label="Satellite observations",
86
152
  )
87
153
 
@@ -95,15 +161,6 @@ def _plot_biases(
95
161
  )
96
162
  ax.set_title("")
97
163
 
98
- # Add linear regression line
99
- if obs_linreg is not None:
100
- ax.plot(
101
- obs_tides_da.time.isel(time=[0, -1]),
102
- obs_linreg.intercept + obs_linreg.slope * obs_x[[0, -1]],
103
- "r",
104
- label="fitted line",
105
- )
106
-
107
164
  # Add horizontal lines for spread/offsets
108
165
  ax.axhline(lot, color="black", linestyle=":", linewidth=1)
109
166
  ax.axhline(hot, color="black", linestyle=":", linewidth=1)
@@ -113,17 +170,17 @@ def _plot_biases(
113
170
  # Add text annotations for spread/offsets
114
171
  ax.annotate(
115
172
  f" High tide\n offset ({offset_high:.0%})",
116
- xy=(all_timerange.max(), np.mean([hat, hot])),
173
+ xy=(all_tides_da.time.max(), np.mean([hat, hot])),
117
174
  va="center",
118
175
  )
119
176
  ax.annotate(
120
177
  f" Spread\n ({spread:.0%})",
121
- xy=(all_timerange.max(), np.mean([lot, hot])),
178
+ xy=(all_tides_da.time.max(), np.mean([lot, hot])),
122
179
  va="center",
123
180
  )
124
181
  ax.annotate(
125
182
  f" Low tide\n offset ({offset_low:.0%})",
126
- xy=(all_timerange.max(), np.mean([lat, lot])),
183
+ xy=(all_tides_da.time.max(), np.mean([lat, lot])),
127
184
  )
128
185
 
129
186
  # Remove top right axes and add labels
@@ -145,23 +202,25 @@ def tide_stats(
145
202
  tidepost_lon: float | None = None,
146
203
  plain_english: bool = True,
147
204
  plot: bool = True,
148
- plot_col: str | None = None,
205
+ plot_var: str | None = None,
206
+ point_col: str | None = None,
149
207
  modelled_freq: str = "3h",
150
- linear_reg: bool = False,
151
208
  min_max_q: tuple = (0.0, 1.0),
152
209
  round_stats: int = 3,
153
- **model_tides_kwargs,
210
+ **tag_tides_kwargs,
154
211
  ) -> pd.Series:
155
212
  """
156
213
  Takes a multi-dimensional dataset and generate tide statistics
157
214
  and satellite-observed tide bias metrics, calculated based on
158
- every timestep in the satellte data and the geographic centroid
215
+ every timestep in the satellite data and the geographic centroid
159
216
  of the imagery.
160
217
 
161
218
  By comparing the subset of tides observed by satellites
162
219
  against the full astronomical tidal range, we can evaluate
163
220
  whether the tides observed by satellites are biased
164
- (e.g. fail to observe either the highest or lowest tides).
221
+ (e.g. fail to observe either the highest or lowest tides) due
222
+ to tide aliasing interactions with sun-synchronous satellite
223
+ overpasses.
165
224
 
166
225
  For more information about the tidal statistics computed by this
167
226
  function, refer to Figure 8 in Bishop-Taylor et al. 2018:
@@ -181,10 +240,13 @@ def tide_stats(
181
240
  be used to provide a custom set of times. Accepts any format
182
241
  that can be converted by `pandas.to_datetime()`. For example:
183
242
  `time=pd.date_range(start="2000", end="2001", freq="5h")`
184
- model : str, optional
185
- The tide model to use to model tides. Defaults to "EOT20";
186
- for a full list of available/supported models, run
187
- `eo_tides.model.list_models`.
243
+ model : str or list of str, optional
244
+ The tide model (or list of models) to use to model tides.
245
+ If a list is provided, the resulting statistics will be
246
+ returned as a `pandas.Dataframe`; otherwise a `pandas.Series`.
247
+ Defaults to "EOT20"; specify "all" to use all models available
248
+ in `directory`. For a full list of available and supported
249
+ models, run `eo_tides.model.list_models`.
188
250
  directory : str, optional
189
251
  The directory containing tide model data files. If no path is
190
252
  provided, this will default to the environment variable
@@ -198,26 +260,25 @@ def tide_stats(
198
260
  location.
199
261
  plain_english : bool, optional
200
262
  An optional boolean indicating whether to print a plain english
201
- version of the tidal statistics to the screen. Defaults to True.
263
+ version of the tidal statistics to the screen. Defaults to True;
264
+ only supported when a single tide model is passed to `model`.
202
265
  plot : bool, optional
203
266
  An optional boolean indicating whether to plot how satellite-
204
267
  observed tide heights compare against the full tidal range.
205
- Defaults to True.
206
- plot_col : str, optional
268
+ Defaults to True; only supported when a single tide model is
269
+ passed to `model`.
270
+ plot_var : str, optional
207
271
  Optional name of a coordinate, dimension or variable in the array
208
272
  that will be used to plot observations with unique symbols.
209
273
  Defaults to None, which will plot all observations as circles.
274
+ point_col : str, optional
275
+ Colour used to plot points on the graph. Defaults to None which
276
+ will automatically select colours.
210
277
  modelled_freq : str, optional
211
278
  An optional string giving the frequency at which to model tides
212
279
  when computing the full modelled tidal range. Defaults to '3h',
213
280
  which computes a tide height for every three hours across the
214
281
  temporal extent of `data`.
215
- linear_reg: bool, optional
216
- Whether to return linear regression statistics that assess
217
- whether satellite-observed tides show any decreasing or
218
- increasing trends over time. This may indicate whether your
219
- satellite data may produce misleading trends based on uneven
220
- sampling of the local tide regime.
221
282
  min_max_q : tuple, optional
222
283
  Quantiles used to calculate max and min observed and modelled
223
284
  astronomical tides. By default `(0.0, 1.0)` which is equivalent
@@ -226,17 +287,15 @@ def tide_stats(
226
287
  round_stats : int, optional
227
288
  The number of decimal places used to round the output statistics.
228
289
  Defaults to 3.
229
- **model_tides_kwargs :
230
- Optional parameters passed to the `eo_tides.model.model_tides`
231
- function. Important parameters include `cutoff` (used to
232
- extrapolate modelled tides away from the coast; defaults to
233
- `np.inf`), `crop` (whether to crop tide model constituent files
234
- on-the-fly to improve performance) etc.
290
+ **tag_tides_kwargs :
291
+ Optional parameters passed to the `eo_tides.eo.tag_tides`
292
+ function that is used to model tides for each observed and
293
+ modelled timestep.
235
294
 
236
295
  Returns
237
296
  -------
238
- stats_df : pandas.Series
239
- A `pandas.Series` containing the following statistics:
297
+ stats_df : pandas.Series or pandas.Dataframe
298
+ A pandas object containing the following statistics:
240
299
 
241
300
  - `y`: latitude used for modelling tide heights
242
301
  - `x`: longitude used for modelling tide heights
@@ -251,158 +310,92 @@ def tide_stats(
251
310
  - `spread`: proportion of the full modelled tidal range observed by the satellite
252
311
  - `offset_low`: proportion of the lowest tides never observed by the satellite
253
312
  - `offset_high`: proportion of the highest tides never observed by the satellite
254
-
255
- If `linear_reg = True`, the output will also contain:
256
-
257
- - `observed_slope`: slope of any relationship between observed tide heights and time
258
- - `observed_pval`: significance/p-value of any relationship between observed tide heights and time
259
313
  """
314
+
260
315
  # Standardise data inputs, time and models
261
- gbox, time_coords = _standardise_inputs(data, time)
316
+ gbox, obs_times = _standardise_inputs(data, time)
262
317
 
263
- # Verify that only one tide model is provided
264
- if isinstance(model, list):
265
- raise Exception("Only single tide models are supported by `tide_stats`.")
318
+ # Generate range of times covering entire period of satellite record
319
+ assert obs_times is not None
320
+ all_times = pd.date_range(
321
+ start=obs_times.min().item(),
322
+ end=obs_times.max().item(),
323
+ freq=modelled_freq,
324
+ )
266
325
 
267
326
  # If custom tide modelling locations are not provided, use the
268
327
  # dataset centroid
269
328
  if not tidepost_lat or not tidepost_lon:
270
329
  tidepost_lon, tidepost_lat = gbox.geographic_extent.centroid.coords[0]
271
330
 
272
- # Model tides for each observation in the supplied xarray object
273
- assert time_coords is not None
331
+ # Model tides for observed timesteps
274
332
  obs_tides_da = tag_tides(
275
333
  gbox,
276
- time=time_coords,
334
+ time=obs_times,
277
335
  model=model,
278
336
  directory=directory,
279
337
  tidepost_lat=tidepost_lat, # type: ignore
280
338
  tidepost_lon=tidepost_lon, # type: ignore
281
- return_tideposts=True,
282
- **model_tides_kwargs,
339
+ **tag_tides_kwargs,
283
340
  )
284
- if isinstance(data, (xr.Dataset, xr.DataArray)):
285
- obs_tides_da = obs_tides_da.reindex_like(data)
286
341
 
287
- # Generate range of times covering entire period of satellite record
288
- all_timerange = pd.date_range(
289
- start=time_coords.min().item(),
290
- end=time_coords.max().item(),
291
- freq=modelled_freq,
292
- )
293
-
294
- # Model tides for each timestep
295
- all_tides_df = model_tides(
296
- x=tidepost_lon, # type: ignore
297
- y=tidepost_lat, # type: ignore
298
- time=all_timerange,
342
+ # Model tides for all modelled timesteps
343
+ all_tides_da = tag_tides(
344
+ gbox,
345
+ time=all_times,
299
346
  model=model,
300
347
  directory=directory,
301
- crs="EPSG:4326",
302
- **model_tides_kwargs,
348
+ tidepost_lat=tidepost_lat, # type: ignore
349
+ tidepost_lon=tidepost_lon, # type: ignore
350
+ **tag_tides_kwargs,
303
351
  )
304
352
 
305
- # Get coarse statistics on all and observed tidal ranges
306
- obs_mean = obs_tides_da.mean().item()
307
- all_mean = all_tides_df.tide_height.mean()
308
- obs_min, obs_max = obs_tides_da.quantile(min_max_q).values
309
- all_min, all_max = all_tides_df.tide_height.quantile(min_max_q).values
353
+ # Calculate statistics
354
+ stats_ds = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
310
355
 
311
- # Calculate tidal range
312
- obs_range = obs_max - obs_min
313
- all_range = all_max - all_min
314
-
315
- # Calculate Bishop-Taylor et al. 2018 tidal metrics
316
- spread = obs_range / all_range
317
- low_tide_offset_m = abs(all_min - obs_min)
318
- high_tide_offset_m = abs(all_max - obs_max)
319
- low_tide_offset = low_tide_offset_m / all_range
320
- high_tide_offset = high_tide_offset_m / all_range
321
-
322
- # Plain text descriptors
323
- mean_diff = "higher" if obs_mean > all_mean else "lower"
324
- mean_diff_icon = "⬆️" if obs_mean > all_mean else "⬇️"
325
- spread_icon = "🟢" if spread >= 0.9 else "🟡" if 0.7 < spread <= 0.9 else "🔴"
326
- low_tide_icon = "🟢" if low_tide_offset <= 0.1 else "🟡" if 0.1 <= low_tide_offset < 0.2 else "🔴"
327
- high_tide_icon = "🟢" if high_tide_offset <= 0.1 else "🟡" if 0.1 <= high_tide_offset < 0.2 else "🔴"
356
+ # Convert to pandas and add tide post coordinates
357
+ stats_df = stats_ds.to_pandas().astype("float32")
358
+ stats_df["x"] = tidepost_lon
359
+ stats_df["y"] = tidepost_lat
328
360
 
329
- # Extract x (time in decimal years) and y (distance) values
330
- obs_x = (
331
- obs_tides_da.time.dt.year + ((obs_tides_da.time.dt.dayofyear - 1) / 365) + ((obs_tides_da.time.dt.hour) / 24)
332
- )
333
- obs_y = obs_tides_da.values.astype(np.float32)
334
-
335
- # Compute linear regression
336
- obs_linreg = stats.linregress(x=obs_x, y=obs_y)
361
+ # Convert coordinates to index if dataframe
362
+ if isinstance(stats_df, pd.DataFrame):
363
+ stats_df = stats_df.set_index(["x", "y"], append=True)
337
364
 
338
- if plain_english:
339
- print(f"\n\n🌊 Modelled astronomical tide range: {all_range:.2f} metres.")
340
- print(f"🛰️ Observed tide range: {obs_range:.2f} metres.\n")
341
- print(f"{spread_icon} {spread:.0%} of the modelled astronomical tide range was observed at this location.")
342
- print(
343
- f"{high_tide_icon} The highest {high_tide_offset:.0%} ({high_tide_offset_m:.2f} metres) of the tide range was never observed."
344
- )
345
- print(
346
- f"{low_tide_icon} The lowest {low_tide_offset:.0%} ({low_tide_offset_m:.2f} metres) of the tide range was never observed.\n"
347
- )
348
- print(f"🌊 Mean modelled astronomical tide height: {all_mean:.2f} metres.")
349
- print(f"🛰️ Mean observed tide height: {obs_mean:.2f} metres.\n")
350
- print(
351
- f"{mean_diff_icon} The mean observed tide height was {obs_mean - all_mean:.2f} metres {mean_diff} than the mean modelled astronomical tide height."
352
- )
365
+ # If a series, print and plot summaries
366
+ else:
367
+ if plain_english:
368
+ _stats_plain_english(
369
+ mot=stats_df.mot,
370
+ mat=stats_df.mat,
371
+ hot=stats_df.hot,
372
+ hat=stats_df.hat,
373
+ lot=stats_df.lot,
374
+ lat=stats_df.lat,
375
+ otr=stats_df.otr,
376
+ tr=stats_df.tr,
377
+ spread=stats_df.spread,
378
+ offset_low=stats_df.offset_low,
379
+ offset_high=stats_df.offset_high,
380
+ )
353
381
 
354
- if linear_reg:
355
- if obs_linreg.pvalue > 0.01:
356
- print("➖ Observed tides showed no significant trends over time.")
357
- else:
358
- obs_slope_desc = "decreasing" if obs_linreg.slope < 0 else "increasing"
359
- print(
360
- f"⚠️ Observed tides showed a significant {obs_slope_desc} trend over time (p={obs_linreg.pvalue:.3f}, {obs_linreg.slope:.2f} metres per year)"
361
- )
362
-
363
- if plot:
364
- _plot_biases(
365
- all_tides_df=all_tides_df,
366
- obs_tides_da=obs_tides_da,
367
- lat=all_min,
368
- lot=obs_min,
369
- hat=all_max,
370
- hot=obs_max,
371
- offset_low=low_tide_offset,
372
- offset_high=high_tide_offset,
373
- spread=spread,
374
- plot_col=data[plot_col] if plot_col else None,
375
- obs_linreg=obs_linreg if linear_reg else None,
376
- obs_x=obs_x,
377
- all_timerange=all_timerange,
378
- )
382
+ if plot:
383
+ _stats_figure(
384
+ all_tides_da=all_tides_da,
385
+ obs_tides_da=obs_tides_da,
386
+ hot=stats_df.hot,
387
+ hat=stats_df.hat,
388
+ lot=stats_df.lot,
389
+ lat=stats_df.lat,
390
+ spread=stats_df.spread,
391
+ offset_low=stats_df.offset_low,
392
+ offset_high=stats_df.offset_high,
393
+ plot_var=data[plot_var] if plot_var else None,
394
+ point_col=point_col,
395
+ )
379
396
 
380
- # Export pandas.Series containing tidal stats
381
- output_stats = {
382
- "y": tidepost_lat,
383
- "x": tidepost_lon,
384
- "mot": obs_mean,
385
- "mat": all_mean,
386
- "lot": obs_min,
387
- "lat": all_min,
388
- "hot": obs_max,
389
- "hat": all_max,
390
- "otr": obs_range,
391
- "tr": all_range,
392
- "spread": spread,
393
- "offset_low": low_tide_offset,
394
- "offset_high": high_tide_offset,
395
- }
396
-
397
- if linear_reg:
398
- output_stats.update({
399
- "observed_slope": obs_linreg.slope,
400
- "observed_pval": obs_linreg.pvalue,
401
- })
402
-
403
- # Return pandas data
404
- stats_df = pd.Series(output_stats).round(round_stats)
405
- return stats_df
397
+ # Return in Pandas format
398
+ return stats_df.round(round_stats)
406
399
 
407
400
 
408
401
  def pixel_stats(
@@ -410,26 +403,31 @@ def pixel_stats(
410
403
  time: DatetimeLike | None = None,
411
404
  model: str | list[str] = "EOT20",
412
405
  directory: str | os.PathLike | None = None,
413
- resample: bool = False,
406
+ resample: bool = True,
414
407
  modelled_freq: str = "3h",
415
408
  min_max_q: tuple[float, float] = (0.0, 1.0),
409
+ resample_method: str = "bilinear",
410
+ dask_chunks: tuple[float, float] | None = None,
411
+ dask_compute: bool = True,
416
412
  extrapolate: bool = True,
417
413
  cutoff: float = 10,
418
414
  **pixel_tides_kwargs,
419
415
  ) -> xr.Dataset:
420
416
  """
421
- Takes a multi-dimensional dataset and generate two-dimensional
417
+ Takes a multi-dimensional dataset and generate spatial
422
418
  tide statistics and satellite-observed tide bias metrics,
423
- calculated based on every timestep in the satellte data and
419
+ calculated based on every timestep in the satellite data and
424
420
  modelled into the spatial extent of the imagery.
425
421
 
426
422
  By comparing the subset of tides observed by satellites
427
423
  against the full astronomical tidal range, we can evaluate
428
424
  whether the tides observed by satellites are biased
429
- (e.g. fail to observe either the highest or lowest tides).
425
+ (e.g. fail to observe either the highest or lowest tides)
426
+ due to tide aliasing interactions with sun-synchronous satellite
427
+ overpasses.
430
428
 
431
429
  Compared to `tide_stats`, this function models tide metrics
432
- spatially to produce a two-dimensional output.
430
+ spatially to produce a two-dimensional output for each statistic.
433
431
 
434
432
  For more information about the tidal statistics computed by this
435
433
  function, refer to Figure 8 in Bishop-Taylor et al. 2018:
@@ -439,7 +437,7 @@ def pixel_stats(
439
437
  ----------
440
438
  data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
441
439
  A multi-dimensional dataset or GeoBox pixel grid that will
442
- be used to calculate 2D tide statistics. If `data`
440
+ be used to calculate spatial tide statistics. If `data`
443
441
  is an xarray object, it should include a "time" dimension.
444
442
  If no "time" dimension exists or if `data` is a GeoBox,
445
443
  then times must be passed using the `time` parameter.
@@ -450,10 +448,12 @@ def pixel_stats(
450
448
  that can be converted by `pandas.to_datetime()`. For example:
451
449
  `time=pd.date_range(start="2000", end="2001", freq="5h")`
452
450
  model : str or list of str, optional
453
- The tide model (or models) to use to model tides. If a list is
454
- provided, a new "tide_model" dimension will be added to `data`.
455
- Defaults to "EOT20"; for a full list of available/supported
456
- models, run `eo_tides.model.list_models`.
451
+ The tide model (or list of models) to use to model tides.
452
+ If a list is provided, a new "tide_model" dimension will be
453
+ added to the `xarray.Dataset` output. Defaults to "EOT20";
454
+ specify "all" to use all models available in `directory`.
455
+ For a full list of available and supported models, run
456
+ `eo_tides.model.list_models`.
457
457
  directory : str, optional
458
458
  The directory containing tide model data files. If no path is
459
459
  provided, this will default to the environment variable
@@ -463,9 +463,9 @@ def pixel_stats(
463
463
  (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
464
464
  resample : bool, optional
465
465
  Whether to resample tide statistics back into `data`'s original
466
- higher resolution grid. Defaults to False, which will return
467
- lower-resolution statistics that are typically sufficient for
468
- most purposes.
466
+ higher resolution grid. Set this to `False` if you want to return
467
+ lower-resolution tide statistics (which can be useful for
468
+ assessing tide biases across large spatial extents).
469
469
  modelled_freq : str, optional
470
470
  An optional string giving the frequency at which to model tides
471
471
  when computing the full modelled tidal range. Defaults to '3h',
@@ -474,10 +474,25 @@ def pixel_stats(
474
474
  min_max_q : tuple, optional
475
475
  Quantiles used to calculate max and min observed and modelled
476
476
  astronomical tides. By default `(0.0, 1.0)` which is equivalent
477
- to minimum and maximum; to use a softer threshold that is more
478
- robust to outliers, use e.g. `(0.1, 0.9)`.
477
+ to minimum and maximum; for a softer threshold that is more
478
+ robust to outliers use e.g. `(0.1, 0.9)`.
479
+ resample_method : str, optional
480
+ If resampling is requested (see `resample` above), use this
481
+ resampling method when resampling from low resolution to high
482
+ resolution pixels. Defaults to "bilinear"; valid options include
483
+ "nearest", "cubic", "min", "max", "average" etc.
484
+ dask_chunks : tuple of float, optional
485
+ Can be used to configure custom Dask chunking for the final
486
+ resampling step. By default, chunks will be automatically set
487
+ to match y/x chunks from `data` if they exist; otherwise chunks
488
+ will be chosen to cover the entire y/x extent of the dataset.
489
+ For custom chunks, provide a tuple in the form `(y, x)`, e.g.
490
+ `(2048, 2048)`.
491
+ dask_compute : bool, optional
492
+ Whether to compute results of the resampling step using Dask.
493
+ If False, `stats_ds` will be returned as a Dask-enabled array.
479
494
  extrapolate : bool, optional
480
- Whether to extrapolate tides for x and y coordinates outside of
495
+ Whether to extrapolate tides into x and y coordinates outside of
481
496
  the valid tide modelling domain using nearest-neighbor. Defaults
482
497
  to True.
483
498
  cutoff : float, optional
@@ -492,6 +507,8 @@ def pixel_stats(
492
507
  stats_ds : xarray.Dataset
493
508
  An `xarray.Dataset` containing the following statistics as two-dimensional data variables:
494
509
 
510
+ - `mot`: mean tide height observed by the satellite (metres)
511
+ - `mat`: mean modelled astronomical tide height (metres)
495
512
  - `lot`: minimum tide height observed by the satellite (metres)
496
513
  - `lat`: minimum tide height from modelled astronomical tidal range (metres)
497
514
  - `hot`: maximum tide height observed by the satellite (metres)
@@ -503,90 +520,62 @@ def pixel_stats(
503
520
  - `offset_high`: proportion of the highest tides never observed by the satellite
504
521
 
505
522
  """
523
+
506
524
  # Standardise data inputs, time and models
507
- gbox, time_coords = _standardise_inputs(data, time)
525
+ gbox, obs_times = _standardise_inputs(data, time)
526
+ dask_chunks = _resample_chunks(data, dask_chunks)
508
527
  model = [model] if isinstance(model, str) else model
509
528
 
510
- # Model observed tides
511
- assert time_coords is not None
512
- obs_tides = pixel_tides(
529
+ # Generate range of times covering entire period of satellite record
530
+ assert obs_times is not None
531
+ all_times = pd.date_range(
532
+ start=obs_times.min().item(),
533
+ end=obs_times.max().item(),
534
+ freq=modelled_freq,
535
+ )
536
+
537
+ # Model tides for observed timesteps
538
+ obs_tides_da = pixel_tides(
513
539
  gbox,
514
- time=time_coords,
515
- resample=False,
540
+ time=obs_times,
516
541
  model=model,
517
542
  directory=directory,
518
- calculate_quantiles=min_max_q,
543
+ resample=False,
519
544
  extrapolate=extrapolate,
520
545
  cutoff=cutoff,
521
546
  **pixel_tides_kwargs,
522
547
  )
523
548
 
524
- # Generate times covering entire period of satellite record
525
- all_timerange = pd.date_range(
526
- start=time_coords.min().item(),
527
- end=time_coords.max().item(),
528
- freq=modelled_freq,
529
- )
530
-
531
- # Model all tides
532
- all_tides = pixel_tides(
549
+ # Model tides for all modelled timesteps
550
+ all_tides_da = pixel_tides(
533
551
  gbox,
534
- time=all_timerange,
552
+ time=all_times,
535
553
  model=model,
536
554
  directory=directory,
537
- calculate_quantiles=min_max_q,
538
555
  resample=False,
539
556
  extrapolate=extrapolate,
540
557
  cutoff=cutoff,
541
558
  **pixel_tides_kwargs,
542
559
  )
543
560
 
544
- # # Calculate means
545
- # TODO: Find way to make this work with `calculate_quantiles`
546
- # mot = obs_tides.mean(dim="time")
547
- # mat = all_tides.mean(dim="time")
548
-
549
- # Calculate min and max tides
550
- lot = obs_tides.isel(quantile=0)
551
- hot = obs_tides.isel(quantile=-1)
552
- lat = all_tides.isel(quantile=0)
553
- hat = all_tides.isel(quantile=-1)
561
+ # Calculate statistics
562
+ stats_lowres = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
554
563
 
555
- # Calculate tidal range
556
- otr = hot - lot
557
- tr = hat - lat
564
+ # Assign CRS and geobox to allow reprojection
565
+ stats_lowres = stats_lowres.odc.assign_crs(crs=gbox.crs)
558
566
 
559
- # Calculate Bishop-Taylor et al. 2018 tidal metrics
560
- spread = otr / tr
561
- offset_low_m = abs(lat - lot)
562
- offset_high_m = abs(hat - hot)
563
- offset_low = offset_low_m / tr
564
- offset_high = offset_high_m / tr
565
-
566
- # Combine into a single dataset
567
- stats_ds = (
568
- xr.merge(
569
- [
570
- # mot.rename("mot"),
571
- # mat.rename("mat"),
572
- hot.rename("hot"),
573
- hat.rename("hat"),
574
- lot.rename("lot"),
575
- lat.rename("lat"),
576
- otr.rename("otr"),
577
- tr.rename("tr"),
578
- spread.rename("spread"),
579
- offset_low.rename("offset_low"),
580
- offset_high.rename("offset_high"),
581
- ],
582
- compat="override",
583
- )
584
- .drop_vars("quantile")
585
- .odc.assign_crs(crs=gbox.crs)
586
- )
587
-
588
- # Optionally resample into the original pixel grid of `data`
567
+ # Reproject statistics into original high resolution grid
589
568
  if resample:
590
- stats_ds = stats_ds.odc.reproject(how=gbox, resample_method="bilinear")
569
+ print("Reprojecting statistics into original resolution")
570
+ stats_highres = _pixel_tides_resample(
571
+ stats_lowres,
572
+ gbox,
573
+ resample_method,
574
+ dask_chunks,
575
+ dask_compute,
576
+ None,
577
+ )
578
+ return stats_highres
591
579
 
592
- return stats_ds
580
+ print("Returning low resolution statistics array")
581
+ return stats_lowres