eo-tides 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/stats.py CHANGED
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
- import odc.geo.xr
10
9
  import pandas as pd
11
10
  import xarray as xr
12
11
  from scipy import stats
@@ -14,9 +13,11 @@ from scipy import stats
14
13
  # Only import if running type checking
15
14
  if TYPE_CHECKING:
16
15
  import xarray as xr
16
+ from odc.geo.geobox import GeoBox
17
17
 
18
- from .eo import pixel_tides, tag_tides
18
+ from .eo import _standardise_inputs, pixel_tides, tag_tides
19
19
  from .model import model_tides
20
+ from .utils import DatetimeLike
20
21
 
21
22
 
22
23
  def _plot_biases(
@@ -136,7 +137,8 @@ def _plot_biases(
136
137
 
137
138
 
138
139
  def tide_stats(
139
- ds: xr.Dataset | xr.DataArray,
140
+ data: xr.Dataset | xr.DataArray | GeoBox,
141
+ time: DatetimeLike | None = None,
140
142
  model: str = "EOT20",
141
143
  directory: str | os.PathLike | None = None,
142
144
  tidepost_lat: float | None = None,
@@ -167,15 +169,23 @@ def tide_stats(
167
169
 
168
170
  Parameters
169
171
  ----------
170
- ds : xarray.Dataset or xarray.DataArray
171
- A multi-dimensional dataset (e.g. "x", "y", "time") used
172
- to calculate tide statistics. This dataset must contain
173
- a "time" dimension.
174
- model : string, optional
172
+ data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
173
+ A multi-dimensional dataset or GeoBox pixel grid that will
174
+ be used to calculate tide statistics. If `data` is an
175
+ xarray object, it should include a "time" dimension.
176
+ If no "time" dimension exists or if `data` is a GeoBox,
177
+ then times must be passed using the `time` parameter.
178
+ time : DatetimeLike, optional
179
+ By default, tides will be modelled using times from the
180
+ "time" dimension of `data`. Alternatively, this param can
181
+ be used to provide a custom set of times. Accepts any format
182
+ that can be converted by `pandas.to_datetime()`. For example:
183
+ `time=pd.date_range(start="2000", end="2001", freq="5h")`
184
+ model : str, optional
175
185
  The tide model to use to model tides. Defaults to "EOT20";
176
186
  for a full list of available/supported models, run
177
187
  `eo_tides.model.list_models`.
178
- directory : string, optional
188
+ directory : str, optional
179
189
  The directory containing tide model data files. If no path is
180
190
  provided, this will default to the environment variable
181
191
  `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
@@ -201,7 +211,7 @@ def tide_stats(
201
211
  An optional string giving the frequency at which to model tides
202
212
  when computing the full modelled tidal range. Defaults to '3h',
203
213
  which computes a tide height for every three hours across the
204
- temporal extent of `ds`.
214
+ temporal extent of `data`.
205
215
  linear_reg: bool, optional
206
216
  Whether to return linear regression statistics that assess
207
217
  whether satellite-observed tides show any decreasing or
@@ -247,6 +257,9 @@ def tide_stats(
247
257
  - `observed_slope`: slope of any relationship between observed tide heights and time
248
258
  - `observed_pval`: significance/p-value of any relationship between observed tide heights and time
249
259
  """
260
+ # Standardise data inputs, time and models
261
+ gbox, time_coords = _standardise_inputs(data, time)
262
+
250
263
  # Verify that only one tide model is provided
251
264
  if isinstance(model, list):
252
265
  raise Exception("Only single tide models are supported by `tide_stats`.")
@@ -254,11 +267,13 @@ def tide_stats(
254
267
  # If custom tide modelling locations are not provided, use the
255
268
  # dataset centroid
256
269
  if not tidepost_lat or not tidepost_lon:
257
- tidepost_lon, tidepost_lat = ds.odc.geobox.geographic_extent.centroid.coords[0]
270
+ tidepost_lon, tidepost_lat = gbox.geographic_extent.centroid.coords[0]
258
271
 
259
272
  # Model tides for each observation in the supplied xarray object
273
+ assert time_coords is not None
260
274
  obs_tides_da = tag_tides(
261
- ds,
275
+ gbox,
276
+ time=time_coords,
262
277
  model=model,
263
278
  directory=directory,
264
279
  tidepost_lat=tidepost_lat, # type: ignore
@@ -266,12 +281,13 @@ def tide_stats(
266
281
  return_tideposts=True,
267
282
  **model_tides_kwargs,
268
283
  )
269
- obs_tides_da = obs_tides_da.reindex_like(ds)
284
+ if isinstance(data, (xr.Dataset, xr.DataArray)):
285
+ obs_tides_da = obs_tides_da.reindex_like(data)
270
286
 
271
287
  # Generate range of times covering entire period of satellite record
272
288
  all_timerange = pd.date_range(
273
- start=obs_tides_da.time.min().item(),
274
- end=obs_tides_da.time.max().item(),
289
+ start=time_coords.min().item(),
290
+ end=time_coords.max().item(),
275
291
  freq=modelled_freq,
276
292
  )
277
293
 
@@ -355,7 +371,7 @@ def tide_stats(
355
371
  offset_low=low_tide_offset,
356
372
  offset_high=high_tide_offset,
357
373
  spread=spread,
358
- plot_col=ds[plot_col] if plot_col else None,
374
+ plot_col=data[plot_col] if plot_col else None,
359
375
  obs_linreg=obs_linreg if linear_reg else None,
360
376
  obs_x=obs_x,
361
377
  all_timerange=all_timerange,
@@ -390,12 +406,13 @@ def tide_stats(
390
406
 
391
407
 
392
408
  def pixel_stats(
393
- ds: xr.Dataset | xr.DataArray,
409
+ data: xr.Dataset | xr.DataArray | GeoBox,
410
+ time: DatetimeLike | None = None,
394
411
  model: str | list[str] = "EOT20",
395
412
  directory: str | os.PathLike | None = None,
396
413
  resample: bool = False,
397
- modelled_freq="3h",
398
- min_max_q=(0.0, 1.0),
414
+ modelled_freq: str = "3h",
415
+ min_max_q: tuple[float, float] = (0.0, 1.0),
399
416
  extrapolate: bool = True,
400
417
  cutoff: float = 10,
401
418
  **pixel_tides_kwargs,
@@ -420,13 +437,21 @@ def pixel_stats(
420
437
 
421
438
  Parameters
422
439
  ----------
423
- ds : xarray.Dataset or xarray.DataArray
424
- A multi-dimensional dataset (e.g. "x", "y", "time") used
425
- to calculate 2D tide statistics. This dataset must contain
426
- a "time" dimension.
440
+ data : xarray.Dataset or xarray.DataArray or odc.geo.geobox.GeoBox
441
+ A multi-dimensional dataset or GeoBox pixel grid that will
442
+ be used to calculate 2D tide statistics. If `data`
443
+ is an xarray object, it should include a "time" dimension.
444
+ If no "time" dimension exists or if `data` is a GeoBox,
445
+ then times must be passed using the `time` parameter.
446
+ time : DatetimeLike, optional
447
+ By default, tides will be modelled using times from the
448
+ "time" dimension of `data`. Alternatively, this param can
449
+ be used to provide a custom set of times. Accepts any format
450
+ that can be converted by `pandas.to_datetime()`. For example:
451
+ `time=pd.date_range(start="2000", end="2001", freq="5h")`
427
452
  model : str or list of str, optional
428
453
  The tide model (or models) to use to model tides. If a list is
429
- provided, a new "tide_model" dimension will be added to `ds`.
454
+ provided, a new "tide_model" dimension will be added to `data`.
430
455
  Defaults to "EOT20"; for a full list of available/supported
431
456
  models, run `eo_tides.model.list_models`.
432
457
  directory : str, optional
@@ -437,7 +462,7 @@ def pixel_stats(
437
462
  model that match the structure required by `pyTMD`
438
463
  (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
439
464
  resample : bool, optional
440
- Whether to resample tide statistics back into `ds`'s original
465
+ Whether to resample tide statistics back into `data`'s original
441
466
  higher resolution grid. Defaults to False, which will return
442
467
  lower-resolution statistics that are typically sufficient for
443
468
  most purposes.
@@ -445,7 +470,7 @@ def pixel_stats(
445
470
  An optional string giving the frequency at which to model tides
446
471
  when computing the full modelled tidal range. Defaults to '3h',
447
472
  which computes a tide height for every three hours across the
448
- temporal extent of `ds`.
473
+ temporal extent of `data`.
449
474
  min_max_q : tuple, optional
450
475
  Quantiles used to calculate max and min observed and modelled
451
476
  astronomical tides. By default `(0.0, 1.0)` which is equivalent
@@ -478,9 +503,15 @@ def pixel_stats(
478
503
  - `offset_high`: proportion of the highest tides never observed by the satellite
479
504
 
480
505
  """
506
+ # Standardise data inputs, time and models
507
+ gbox, time_coords = _standardise_inputs(data, time)
508
+ model = [model] if isinstance(model, str) else model
509
+
481
510
  # Model observed tides
511
+ assert time_coords is not None
482
512
  obs_tides = pixel_tides(
483
- ds,
513
+ gbox,
514
+ time=time_coords,
484
515
  resample=False,
485
516
  model=model,
486
517
  directory=directory,
@@ -492,15 +523,15 @@ def pixel_stats(
492
523
 
493
524
  # Generate times covering entire period of satellite record
494
525
  all_timerange = pd.date_range(
495
- start=ds.time.min().item(),
496
- end=ds.time.max().item(),
526
+ start=time_coords.min().item(),
527
+ end=time_coords.max().item(),
497
528
  freq=modelled_freq,
498
529
  )
499
530
 
500
531
  # Model all tides
501
532
  all_tides = pixel_tides(
502
- ds,
503
- times=all_timerange,
533
+ gbox,
534
+ time=all_timerange,
504
535
  model=model,
505
536
  directory=directory,
506
537
  calculate_quantiles=min_max_q,
@@ -510,6 +541,11 @@ def pixel_stats(
510
541
  **pixel_tides_kwargs,
511
542
  )
512
543
 
544
+ # # Calculate means
545
+ # TODO: Find way to make this work with `calculate_quantiles`
546
+ # mot = obs_tides.mean(dim="time")
547
+ # mat = all_tides.mean(dim="time")
548
+
513
549
  # Calculate min and max tides
514
550
  lot = obs_tides.isel(quantile=0)
515
551
  hot = obs_tides.isel(quantile=-1)
@@ -531,10 +567,12 @@ def pixel_stats(
531
567
  stats_ds = (
532
568
  xr.merge(
533
569
  [
534
- hat.rename("hat"),
570
+ # mot.rename("mot"),
571
+ # mat.rename("mat"),
535
572
  hot.rename("hot"),
536
- lat.rename("lat"),
573
+ hat.rename("hat"),
537
574
  lot.rename("lot"),
575
+ lat.rename("lat"),
538
576
  otr.rename("otr"),
539
577
  tr.rename("tr"),
540
578
  spread.rename("spread"),
@@ -544,11 +582,11 @@ def pixel_stats(
544
582
  compat="override",
545
583
  )
546
584
  .drop_vars("quantile")
547
- .odc.assign_crs(crs=ds.odc.crs)
585
+ .odc.assign_crs(crs=gbox.crs)
548
586
  )
549
587
 
550
- # Optionally resample into the original pixel grid of `ds`
588
+ # Optionally resample into the original pixel grid of `data`
551
589
  if resample:
552
- stats_ds = stats_ds.odc.reproject(how=ds.odc.geobox, resample_method="bilinear")
590
+ stats_ds = stats_ds.odc.reproject(how=gbox, resample_method="bilinear")
553
591
 
554
592
  return stats_ds