eo-tides 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py CHANGED
@@ -1,79 +1,165 @@
1
+ # Used to postpone evaluation of type annotations
2
+ from __future__ import annotations
3
+
1
4
  import os
2
5
  import pathlib
6
+ import warnings
3
7
  from concurrent.futures import ProcessPoolExecutor
4
8
  from functools import partial
9
+ from typing import TYPE_CHECKING
10
+
11
+ # Only import if running type checking
12
+ if TYPE_CHECKING:
13
+ import xarray as xr
5
14
 
6
15
  import geopandas as gpd
7
16
  import numpy as np
8
- import odc.geo.xr
9
17
  import pandas as pd
10
18
  import pyproj
11
19
  import pyTMD
20
+ from colorama import Style, init
12
21
  from pyTMD.io.model import load_database, model
13
22
  from tqdm import tqdm
14
23
 
15
- from eo_tides.utils import idw
24
+ from .utils import idw
25
+
26
+
27
+ def _set_directory(directory):
28
+ """
29
+ Set tide modelling files directory. If no custom
30
+ path is provided, try global environmental variable
31
+ instead.
32
+ """
33
+ if directory is None:
34
+ if "EO_TIDES_TIDE_MODELS" in os.environ:
35
+ directory = os.environ["EO_TIDES_TIDE_MODELS"]
36
+ else:
37
+ raise Exception(
38
+ "No tide model directory provided via `directory`, and/or no "
39
+ "`EO_TIDES_TIDE_MODELS` environment variable found. "
40
+ "Please provide a valid path to your tide model directory."
41
+ )
42
+
43
+ # Verify path exists
44
+ directory = pathlib.Path(directory).expanduser()
45
+ if not directory.exists():
46
+ raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
47
+ else:
48
+ return directory
16
49
 
17
50
 
18
- def available_models(directory=None, show_supported=True):
51
+ def list_models(
52
+ directory: str | os.PathLike | None = None,
53
+ show_available: bool = True,
54
+ show_supported: bool = True,
55
+ raise_error: bool = False,
56
+ ) -> tuple[list[str], list[str]]:
19
57
  """
20
- Prints a list of all tide models available for tide
21
- modelling using `eo-tides`.
58
+ List all tide models available for tide modelling, and
59
+ all models supported by `eo-tides` and `pyTMD`.
22
60
 
23
61
  This function scans the specified tide model directory
24
- for tide models supported by the `pyTMD` package, and
25
- prints a list of models that are available in the
26
- directory as well as the full list of supported models.
62
+ and returns a list of models that are available in the
63
+ directory as well as the full list of all supported models.
27
64
 
28
65
  For instructions on setting up tide models, see:
29
66
  <https://geoscienceaustralia.github.io/eo-tides/setup/>
30
67
 
31
68
  Parameters
32
69
  ----------
33
- directory : str
34
- Path to the directory containing tide model files.
70
+ directory : str, optional
71
+ The directory containing tide model data files. If no path is
72
+ provided, this will default to the environment variable
73
+ `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
74
+ Tide modelling files should be stored in sub-folders for each
75
+ model that match the structure required by `pyTMD`
76
+ (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
77
+ show_available : bool, optional
78
+ Whether to print a list of locally available models.
79
+ show_supported : bool, optional
80
+ Whether to print a list of all supported models, in
81
+ addition to models available locally.
82
+ raise_error : bool, optional
83
+ If True, raise an error if no available models are found.
84
+ If False, raise a warning.
35
85
 
36
86
  Returns
37
87
  -------
38
- available_m : list
39
- A list of all available tide models within
40
- `directory`.
88
+ available_models : list of str
89
+ A list of all tide models available within `directory`.
90
+ supported_models : list of str
91
+ A list of all tide models supported by `eo-tides`.
41
92
  """
42
- # TODO: Pull directory code into re-usable function
93
+ init() # Initialize colorama
43
94
 
44
- # Set tide modelling files directory. If no custom path is provided,
45
- # first try global environmental var, then "/var/share/tide_models"
46
- if directory is None:
47
- if "EO_TIDES_TIDE_MODELS" in os.environ:
48
- directory = os.environ["EO_TIDES_TIDE_MODELS"]
49
- else:
50
- directory = "/var/share/tide_models"
95
+ # Set tide modelling files directory. If no custom path is
96
+ # provided, try global environment variable.
97
+ directory = _set_directory(directory)
51
98
 
52
- # Verify path exists
53
- directory = pathlib.Path(directory).expanduser()
54
- if not directory.exists():
55
- raise FileNotFoundError("Invalid tide directory")
99
+ # Get full list of supported models from pyTMD database
100
+ model_database = load_database()["elevation"]
101
+ supported_models = list(model_database.keys())
102
+
103
+ # Extract expected model paths
104
+ expected_paths = {}
105
+ for m in supported_models:
106
+ model_file = model_database[m]["model_file"]
107
+ model_file = model_file[0] if isinstance(model_file, list) else model_file
108
+ expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
56
109
 
57
- # Get full list of supported models from the database
58
- supported_models = load_database()["elevation"].keys()
110
+ # Define column widths
111
+ status_width = 4 # Width for emoji
112
+ name_width = max(len(name) for name in supported_models)
113
+ path_width = max(len(path) for path in expected_paths.values())
59
114
 
60
115
  # Print list of supported models, marking available and
61
116
  # unavailable models and appending available to list
62
- print(f"Tide models available in `{directory}`:")
63
- available_m = []
117
+ if show_available or show_supported:
118
+ total_width = min(status_width + name_width + path_width + 6, 80)
119
+ print("─" * total_width)
120
+ print(f"{'󠀠🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
121
+ print("─" * total_width)
122
+
123
+ available_models = []
64
124
  for m in supported_models:
65
125
  try:
66
- model(directory=directory).elevation(m=m)
67
- # Mark available models with a green tick
68
- print(f" ✅ {m}")
69
- available_m.append(m)
126
+ model_file = model(directory=directory).elevation(m=m)
127
+ available_models.append(m)
128
+
129
+ if show_available:
130
+ # Mark available models with a green tick
131
+ status = "✅"
132
+ print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
70
133
  except:
71
134
  if show_supported:
72
135
  # Mark unavailable models with a red cross
73
- print(f" {m}")
136
+ status = "❌"
137
+ print(
138
+ f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
139
+ )
74
140
 
75
- # Return list of available models
76
- return available_m
141
+ if show_available or show_supported:
142
+ print("─" * total_width)
143
+
144
+ # Print summary
145
+ print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
146
+ print(f"Available models: {len(available_models)}/{len(supported_models)}")
147
+
148
+ # Raise error or warning if no models are available
149
+ if not available_models:
150
+ warning_text = (
151
+ f"No valid tide models are available in `{directory}`. "
152
+ "Are you sure you have provided the correct `directory` path, "
153
+ "or set the `EO_TIDES_TIDE_MODELS` environment variable "
154
+ "to point to the location of your tide model directory?"
155
+ )
156
+ if raise_error:
157
+ raise Exception(warning_text)
158
+ else:
159
+ warnings.warn(warning_text, UserWarning)
160
+
161
+ # Return list of available and supported models
162
+ return available_models, supported_models
77
163
 
78
164
 
79
165
  def _model_tides(
@@ -94,34 +180,7 @@ def _model_tides(
94
180
  extraction of tide modelling constituents and tide modelling using
95
181
  `pyTMD`.
96
182
  """
97
- # import pyTMD.eop
98
- # import pyTMD.io
99
- # import pyTMD.io.model
100
- # import pyTMD.predict
101
- # import pyTMD.spatial
102
- # import pyTMD.time
103
- # import pyTMD.utilities
104
-
105
- # Get parameters for tide model; use custom definition file for
106
- # FES2012 (leave this as an undocumented feature for now)
107
- # if model == "FES2012":
108
- # pytmd_model = pyTMD.io.model(directory).from_file(
109
- # directory / "model_FES2012.def"
110
- # )
111
- # elif model == "TPXO8-atlas-v1":
112
- # pytmd_model = pyTMD.io.model(directory).from_file(directory / "model_TPXO8.def")
113
- # else:
114
- # pytmd_model = pyTMD.io.model(
115
- # directory, format="netcdf", compressed=False
116
- # ).elevation(model)
117
-
118
- # if model in NONSTANDARD_MODELS:
119
- # model_params = NONSTANDARD_MODELS[model]
120
- # model_params_bytes = io.BytesIO(json.dumps(model_params).encode("utf-8"))
121
- # pytmd_model = pyTMD.io.model(directory).from_file(definition_file=model_params_bytes)
122
-
123
- # else:
124
-
183
+ # Obtain model details
125
184
  pytmd_model = pyTMD.io.model(directory).elevation(model)
126
185
 
127
186
  # Convert x, y to latitude/longitude
@@ -284,10 +343,8 @@ def _model_tides(
284
343
 
285
344
 
286
345
  def _ensemble_model(
287
- x,
288
- y,
289
- crs,
290
346
  tide_df,
347
+ crs,
291
348
  ensemble_models,
292
349
  ensemble_func=None,
293
350
  ensemble_top_n=3,
@@ -301,29 +358,27 @@ def _ensemble_model(
301
358
  to inform the selection of the best local models.
302
359
 
303
360
  This function performs the following steps:
361
+ 1. Takes a dataframe of tide heights from multiple tide models, as
362
+ produced by `eo_tides.model.model_tides`
304
363
  1. Loads model ranking points from a GeoJSON file, filters them
305
364
  based on the valid data percentage, and retains relevant columns
306
- 2. Interpolates the model rankings into the requested x and y
307
- coordinates using Inverse Weighted Interpolation (IDW)
365
+ 2. Interpolates the model rankings into the "x" and "y" coordinates
366
+ of the original dataframe using Inverse Weighted Interpolation (IDW)
308
367
  3. Uses rankings to combine multiple tide models into a single
309
368
  optimised ensemble model (by default, by taking the mean of the
310
369
  top 3 ranked models)
311
- 4. Returns a DataFrame with the combined ensemble model predictions
370
+ 4. Returns a new dataFrame with the combined ensemble model predictions
312
371
 
313
372
  Parameters
314
373
  ----------
315
- x : array-like
316
- Array of x-coordinates where the ensemble model predictions are
317
- required.
318
- y : array-like
319
- Array of y-coordinates where the ensemble model predictions are
320
- required.
321
- crs : string
322
- Input coordinate reference system for x and y coordinates. Used
323
- to ensure that interpolations are performed in the correct CRS.
324
374
  tide_df : pandas.DataFrame
325
- DataFrame containing tide model predictions with columns
375
+ DataFrame produced by `eo_tides.model.model_tides`, containing
376
+ tide model predictions with columns:
326
377
  `["time", "x", "y", "tide_height", "tide_model"]`.
378
+ crs : string
379
+ Coordinate reference system for the "x" and "y" coordinates in
380
+ `tide_df`. Used to ensure that interpolations are performed
381
+ in the correct CRS.
327
382
  ensemble_models : list
328
383
  A list of models to include in the ensemble modelling process.
329
384
  All values must exist as columns with the prefix "rank_" in
@@ -342,7 +397,7 @@ def _ensemble_model(
342
397
  ranking_points : str, optional
343
398
  Path to the GeoJSON file containing model ranking points. This
344
399
  dataset should include columns containing rankings for each tide
345
- model, named with the prefix "rank_". e.g. "rank_FES2014".
400
+ model, named with the prefix "rank_". e.g. "rank_EOT20".
346
401
  Low values should represent high rankings (e.g. 1 = top ranked).
347
402
  ranking_valid_perc : float, optional
348
403
  Minimum percentage of valid data required to include a model
@@ -367,6 +422,10 @@ def _ensemble_model(
367
422
  the provided dictionary keys).
368
423
 
369
424
  """
425
+ # Extract x and y coords from dataframe
426
+ x = tide_df.index.get_level_values(level="x")
427
+ y = tide_df.index.get_level_values(level="y")
428
+
370
429
  # Load model ranks points and reproject to same CRS as x and y
371
430
  model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
372
431
  model_ranks_gdf = (
@@ -449,36 +508,36 @@ def _ensemble_model(
449
508
 
450
509
 
451
510
  def model_tides(
452
- x,
453
- y,
454
- time,
455
- model="FES2014",
456
- directory=None,
457
- crs="EPSG:4326",
458
- crop=True,
459
- method="spline",
460
- extrapolate=True,
461
- cutoff=None,
462
- mode="one-to-many",
463
- parallel=True,
464
- parallel_splits=5,
465
- output_units="m",
466
- output_format="long",
467
- ensemble_models=None,
511
+ x: float | list[float] | xr.DataArray,
512
+ y: float | list[float] | xr.DataArray,
513
+ time: np.ndarray | pd.DatetimeIndex,
514
+ model: str | list[str] = "EOT20",
515
+ directory: str | os.PathLike | None = None,
516
+ crs: str = "EPSG:4326",
517
+ crop: bool = True,
518
+ method: str = "spline",
519
+ extrapolate: bool = True,
520
+ cutoff: float | None = None,
521
+ mode: str = "one-to-many",
522
+ parallel: bool = True,
523
+ parallel_splits: int = 5,
524
+ output_units: str = "m",
525
+ output_format: str = "long",
526
+ ensemble_models: list[str] | None = None,
468
527
  **ensemble_kwargs,
469
- ):
528
+ ) -> pd.DataFrame:
470
529
  """
471
- Compute tide heights from multiple tide models and for
472
- multiple coordinates and/or timesteps.
530
+ Model tide heights at multiple coordinates and/or timesteps
531
+ using using one or more ocean tide models.
473
532
 
474
533
  This function is parallelised to improve performance, and
475
534
  supports all tidal models supported by `pyTMD`, including:
476
535
 
477
- - Empirical Ocean Tide model (`EOT20`)
478
- - Finite Element Solution tide models (`FES2022`, `FES2014`, `FES2012`)
479
- - TOPEX/POSEIDON global tide models (`TPXO10`, `TPXO9`, `TPXO8`)
480
- - Global Ocean Tide models (`GOT5.6`, `GOT5.5`, `GOT4.10`, `GOT4.8`, `GOT4.7`)
481
- - Hamburg direct data Assimilation Methods for Tides models (`HAMTIDE11`)
536
+ - Empirical Ocean Tide model (EOT20)
537
+ - Finite Element Solution tide models (FES2022, FES2014, FES2012)
538
+ - TOPEX/POSEIDON global tide models (TPXO10, TPXO9, TPXO8)
539
+ - Global Ocean Tide models (GOT5.6, GOT5.5, GOT4.10, GOT4.8, GOT4.7)
540
+ - Hamburg direct data Assimilation Methods for Tides models (HAMTIDE11)
482
541
 
483
542
  This function requires access to tide model data files.
484
543
  These should be placed in a folder with subfolders matching
@@ -487,52 +546,39 @@ def model_tides(
487
546
  <https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories>
488
547
 
489
548
  This function is a modification of the `pyTMD` package's
490
- `compute_tide_corrections` function. For more info:
491
- <https://pytmd.readthedocs.io/en/stable/user_guide/compute_tide_corrections.html>
549
+ `compute_tidal_elevations` function. For more info:
550
+ <https://pytmd.readthedocs.io/en/latest/api_reference/compute_tidal_elevations.html>
492
551
 
493
552
  Parameters
494
553
  ----------
495
- x, y : float or list of floats
554
+ x, y : float or list of float
496
555
  One or more x and y coordinates used to define
497
556
  the location at which to model tides. By default these
498
557
  coordinates should be lat/lon; use "crs" if they
499
558
  are in a custom coordinate reference system.
500
- time : A datetime array or pandas.DatetimeIndex
559
+ time : Numpy datetime array or pandas.DatetimeIndex
501
560
  An array containing `datetime64[ns]` values or a
502
561
  `pandas.DatetimeIndex` providing the times at which to
503
562
  model tides in UTC time.
504
- model : string, optional
505
- The tide model used to model tides. Options include:
506
-
507
- - "FES2014" (pre-configured on DEA Sandbox)
508
- - "FES2022"
509
- - "TPXO9-atlas-v5"
510
- - "TPXO8-atlas"
511
- - "EOT20"
512
- - "HAMTIDE11"
513
- - "GOT4.10"
514
- - "ensemble" (advanced ensemble tide model functionality;
515
- combining multiple models based on external model rankings)
516
- directory : string, optional
563
+ model : str or list of str, optional
564
+ The tide model (or models) to use to model tides.
565
+ Defaults to "EOT20"; for a full list of available/supported
566
+ models, run `eo_tides.model.list_models`.
567
+ directory : str, optional
517
568
  The directory containing tide model data files. If no path is
518
569
  provided, this will default to the environment variable
519
- `EO_TIDES_TIDE_MODELS` if set, otherwise "/var/share/tide_models".
570
+ `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
520
571
  Tide modelling files should be stored in sub-folders for each
521
- model that match the structure provided by `pyTMD`.
522
-
523
- For example:
524
-
525
- - `{directory}/fes2014/ocean_tide/`
526
- - `{directory}/tpxo8_atlas/`
527
- - `{directory}/TPXO9_atlas_v5/`
572
+ model that match the structure required by `pyTMD`
573
+ (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
528
574
  crs : str, optional
529
575
  Input coordinate reference system for x and y coordinates.
530
576
  Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
531
- crop : bool optional
577
+ crop : bool, optional
532
578
  Whether to crop tide model constituent files on-the-fly to
533
579
  improve performance. Cropping will be performed based on a
534
580
  1 degree buffer around all input points. Defaults to True.
535
- method : string, optional
581
+ method : str, optional
536
582
  Method used to interpolate tidal constituents
537
583
  from model files. Options include:
538
584
 
@@ -542,11 +588,11 @@ def model_tides(
542
588
  extrapolate : bool, optional
543
589
  Whether to extrapolate tides for x and y coordinates outside of
544
590
  the valid tide modelling domain using nearest-neighbor.
545
- cutoff : int or float, optional
591
+ cutoff : float, optional
546
592
  Extrapolation cutoff in kilometers. The default is None, which
547
593
  will extrapolate for all points regardless of distance from the
548
594
  valid tide modelling domain.
549
- mode : string, optional
595
+ mode : str, optional
550
596
  The analysis mode to use for tide modelling. Supports two options:
551
597
 
552
598
  - "one-to-many": Models tides for every timestep in "time" at
@@ -558,7 +604,7 @@ def model_tides(
558
604
  set of x and y coordinates. In this mode, the number of x and
559
605
  y points must equal the number of timesteps provided in "time".
560
606
 
561
- parallel : boolean, optional
607
+ parallel : bool, optional
562
608
  Whether to parallelise tide modelling using `concurrent.futures`.
563
609
  If multiple tide models are requested, these will be run in
564
610
  parallel. Optionally, tide modelling can also be run in parallel
@@ -582,7 +628,7 @@ def model_tides(
582
628
  results stacked vertically along "tide_model" and "tide_height"
583
629
  columns), or wide format (with a column for each tide model).
584
630
  Defaults to "long".
585
- ensemble_models : list, optional
631
+ ensemble_models : list of str, optional
586
632
  An optional list of models used to generate the ensemble tide
587
633
  model if "ensemble" tide modelling is requested. Defaults to
588
634
  ["FES2014", "TPXO9-atlas-v5", "EOT20", "HAMTIDE11", "GOT4.10",
@@ -602,25 +648,8 @@ def model_tides(
602
648
  A dataframe containing modelled tide heights.
603
649
 
604
650
  """
605
- # Set tide modelling files directory. If no custom path is provided,
606
- # first try global environmental var, then "/var/share/tide_models"
607
- if directory is None:
608
- if "EO_TIDES_TIDE_MODELS" in os.environ:
609
- directory = os.environ["EO_TIDES_TIDE_MODELS"]
610
- else:
611
- directory = "/var/share/tide_models"
612
-
613
- # Verify path exists
614
- directory = pathlib.Path(directory).expanduser()
615
- if not directory.exists():
616
- raise FileNotFoundError("Invalid tide directory")
617
-
618
- # If time passed as a single Timestamp, convert to datetime64
619
- if isinstance(time, pd.Timestamp):
620
- time = time.to_datetime64()
621
-
622
651
  # Turn inputs into arrays for consistent handling
623
- models_requested = np.atleast_1d(model)
652
+ models_requested = list(np.atleast_1d(model))
624
653
  x = np.atleast_1d(x)
625
654
  y = np.atleast_1d(y)
626
655
  time = np.atleast_1d(time)
@@ -644,32 +673,44 @@ def model_tides(
644
673
  "you intended to model multiple timesteps at each point."
645
674
  )
646
675
 
647
- # Verify that all provided models are supported
648
- valid_models = [
649
- # Standard built-in pyTMD models
650
- "EOT20",
651
- "FES2014",
652
- "FES2022",
653
- "GOT4.10",
654
- "HAMTIDE11",
655
- "TPXO8-atlas", # binary version, not suitable for clipping
656
- "TPXO9-atlas-v5",
657
- # Non-standard models, defined internally
658
- "FES2012",
659
- "FES2014_extrapolated",
660
- "FES2022_extrapolated",
661
- "GOT5.6",
662
- "GOT5.6_extrapolated",
663
- "TPXO8-atlas-v1", # netCDF version
664
- # Advanced ensemble model functionality
665
- "ensemble",
666
- ]
676
+ # If time passed as a single Timestamp, convert to datetime64
677
+ if isinstance(time, pd.Timestamp):
678
+ time = time.to_datetime64()
679
+
680
+ # Set tide modelling files directory. If no custom path is
681
+ # provided, try global environment variable.
682
+ directory = _set_directory(directory)
683
+
684
+ # Get full list of supported models from pyTMD database;
685
+ # add ensemble option to list of models
686
+ available_models, valid_models = list_models(
687
+ directory, show_available=False, show_supported=False, raise_error=True
688
+ )
689
+ # TODO: This is hacky, find a better way. Perhaps a kwarg that
690
+ # turns ensemble functionality on, and checks that supplied
691
+ # models match models expected for ensemble?
692
+ available_models = available_models + ["ensemble"]
693
+ valid_models = valid_models + ["ensemble"]
694
+
695
+ # Error if any models are not supported
667
696
  if not all(m in valid_models for m in models_requested):
668
- raise ValueError(
669
- f"One or more of the models requested {models_requested} is "
670
- f"not valid. The following models are currently supported: "
671
- f"{valid_models}",
697
+ error_text = (
698
+ f"One or more of the requested models are not valid:\n"
699
+ f"{models_requested}\n\n"
700
+ "The following models are supported:\n"
701
+ f"{valid_models}"
672
702
  )
703
+ raise ValueError(error_text)
704
+
705
+ # Error if any models are not available in `directory`
706
+ if not all(m in available_models for m in models_requested):
707
+ error_text = (
708
+ f"One or more of the requested models are valid, but not available in `{directory}`:\n"
709
+ f"{models_requested}\n\n"
710
+ f"The following models are available in `{directory}`:\n"
711
+ f"{available_models}"
712
+ )
713
+ raise ValueError(error_text)
673
714
 
674
715
  # If ensemble modelling is requested, use a custom list of models
675
716
  # for subsequent processing
@@ -763,11 +804,11 @@ def model_tides(
763
804
 
764
805
  # Optionally compute ensemble model and add to dataframe
765
806
  if "ensemble" in models_requested:
766
- ensemble_df = _ensemble_model(x, y, crs, tide_df, models_to_process, **ensemble_kwargs)
807
+ ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
767
808
 
768
809
  # Update requested models with any custom ensemble models, then
769
810
  # filter the dataframe to keep only models originally requested
770
- models_requested = np.union1d(models_requested, ensemble_df.tide_model.unique())
811
+ models_requested = list(np.union1d(models_requested, ensemble_df.tide_model.unique()))
771
812
  tide_df = pd.concat([tide_df, ensemble_df]).query("tide_model in @models_requested")
772
813
 
773
814
  # Optionally convert to a wide format dataframe with a tide model in
@@ -784,362 +825,3 @@ def model_tides(
784
825
  tide_df = tide_df.reindex(output_indices)
785
826
 
786
827
  return tide_df
787
-
788
-
789
- def _pixel_tides_resample(
790
- tides_lowres,
791
- ds,
792
- resample_method="bilinear",
793
- dask_chunks="auto",
794
- dask_compute=True,
795
- ):
796
- """Resamples low resolution tides modelled by `pixel_tides` into the
797
- geobox (e.g. spatial resolution and extent) of the original higher
798
- resolution satellite dataset.
799
-
800
- Parameters
801
- ----------
802
- tides_lowres : xarray.DataArray
803
- The low resolution tide modelling data array to be resampled.
804
- ds : xarray.Dataset
805
- The dataset whose geobox will be used as the template for the
806
- resampling operation. This is typically the same satellite
807
- dataset originally passed to `pixel_tides`.
808
- resample_method : string, optional
809
- The resampling method to use. Defaults to "bilinear"; valid
810
- options include "nearest", "cubic", "min", "max", "average" etc.
811
- dask_chunks : str or tuple, optional
812
- Can be used to configure custom Dask chunking for the final
813
- resampling step. The default of "auto" will automatically set
814
- x/y chunks to match those in `ds` if they exist, otherwise will
815
- set x/y chunks that cover the entire extent of the dataset.
816
- For custom chunks, provide a tuple in the form `(y, x)`, e.g.
817
- `(2048, 2048)`.
818
- dask_compute : bool, optional
819
- Whether to compute results of the resampling step using Dask.
820
- If False, this will return `tides_highres` as a Dask array.
821
-
822
- Returns
823
- -------
824
- tides_highres, tides_lowres : tuple of xr.DataArrays
825
- In addition to `tides_lowres` (see above), a high resolution
826
- array of tide heights will be generated matching the
827
- exact spatial resolution and extent of `ds`.
828
-
829
- """
830
- # Determine spatial dimensions
831
- y_dim, x_dim = ds.odc.spatial_dims
832
-
833
- # Convert array to Dask, using no chunking along y and x dims,
834
- # and a single chunk for each timestep/quantile and tide model
835
- tides_lowres_dask = tides_lowres.chunk({d: None if d in [y_dim, x_dim] else 1 for d in tides_lowres.dims})
836
-
837
- # Automatically set Dask chunks for reprojection if set to "auto".
838
- # This will either use x/y chunks if they exist in `ds`, else
839
- # will cover the entire x and y dims) so we don't end up with
840
- # hundreds of tiny x and y chunks due to the small size of
841
- # `tides_lowres` (possible odc.geo bug?)
842
- if dask_chunks == "auto":
843
- if ds.chunks is not None:
844
- if (y_dim in ds.chunks) & (x_dim in ds.chunks):
845
- dask_chunks = (ds.chunks[y_dim], ds.chunks[x_dim])
846
- else:
847
- dask_chunks = ds.odc.geobox.shape
848
- else:
849
- dask_chunks = ds.odc.geobox.shape
850
-
851
- # Reproject into the GeoBox of `ds` using odc.geo and Dask
852
- tides_highres = tides_lowres_dask.odc.reproject(
853
- how=ds.odc.geobox,
854
- chunks=dask_chunks,
855
- resampling=resample_method,
856
- ).rename("tide_height")
857
-
858
- # Optionally process and load into memory with Dask
859
- if dask_compute:
860
- tides_highres.load()
861
-
862
- return tides_highres, tides_lowres
863
-
864
-
865
- def pixel_tides(
866
- ds,
867
- times=None,
868
- resample=True,
869
- calculate_quantiles=None,
870
- resolution=None,
871
- buffer=None,
872
- resample_method="bilinear",
873
- model="FES2014",
874
- dask_chunks="auto",
875
- dask_compute=True,
876
- **model_tides_kwargs,
877
- ):
878
- """Obtain tide heights for each pixel in a dataset by modelling
879
- tides into a low-resolution grid surrounding the dataset,
880
- then (optionally) spatially resample this low-res data back
881
- into the original higher resolution dataset extent and resolution.
882
-
883
- Parameters
884
- ----------
885
- ds : xarray.Dataset
886
- A dataset whose geobox (`ds.odc.geobox`) will be used to define
887
- the spatial extent of the low resolution tide modelling grid.
888
- times : pandas.DatetimeIndex or list of pandas.Timestamps, optional
889
- By default, the function will model tides using the times
890
- contained in the `time` dimension of `ds`. Alternatively, this
891
- param can be used to model tides for a custom set of times
892
- instead. For example:
893
- `times=pd.date_range(start="2000", end="2001", freq="5h")`
894
- resample : bool, optional
895
- Whether to resample low resolution tides back into `ds`'s original
896
- higher resolution grid. Set this to `False` if you do not want
897
- low resolution tides to be re-projected back to higher resolution.
898
- calculate_quantiles : list or np.array, optional
899
- Rather than returning all individual tides, low-resolution tides
900
- can be first aggregated using a quantile calculation by passing in
901
- a list or array of quantiles to compute. For example, this could
902
- be used to calculate the min/max tide across all times:
903
- `calculate_quantiles=[0.0, 1.0]`.
904
- resolution : int, optional
905
- The desired resolution of the low-resolution grid used for tide
906
- modelling. The default None will create a 5000 m resolution grid
907
- if `ds` has a projected CRS (i.e. metre units), or a 0.05 degree
908
- resolution grid if `ds` has a geographic CRS (e.g. degree units).
909
- Note: higher resolutions do not necessarily provide better
910
- tide modelling performance, as results will be limited by the
911
- resolution of the underlying global tide model (e.g. 1/16th
912
- degree / ~5 km resolution grid for FES2014).
913
- buffer : int, optional
914
- The amount by which to buffer the higher resolution grid extent
915
- when creating the new low resolution grid. This buffering is
916
- important as it ensures that ensure pixel-based tides are seamless
917
- across dataset boundaries. This buffer will eventually be clipped
918
- away when the low-resolution data is re-projected back to the
919
- resolution and extent of the higher resolution dataset. To
920
- ensure that at least two pixels occur outside of the dataset
921
- bounds, the default None applies a 12000 m buffer if `ds` has a
922
- projected CRS (i.e. metre units), or a 0.12 degree buffer if
923
- `ds` has a geographic CRS (e.g. degree units).
924
- resample_method : string, optional
925
- If resampling is requested (see `resample` above), use this
926
- resampling method when converting from low resolution to high
927
- resolution pixels. Defaults to "bilinear"; valid options include
928
- "nearest", "cubic", "min", "max", "average" etc.
929
- model : string or list of strings
930
- The tide model or a list of models used to model tides, as
931
- supported by the `pyTMD` Python package. Options include:
932
- - "FES2014" (default; pre-configured on DEA Sandbox)
933
- - "FES2022"
934
- - "TPXO8-atlas"
935
- - "TPXO9-atlas-v5"
936
- - "EOT20"
937
- - "HAMTIDE11"
938
- - "GOT4.10"
939
- dask_chunks : str or tuple, optional
940
- Can be used to configure custom Dask chunking for the final
941
- resampling step. The default of "auto" will automatically set
942
- x/y chunks to match those in `ds` if they exist, otherwise will
943
- set x/y chunks that cover the entire extent of the dataset.
944
- For custom chunks, provide a tuple in the form `(y, x)`, e.g.
945
- `(2048, 2048)`.
946
- dask_compute : bool, optional
947
- Whether to compute results of the resampling step using Dask.
948
- If False, this will return `tides_highres` as a Dask array.
949
- **model_tides_kwargs :
950
- Optional parameters passed to the `dea_tools.coastal.model_tides`
951
- function. Important parameters include "directory" (used to
952
- specify the location of input tide modelling files) and "cutoff"
953
- (used to extrapolate modelled tides away from the coast; if not
954
- specified here, cutoff defaults to `np.inf`).
955
-
956
- Returns
957
- -------
958
- If `resample` is False:
959
-
960
- tides_lowres : xr.DataArray
961
- A low resolution data array giving either tide heights every
962
- timestep in `ds` (if `times` is None), tide heights at every
963
- time in `times` (if `times` is not None), or tide height quantiles
964
- for every quantile provided by `calculate_quantiles`.
965
-
966
- If `resample` is True:
967
-
968
- tides_highres, tides_lowres : tuple of xr.DataArrays
969
- In addition to `tides_lowres` (see above), a high resolution
970
- array of tide heights will be generated that matches the
971
- exact spatial resolution and extent of `ds`. This will contain
972
- either tide heights every timestep in `ds` (if `times` is None),
973
- tide heights at every time in `times` (if `times` is not None),
974
- or tide height quantiles for every quantile provided by
975
- `calculate_quantiles`.
976
-
977
- """
978
- from odc.geo.geobox import GeoBox
979
-
980
- # First test if no time dimension and nothing passed to `times`
981
- if ("time" not in ds.dims) & (times is None):
982
- raise ValueError(
983
- "`ds` does not contain a 'time' dimension. Times are required "
984
- "for modelling tides: please pass in a set of custom tides "
985
- "using the `times` parameter. For example: "
986
- "`times=pd.date_range(start='2000', end='2001', freq='5h')`",
987
- )
988
-
989
- # If custom times are provided, convert them to a consistent
990
- # pandas.DatatimeIndex format
991
- if times is not None:
992
- if isinstance(times, list):
993
- time_coords = pd.DatetimeIndex(times)
994
- elif isinstance(times, pd.Timestamp):
995
- time_coords = pd.DatetimeIndex([times])
996
- else:
997
- time_coords = times
998
-
999
- # Otherwise, use times from `ds` directly
1000
- else:
1001
- time_coords = ds.coords["time"]
1002
-
1003
- # Set defaults passed to `model_tides`
1004
- model_tides_kwargs.setdefault("cutoff", np.inf)
1005
-
1006
- # Standardise model into a list for easy handling
1007
- model = [model] if isinstance(model, str) else model
1008
-
1009
- # Test if no time dimension and nothing passed to `times`
1010
- if ("time" not in ds.dims) & (times is None):
1011
- raise ValueError(
1012
- "`ds` does not contain a 'time' dimension. Times are required "
1013
- "for modelling tides: please pass in a set of custom tides "
1014
- "using the `times` parameter. For example: "
1015
- "`times=pd.date_range(start='2000', end='2001', freq='5h')`",
1016
- )
1017
-
1018
- # If custom times are provided, convert them to a consistent
1019
- # pandas.DatatimeIndex format
1020
- if times is not None:
1021
- if isinstance(times, list):
1022
- time_coords = pd.DatetimeIndex(times)
1023
- elif isinstance(times, pd.Timestamp):
1024
- time_coords = pd.DatetimeIndex([times])
1025
- else:
1026
- time_coords = times
1027
-
1028
- # Otherwise, use times from `ds` directly
1029
- else:
1030
- time_coords = ds.coords["time"]
1031
-
1032
- # Determine spatial dimensions
1033
- y_dim, x_dim = ds.odc.spatial_dims
1034
-
1035
- # Determine resolution and buffer, using different defaults for
1036
- # geographic (i.e. degrees) and projected (i.e. metres) CRSs:
1037
- crs_units = ds.odc.geobox.crs.units[0][0:6]
1038
- if ds.odc.geobox.crs.geographic:
1039
- if resolution is None:
1040
- resolution = 0.05
1041
- elif resolution > 360:
1042
- raise ValueError(
1043
- f"A resolution of greater than 360 was "
1044
- f"provided, but `ds` has a geographic CRS "
1045
- f"in {crs_units} units. Did you accidently "
1046
- f"provide a resolution in projected "
1047
- f"(i.e. metre) units?",
1048
- )
1049
- if buffer is None:
1050
- buffer = 0.12
1051
- else:
1052
- if resolution is None:
1053
- resolution = 5000
1054
- elif resolution < 1:
1055
- raise ValueError(
1056
- f"A resolution of less than 1 was provided, "
1057
- f"but `ds` has a projected CRS in "
1058
- f"{crs_units} units. Did you accidently "
1059
- f"provide a resolution in geographic "
1060
- f"(degree) units?",
1061
- )
1062
- if buffer is None:
1063
- buffer = 12000
1064
-
1065
- # Raise error if resolution is less than dataset resolution
1066
- dataset_res = ds.odc.geobox.resolution.x
1067
- if resolution < dataset_res:
1068
- raise ValueError(
1069
- f"The resolution of the low-resolution tide "
1070
- f"modelling grid ({resolution:.2f}) is less "
1071
- f"than `ds`'s pixel resolution ({dataset_res:.2f}). "
1072
- f"This can cause extremely slow tide modelling "
1073
- f"performance. Please select provide a resolution "
1074
- f"greater than {dataset_res:.2f} using "
1075
- f"`pixel_tides`'s 'resolution' parameter.",
1076
- )
1077
-
1078
- # Create a new reduced resolution tide modelling grid after
1079
- # first buffering the grid
1080
- print(f"Creating reduced resolution {resolution} x {resolution} {crs_units} tide modelling array")
1081
- buffered_geobox = ds.odc.geobox.buffered(buffer)
1082
- rescaled_geobox = GeoBox.from_bbox(bbox=buffered_geobox.boundingbox, resolution=resolution)
1083
- rescaled_ds = odc.geo.xr.xr_zeros(rescaled_geobox)
1084
-
1085
- # Flatten grid to 1D, then add time dimension
1086
- flattened_ds = rescaled_ds.stack(z=(x_dim, y_dim))
1087
- flattened_ds = flattened_ds.expand_dims(dim={"time": time_coords.values})
1088
-
1089
- # Model tides in parallel, returning a pandas.DataFrame
1090
- tide_df = model_tides(
1091
- x=flattened_ds[x_dim],
1092
- y=flattened_ds[y_dim],
1093
- time=flattened_ds.time,
1094
- crs=f"EPSG:{ds.odc.geobox.crs.epsg}",
1095
- model=model,
1096
- **model_tides_kwargs,
1097
- )
1098
-
1099
- # Convert our pandas.DataFrame tide modelling outputs to xarray
1100
- tides_lowres = (
1101
- # Rename x and y dataframe indexes to match x and y xarray dims
1102
- tide_df.rename_axis(["time", x_dim, y_dim])
1103
- # Add tide model column to dataframe indexes so we can convert
1104
- # our dataframe to a multidimensional xarray
1105
- .set_index("tide_model", append=True)
1106
- # Convert to xarray and select our tide modelling xr.DataArray
1107
- .to_xarray()
1108
- .tide_height
1109
- # Re-index and transpose into our input coordinates and dim order
1110
- .reindex_like(rescaled_ds)
1111
- .transpose("tide_model", "time", y_dim, x_dim)
1112
- )
1113
-
1114
- # Optionally calculate and return quantiles rather than raw data.
1115
- # Set dtype to dtype of the input data as quantile always returns
1116
- # float64 (memory intensive)
1117
- if calculate_quantiles is not None:
1118
- print("Computing tide quantiles")
1119
- tides_lowres = tides_lowres.quantile(q=calculate_quantiles, dim="time").astype(tides_lowres.dtype)
1120
-
1121
- # If only one tidal model exists, squeeze out "tide_model" dim
1122
- if len(tides_lowres.tide_model) == 1:
1123
- tides_lowres = tides_lowres.squeeze("tide_model")
1124
-
1125
- # Ensure CRS is present before we apply any resampling
1126
- tides_lowres = tides_lowres.odc.assign_crs(ds.odc.geobox.crs)
1127
-
1128
- # Reproject into original high resolution grid
1129
- if resample:
1130
- print("Reprojecting tides into original array")
1131
- tides_highres, tides_lowres = _pixel_tides_resample(
1132
- tides_lowres,
1133
- ds,
1134
- resample_method,
1135
- dask_chunks,
1136
- dask_compute,
1137
- )
1138
- return tides_highres, tides_lowres
1139
-
1140
- print("Returning low resolution tide array")
1141
- return tides_lowres
1142
-
1143
-
1144
- if __name__ == "__main__": # pragma: no cover
1145
- pass