eo-tides 0.7.6.dev1__py3-none-any.whl → 0.7.6.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py CHANGED
@@ -1,3 +1,9 @@
1
+ """Core tide modelling functionality.
2
+
3
+ This module provides tools for modelling ocean tide heights and phases
4
+ for any location or time period using one or more global tide models.
5
+ """
6
+
1
7
  # Used to postpone evaluation of type annotations
2
8
  from __future__ import annotations
3
9
 
@@ -23,7 +29,13 @@ import pyTMD
23
29
  import timescale.time
24
30
  from tqdm import tqdm
25
31
 
26
- from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw
32
+ from .utils import (
33
+ DatetimeLike,
34
+ _set_directory,
35
+ _standardise_models,
36
+ _standardise_time,
37
+ idw,
38
+ )
27
39
 
28
40
 
29
41
  def _parallel_splits(
@@ -32,12 +44,13 @@ def _parallel_splits(
32
44
  parallel_max: int | None = None,
33
45
  min_points_per_split: int = 1000,
34
46
  ) -> int:
35
- """
36
- Calculates the optimal number of parallel splits for data
37
- processing based on system resources and processing constraints.
47
+ """Calculate the optimal number of parallel splits for data processing.
48
+
49
+ Optimal parallelisation is estimated based on system resources
50
+ and processing constraints.
38
51
 
39
- Parameters:
40
- -----------
52
+ Parameters
53
+ ----------
41
54
  total_points : int
42
55
  Total number of data points to process
43
56
  model_count : int
@@ -46,6 +59,7 @@ def _parallel_splits(
46
59
  Maximum number of parallel processes to use. If None, uses CPU core count
47
60
  min_points_per_split : int, default=1000
48
61
  Minimum number of points that should be processed in each split
62
+
49
63
  """
50
64
  # Get available CPUs. First see if `CPU_GUARANTEE` exists in
51
65
  # environment (if running in JupyterHub); if not use psutil
@@ -81,13 +95,20 @@ def _model_tides(
81
95
  crop,
82
96
  crop_buffer,
83
97
  append_node,
98
+ constituents,
99
+ extra_databases,
84
100
  ):
85
- """Worker function applied in parallel by `model_tides`. Handles the
86
- extraction of tide modelling constituents and tide modelling using
87
- `pyTMD`.
101
+ """Worker function applied in parallel by `model_tides`.
102
+
103
+ Handles the extraction of tide modelling constituents and tide
104
+ modelling using `pyTMD`.
88
105
  """
89
- # Obtain model details
90
- pytmd_model = pyTMD.io.model(directory).elevation(model)
106
+ # Load models from pyTMD database
107
+ extra_databases = [] if extra_databases is None else extra_databases
108
+ pytmd_model = pyTMD.io.model(
109
+ directory=directory,
110
+ extra_databases=extra_databases,
111
+ ).elevation(model)
91
112
 
92
113
  # Reproject x, y to latitude/longitude
93
114
  transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
@@ -108,6 +129,8 @@ def _model_tides(
108
129
  extrapolate=extrapolate,
109
130
  cutoff=cutoff,
110
131
  append_node=append_node,
132
+ constituents=constituents,
133
+ extra_databases=extra_databases,
111
134
  )
112
135
 
113
136
  # TODO: Return constituents
@@ -124,7 +147,8 @@ def _model_tides(
124
147
  "affect your results but may lead to a minor slowdown. "
125
148
  "This can occur when analysing clipped model files restricted "
126
149
  "to the western hemisphere. To suppress this warning, manually "
127
- "set `crop=False`."
150
+ "set `crop=False`.",
151
+ stacklevel=2,
128
152
  )
129
153
 
130
154
  # Read tidal constants and interpolate to grid points
@@ -138,6 +162,8 @@ def _model_tides(
138
162
  extrapolate=extrapolate,
139
163
  cutoff=cutoff,
140
164
  append_node=append_node,
165
+ constituents=constituents,
166
+ extra_databases=extra_databases,
141
167
  )
142
168
 
143
169
  # Otherwise, raise error if cropping if set to True
@@ -208,13 +234,15 @@ def _model_tides(
208
234
 
209
235
  # Convert data to pandas.DataFrame, and set index to our input
210
236
  # time/x/y values
211
- tide_df = pd.DataFrame({
212
- "time": np.tile(time, points_repeat),
213
- "x": np.repeat(x, time_repeat),
214
- "y": np.repeat(y, time_repeat),
215
- "tide_model": model,
216
- "tide_height": tide,
217
- }).set_index(["time", "x", "y"])
237
+ tide_df = pd.DataFrame(
238
+ {
239
+ "time": np.tile(time, points_repeat),
240
+ "x": np.repeat(x, time_repeat),
241
+ "y": np.repeat(y, time_repeat),
242
+ "tide_model": model,
243
+ "tide_height": tide,
244
+ },
245
+ ).set_index(["time", "x", "y"])
218
246
 
219
247
  # Optionally convert outputs to integer units (can save memory)
220
248
  if output_units == "m":
@@ -237,10 +265,11 @@ def ensemble_tides(
237
265
  ranking_valid_perc=0.02,
238
266
  **idw_kwargs,
239
267
  ):
240
- """Combine multiple tide models into a single locally optimised
241
- ensemble tide model using external model ranking data (e.g.
242
- satellite altimetry or NDWI-tide correlations along the coastline)
243
- to inform the selection of the best local models.
268
+ """Combine multiple tide models into a single locally optimised ensemble tide model.
269
+
270
+ Uses external model ranking data (e.g. satellite altimetry or
271
+ NDWI-tide correlations along the coastline) to inform the
272
+ selection of the best local models.
244
273
 
245
274
  This function performs the following steps:
246
275
 
@@ -311,11 +340,12 @@ def ensemble_tides(
311
340
  """
312
341
  # Raise data if `tide_df` provided in wide format
313
342
  if "tide_model" not in tide_df:
314
- raise Exception(
343
+ err_msg = (
315
344
  "`tide_df` does not contain the expected 'tide_model' and "
316
345
  "'tide_height' columns. Ensure that tides were modelled in "
317
- "long format (i.e. `output_format='long'` in `model_tides`)."
346
+ "long format (i.e. `output_format='long'` in `model_tides`).",
318
347
  )
348
+ raise Exception(err_msg)
319
349
 
320
350
  # Extract x and y coords from dataframe
321
351
  x = tide_df.index.get_level_values(level="x")
@@ -331,7 +361,8 @@ def ensemble_tides(
331
361
  gpd.read_file(ranking_points, engine="pyogrio")
332
362
  .to_crs(crs)
333
363
  .query(f"valid_perc > {ranking_valid_perc}")
334
- .dropna(how="all")[model_ranking_cols + ["geometry"]]
364
+ .dropna(how="all")
365
+ .filter(model_ranking_cols + ["geometry"]) # noqa: RUF005
335
366
  )
336
367
  except KeyError:
337
368
  error_msg = f"""
@@ -433,15 +464,15 @@ def model_tides(
433
464
  crop: bool | str = "auto",
434
465
  crop_buffer: float | None = 5,
435
466
  append_node: bool = False,
467
+ constituents: list[str] | None = None,
436
468
  parallel: bool = True,
437
469
  parallel_splits: int | str = "auto",
438
470
  parallel_max: int | None = None,
439
471
  ensemble_models: list[str] | None = None,
472
+ extra_databases: str | os.PathLike | list | None = None,
440
473
  **ensemble_kwargs,
441
474
  ) -> pd.DataFrame:
442
- """
443
- Model tide heights at multiple coordinates and/or timesteps
444
- using using one or more ocean tide models.
475
+ """Model tide heights at multiple coordinates or timesteps using using multiple ocean tide models.
445
476
 
446
477
  This function is parallelised to improve performance, and
447
478
  supports all tidal models supported by `pyTMD`, including:
@@ -454,33 +485,30 @@ def model_tides(
454
485
  - Technical University of Denmark tide models (DTU23)
455
486
 
456
487
  This function requires access to tide model data files.
457
- These should be placed in a folder with subfolders matching
458
- the structure required by `pyTMD`. For more details:
459
- <https://geoscienceaustralia.github.io/eo-tides/setup/>
460
- <https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories>
488
+ For tide model setup instructions, refer to the guide:
489
+ https://geoscienceaustralia.github.io/eo-tides/setup/
461
490
 
462
491
  This function is a modification of the `pyTMD` package's
463
492
  `pyTMD.compute.tide_elevations` function. For more info:
464
- <https://pytmd.readthedocs.io/en/latest/api_reference/compute.html#pyTMD.compute.tide_elevations>
493
+ https://pytmd.readthedocs.io/en/latest/api_reference/compute.html#pyTMD.compute.tide_elevations
494
+ https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories
465
495
 
466
496
  Parameters
467
497
  ----------
468
- x : float or list of float
469
- One or more x coordinates used to define the location at
470
- which to model tides. By default these coordinates should
471
- be in "EPSG:4326" WGS84 degrees longitude; use "crs" if they
472
- are in a custom coordinate reference system.
473
- y : float or list of float
474
- One or more y coordinates used to define the location at
475
- which to model tides. By default these coordinates should
476
- be in "EPSG:4326" WGS84 degrees latitude; use "crs" if they
477
- are in a custom coordinate reference system.
498
+ x : float or list of floats
499
+ One or more x coordinates at which to model tides. Assumes
500
+ degrees longitude (EPSG:4326) by default; use `crs` to specify
501
+ a different coordinate reference system.
502
+ y : float or list of floats
503
+ One or more y coordinates at which to model tides. Assumes
504
+ degrees latitude (EPSG:4326) by default; use `crs` to specify
505
+ a different coordinate reference system.
478
506
  time : DatetimeLike
479
- Times at which to model tide heights (in UTC). Accepts
480
- any format that can be converted by `pandas.to_datetime()`;
481
- e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
482
- datetime.datetime and strings (e.g. "2020-01-01 23:00").
483
- For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
507
+ One or more UTC times at which to model tide heights. Accepts
508
+ any time format compatible with `pandas.to_datetime()`, e.g.
509
+ datetime.datetime, pd.Timestamp, pd.DatetimeIndex, numpy.datetime64,
510
+ or date/time strings (e.g. "2020-01-01 23:00"). For example:
511
+ `time = pd.date_range(start="2000", end="2001", freq="5h")`.
484
512
  model : str or list of str, optional
485
513
  The tide model (or list of models) to use to model tides.
486
514
  Defaults to "EOT20"; specify "all" to use all models available
@@ -494,65 +522,67 @@ def model_tides(
494
522
  model that match the structure required by `pyTMD`
495
523
  (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
496
524
  crs : str, optional
497
- Input coordinate reference system for x and y coordinates.
498
- Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
525
+ Input coordinate reference system for x/y coordinates.
526
+ Defaults to "EPSG:4326" (degrees latitude, longitude).
499
527
  mode : str, optional
500
- The analysis mode to use for tide modelling. Supports two options:
501
-
502
- - "one-to-many": Models tides for every timestep in "time" at
503
- every input x and y coordinate point. This is useful if you
504
- want to model tides for a specific list of timesteps across
505
- multiple spatial points (e.g. for the same set of satellite
506
- acquisition times at various locations across your study area).
507
- - "one-to-one": Model tides using a unique timestep for each
508
- x and y coordinate pair. In this mode, the number of x and
509
- y points must equal the number of timesteps provided in "time".
528
+ Tide modelling analysis mode. Supports two options:
529
+
530
+ - `"one-to-many"`: Models tides at every x/y coordinate for
531
+ every timestep in `time`. This is useful for Earth observation
532
+ workflows where you want to model tides at many spatial points
533
+ for a common set of acquisition times (e.g. satellite overpasses).
534
+
535
+ - `"one-to-one"`: Model tides using one timestep for each x/y
536
+ coordinate. In this mode, the number of x/y coordinates must
537
+ match the number of timesteps in `time`.
510
538
  output_format : str, optional
511
539
  Whether to return the output dataframe in long format (with
512
540
  results stacked vertically along "tide_model" and "tide_height"
513
541
  columns), or wide format (with a column for each tide model).
514
542
  Defaults to "long".
515
543
  output_units : str, optional
516
- Whether to return modelled tides in floating point metre units,
517
- or integer centimetre units (i.e. scaled by 100) or integer
518
- millimetre units (i.e. scaled by 1000. Returning outputs in
519
- integer units can be useful for reducing memory usage.
520
- Defaults to "m" for metres; set to "cm" for centimetres or "mm"
521
- for millimetres.
544
+ Units for the returned tide heights. Options are:
545
+
546
+ - `"m"` (default): floating point values in metres
547
+ - `"cm"`: integer values in centimetres (x100)
548
+ - `"mm"`: integer values in millimetres (x1000)
549
+
550
+ Using integer units can help reduce memory usage.
522
551
  method : str, optional
523
- Method used to interpolate tidal constituents
524
- from model files. Defaults to "linear"; options include:
552
+ Method used to interpolate tide model constituent files.
553
+ Defaults to "linear"; options include:
525
554
 
526
- - "linear", "nearest": scipy regular grid interpolations
527
- - "spline": scipy bivariate spline interpolation
528
- - "bilinear": quick bilinear interpolation
555
+ - `"linear"`, `"nearest"`: scipy regular grid interpolations
556
+ - `"spline"`: scipy bivariate spline interpolation
557
+ - `"bilinear"`: quick bilinear interpolation
529
558
  extrapolate : bool, optional
530
- Whether to extrapolate tides into x and y coordinates outside
531
- of the valid tide modelling domain using nearest-neighbor
532
- interpolation. The default of True ensures that modelled tides
533
- will be returned even if there is no underlying tide model
534
- data for a location (e.g. inside estuaries far from the
535
- coastline). However, this can also produce unreliable results.
559
+ If True (default), extrapolate tides inland of the valid tide
560
+ model extent using nearest-neighbor interpolation. This can
561
+ ensure tide are returned everywhere, but accuracy may degrade
562
+ with distance from the valid model extent (e.g. inland or along
563
+ complex estuaries or rivers). Set `cutoff` to define the
564
+ maximum extrapolation distance.
536
565
  cutoff : float, optional
537
- Extrapolation cutoff in kilometers. The default is None, which
538
- will extrapolate for all points regardless of distance from the
539
- valid tide modelling domain.
566
+ Maximum distance in kilometres to extrapolate tides inland of the
567
+ valid tide model extent. The default of None allows extrapolation
568
+ at any (i.e. infinite) distance.
540
569
  crop : bool or str, optional
541
- Whether to crop tide model constituent files on-the-fly to
542
- improve performance. Defaults to "auto", which will attempt to
543
- apply on-the-fly cropping where possible (some clipped model
544
- files restricted entirely to the western hemisphere are not
545
- suitable for on-the-fly cropping). Set `crop_buffer` to
546
- customise the buffer distance used to crop the files.
570
+ Whether to crop tide model files on-the-fly to improve performance.
571
+ Defaults to "auto", which enables cropping when supported (some
572
+ clipped model files limited to the western hemisphere may not support
573
+ on-the-fly cropping). Use `crop_buffer` to adjust the buffer
574
+ distance used for cropping.
547
575
  crop_buffer : int or float, optional
548
- The buffer distance in degrees used to crop tide model
549
- constituent files around the modelling area. Defaults to 5,
550
- which will crop constituents using a five degree buffer on
551
- either side of the analysis extent.
552
- append_node: bool, optional
576
+ The buffer distance in degrees to crop tide model files around the
577
+ requested x/y coordinates. Defaults to 5, which will crop model
578
+ files using a five degree buffer.
579
+ append_node : bool, optional
553
580
  Apply adjustments to harmonic constituents to allow for periodic
554
581
  modulations over the 18.6-year nodal period (lunar nodal tide).
555
582
  Default is False.
583
+ constituents : list, optional
584
+ Optional list of tide constituents to use for tide prediction.
585
+ Default is None, which will use all available constituents.
556
586
  parallel : bool, optional
557
587
  Whether to parallelise tide modelling. If multiple tide models
558
588
  are requested, these will be run in parallel. If enough workers
@@ -575,9 +605,14 @@ def model_tides(
575
605
  `["EOT20", "FES2012", "FES2014_extrapolated", "FES2022_extrapolated",
576
606
  "GOT4.10", "GOT5.5_extrapolated", "GOT5.6_extrapolated",
577
607
  "TPXO10-atlas-v2-nc", "TPXO8-atlas-nc", "TPXO9-atlas-v5-nc"]`.
608
+ extra_databases : str or path or list, optional
609
+ Additional custom tide model definitions to load, provided as
610
+ dictionaries or paths to JSON database files. Use this to
611
+ enable custom tide models not included with `pyTMD`.
612
+ See: https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#model-database
578
613
  **ensemble_kwargs :
579
614
  Keyword arguments used to customise the generation of optional
580
- ensemble tide models if "ensemble" modelling are requested.
615
+ ensemble tide models if "ensemble" tide modelling is requested.
581
616
  These are passed to the underlying `_ensemble_model` function.
582
617
  Useful parameters include `ranking_points` (path to model
583
618
  rankings data), `k` (for controlling how model rankings are
@@ -596,36 +631,55 @@ def model_tides(
596
631
  time = _standardise_time(time)
597
632
 
598
633
  # Validate input arguments
599
- assert time is not None, "Times for modelling tides must be provided via `time`."
600
- assert method in ("bilinear", "spline", "linear", "nearest")
601
- assert output_units in (
602
- "m",
603
- "cm",
604
- "mm",
605
- ), "Output units must be either 'm', 'cm', or 'mm'."
606
- assert output_format in (
607
- "long",
608
- "wide",
609
- ), "Output format must be either 'long' or 'wide'."
610
- assert np.issubdtype(x.dtype, np.number), "`x` must contain only valid numeric values, and must not be None."
611
- assert np.issubdtype(y.dtype, np.number), "`y` must contain only valid numeric values, and must not be None.."
612
- assert len(x) == len(y), "x and y must be the same length."
613
- if mode == "one-to-one":
614
- assert len(x) == len(time), (
615
- "The number of supplied x and y points and times must be "
616
- "identical in 'one-to-one' mode. Use 'one-to-many' mode if "
617
- "you intended to model multiple timesteps at each point."
634
+ if time is None:
635
+ err_msg = "Times for modelling tides must be provided via `time`."
636
+ raise ValueError(err_msg)
637
+
638
+ if method not in ("bilinear", "spline", "linear", "nearest"):
639
+ err_msg = (
640
+ f"Invalid interpolation method '{method}'. Must be one of 'bilinear', 'spline', 'linear', or 'nearest'."
618
641
  )
642
+ raise ValueError(err_msg)
643
+
644
+ if output_units not in ("m", "cm", "mm"):
645
+ err_msg = "Output units must be either 'm', 'cm', or 'mm'."
646
+ raise ValueError(err_msg)
647
+
648
+ if output_format not in ("long", "wide"):
649
+ err_msg = "Output format must be either 'long' or 'wide'."
650
+ raise ValueError(err_msg)
651
+
652
+ if not np.issubdtype(x.dtype, np.number):
653
+ err_msg = "`x` must contain only valid numeric values, and must not be None."
654
+ raise TypeError(err_msg)
655
+
656
+ if not np.issubdtype(y.dtype, np.number):
657
+ err_msg = "`y` must contain only valid numeric values, and must not be None."
658
+ raise TypeError(err_msg)
659
+
660
+ if len(x) != len(y):
661
+ err_msg = "`x` and `y` must be the same length."
662
+ raise ValueError(err_msg)
663
+
664
+ if mode == "one-to-one" and len(x) != len(time):
665
+ err_msg = (
666
+ "The number of supplied `x` and `y` points and `time` values must be "
667
+ "identical in 'one-to-one' mode. Use 'one-to-many' mode if you intended "
668
+ "to model multiple timesteps at each point."
669
+ )
670
+ raise ValueError(err_msg)
619
671
 
620
672
  # Set tide modelling files directory. If no custom path is
621
673
  # provided, try global environment variable.
622
674
  directory = _set_directory(directory)
623
675
 
624
- # Standardise model list, handling "all" and "ensemble" functionality
676
+ # Standardise model list, handling "all" and "ensemble" functionality,
677
+ # and any custom tide model definitions
625
678
  models_to_process, models_requested, ensemble_models = _standardise_models(
626
679
  model=model,
627
680
  directory=directory,
628
681
  ensemble_models=ensemble_models,
682
+ extra_databases=extra_databases,
629
683
  )
630
684
 
631
685
  # Update tide modelling func to add default keyword arguments that
@@ -642,6 +696,8 @@ def model_tides(
642
696
  crop=crop,
643
697
  crop_buffer=crop_buffer,
644
698
  append_node=append_node,
699
+ constituents=constituents,
700
+ extra_databases=extra_databases,
645
701
  )
646
702
 
647
703
  # If automatic parallel splits, calculate optimal value
@@ -655,15 +711,16 @@ def model_tides(
655
711
  )
656
712
 
657
713
  # Verify that parallel splits are not larger than number of points
658
- assert isinstance(parallel_splits, int)
714
+ assert isinstance(parallel_splits, int) # noqa: S101
659
715
  if parallel_splits > len(x):
660
- raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
716
+ err_msg = f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)})."
717
+ raise ValueError(err_msg)
661
718
 
662
719
  # Parallelise if either multiple models or multiple splits requested
663
720
  if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
664
721
  with ProcessPoolExecutor(max_workers=parallel_max) as executor:
665
722
  print(
666
- f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
723
+ f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})",
667
724
  )
668
725
 
669
726
  # Optionally split lon/lat points into `splits_n` chunks
@@ -680,6 +737,7 @@ def model_tides(
680
737
  if mode == "one-to-many":
681
738
  model_iters, x_iters, y_iters = zip(
682
739
  *[(m, x_split[i], y_split[i]) for m in models_to_process for i in range(parallel_splits)],
740
+ strict=False,
683
741
  )
684
742
  time_iters = [time] * len(model_iters)
685
743
  elif mode == "one-to-one":
@@ -690,22 +748,29 @@ def model_tides(
690
748
  for m in models_to_process
691
749
  for i in range(parallel_splits)
692
750
  ],
751
+ strict=False,
693
752
  )
694
753
 
695
754
  # Apply func in parallel, iterating through each input param
696
755
  try:
697
756
  model_outputs = list(
698
757
  tqdm(
699
- executor.map(iter_func, model_iters, x_iters, y_iters, time_iters),
758
+ executor.map(
759
+ iter_func,
760
+ model_iters,
761
+ x_iters,
762
+ y_iters,
763
+ time_iters,
764
+ ),
700
765
  total=len(model_iters),
701
766
  ),
702
767
  )
703
768
  except BrokenProcessPool:
704
- error_msg = (
769
+ err_msg = (
705
770
  "Parallelised tide modelling failed, likely to to an out-of-memory error. "
706
771
  "Try reducing the size of your analysis, or set `parallel=False`."
707
772
  )
708
- raise RuntimeError(error_msg)
773
+ raise RuntimeError(err_msg) from None
709
774
 
710
775
  # Model tides in series if parallelisation is off
711
776
  else:
@@ -725,20 +790,27 @@ def model_tides(
725
790
 
726
791
  # Update requested models with any custom ensemble models, then
727
792
  # filter the dataframe to keep only models originally requested
728
- models_requested = list(np.union1d(models_requested, ensemble_df.tide_model.unique()))
729
- tide_df = pd.concat([tide_df, ensemble_df]).query("tide_model in @models_requested")
793
+ models_requested = list(
794
+ np.union1d(models_requested, ensemble_df.tide_model.unique()),
795
+ )
796
+ tide_df = pd.concat([tide_df, ensemble_df]).query(
797
+ "tide_model in @models_requested",
798
+ )
730
799
 
731
800
  # Optionally convert to a wide format dataframe with a tide model in
732
801
  # each dataframe column
733
802
  if output_format == "wide":
734
803
  # Pivot into wide format with each time model as a column
735
804
  print("Converting to a wide format dataframe")
736
- tide_df = tide_df.pivot(columns="tide_model", values="tide_height")
805
+ tide_df = tide_df.pivot(columns="tide_model", values="tide_height") # noqa: PD010
737
806
 
738
807
  # If in 'one-to-one' mode, reindex using our input time/x/y
739
808
  # values to ensure the output is sorted the same as our inputs
740
809
  if mode == "one-to-one":
741
- output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
810
+ output_indices = pd.MultiIndex.from_arrays(
811
+ [time, x, y],
812
+ names=["time", "x", "y"],
813
+ )
742
814
  tide_df = tide_df.reindex(output_indices)
743
815
 
744
816
  return tide_df
@@ -754,41 +826,39 @@ def model_phases(
754
826
  return_tides: bool = False,
755
827
  **model_tides_kwargs,
756
828
  ) -> pd.DataFrame:
757
- """
758
- Model tide phases (low-flow, high-flow, high-ebb, low-ebb)
759
- at multiple coordinates and/or timesteps using using one
760
- or more ocean tide models.
761
-
762
- Ebb and low phases are calculated by running the
763
- `eo_tides.model.model_tides` function twice, once for
764
- the requested timesteps, and again after subtracting a
765
- small time offset (by default, 15 minutes). If tides
766
- increased over this period, they are assigned as "flow";
767
- if they decreased, they are assigned as "ebb".
829
+ """Model tide phases at multiple coordinates or timesteps using multiple ocean tide models.
830
+
831
+ Ebb and low phases (low-flow, high-flow, high-ebb, low-ebb)
832
+ are calculated by running the `eo_tides.model.model_tides`
833
+ function twice, once for the requested timesteps, and again
834
+ after subtracting a small time offset (15 mins by default).
835
+ If tides increased over this period, they are assigned as
836
+ "flow"; if they decreased, they are assigned as "ebb".
768
837
  Tides are considered "high" if equal or greater than 0
769
838
  metres tide height, otherwise "low".
770
839
 
771
840
  This function supports all parameters that are supported
772
841
  by `model_tides`.
773
842
 
843
+ For tide model setup instructions, refer to the guide:
844
+ https://geoscienceaustralia.github.io/eo-tides/setup/
845
+
774
846
  Parameters
775
847
  ----------
776
- x : float or list of float
777
- One or more x coordinates used to define the location at
778
- which to model tide phases. By default these coordinates
779
- should be in "EPSG:4326" WGS84 degrees longitude; use "crs"
780
- if they are in a custom coordinate reference system.
781
- y : float or list of float
782
- One or more y coordinates used to define the location at
783
- which to model tide phases. By default these coordinates
784
- should be in "EPSG:4326" WGS84 degrees latitude; use "crs"
785
- if they are in a custom coordinate reference system.
848
+ x : float or list of floats
849
+ One or more x coordinates at which to model tides. Assumes
850
+ degrees longitude (EPSG:4326) by default; use `crs` to specify
851
+ a different coordinate reference system.
852
+ y : float or list of floats
853
+ One or more y coordinates at which to model tides. Assumes
854
+ degrees latitude (EPSG:4326) by default; use `crs` to specify
855
+ a different coordinate reference system.
786
856
  time : DatetimeLike
787
- Times at which to model tide phases (in UTC). Accepts
788
- any format that can be converted by `pandas.to_datetime()`;
789
- e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
790
- datetime.datetime and strings (e.g. "2020-01-01 23:00").
791
- For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
857
+ One or more UTC times at which to model tide heights. Accepts
858
+ any time format compatible with `pandas.to_datetime()`, e.g.
859
+ datetime.datetime, pd.Timestamp, pd.DatetimeIndex, numpy.datetime64,
860
+ or date/time strings (e.g. "2020-01-01 23:00"). For example:
861
+ `time = pd.date_range(start="2000", end="2001", freq="5h")`.
792
862
  model : str or list of str, optional
793
863
  The tide model (or list of models) to use to model tides.
794
864
  Defaults to "EOT20"; specify "all" to use all models available
@@ -821,7 +891,6 @@ def model_phases(
821
891
  A dataframe containing modelled tide phases.
822
892
 
823
893
  """
824
-
825
894
  # Pop output format and mode for special handling
826
895
  output_format = model_tides_kwargs.pop("output_format", "long")
827
896
  mode = model_tides_kwargs.pop("mode", "one-to-many")
@@ -851,7 +920,9 @@ def model_phases(
851
920
  # Compare tides computed for each timestep. If the previous tide
852
921
  # was higher than the current tide, the tide is 'ebbing'. If the
853
922
  # previous tide was lower, the tide is 'flowing'
854
- ebb_flow = (tide_df.tide_height < pre_df.tide_height.values).replace({True: "ebb", False: "flow"})
923
+ ebb_flow = (tide_df.tide_height < pre_df.tide_height.to_numpy()).replace(
924
+ {True: "ebb", False: "flow"},
925
+ )
855
926
 
856
927
  # If tides are greater than 0, then "high", otherwise "low"
857
928
  high_low = (tide_df.tide_height >= 0).replace({True: "high", False: "low"})
@@ -864,12 +935,15 @@ def model_phases(
864
935
  if output_format == "wide":
865
936
  # Pivot into wide format with each time model as a column
866
937
  print("Converting to a wide format dataframe")
867
- tide_df = tide_df.pivot(columns="tide_model")
938
+ tide_df = tide_df.pivot(columns="tide_model") # noqa: PD010
868
939
 
869
940
  # If in 'one-to-one' mode, reindex using our input time/x/y
870
941
  # values to ensure the output is sorted the same as our inputs
871
942
  if mode == "one-to-one":
872
- output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
943
+ output_indices = pd.MultiIndex.from_arrays(
944
+ [time, x, y],
945
+ names=["time", "x", "y"],
946
+ )
873
947
  tide_df = tide_df.reindex(output_indices)
874
948
 
875
949
  # Optionally drop tides