eo-tides 0.7.6.dev2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/stats.py CHANGED
@@ -1,8 +1,14 @@
1
+ """Tools for analysing local tide dynamics and satellite biases.
2
+
3
+ This module provides functions to assess how well satellite EO data
4
+ captures real-world tides, and reveals potential tide biases in
5
+ satellite EO data coverage.
6
+ """
7
+
1
8
  # Used to postpone evaluation of type annotations
2
9
  from __future__ import annotations
3
10
 
4
- import os
5
- from typing import TYPE_CHECKING
11
+ from typing import TYPE_CHECKING, cast
6
12
 
7
13
  import matplotlib.pyplot as plt
8
14
  import numpy as np
@@ -11,13 +17,21 @@ import xarray as xr
11
17
 
12
18
  # Only import if running type checking
13
19
  if TYPE_CHECKING:
20
+ import os
21
+
14
22
  from odc.geo.geobox import GeoBox
15
23
 
24
+ from .utils import DatetimeLike
25
+
16
26
  from .eo import _pixel_tides_resample, _resample_chunks, _standardise_inputs, pixel_tides, tag_tides
17
- from .utils import DatetimeLike
18
27
 
19
28
 
20
- def _tide_statistics(obs_tides, all_tides, min_max_q=(0.0, 1.0), dim="time"):
29
+ def _tide_statistics(
30
+ obs_tides: xr.DataArray,
31
+ all_tides: xr.DataArray,
32
+ min_max_q: tuple = (0.0, 1.0),
33
+ dim: str = "time",
34
+ ) -> xr.Dataset:
21
35
  # Calculate means of observed and modelled tides
22
36
  mot = obs_tides.mean(dim=dim)
23
37
  mat = all_tides.mean(dim=dim)
@@ -62,7 +76,19 @@ def _tide_statistics(obs_tides, all_tides, min_max_q=(0.0, 1.0), dim="time"):
62
76
  )
63
77
 
64
78
 
65
- def _stats_plain_english(mot, mat, hot, hat, lot, lat, otr, tr, spread, offset_low, offset_high):
79
+ def _stats_plain_english(
80
+ mot,
81
+ mat,
82
+ hot,
83
+ hat,
84
+ lot,
85
+ lat,
86
+ otr,
87
+ tr,
88
+ spread,
89
+ offset_low,
90
+ offset_high,
91
+ ) -> None:
66
92
  # Plain text descriptors
67
93
  mean_diff = "higher" if mot > mat else "lower"
68
94
  mean_diff_icon = "⬆️" if mot > mat else "⬇️"
@@ -75,26 +101,32 @@ def _stats_plain_english(mot, mat, hot, hat, lot, lat, otr, tr, spread, offset_l
75
101
  print(f"🛰️ Observed tide range: {otr:.2f} m ({lot:.2f} to {hot:.2f} m).\n")
76
102
  print(f"{spread_icon} {spread:.0%} of the modelled astronomical tide range was observed at this location.")
77
103
  print(
78
- f"{high_tide_icon} The highest {offset_high:.0%} ({offset_high * tr:.2f} m) of the tide range was never observed."
104
+ f"{high_tide_icon} The highest {offset_high:.0%} ({offset_high * tr:.2f} m) of the tide range was never observed.",
79
105
  )
80
106
  print(
81
- f"{low_tide_icon} The lowest {offset_low:.0%} ({offset_low * tr:.2f} m) of the tide range was never observed.\n"
107
+ f"{low_tide_icon} The lowest {offset_low:.0%} ({offset_low * tr:.2f} m) of the tide range was never observed.\n",
82
108
  )
83
109
  print(f"🌊 Mean modelled astronomical tide height: {mat:.2f} m.")
84
110
  print(f"🛰️ Mean observed tide height: {mot:.2f} m.")
85
111
  print(
86
- f"{mean_diff_icon} The mean observed tide height was {mot - mat:.2f} m {mean_diff} than the mean modelled astronomical tide height."
112
+ f"{mean_diff_icon} The mean observed tide height was {mot - mat:.2f} m {mean_diff} than the mean modelled astronomical tide height.",
87
113
  )
88
114
 
89
115
 
90
116
  def _stats_figure(
91
- all_tides_da, obs_tides_da, hot, hat, lot, lat, spread, offset_low, offset_high, plot_var, point_col=None
117
+ all_tides_da,
118
+ obs_tides_da,
119
+ hot,
120
+ hat,
121
+ lot,
122
+ lat,
123
+ spread,
124
+ offset_low,
125
+ offset_high,
126
+ plot_var,
127
+ point_col=None,
92
128
  ):
93
- """
94
- Plot tide bias statistics as a figure, including both
95
- satellite observations and all modelled tides.
96
- """
97
-
129
+ """Plot tide bias statistics as a figure comparing satellite observations and all modelled tides."""
98
130
  # Create plot and add all modelled tides
99
131
  fig, ax = plt.subplots(figsize=(10, 6))
100
132
  all_tides_da.plot(ax=ax, alpha=0.4, label="Modelled tides")
@@ -207,7 +239,8 @@ def tide_stats(
207
239
  round_stats: int = 3,
208
240
  **tag_tides_kwargs,
209
241
  ) -> pd.Series:
210
- """
242
+ """Generate tide statistics and satellite tide bias metrics for every dataset timestep.
243
+
211
244
  Takes a multi-dimensional dataset and generate tide statistics
212
245
  and satellite-observed tide bias metrics, calculated based on
213
246
  every timestep in the satellite data and the geographic centroid
@@ -222,7 +255,7 @@ def tide_stats(
222
255
 
223
256
  For more information about the tidal statistics computed by this
224
257
  function, refer to Figure 8 in Bishop-Taylor et al. 2018:
225
- <https://www.sciencedirect.com/science/article/pii/S0272771418308783#fig8>
258
+ https://www.sciencedirect.com/science/article/pii/S0272771418308783#fig8
226
259
 
227
260
  Parameters
228
261
  ----------
@@ -309,13 +342,13 @@ def tide_stats(
309
342
  - `spread`: proportion of the full modelled tidal range observed by the satellite
310
343
  - `offset_low`: proportion of the lowest tides never observed by the satellite
311
344
  - `offset_high`: proportion of the highest tides never observed by the satellite
312
- """
313
345
 
346
+ """
314
347
  # Standardise data inputs, time and models
315
348
  gbox, obs_times = _standardise_inputs(data, time)
316
349
 
317
350
  # Generate range of times covering entire period of satellite record
318
- assert obs_times is not None
351
+ assert obs_times is not None # noqa: S101
319
352
  all_times = pd.date_range(
320
353
  start=obs_times.min().item(),
321
354
  end=obs_times.max().item(),
@@ -333,8 +366,8 @@ def tide_stats(
333
366
  time=obs_times,
334
367
  model=model,
335
368
  directory=directory,
336
- tidepost_lat=tidepost_lat, # type: ignore
337
- tidepost_lon=tidepost_lon, # type: ignore
369
+ tidepost_lat=tidepost_lat,
370
+ tidepost_lon=tidepost_lon,
338
371
  **tag_tides_kwargs,
339
372
  )
340
373
 
@@ -344,13 +377,18 @@ def tide_stats(
344
377
  time=all_times,
345
378
  model=model,
346
379
  directory=directory,
347
- tidepost_lat=tidepost_lat, # type: ignore
348
- tidepost_lon=tidepost_lon, # type: ignore
380
+ tidepost_lat=tidepost_lat,
381
+ tidepost_lon=tidepost_lon,
349
382
  **tag_tides_kwargs,
350
383
  )
351
384
 
352
385
  # Calculate statistics
353
- stats_ds = _tide_statistics(obs_tides_da, all_tides_da, min_max_q=min_max_q)
386
+ # # (cast ensures typing knows these are always DataArrays)
387
+ stats_ds = _tide_statistics(
388
+ cast(xr.DataArray, obs_tides_da),
389
+ cast(xr.DataArray, all_tides_da),
390
+ min_max_q=min_max_q,
391
+ )
354
392
 
355
393
  # Convert to pandas and add tide post coordinates
356
394
  stats_df = stats_ds.to_pandas().astype("float32")
@@ -412,8 +450,9 @@ def pixel_stats(
412
450
  cutoff: float = 10,
413
451
  **pixel_tides_kwargs,
414
452
  ) -> xr.Dataset:
415
- """
416
- Takes a multi-dimensional dataset and generate spatial
453
+ """Generate tide statistics and satellite tide bias metrics for every dataset pixel.
454
+
455
+ Takes a multi-dimensional dataset and generate pixel-level
417
456
  tide statistics and satellite-observed tide bias metrics,
418
457
  calculated based on every timestep in the satellite data and
419
458
  modelled into the spatial extent of the imagery.
@@ -519,14 +558,13 @@ def pixel_stats(
519
558
  - `offset_high`: proportion of the highest tides never observed by the satellite
520
559
 
521
560
  """
522
-
523
561
  # Standardise data inputs, time and models
524
562
  gbox, obs_times = _standardise_inputs(data, time)
525
563
  dask_chunks = _resample_chunks(data, dask_chunks)
526
564
  model = [model] if isinstance(model, str) else model
527
565
 
528
566
  # Generate range of times covering entire period of satellite record
529
- assert obs_times is not None
567
+ assert obs_times is not None # noqa: S101
530
568
  all_times = pd.date_range(
531
569
  start=obs_times.min().item(),
532
570
  end=obs_times.max().item(),
eo_tides/utils.py CHANGED
@@ -1,3 +1,10 @@
1
+ """General-purpose utilities for tide model setup and data processing.
2
+
3
+ This module includes tools for listing and clipping model files,
4
+ performing spatial interpolation, and other helper tools used across
5
+ the eo_tides package.
6
+ """
7
+
1
8
  # Used to postpone evaluation of type annotations
2
9
  from __future__ import annotations
3
10
 
@@ -7,7 +14,14 @@ import pathlib
7
14
  import textwrap
8
15
  import warnings
9
16
  from collections import Counter
10
- from typing import TypeAlias
17
+ from typing import TYPE_CHECKING
18
+
19
+ # Only import if running type checking
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Sequence
22
+ from typing import Any, TypeAlias
23
+
24
+ from odc.geo.geom import BoundingBox
11
25
 
12
26
  import numpy as np
13
27
  import odc.geo
@@ -15,7 +29,6 @@ import pandas as pd
15
29
  import pyTMD
16
30
  import xarray as xr
17
31
  from colorama import Style, init
18
- from odc.geo.geom import BoundingBox
19
32
  from pyTMD.io.model import load_database
20
33
  from scipy.spatial import cKDTree as KDTree
21
34
  from tqdm import tqdm
@@ -24,10 +37,8 @@ from tqdm import tqdm
24
37
  DatetimeLike: TypeAlias = np.ndarray | pd.DatetimeIndex | pd.Timestamp | datetime.datetime | str | list[str]
25
38
 
26
39
 
27
- def _get_duplicates(array):
28
- """
29
- Return any duplicates in a list or array.
30
- """
40
+ def _get_duplicates(array: Sequence[Any]) -> list[Any]:
41
+ """Return any duplicates in a list or array."""
31
42
  c = Counter(array)
32
43
  return [k for k in c if c[k] > 1]
33
44
 
@@ -35,35 +46,37 @@ def _get_duplicates(array):
35
46
  def _set_directory(
36
47
  directory: str | os.PathLike | None = None,
37
48
  ) -> os.PathLike:
38
- """
39
- Set tide modelling files directory. If no custom
40
- path is provided, try global `EO_TIDES_TIDE_MODELS`
49
+ """Set tide modelling files directory.
50
+
51
+ If no custom path is provided, try global `EO_TIDES_TIDE_MODELS`
41
52
  environmental variable instead.
42
53
  """
43
54
  if directory is None:
44
55
  if "EO_TIDES_TIDE_MODELS" in os.environ:
45
56
  directory = os.environ["EO_TIDES_TIDE_MODELS"]
46
57
  else:
47
- raise Exception(
58
+ err_msg = (
48
59
  "No tide model directory provided via `directory`, and/or no "
49
60
  "`EO_TIDES_TIDE_MODELS` environment variable found. "
50
61
  "Please provide a valid path to your tide model directory."
51
62
  )
63
+ raise Exception(err_msg)
52
64
 
53
65
  # Verify path exists
54
66
  directory = pathlib.Path(directory).expanduser()
55
67
  if not directory.exists():
56
- raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
68
+ err_msg = f"No valid tide model directory found at path `{directory}`"
69
+ raise FileNotFoundError(err_msg)
57
70
  return directory
58
71
 
59
72
 
60
73
  def _standardise_time(
61
74
  time: DatetimeLike | None,
62
75
  ) -> np.ndarray | None:
63
- """
64
- Accept any time format accepted by `pd.to_datetime`,
65
- and return a datetime64 ndarray. Return None if None
66
- passed.
76
+ """Standardise input times for analysis.
77
+
78
+ Accept any time format accepted by `pd.to_datetime`, and
79
+ return a datetime64 ndarray. Return None if None passed.
67
80
  """
68
81
  # Return time as-is if None
69
82
  if time is None:
@@ -82,7 +95,8 @@ def _standardise_models(
82
95
  ensemble_models: list[str] | None = None,
83
96
  extra_databases: str | os.PathLike | list | None = None,
84
97
  ) -> tuple[list[str], list[str], list[str] | None]:
85
- """
98
+ """Standardise lists of models for analysis.
99
+
86
100
  Take an input model name or list of names, and return a list
87
101
  of models to process, requested models, and ensemble models,
88
102
  as required by the `model_tides` function.
@@ -92,14 +106,14 @@ def _standardise_models(
92
106
  "ensemble", which will model tides for all models in a list
93
107
  of ensemble models.
94
108
  """
95
-
96
109
  # Turn inputs into arrays for consistent handling
97
- models_requested = list(np.atleast_1d(model))
110
+ models_requested = [str(m) for m in np.atleast_1d(model)]
98
111
 
99
112
  # Raise error if list contains duplications
100
113
  duplicates = _get_duplicates(models_requested)
101
114
  if len(duplicates) > 0:
102
- raise ValueError(f"The model parameter contains duplicate values: {duplicates}")
115
+ err_msg = f"The model parameter contains duplicate values: {duplicates}"
116
+ raise ValueError(err_msg)
103
117
 
104
118
  # Load supported models from pyTMD database
105
119
  available_models, valid_models = list_models(
@@ -114,22 +128,22 @@ def _standardise_models(
114
128
  # Error if any models are not supported
115
129
  if not all(m in valid_models + custom_options for m in models_requested):
116
130
  error_text = (
117
- f"One or more of the requested models are not valid:\n"
118
- f"{models_requested}\n\n"
119
- "The following models are supported:\n"
120
- f"{valid_models}"
131
+ f"One or more of the requested models are not valid.\n"
132
+ f"Requested models: {models_requested}\n"
133
+ f"Valid models: {valid_models}\n"
134
+ "For tide model setup instructions, refer to the guide: https://geoscienceaustralia.github.io/eo-tides/setup/"
121
135
  )
122
- raise ValueError(error_text)
136
+ raise ValueError(error_text) from None
123
137
 
124
138
  # Error if any models are not available in `directory`
125
139
  if not all(m in available_models + custom_options for m in models_requested):
126
140
  error_text = (
127
- f"One or more of the requested models are valid, but not available in `{directory}`:\n"
128
- f"{models_requested}\n\n"
129
- f"The following models are available in `{directory}`:\n"
130
- f"{available_models}"
141
+ f"One or more of the requested tide models are not available in `{directory}`.\n"
142
+ f"Requested models: {models_requested}\n"
143
+ f"Available models: {available_models}\n"
144
+ "For tide model setup instructions, refer to the guide: https://geoscienceaustralia.github.io/eo-tides/setup/"
131
145
  )
132
- raise ValueError(error_text)
146
+ raise ValueError(error_text) from None
133
147
 
134
148
  # If "all" models are requested, update requested list to include available models
135
149
  if "all" in models_requested:
@@ -182,8 +196,7 @@ def _clip_model_file(
182
196
  ycoord: str,
183
197
  xcoord: str,
184
198
  ) -> xr.Dataset:
185
- """
186
- Clips tide model netCDF datasets to a bounding box.
199
+ """Clips tide model netCDF datasets to a bounding box.
187
200
 
188
201
  If the bounding box crosses 0 degrees longitude (e.g. Greenwich prime
189
202
  meridian), the dataset will be clipped into two parts and concatenated
@@ -224,8 +237,8 @@ def _clip_model_file(
224
237
  >>> nc = xr.open_dataset("GOT5.5/ocean_tides/2n2.nc")
225
238
  >>> bbox = BoundingBox(left=108, bottom=-48, right=158, top=-6, crs='EPSG:4326')
226
239
  >>> clipped_nc = _clip_model_file(nc, bbox, xdim="lon", ydim="lat", ycoord="latitude", xcoord="longitude")
227
- """
228
240
 
241
+ """
229
242
  # Extract x and y coords from xarray and load into memory
230
243
  xcoords = nc[xcoord].compute()
231
244
  ycoords = nc[ycoord].compute()
@@ -270,12 +283,12 @@ def _clip_model_file(
270
283
  # Combine left and right data along x dimension
271
284
  nc_clipped = xr.concat([nc_left, nc_right], dim=xdim)
272
285
 
273
- # Hack fix to remove expanded x dim on lat variables issue
286
+ # Temporary fix to remove expanded x dim on lat variables issue
274
287
  # for TPXO data; remove x dim by selecting the first obs
275
288
  for i in ["lat_z", "lat_v", "lat_u", "con"]:
276
289
  try:
277
290
  nc_clipped[i] = nc_clipped[i].isel(nx=0)
278
- except KeyError:
291
+ except KeyError: # noqa: PERF203
279
292
  pass
280
293
 
281
294
  return nc_clipped
@@ -288,9 +301,8 @@ def clip_models(
288
301
  model: list | None = None,
289
302
  buffer: float = 5,
290
303
  overwrite: bool = False,
291
- ):
292
- """
293
- Clip NetCDF-format ocean tide models to a bounding box.
304
+ ) -> None:
305
+ """Clip NetCDF-format ocean tide models to a bounding box.
294
306
 
295
307
  This function identifies all NetCDF-format tide models in a
296
308
  given input directory, including "ATLAS-netcdf" (e.g. TPXO9-atlas-nc),
@@ -301,8 +313,8 @@ def clip_models(
301
313
  directory and verified with `pyTMD` to ensure the clipped data is
302
314
  suitable for tide modelling.
303
315
 
304
- For instructions on accessing and downloading tide models, see:
305
- <https://geoscienceaustralia.github.io/eo-tides/setup/>
316
+ For tide model setup instructions, refer to the guide:
317
+ https://geoscienceaustralia.github.io/eo-tides/setup/
306
318
 
307
319
  Parameters
308
320
  ----------
@@ -331,8 +343,8 @@ def clip_models(
331
343
  ... output_directory="tide_models_clipped/",
332
344
  ... bbox=(-8.968392, 50.070574, 2.447160, 59.367122),
333
345
  ... )
334
- """
335
346
 
347
+ """
336
348
  # Get input and output paths
337
349
  input_directory = _set_directory(input_directory)
338
350
  output_directory = pathlib.Path(output_directory)
@@ -352,7 +364,8 @@ def clip_models(
352
364
 
353
365
  # Raise error if no valid models found
354
366
  if len(available_netcdf_models) == 0:
355
- raise ValueError(f"No valid NetCDF models found in {input_directory}.")
367
+ err_msg = f"No valid NetCDF models found in {input_directory}."
368
+ raise ValueError(err_msg)
356
369
 
357
370
  # If model list is provided,
358
371
  print(f"Preparing to clip suitable NetCDF models: {available_netcdf_models}\n")
@@ -431,7 +444,8 @@ def clip_models(
431
444
  )
432
445
 
433
446
  else:
434
- raise Exception(f"Model {m} not supported")
447
+ err_msg = f"Model {m} not supported"
448
+ raise Exception(err_msg)
435
449
 
436
450
  # Create directory and export
437
451
  (output_directory / file).parent.mkdir(parents=True, exist_ok=True)
@@ -454,16 +468,15 @@ def list_models(
454
468
  raise_error: bool = False,
455
469
  extra_databases: str | os.PathLike | list | None = None,
456
470
  ) -> tuple[list[str], list[str]]:
457
- """
458
- List all tide models available for tide modelling.
471
+ """List all tide models available for tide modelling.
459
472
 
460
473
  This function scans the specified tide model directory
461
474
  and returns a list of models that are available in the
462
475
  directory as well as the full list of all models supported
463
476
  by `eo-tides` and `pyTMD`.
464
477
 
465
- For instructions on setting up tide models, see:
466
- <https://geoscienceaustralia.github.io/eo-tides/setup/>
478
+ For tide model setup instructions, refer to the guide:
479
+ https://geoscienceaustralia.github.io/eo-tides/setup/
467
480
 
468
481
  Parameters
469
482
  ----------
@@ -494,6 +507,7 @@ def list_models(
494
507
  A list of all tide models available within `directory`.
495
508
  supported_models : list of str
496
509
  A list of all tide models supported by `eo-tides`.
510
+
497
511
  """
498
512
  init() # Initialize colorama
499
513
 
@@ -515,7 +529,7 @@ def list_models(
515
529
 
516
530
  # Handle GOT5.6 differently to ensure we test for presence of GOT5.6 constituents
517
531
  if m in ("GOT5.6", "GOT5.6_extrapolated"):
518
- model_file = [file for file in model_file if "GOT5.6" in file][0]
532
+ model_file = next(file for file in model_file if "GOT5.6" in file)
519
533
  else:
520
534
  model_file = model_file[0] if isinstance(model_file, list) else model_file
521
535
 
@@ -548,13 +562,12 @@ def list_models(
548
562
  # Mark available models with a green tick
549
563
  status = "✅"
550
564
  print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
551
-
552
- except FileNotFoundError:
565
+ except FileNotFoundError: # noqa: PERF203
553
566
  if show_supported:
554
567
  # Mark unavailable models with a red cross
555
568
  status = "❌"
556
569
  print(
557
- f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
570
+ f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}",
558
571
  )
559
572
 
560
573
  if show_available or show_supported:
@@ -568,16 +581,15 @@ def list_models(
568
581
  if not available_models:
569
582
  warning_msg = textwrap.dedent(
570
583
  f"""
571
- No valid tide models are available in `{directory}`.
572
- Are you sure you have provided the correct `directory` path, or set the
573
- `EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
574
- tide model directory?
575
- """
584
+ No valid tide models were found in `{directory}`.
585
+ Please ensure that the path you provided is correct, or set the `EO_TIDES_TIDE_MODELS` environment variable to point to a valid tide model directory.
586
+ For tide model setup instructions, refer to the guide: https://geoscienceaustralia.github.io/eo-tides/setup/
587
+ """,
576
588
  ).strip()
577
589
 
578
590
  if raise_error:
579
- raise Exception(warning_msg)
580
- warnings.warn(warning_msg, UserWarning)
591
+ raise Exception(warning_msg) from None
592
+ warnings.warn(warning_msg, UserWarning, stacklevel=2)
581
593
 
582
594
  # Return list of available and supported models
583
595
  return available_models, supported_models
@@ -669,18 +681,22 @@ def idw(
669
681
 
670
682
  # Verify input and outputs have matching lengths
671
683
  if not (input_z.shape[0] == len(input_x) == len(input_y)):
672
- raise ValueError("All of `input_z`, `input_x` and `input_y` must be the same length.")
673
- if not (len(output_x) == len(output_y)):
674
- raise ValueError("Both `output_x` and `output_y` must be the same length.")
684
+ err_msg = "All of `input_z`, `input_x` and `input_y` must be the same length."
685
+ raise ValueError(err_msg)
686
+ if len(output_x) != len(output_y):
687
+ err_msg = "Both `output_x` and `output_y` must be the same length."
688
+ raise ValueError(err_msg)
675
689
 
676
690
  # Verify k is smaller than total number of points, and non-zero
677
691
  if k > input_z.shape[0]:
678
- raise ValueError(
692
+ err_msg = (
679
693
  f"The requested number of nearest neighbours (`k={k}`) "
680
694
  f"is smaller than the total number of points ({input_z.shape[0]}).",
681
695
  )
696
+ raise ValueError(err_msg)
682
697
  if k == 0:
683
- raise ValueError("Interpolation based on `k=0` nearest neighbours is not valid.")
698
+ err_msg = "Interpolation based on `k=0` nearest neighbours is not valid."
699
+ raise ValueError(err_msg)
684
700
 
685
701
  # Create KDTree to efficiently find nearest neighbours
686
702
  points_xy = np.column_stack((input_y, input_x))
eo_tides/validation.py CHANGED
@@ -1,3 +1,9 @@
1
+ """Validation tools for comparing modelled tides to observed tide gauge data.
2
+
3
+ This module provides functions for loading, filtering, and analysing
4
+ observed tide gauge data to validate modelled tide heights.
5
+ """
6
+
1
7
  import datetime
2
8
  import warnings
3
9
  from math import sqrt
@@ -14,10 +20,8 @@ from shapely.geometry import Point
14
20
  from sklearn.metrics import mean_absolute_error, mean_squared_error
15
21
 
16
22
 
17
- def eval_metrics(x, y, round=3, all_regress=False):
18
- """
19
- Calculate a set of common statistical metrics
20
- based on two input actual and predicted vectors.
23
+ def eval_metrics(x, y, round=3, all_regress=False): # noqa: A002
24
+ """Calculate common statistical validation metrics.
21
25
 
22
26
  These include:
23
27
 
@@ -46,8 +50,8 @@ def eval_metrics(x, y, round=3, all_regress=False):
46
50
  -------
47
51
  pandas.Series
48
52
  A `pd.Series` containing all calculated metrics.
49
- """
50
53
 
54
+ """
51
55
  # Create dataframe to drop na
52
56
  xy_df = pd.DataFrame({"x": x, "y": y}).dropna()
53
57
 
@@ -77,9 +81,7 @@ def eval_metrics(x, y, round=3, all_regress=False):
77
81
 
78
82
 
79
83
  def _round_date_strings(date, round_type="end"):
80
- """
81
- Round a date string up or down to the start or end of a given time
82
- period.
84
+ """Round a date string up or down to the start or end of a time period.
83
85
 
84
86
  Parameters
85
87
  ----------
@@ -107,8 +109,8 @@ def _round_date_strings(date, round_type="end"):
107
109
 
108
110
  >>> round_date_strings('2020-01', round_type='end')
109
111
  '2020-01-31 00:00:00'
110
- """
111
112
 
113
+ """
112
114
  # Determine precision of input date string
113
115
  date_segments = len(date.split("-"))
114
116
 
@@ -194,8 +196,7 @@ def load_gauge_gesla(
194
196
  data_path="GESLA3.0_ALL",
195
197
  metadata_path="",
196
198
  ):
197
- """
198
- Load Global Extreme Sea Level Analysis (GESLA) tide gauge data.
199
+ """Load Global Extreme Sea Level Analysis (GESLA) tide gauge data.
199
200
 
200
201
  Load and process all available GESLA measured sea-level data
201
202
  with an `x, y, time` spatio-temporal query, or from a list of
@@ -259,24 +260,27 @@ def load_gauge_gesla(
259
260
  - "use_flag": Use-in-analysis flag (1 = use, 0 = do not use),
260
261
 
261
262
  ...and additional columns from station metadata.
263
+
262
264
  """
263
265
  # Expand and validate data and metadata paths
264
266
  data_path = Path(data_path).expanduser()
265
267
  metadata_path = Path(metadata_path).expanduser()
266
268
 
267
269
  if not data_path.exists():
268
- raise FileNotFoundError(
270
+ err_msg = (
269
271
  f"GESLA raw data directory not found at: {data_path}\n"
270
272
  "Download 'GESLA-3 DATA' from: "
271
- "https://gesla787883612.wordpress.com/downloads/"
273
+ "https://gesla787883612.wordpress.com/downloads/",
272
274
  )
275
+ raise FileNotFoundError(err_msg)
273
276
 
274
277
  if not metadata_path.exists():
275
- raise FileNotFoundError(
278
+ err_msg = (
276
279
  f"GESLA station metadata file not found at: {metadata_path}\n"
277
280
  "Download the 'GESLA-3 CSV META-DATA FILE' from: "
278
- "https://gesla787883612.wordpress.com/downloads/"
281
+ "https://gesla787883612.wordpress.com/downloads/",
279
282
  )
283
+ raise FileNotFoundError(err_msg)
280
284
 
281
285
  # Load tide gauge metadata
282
286
  metadata_df, metadata_gdf = _load_gauge_metadata(metadata_path)
@@ -297,20 +301,21 @@ def load_gauge_gesla(
297
301
  site_code = (
298
302
  _nearest_row(metadata_gdf, x, y, max_distance).rename({"index_right": "site_code"}, axis=1).site_code
299
303
  )
300
- # site_code = _nearest_row(metadata_gdf, x, y, max_distance).site_code
301
304
 
302
305
  # Raise exception if no valid tide gauges are found
303
- if site_code.isnull().all():
304
- raise Exception(f"No tide gauge found within {max_distance} degrees of {x}, {y}.")
306
+ if site_code.isna().all():
307
+ err_msg = f"No tide gauge found within {max_distance} degrees of {x}, {y}."
308
+ raise Exception(err_msg)
305
309
 
306
310
  # Otherwise if all are None, return all available site codes
307
311
  elif (site_code is None) & (x is None) & (y is None):
308
312
  site_code = metadata_df.index.to_list()
309
313
 
310
314
  else:
311
- raise TypeError(
312
- "`x` and `y` must be provided as either singular coordinates (e.g. `x=150`), or as a tuple bounding box (e.g. `x=(150, 152)`)."
315
+ err_msg = (
316
+ "`x` and `y` must be provided as either singular coordinates (e.g. `x=150`), or as a tuple bounding box (e.g. `x=(150, 152)`).",
313
317
  )
318
+ raise Exception(err_msg)
314
319
 
315
320
  # Prepare times
316
321
  if time is None:
@@ -342,7 +347,7 @@ def load_gauge_gesla(
342
347
  data_df = data_df.set_index("time", append=True)
343
348
  duplicates = data_df.index.duplicated()
344
349
  if duplicates.sum() > 0:
345
- warnings.warn("Duplicate timestamps were removed.")
350
+ warnings.warn("Duplicate timestamps were removed.", stacklevel=2)
346
351
  data_df = data_df.loc[~duplicates]
347
352
 
348
353
  # Remove observed mean sea level if requested
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eo-tides
3
- Version: 0.7.6.dev2
3
+ Version: 0.8.0
4
4
  Summary: Tide modelling tools for large-scale satellite earth observation analysis
5
5
  Project-URL: Homepage, https://GeoscienceAustralia.github.io/eo-tides/
6
6
  Project-URL: Repository, https://github.com/GeoscienceAustralia/eo-tides