eo-tides 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +7 -4
- eo_tides/eo.py +184 -161
- eo_tides/model.py +350 -366
- eo_tides/stats.py +74 -36
- eo_tides/utils.py +453 -1
- eo_tides/validation.py +5 -5
- {eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/METADATA +20 -10
- eo_tides-0.3.0.dist-info/RECORD +11 -0
- {eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/WHEEL +1 -1
- eo_tides-0.1.1.dist-info/RECORD +0 -11
- {eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/LICENSE +0 -0
- {eo_tides-0.1.1.dist-info → eo_tides-0.3.0.dist-info}/top_level.txt +0 -0
eo_tides/model.py
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
4
|
import os
|
5
|
-
import pathlib
|
6
5
|
import textwrap
|
7
|
-
import warnings
|
8
6
|
from concurrent.futures import ProcessPoolExecutor
|
9
7
|
from concurrent.futures.process import BrokenProcessPool
|
10
8
|
from functools import partial
|
11
9
|
from typing import TYPE_CHECKING
|
12
10
|
|
11
|
+
import psutil
|
12
|
+
|
13
13
|
# Only import if running type checking
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
import xarray as xr
|
@@ -19,349 +19,9 @@ import numpy as np
|
|
19
19
|
import pandas as pd
|
20
20
|
import pyproj
|
21
21
|
import pyTMD
|
22
|
-
from colorama import Style, init
|
23
|
-
from pyTMD.io.model import load_database, model
|
24
22
|
from tqdm import tqdm
|
25
23
|
|
26
|
-
from .utils import idw
|
27
|
-
|
28
|
-
|
29
|
-
def _set_directory(directory):
|
30
|
-
"""
|
31
|
-
Set tide modelling files directory. If no custom
|
32
|
-
path is provided, try global environmental variable
|
33
|
-
instead.
|
34
|
-
"""
|
35
|
-
if directory is None:
|
36
|
-
if "EO_TIDES_TIDE_MODELS" in os.environ:
|
37
|
-
directory = os.environ["EO_TIDES_TIDE_MODELS"]
|
38
|
-
else:
|
39
|
-
raise Exception(
|
40
|
-
"No tide model directory provided via `directory`, and/or no "
|
41
|
-
"`EO_TIDES_TIDE_MODELS` environment variable found. "
|
42
|
-
"Please provide a valid path to your tide model directory."
|
43
|
-
)
|
44
|
-
|
45
|
-
# Verify path exists
|
46
|
-
directory = pathlib.Path(directory).expanduser()
|
47
|
-
if not directory.exists():
|
48
|
-
raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
|
49
|
-
else:
|
50
|
-
return directory
|
51
|
-
|
52
|
-
|
53
|
-
def list_models(
|
54
|
-
directory: str | os.PathLike | None = None,
|
55
|
-
show_available: bool = True,
|
56
|
-
show_supported: bool = True,
|
57
|
-
raise_error: bool = False,
|
58
|
-
) -> tuple[list[str], list[str]]:
|
59
|
-
"""
|
60
|
-
List all tide models available for tide modelling, and
|
61
|
-
all models supported by `eo-tides` and `pyTMD`.
|
62
|
-
|
63
|
-
This function scans the specified tide model directory
|
64
|
-
and returns a list of models that are available in the
|
65
|
-
directory as well as the full list of all supported models.
|
66
|
-
|
67
|
-
For instructions on setting up tide models, see:
|
68
|
-
<https://geoscienceaustralia.github.io/eo-tides/setup/>
|
69
|
-
|
70
|
-
Parameters
|
71
|
-
----------
|
72
|
-
directory : str, optional
|
73
|
-
The directory containing tide model data files. If no path is
|
74
|
-
provided, this will default to the environment variable
|
75
|
-
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
76
|
-
Tide modelling files should be stored in sub-folders for each
|
77
|
-
model that match the structure required by `pyTMD`
|
78
|
-
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
79
|
-
show_available : bool, optional
|
80
|
-
Whether to print a list of locally available models.
|
81
|
-
show_supported : bool, optional
|
82
|
-
Whether to print a list of all supported models, in
|
83
|
-
addition to models available locally.
|
84
|
-
raise_error : bool, optional
|
85
|
-
If True, raise an error if no available models are found.
|
86
|
-
If False, raise a warning.
|
87
|
-
|
88
|
-
Returns
|
89
|
-
-------
|
90
|
-
available_models : list of str
|
91
|
-
A list of all tide models available within `directory`.
|
92
|
-
supported_models : list of str
|
93
|
-
A list of all tide models supported by `eo-tides`.
|
94
|
-
"""
|
95
|
-
init() # Initialize colorama
|
96
|
-
|
97
|
-
# Set tide modelling files directory. If no custom path is
|
98
|
-
# provided, try global environment variable.
|
99
|
-
directory = _set_directory(directory)
|
100
|
-
|
101
|
-
# Get full list of supported models from pyTMD database
|
102
|
-
model_database = load_database()["elevation"]
|
103
|
-
supported_models = list(model_database.keys())
|
104
|
-
|
105
|
-
# Extract expected model paths
|
106
|
-
expected_paths = {}
|
107
|
-
for m in supported_models:
|
108
|
-
model_file = model_database[m]["model_file"]
|
109
|
-
model_file = model_file[0] if isinstance(model_file, list) else model_file
|
110
|
-
expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
|
111
|
-
|
112
|
-
# Define column widths
|
113
|
-
status_width = 4 # Width for emoji
|
114
|
-
name_width = max(len(name) for name in supported_models)
|
115
|
-
path_width = max(len(path) for path in expected_paths.values())
|
116
|
-
|
117
|
-
# Print list of supported models, marking available and
|
118
|
-
# unavailable models and appending available to list
|
119
|
-
if show_available or show_supported:
|
120
|
-
total_width = min(status_width + name_width + path_width + 6, 80)
|
121
|
-
print("─" * total_width)
|
122
|
-
print(f"{'🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
|
123
|
-
print("─" * total_width)
|
124
|
-
|
125
|
-
available_models = []
|
126
|
-
for m in supported_models:
|
127
|
-
try:
|
128
|
-
model_file = model(directory=directory).elevation(m=m)
|
129
|
-
available_models.append(m)
|
130
|
-
|
131
|
-
if show_available:
|
132
|
-
# Mark available models with a green tick
|
133
|
-
status = "✅"
|
134
|
-
print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
|
135
|
-
except FileNotFoundError:
|
136
|
-
if show_supported:
|
137
|
-
# Mark unavailable models with a red cross
|
138
|
-
status = "❌"
|
139
|
-
print(
|
140
|
-
f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
|
141
|
-
)
|
142
|
-
|
143
|
-
if show_available or show_supported:
|
144
|
-
print("─" * total_width)
|
145
|
-
|
146
|
-
# Print summary
|
147
|
-
print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
|
148
|
-
print(f"Available models: {len(available_models)}/{len(supported_models)}")
|
149
|
-
|
150
|
-
# Raise error or warning if no models are available
|
151
|
-
if not available_models:
|
152
|
-
warning_msg = textwrap.dedent(
|
153
|
-
f"""
|
154
|
-
No valid tide models are available in `{directory}`.
|
155
|
-
Are you sure you have provided the correct `directory` path, or set the
|
156
|
-
`EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
|
157
|
-
tide model directory?
|
158
|
-
"""
|
159
|
-
).strip()
|
160
|
-
|
161
|
-
if raise_error:
|
162
|
-
raise Exception(warning_msg)
|
163
|
-
else:
|
164
|
-
warnings.warn(warning_msg, UserWarning)
|
165
|
-
|
166
|
-
# Return list of available and supported models
|
167
|
-
return available_models, supported_models
|
168
|
-
|
169
|
-
|
170
|
-
def _model_tides(
|
171
|
-
model,
|
172
|
-
x,
|
173
|
-
y,
|
174
|
-
time,
|
175
|
-
directory,
|
176
|
-
crs,
|
177
|
-
crop,
|
178
|
-
method,
|
179
|
-
extrapolate,
|
180
|
-
cutoff,
|
181
|
-
output_units,
|
182
|
-
mode,
|
183
|
-
):
|
184
|
-
"""Worker function applied in parallel by `model_tides`. Handles the
|
185
|
-
extraction of tide modelling constituents and tide modelling using
|
186
|
-
`pyTMD`.
|
187
|
-
"""
|
188
|
-
# Obtain model details
|
189
|
-
pytmd_model = pyTMD.io.model(directory).elevation(model)
|
190
|
-
|
191
|
-
# Convert x, y to latitude/longitude
|
192
|
-
transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
|
193
|
-
lon, lat = transformer.transform(x.flatten(), y.flatten())
|
194
|
-
|
195
|
-
# Convert datetime
|
196
|
-
timescale = pyTMD.time.timescale().from_datetime(time.flatten())
|
197
|
-
|
198
|
-
# Calculate bounds for cropping
|
199
|
-
buffer = 1 # one degree on either side
|
200
|
-
bounds = [
|
201
|
-
lon.min() - buffer,
|
202
|
-
lon.max() + buffer,
|
203
|
-
lat.min() - buffer,
|
204
|
-
lat.max() + buffer,
|
205
|
-
]
|
206
|
-
|
207
|
-
try:
|
208
|
-
# Read tidal constants and interpolate to grid points
|
209
|
-
if pytmd_model.format in ("OTIS", "ATLAS-compact", "TMD3"):
|
210
|
-
amp, ph, D, c = pyTMD.io.OTIS.extract_constants(
|
211
|
-
lon,
|
212
|
-
lat,
|
213
|
-
pytmd_model.grid_file,
|
214
|
-
pytmd_model.model_file,
|
215
|
-
pytmd_model.projection,
|
216
|
-
type=pytmd_model.type,
|
217
|
-
grid=pytmd_model.file_format,
|
218
|
-
crop=crop,
|
219
|
-
bounds=bounds,
|
220
|
-
method=method,
|
221
|
-
extrapolate=extrapolate,
|
222
|
-
cutoff=cutoff,
|
223
|
-
)
|
224
|
-
|
225
|
-
# Use delta time at 2000.0 to match TMD outputs
|
226
|
-
deltat = np.zeros((len(timescale)), dtype=np.float64)
|
227
|
-
|
228
|
-
elif pytmd_model.format in ("ATLAS-netcdf",):
|
229
|
-
amp, ph, D, c = pyTMD.io.ATLAS.extract_constants(
|
230
|
-
lon,
|
231
|
-
lat,
|
232
|
-
pytmd_model.grid_file,
|
233
|
-
pytmd_model.model_file,
|
234
|
-
type=pytmd_model.type,
|
235
|
-
crop=crop,
|
236
|
-
bounds=bounds,
|
237
|
-
method=method,
|
238
|
-
extrapolate=extrapolate,
|
239
|
-
cutoff=cutoff,
|
240
|
-
scale=pytmd_model.scale,
|
241
|
-
compressed=pytmd_model.compressed,
|
242
|
-
)
|
243
|
-
|
244
|
-
# Use delta time at 2000.0 to match TMD outputs
|
245
|
-
deltat = np.zeros((len(timescale)), dtype=np.float64)
|
246
|
-
|
247
|
-
elif pytmd_model.format in ("GOT-ascii", "GOT-netcdf"):
|
248
|
-
amp, ph, c = pyTMD.io.GOT.extract_constants(
|
249
|
-
lon,
|
250
|
-
lat,
|
251
|
-
pytmd_model.model_file,
|
252
|
-
grid=pytmd_model.file_format,
|
253
|
-
crop=crop,
|
254
|
-
bounds=bounds,
|
255
|
-
method=method,
|
256
|
-
extrapolate=extrapolate,
|
257
|
-
cutoff=cutoff,
|
258
|
-
scale=pytmd_model.scale,
|
259
|
-
compressed=pytmd_model.compressed,
|
260
|
-
)
|
261
|
-
|
262
|
-
# Delta time (TT - UT1)
|
263
|
-
deltat = timescale.tt_ut1
|
264
|
-
|
265
|
-
elif pytmd_model.format in ("FES-ascii", "FES-netcdf"):
|
266
|
-
amp, ph = pyTMD.io.FES.extract_constants(
|
267
|
-
lon,
|
268
|
-
lat,
|
269
|
-
pytmd_model.model_file,
|
270
|
-
type=pytmd_model.type,
|
271
|
-
version=pytmd_model.version,
|
272
|
-
crop=crop,
|
273
|
-
bounds=bounds,
|
274
|
-
method=method,
|
275
|
-
extrapolate=extrapolate,
|
276
|
-
cutoff=cutoff,
|
277
|
-
scale=pytmd_model.scale,
|
278
|
-
compressed=pytmd_model.compressed,
|
279
|
-
)
|
280
|
-
|
281
|
-
# Available model constituents
|
282
|
-
c = pytmd_model.constituents
|
283
|
-
|
284
|
-
# Delta time (TT - UT1)
|
285
|
-
deltat = timescale.tt_ut1
|
286
|
-
else:
|
287
|
-
raise Exception(
|
288
|
-
f"Unsupported model format ({pytmd_model.format}). This may be due to an incompatible version of `pyTMD`."
|
289
|
-
)
|
290
|
-
|
291
|
-
# Raise error if constituent files no not cover analysis extent
|
292
|
-
except IndexError:
|
293
|
-
error_msg = textwrap.dedent(
|
294
|
-
f"""
|
295
|
-
The {model} tide model constituent files do not cover the requested analysis extent.
|
296
|
-
This can occur if you are using clipped model files to improve run times.
|
297
|
-
Consider using model files that cover your entire analysis area, or set `crop=False`
|
298
|
-
to reduce the extent of tide model constituent files that is loaded.
|
299
|
-
"""
|
300
|
-
).strip()
|
301
|
-
raise Exception(error_msg)
|
302
|
-
|
303
|
-
# Calculate complex phase in radians for Euler's
|
304
|
-
cph = -1j * ph * np.pi / 180.0
|
305
|
-
|
306
|
-
# Calculate constituent oscillation
|
307
|
-
hc = amp * np.exp(cph)
|
308
|
-
|
309
|
-
# Determine the number of points and times to process. If in
|
310
|
-
# "one-to-many" mode, these counts are used to repeat our extracted
|
311
|
-
# constituents and timesteps so we can extract tides for all
|
312
|
-
# combinations of our input times and tide modelling points.
|
313
|
-
# If in "one-to-one" mode, we avoid this step by setting counts to 1
|
314
|
-
# (e.g. "repeat 1 times")
|
315
|
-
points_repeat = len(x) if mode == "one-to-many" else 1
|
316
|
-
time_repeat = len(time) if mode == "one-to-many" else 1
|
317
|
-
|
318
|
-
# If in "one-to-many" mode, repeat constituents to length of time
|
319
|
-
# and number of input coords before passing to `predict_tide_drift`
|
320
|
-
t, hc, deltat = (
|
321
|
-
np.tile(timescale.tide, points_repeat),
|
322
|
-
hc.repeat(time_repeat, axis=0),
|
323
|
-
np.tile(deltat, points_repeat),
|
324
|
-
)
|
325
|
-
|
326
|
-
# Predict tidal elevations at time and infer minor corrections
|
327
|
-
npts = len(t)
|
328
|
-
tide = np.ma.zeros((npts), fill_value=np.nan)
|
329
|
-
tide.mask = np.any(hc.mask, axis=1)
|
330
|
-
|
331
|
-
# Predict tides
|
332
|
-
tide.data[:] = pyTMD.predict.drift(t, hc, c, deltat=deltat, corrections=pytmd_model.corrections)
|
333
|
-
minor = pyTMD.predict.infer_minor(
|
334
|
-
t,
|
335
|
-
hc,
|
336
|
-
c,
|
337
|
-
deltat=deltat,
|
338
|
-
corrections=pytmd_model.corrections,
|
339
|
-
minor=pytmd_model.minor,
|
340
|
-
)
|
341
|
-
tide.data[:] += minor.data[:]
|
342
|
-
|
343
|
-
# Replace invalid values with fill value
|
344
|
-
tide.data[tide.mask] = tide.fill_value
|
345
|
-
|
346
|
-
# Convert data to pandas.DataFrame, and set index to our input
|
347
|
-
# time/x/y values
|
348
|
-
tide_df = pd.DataFrame({
|
349
|
-
"time": np.tile(time, points_repeat),
|
350
|
-
"x": np.repeat(x, time_repeat),
|
351
|
-
"y": np.repeat(y, time_repeat),
|
352
|
-
"tide_model": model,
|
353
|
-
"tide_height": tide,
|
354
|
-
}).set_index(["time", "x", "y"])
|
355
|
-
|
356
|
-
# Optionally convert outputs to integer units (can save memory)
|
357
|
-
if output_units == "m":
|
358
|
-
tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
|
359
|
-
elif output_units == "cm":
|
360
|
-
tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
|
361
|
-
elif output_units == "mm":
|
362
|
-
tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
|
363
|
-
|
364
|
-
return tide_df
|
24
|
+
from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
|
365
25
|
|
366
26
|
|
367
27
|
def _ensemble_model(
|
@@ -529,20 +189,195 @@ def _ensemble_model(
|
|
529
189
|
return pd.concat(ensemble_list)
|
530
190
|
|
531
191
|
|
192
|
+
def _parallel_splits(
|
193
|
+
total_points: int,
|
194
|
+
model_count: int,
|
195
|
+
parallel_max: int | None = None,
|
196
|
+
min_points_per_split: int = 1000,
|
197
|
+
) -> int:
|
198
|
+
"""
|
199
|
+
Calculates the optimal number of parallel splits for data
|
200
|
+
processing based on system resources and processing constraints.
|
201
|
+
|
202
|
+
Parameters:
|
203
|
+
-----------
|
204
|
+
total_points : int
|
205
|
+
Total number of data points to process
|
206
|
+
model_count : int
|
207
|
+
Number of models that will be run in parallel
|
208
|
+
parallel_max : int, optional
|
209
|
+
Maximum number of parallel processes to use. If None, uses CPU core count
|
210
|
+
min_points_per_split : int, default=1000
|
211
|
+
Minimum number of points that should be processed in each split
|
212
|
+
"""
|
213
|
+
# Get available CPUs. First see if `CPU_GUARANTEE` exists in
|
214
|
+
# environment (if running in JupyterHub); if not use psutil
|
215
|
+
# followed by standard CPU count
|
216
|
+
if parallel_max is None:
|
217
|
+
# Take the first valid output
|
218
|
+
raw_value = os.environ.get("CPU_GUARANTEE") or psutil.cpu_count(logical=False) or os.cpu_count() or 1
|
219
|
+
|
220
|
+
# Convert to integer
|
221
|
+
if isinstance(raw_value, str):
|
222
|
+
parallel_max = int(float(raw_value))
|
223
|
+
else:
|
224
|
+
parallel_max = int(raw_value)
|
225
|
+
|
226
|
+
# Calculate optimal number of splits based on constraints
|
227
|
+
splits_by_size = total_points / min_points_per_split
|
228
|
+
splits_by_cpu = parallel_max / model_count
|
229
|
+
optimal_splits = min(splits_by_size, splits_by_cpu)
|
230
|
+
|
231
|
+
# Convert to integer and ensure at least 1 split
|
232
|
+
final_split_count = int(max(1, optimal_splits))
|
233
|
+
return final_split_count
|
234
|
+
|
235
|
+
|
236
|
+
def _model_tides(
|
237
|
+
model,
|
238
|
+
x,
|
239
|
+
y,
|
240
|
+
time,
|
241
|
+
directory,
|
242
|
+
crs,
|
243
|
+
crop,
|
244
|
+
method,
|
245
|
+
extrapolate,
|
246
|
+
cutoff,
|
247
|
+
output_units,
|
248
|
+
mode,
|
249
|
+
):
|
250
|
+
"""Worker function applied in parallel by `model_tides`. Handles the
|
251
|
+
extraction of tide modelling constituents and tide modelling using
|
252
|
+
`pyTMD`.
|
253
|
+
"""
|
254
|
+
# Obtain model details
|
255
|
+
pytmd_model = pyTMD.io.model(directory).elevation(model)
|
256
|
+
|
257
|
+
# Reproject x, y to latitude/longitude
|
258
|
+
transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
|
259
|
+
lon, lat = transformer.transform(x.flatten(), y.flatten())
|
260
|
+
|
261
|
+
# Convert datetime
|
262
|
+
timescale = pyTMD.time.timescale().from_datetime(time.flatten())
|
263
|
+
|
264
|
+
try:
|
265
|
+
# Read tidal constants and interpolate to grid points
|
266
|
+
amp, ph, c = pytmd_model.extract_constants(
|
267
|
+
lon,
|
268
|
+
lat,
|
269
|
+
type=pytmd_model.type,
|
270
|
+
crop=crop,
|
271
|
+
method=method,
|
272
|
+
extrapolate=extrapolate,
|
273
|
+
cutoff=cutoff,
|
274
|
+
append_node=False,
|
275
|
+
# append_node=True,
|
276
|
+
)
|
277
|
+
|
278
|
+
# TODO: Return constituents
|
279
|
+
# print(amp.shape, ph.shape, c)
|
280
|
+
# print(pd.DataFrame({"amplitude": amp}))
|
281
|
+
|
282
|
+
# Raise error if constituent files no not cover analysis extent
|
283
|
+
except IndexError:
|
284
|
+
error_msg = f"""
|
285
|
+
The {model} tide model constituent files do not cover the analysis extent
|
286
|
+
({min(lon):.2f}, {max(lon):.2f}, {min(lat):.2f}, {max(lat):.2f}).
|
287
|
+
This can occur if you are using clipped model files to improve run times.
|
288
|
+
Consider using model files that cover your entire analysis area, or set `crop=False`
|
289
|
+
to reduce the extent of tide model constituent files that is loaded.
|
290
|
+
"""
|
291
|
+
raise Exception(textwrap.dedent(error_msg).strip()) from None
|
292
|
+
|
293
|
+
# Calculate complex phase in radians for Euler's
|
294
|
+
cph = -1j * ph * np.pi / 180.0
|
295
|
+
|
296
|
+
# Calculate constituent oscillation
|
297
|
+
hc = amp * np.exp(cph)
|
298
|
+
|
299
|
+
# Compute delta times based on model
|
300
|
+
if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
|
301
|
+
# Use delta time at 2000.0 to match TMD outputs
|
302
|
+
deltat = np.zeros_like(timescale.tt_ut1)
|
303
|
+
else:
|
304
|
+
# Use interpolated delta times
|
305
|
+
deltat = timescale.tt_ut1
|
306
|
+
|
307
|
+
# In "one-to-many" mode, extracted tidal constituents and timesteps
|
308
|
+
# are repeated/multiplied out to match the number of input points and
|
309
|
+
# timesteps, enabling the modeling of tides across all combinations
|
310
|
+
# of input times and points. In "one-to-one" mode, no repetition is
|
311
|
+
# needed, so each repeat count is set to 1.
|
312
|
+
points_repeat = len(x) if mode == "one-to-many" else 1
|
313
|
+
time_repeat = len(time) if mode == "one-to-many" else 1
|
314
|
+
t, hc, deltat = (
|
315
|
+
np.tile(timescale.tide, points_repeat),
|
316
|
+
hc.repeat(time_repeat, axis=0),
|
317
|
+
np.tile(deltat, points_repeat),
|
318
|
+
)
|
319
|
+
|
320
|
+
# Create arrays to hold outputs
|
321
|
+
tide = np.ma.zeros((len(t)), fill_value=np.nan)
|
322
|
+
tide.mask = np.any(hc.mask, axis=1)
|
323
|
+
|
324
|
+
# Predict tidal elevations at time and infer minor corrections
|
325
|
+
tide.data[:] = pyTMD.predict.drift(
|
326
|
+
t,
|
327
|
+
hc,
|
328
|
+
c,
|
329
|
+
deltat=deltat,
|
330
|
+
corrections=pytmd_model.corrections,
|
331
|
+
)
|
332
|
+
minor = pyTMD.predict.infer_minor(
|
333
|
+
t,
|
334
|
+
hc,
|
335
|
+
c,
|
336
|
+
deltat=deltat,
|
337
|
+
corrections=pytmd_model.corrections,
|
338
|
+
minor=pytmd_model.minor,
|
339
|
+
)
|
340
|
+
tide.data[:] += minor.data[:]
|
341
|
+
|
342
|
+
# Replace invalid values with fill value
|
343
|
+
tide.data[tide.mask] = tide.fill_value
|
344
|
+
|
345
|
+
# Convert data to pandas.DataFrame, and set index to our input
|
346
|
+
# time/x/y values
|
347
|
+
tide_df = pd.DataFrame({
|
348
|
+
"time": np.tile(time, points_repeat),
|
349
|
+
"x": np.repeat(x, time_repeat),
|
350
|
+
"y": np.repeat(y, time_repeat),
|
351
|
+
"tide_model": model,
|
352
|
+
"tide_height": tide,
|
353
|
+
}).set_index(["time", "x", "y"])
|
354
|
+
|
355
|
+
# Optionally convert outputs to integer units (can save memory)
|
356
|
+
if output_units == "m":
|
357
|
+
tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
|
358
|
+
elif output_units == "cm":
|
359
|
+
tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
|
360
|
+
elif output_units == "mm":
|
361
|
+
tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
|
362
|
+
|
363
|
+
return tide_df
|
364
|
+
|
365
|
+
|
532
366
|
def model_tides(
|
533
367
|
x: float | list[float] | xr.DataArray,
|
534
368
|
y: float | list[float] | xr.DataArray,
|
535
|
-
time:
|
369
|
+
time: DatetimeLike,
|
536
370
|
model: str | list[str] = "EOT20",
|
537
371
|
directory: str | os.PathLike | None = None,
|
538
372
|
crs: str = "EPSG:4326",
|
539
373
|
crop: bool = True,
|
540
|
-
method: str = "
|
374
|
+
method: str = "linear",
|
541
375
|
extrapolate: bool = True,
|
542
376
|
cutoff: float | None = None,
|
543
377
|
mode: str = "one-to-many",
|
544
378
|
parallel: bool = True,
|
545
|
-
parallel_splits: int =
|
379
|
+
parallel_splits: int | str = "auto",
|
380
|
+
parallel_max: int | None = None,
|
546
381
|
output_units: str = "m",
|
547
382
|
output_format: str = "long",
|
548
383
|
ensemble_models: list[str] | None = None,
|
@@ -578,10 +413,11 @@ def model_tides(
|
|
578
413
|
the location at which to model tides. By default these
|
579
414
|
coordinates should be lat/lon; use "crs" if they
|
580
415
|
are in a custom coordinate reference system.
|
581
|
-
time :
|
582
|
-
|
583
|
-
|
584
|
-
|
416
|
+
time : DatetimeLike
|
417
|
+
Times at which to model tide heights (in UTC). Accepts
|
418
|
+
any format that can be converted by `pandas.to_datetime()`;
|
419
|
+
e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
|
420
|
+
datetime.datetime and strings (e.g. "2020-01-01 23:00").
|
585
421
|
model : str or list of str, optional
|
586
422
|
The tide model (or models) to use to model tides.
|
587
423
|
Defaults to "EOT20"; for a full list of available/supported
|
@@ -602,11 +438,11 @@ def model_tides(
|
|
602
438
|
1 degree buffer around all input points. Defaults to True.
|
603
439
|
method : str, optional
|
604
440
|
Method used to interpolate tidal constituents
|
605
|
-
from model files.
|
441
|
+
from model files. Defaults to "linear"; options include:
|
606
442
|
|
607
|
-
- "spline": scipy bivariate spline interpolation (default)
|
608
|
-
- "bilinear": quick bilinear interpolation
|
609
443
|
- "linear", "nearest": scipy regular grid interpolations
|
444
|
+
- "spline": scipy bivariate spline interpolation
|
445
|
+
- "bilinear": quick bilinear interpolation
|
610
446
|
extrapolate : bool, optional
|
611
447
|
Whether to extrapolate tides for x and y coordinates outside of
|
612
448
|
the valid tide modelling domain using nearest-neighbor.
|
@@ -632,12 +468,16 @@ def model_tides(
|
|
632
468
|
parallel. Optionally, tide modelling can also be run in parallel
|
633
469
|
across input x and y coordinates (see "parallel_splits" below).
|
634
470
|
Default is True.
|
635
|
-
parallel_splits : int, optional
|
471
|
+
parallel_splits : str or int, optional
|
636
472
|
Whether to split the input x and y coordinates into smaller,
|
637
473
|
evenly-sized chunks that are processed in parallel. This can
|
638
474
|
provide a large performance boost when processing large numbers
|
639
|
-
of coordinates. The default is
|
640
|
-
|
475
|
+
of coordinates. The default is "auto", which will automatically
|
476
|
+
attempt to determine optimal splits based on available CPUs,
|
477
|
+
the number of input points, and the number of models.
|
478
|
+
parallel_max : int, optional
|
479
|
+
Maximum number of processes to run in parallel. The default of
|
480
|
+
None will automatically determine this from your available CPUs.
|
641
481
|
output_units : str, optional
|
642
482
|
Whether to return modelled tides in floating point metre units,
|
643
483
|
or integer centimetre units (i.e. scaled by 100) or integer
|
@@ -674,9 +514,10 @@ def model_tides(
|
|
674
514
|
models_requested = list(np.atleast_1d(model))
|
675
515
|
x = np.atleast_1d(x)
|
676
516
|
y = np.atleast_1d(y)
|
677
|
-
time =
|
517
|
+
time = _standardise_time(time)
|
678
518
|
|
679
519
|
# Validate input arguments
|
520
|
+
assert time is not None, "Times for modelling tides muyst be provided via `time`."
|
680
521
|
assert method in ("bilinear", "spline", "linear", "nearest")
|
681
522
|
assert output_units in (
|
682
523
|
"m",
|
@@ -695,10 +536,6 @@ def model_tides(
|
|
695
536
|
"you intended to model multiple timesteps at each point."
|
696
537
|
)
|
697
538
|
|
698
|
-
# If time passed as a single Timestamp, convert to datetime64
|
699
|
-
if isinstance(time, pd.Timestamp):
|
700
|
-
time = time.to_datetime64()
|
701
|
-
|
702
539
|
# Set tide modelling files directory. If no custom path is
|
703
540
|
# provided, try global environment variable.
|
704
541
|
directory = _set_directory(directory)
|
@@ -770,13 +607,28 @@ def model_tides(
|
|
770
607
|
mode=mode,
|
771
608
|
)
|
772
609
|
|
773
|
-
#
|
774
|
-
|
610
|
+
# If automatic parallel splits, calculate optimal value
|
611
|
+
# based on available parallelisation, number of points
|
612
|
+
# and number of models
|
613
|
+
if parallel_splits == "auto":
|
614
|
+
parallel_splits = _parallel_splits(
|
615
|
+
total_points=len(x),
|
616
|
+
model_count=len(models_to_process),
|
617
|
+
parallel_max=parallel_max,
|
618
|
+
)
|
619
|
+
|
620
|
+
# Verify that parallel splits are not larger than number of points
|
621
|
+
assert isinstance(parallel_splits, int)
|
622
|
+
if parallel_splits > len(x):
|
623
|
+
raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
|
775
624
|
|
776
625
|
# Parallelise if either multiple models or multiple splits requested
|
626
|
+
|
777
627
|
if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
|
778
|
-
with ProcessPoolExecutor() as executor:
|
779
|
-
print(
|
628
|
+
with ProcessPoolExecutor(max_workers=parallel_max) as executor:
|
629
|
+
print(
|
630
|
+
f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
|
631
|
+
)
|
780
632
|
|
781
633
|
# Optionally split lon/lat points into `splits_n` chunks
|
782
634
|
# that will be applied in parallel
|
@@ -824,7 +676,7 @@ def model_tides(
|
|
824
676
|
model_outputs = []
|
825
677
|
|
826
678
|
for model_i in models_to_process:
|
827
|
-
print(f"Modelling tides
|
679
|
+
print(f"Modelling tides with {model_i}")
|
828
680
|
tide_df = iter_func(model_i, x, y, time)
|
829
681
|
model_outputs.append(tide_df)
|
830
682
|
|
@@ -854,3 +706,135 @@ def model_tides(
|
|
854
706
|
tide_df = tide_df.reindex(output_indices)
|
855
707
|
|
856
708
|
return tide_df
|
709
|
+
|
710
|
+
|
711
|
+
def model_phases(
|
712
|
+
x: float | list[float] | xr.DataArray,
|
713
|
+
y: float | list[float] | xr.DataArray,
|
714
|
+
time: DatetimeLike,
|
715
|
+
model: str | list[str] = "EOT20",
|
716
|
+
directory: str | os.PathLike | None = None,
|
717
|
+
time_offset: str = "15 min",
|
718
|
+
return_tides: bool = False,
|
719
|
+
**model_tides_kwargs,
|
720
|
+
) -> pd.DataFrame:
|
721
|
+
"""
|
722
|
+
Model tide phases (low-flow, high-flow, high-ebb, low-ebb)
|
723
|
+
at multiple coordinates and/or timesteps using using one
|
724
|
+
or more ocean tide models.
|
725
|
+
|
726
|
+
Ebb and low phases are calculated by running the
|
727
|
+
`eo_tides.model.model_tides` function twice, once for
|
728
|
+
the requested timesteps, and again after subtracting a
|
729
|
+
small time offset (by default, 15 minutes). If tides
|
730
|
+
increased over this period, they are assigned as "flow";
|
731
|
+
if they decreased, they are assigned as "ebb".
|
732
|
+
Tides are considered "high" if equal or greater than 0
|
733
|
+
metres tide height, otherwise "low".
|
734
|
+
|
735
|
+
This function supports all parameters that are supported
|
736
|
+
by `model_tides`.
|
737
|
+
|
738
|
+
Parameters
|
739
|
+
----------
|
740
|
+
x, y : float or list of float
|
741
|
+
One or more x and y coordinates used to define
|
742
|
+
the location at which to model tide phases. By default
|
743
|
+
these coordinates should be lat/lon; use "crs" if they
|
744
|
+
are in a custom coordinate reference system.
|
745
|
+
time : DatetimeLike
|
746
|
+
Times at which to model tide phases (in UTC). Accepts
|
747
|
+
any format that can be converted by `pandas.to_datetime()`;
|
748
|
+
e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
|
749
|
+
datetime.datetime and strings (e.g. "2020-01-01 23:00").
|
750
|
+
model : str or list of str, optional
|
751
|
+
The tide model (or models) to use to compute tide phases.
|
752
|
+
Defaults to "EOT20"; for a full list of available/supported
|
753
|
+
models, run `eo_tides.model.list_models`.
|
754
|
+
directory : str, optional
|
755
|
+
The directory containing tide model data files. If no path is
|
756
|
+
provided, this will default to the environment variable
|
757
|
+
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
758
|
+
Tide modelling files should be stored in sub-folders for each
|
759
|
+
model that match the structure required by `pyTMD`
|
760
|
+
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
761
|
+
time_offset: str, optional
|
762
|
+
The time offset/delta used to generate a time series of
|
763
|
+
offset tide heights required for phase calculation. Defeaults
|
764
|
+
to "15 min"; can be any string passed to `pandas.Timedelta`.
|
765
|
+
return_tides: bool, optional
|
766
|
+
Whether to return intermediate modelled tide heights as a
|
767
|
+
"tide_height" column in the output dataframe. Defaults to False.
|
768
|
+
**model_tides_kwargs :
|
769
|
+
Optional parameters passed to the `eo_tides.model.model_tides`
|
770
|
+
function. Important parameters include `output_format` (e.g.
|
771
|
+
whether to return results in wide or long format), `crop`
|
772
|
+
(whether to crop tide model constituent files on-the-fly to
|
773
|
+
improve performance) etc.
|
774
|
+
|
775
|
+
Returns
|
776
|
+
-------
|
777
|
+
pandas.DataFrame
|
778
|
+
A dataframe containing modelled tide phases.
|
779
|
+
|
780
|
+
"""
|
781
|
+
|
782
|
+
# Pop output format and mode for special handling
|
783
|
+
output_format = model_tides_kwargs.pop("output_format", "long")
|
784
|
+
mode = model_tides_kwargs.pop("mode", "one-to-many")
|
785
|
+
|
786
|
+
# Model tides
|
787
|
+
tide_df = model_tides(
|
788
|
+
x=x,
|
789
|
+
y=y,
|
790
|
+
time=time,
|
791
|
+
model=model,
|
792
|
+
directory=directory,
|
793
|
+
**model_tides_kwargs,
|
794
|
+
)
|
795
|
+
|
796
|
+
# Model tides for a time 15 minutes prior to each previously
|
797
|
+
# modelled satellite acquisition time. This allows us to compare
|
798
|
+
# tide heights to see if they are rising or falling.
|
799
|
+
pre_df = model_tides(
|
800
|
+
x=x,
|
801
|
+
y=y,
|
802
|
+
time=time - pd.Timedelta(time_offset),
|
803
|
+
model=model,
|
804
|
+
directory=directory,
|
805
|
+
**model_tides_kwargs,
|
806
|
+
)
|
807
|
+
|
808
|
+
# Compare tides computed for each timestep. If the previous tide
|
809
|
+
# was higher than the current tide, the tide is 'ebbing'. If the
|
810
|
+
# previous tide was lower, the tide is 'flowing'
|
811
|
+
ebb_flow = (tide_df.tide_height < pre_df.tide_height.values).replace({True: "ebb", False: "flow"})
|
812
|
+
|
813
|
+
# If tides are greater than 0, then "high", otherwise "low"
|
814
|
+
high_low = (tide_df.tide_height >= 0).replace({True: "high", False: "low"})
|
815
|
+
|
816
|
+
# Combine into one string and add to data
|
817
|
+
tide_df["tide_phase"] = high_low.astype(str) + "-" + ebb_flow.astype(str)
|
818
|
+
|
819
|
+
# Optionally convert to a wide format dataframe with a tide model in
|
820
|
+
# each dataframe column
|
821
|
+
if output_format == "wide":
|
822
|
+
# Pivot into wide format with each time model as a column
|
823
|
+
print("Converting to a wide format dataframe")
|
824
|
+
tide_df = tide_df.pivot(columns="tide_model")
|
825
|
+
|
826
|
+
# If in 'one-to-one' mode, reindex using our input time/x/y
|
827
|
+
# values to ensure the output is sorted the same as our inputs
|
828
|
+
if mode == "one-to-one":
|
829
|
+
output_indices = pd.MultiIndex.from_arrays([time, x, y], names=["time", "x", "y"])
|
830
|
+
tide_df = tide_df.reindex(output_indices)
|
831
|
+
|
832
|
+
# Optionally drop tides
|
833
|
+
if not return_tides:
|
834
|
+
return tide_df.drop("tide_height", axis=1)["tide_phase"]
|
835
|
+
|
836
|
+
# Optionally drop tide heights
|
837
|
+
if not return_tides:
|
838
|
+
return tide_df.drop("tide_height", axis=1)
|
839
|
+
|
840
|
+
return tide_df
|