eo-tides 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +4 -3
- eo_tides/eo.py +2 -4
- eo_tides/model.py +218 -371
- eo_tides/stats.py +2 -4
- eo_tides/utils.py +546 -1
- eo_tides/validation.py +5 -5
- {eo_tides-0.2.0.dist-info → eo_tides-0.3.1.dist-info}/METADATA +13 -9
- eo_tides-0.3.1.dist-info/RECORD +11 -0
- {eo_tides-0.2.0.dist-info → eo_tides-0.3.1.dist-info}/WHEEL +1 -1
- eo_tides-0.2.0.dist-info/RECORD +0 -11
- {eo_tides-0.2.0.dist-info → eo_tides-0.3.1.dist-info}/LICENSE +0 -0
- {eo_tides-0.2.0.dist-info → eo_tides-0.3.1.dist-info}/top_level.txt +0 -0
eo_tides/model.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
# Used to postpone evaluation of type annotations
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
import datetime
|
5
4
|
import os
|
6
|
-
import pathlib
|
7
5
|
import textwrap
|
8
|
-
import warnings
|
9
6
|
from concurrent.futures import ProcessPoolExecutor
|
10
7
|
from concurrent.futures.process import BrokenProcessPool
|
11
8
|
from functools import partial
|
12
|
-
from typing import TYPE_CHECKING
|
9
|
+
from typing import TYPE_CHECKING
|
10
|
+
|
11
|
+
import psutil
|
13
12
|
|
14
13
|
# Only import if running type checking
|
15
14
|
if TYPE_CHECKING:
|
@@ -20,309 +19,9 @@ import numpy as np
|
|
20
19
|
import pandas as pd
|
21
20
|
import pyproj
|
22
21
|
import pyTMD
|
23
|
-
from colorama import Style, init
|
24
|
-
from pyTMD.io.model import load_database, model
|
25
22
|
from tqdm import tqdm
|
26
23
|
|
27
|
-
from .utils import idw
|
28
|
-
|
29
|
-
# Type alias for all possible inputs to "time" params
|
30
|
-
DatetimeLike = Union[np.ndarray, pd.DatetimeIndex, pd.Timestamp, datetime.datetime, str, List[str]]
|
31
|
-
|
32
|
-
|
33
|
-
def _set_directory(
|
34
|
-
directory: str | os.PathLike | None = None,
|
35
|
-
) -> os.PathLike:
|
36
|
-
"""
|
37
|
-
Set tide modelling files directory. If no custom
|
38
|
-
path is provided, try global environmental variable
|
39
|
-
instead.
|
40
|
-
"""
|
41
|
-
if directory is None:
|
42
|
-
if "EO_TIDES_TIDE_MODELS" in os.environ:
|
43
|
-
directory = os.environ["EO_TIDES_TIDE_MODELS"]
|
44
|
-
else:
|
45
|
-
raise Exception(
|
46
|
-
"No tide model directory provided via `directory`, and/or no "
|
47
|
-
"`EO_TIDES_TIDE_MODELS` environment variable found. "
|
48
|
-
"Please provide a valid path to your tide model directory."
|
49
|
-
)
|
50
|
-
|
51
|
-
# Verify path exists
|
52
|
-
directory = pathlib.Path(directory).expanduser()
|
53
|
-
if not directory.exists():
|
54
|
-
raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
|
55
|
-
else:
|
56
|
-
return directory
|
57
|
-
|
58
|
-
|
59
|
-
def _standardise_time(
|
60
|
-
time: DatetimeLike | None,
|
61
|
-
) -> np.ndarray | None:
|
62
|
-
"""
|
63
|
-
Accept any time format accepted by `pd.to_datetime`,
|
64
|
-
and return a datetime64 ndarray. Return None if None
|
65
|
-
passed.
|
66
|
-
"""
|
67
|
-
# Return time as-is if None
|
68
|
-
if time is None:
|
69
|
-
return None
|
70
|
-
|
71
|
-
# Use pd.to_datetime for conversion, then convert to numpy array
|
72
|
-
time = pd.to_datetime(time).to_numpy().astype("datetime64[ns]")
|
73
|
-
|
74
|
-
# Ensure that data has at least one dimension
|
75
|
-
return np.atleast_1d(time)
|
76
|
-
|
77
|
-
|
78
|
-
def list_models(
|
79
|
-
directory: str | os.PathLike | None = None,
|
80
|
-
show_available: bool = True,
|
81
|
-
show_supported: bool = True,
|
82
|
-
raise_error: bool = False,
|
83
|
-
) -> tuple[list[str], list[str]]:
|
84
|
-
"""
|
85
|
-
List all tide models available for tide modelling.
|
86
|
-
|
87
|
-
This function scans the specified tide model directory
|
88
|
-
and returns a list of models that are available in the
|
89
|
-
directory as well as the full list of all models supported
|
90
|
-
by `eo-tides` and `pyTMD`.
|
91
|
-
|
92
|
-
For instructions on setting up tide models, see:
|
93
|
-
<https://geoscienceaustralia.github.io/eo-tides/setup/>
|
94
|
-
|
95
|
-
Parameters
|
96
|
-
----------
|
97
|
-
directory : str, optional
|
98
|
-
The directory containing tide model data files. If no path is
|
99
|
-
provided, this will default to the environment variable
|
100
|
-
`EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
|
101
|
-
Tide modelling files should be stored in sub-folders for each
|
102
|
-
model that match the structure required by `pyTMD`
|
103
|
-
(<https://geoscienceaustralia.github.io/eo-tides/setup/>).
|
104
|
-
show_available : bool, optional
|
105
|
-
Whether to print a list of locally available models.
|
106
|
-
show_supported : bool, optional
|
107
|
-
Whether to print a list of all supported models, in
|
108
|
-
addition to models available locally.
|
109
|
-
raise_error : bool, optional
|
110
|
-
If True, raise an error if no available models are found.
|
111
|
-
If False, raise a warning.
|
112
|
-
|
113
|
-
Returns
|
114
|
-
-------
|
115
|
-
available_models : list of str
|
116
|
-
A list of all tide models available within `directory`.
|
117
|
-
supported_models : list of str
|
118
|
-
A list of all tide models supported by `eo-tides`.
|
119
|
-
"""
|
120
|
-
init() # Initialize colorama
|
121
|
-
|
122
|
-
# Set tide modelling files directory. If no custom path is
|
123
|
-
# provided, try global environment variable.
|
124
|
-
directory = _set_directory(directory)
|
125
|
-
|
126
|
-
# Get full list of supported models from pyTMD database
|
127
|
-
model_database = load_database()["elevation"]
|
128
|
-
supported_models = list(model_database.keys())
|
129
|
-
|
130
|
-
# Extract expected model paths
|
131
|
-
expected_paths = {}
|
132
|
-
for m in supported_models:
|
133
|
-
model_file = model_database[m]["model_file"]
|
134
|
-
model_file = model_file[0] if isinstance(model_file, list) else model_file
|
135
|
-
expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
|
136
|
-
|
137
|
-
# Define column widths
|
138
|
-
status_width = 4 # Width for emoji
|
139
|
-
name_width = max(len(name) for name in supported_models)
|
140
|
-
path_width = max(len(path) for path in expected_paths.values())
|
141
|
-
|
142
|
-
# Print list of supported models, marking available and
|
143
|
-
# unavailable models and appending available to list
|
144
|
-
if show_available or show_supported:
|
145
|
-
total_width = min(status_width + name_width + path_width + 6, 80)
|
146
|
-
print("─" * total_width)
|
147
|
-
print(f"{'🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
|
148
|
-
print("─" * total_width)
|
149
|
-
|
150
|
-
available_models = []
|
151
|
-
for m in supported_models:
|
152
|
-
try:
|
153
|
-
model_file = model(directory=directory).elevation(m=m)
|
154
|
-
available_models.append(m)
|
155
|
-
|
156
|
-
if show_available:
|
157
|
-
# Mark available models with a green tick
|
158
|
-
status = "✅"
|
159
|
-
print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
|
160
|
-
except FileNotFoundError:
|
161
|
-
if show_supported:
|
162
|
-
# Mark unavailable models with a red cross
|
163
|
-
status = "❌"
|
164
|
-
print(
|
165
|
-
f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
|
166
|
-
)
|
167
|
-
|
168
|
-
if show_available or show_supported:
|
169
|
-
print("─" * total_width)
|
170
|
-
|
171
|
-
# Print summary
|
172
|
-
print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
|
173
|
-
print(f"Available models: {len(available_models)}/{len(supported_models)}")
|
174
|
-
|
175
|
-
# Raise error or warning if no models are available
|
176
|
-
if not available_models:
|
177
|
-
warning_msg = textwrap.dedent(
|
178
|
-
f"""
|
179
|
-
No valid tide models are available in `{directory}`.
|
180
|
-
Are you sure you have provided the correct `directory` path, or set the
|
181
|
-
`EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
|
182
|
-
tide model directory?
|
183
|
-
"""
|
184
|
-
).strip()
|
185
|
-
|
186
|
-
if raise_error:
|
187
|
-
raise Exception(warning_msg)
|
188
|
-
else:
|
189
|
-
warnings.warn(warning_msg, UserWarning)
|
190
|
-
|
191
|
-
# Return list of available and supported models
|
192
|
-
return available_models, supported_models
|
193
|
-
|
194
|
-
|
195
|
-
def _model_tides(
|
196
|
-
model,
|
197
|
-
x,
|
198
|
-
y,
|
199
|
-
time,
|
200
|
-
directory,
|
201
|
-
crs,
|
202
|
-
crop,
|
203
|
-
method,
|
204
|
-
extrapolate,
|
205
|
-
cutoff,
|
206
|
-
output_units,
|
207
|
-
mode,
|
208
|
-
):
|
209
|
-
"""Worker function applied in parallel by `model_tides`. Handles the
|
210
|
-
extraction of tide modelling constituents and tide modelling using
|
211
|
-
`pyTMD`.
|
212
|
-
"""
|
213
|
-
# Obtain model details
|
214
|
-
pytmd_model = pyTMD.io.model(directory).elevation(model)
|
215
|
-
|
216
|
-
# Reproject x, y to latitude/longitude
|
217
|
-
transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
|
218
|
-
lon, lat = transformer.transform(x.flatten(), y.flatten())
|
219
|
-
|
220
|
-
# Convert datetime
|
221
|
-
timescale = pyTMD.time.timescale().from_datetime(time.flatten())
|
222
|
-
|
223
|
-
try:
|
224
|
-
# Read tidal constants and interpolate to grid points
|
225
|
-
amp, ph, c = pytmd_model.extract_constants(
|
226
|
-
lon,
|
227
|
-
lat,
|
228
|
-
type=pytmd_model.type,
|
229
|
-
crop=crop,
|
230
|
-
bounds=None,
|
231
|
-
method=method,
|
232
|
-
extrapolate=extrapolate,
|
233
|
-
cutoff=cutoff,
|
234
|
-
append_node=False,
|
235
|
-
# append_node=True,
|
236
|
-
)
|
237
|
-
|
238
|
-
# TODO: Return constituents
|
239
|
-
# print(amp.shape, ph.shape, c)
|
240
|
-
# print(pd.DataFrame({"amplitude": amp}))
|
241
|
-
|
242
|
-
# Raise error if constituent files no not cover analysis extent
|
243
|
-
except IndexError as e:
|
244
|
-
error_msg = f"""
|
245
|
-
The {model} tide model constituent files do not cover the requested analysis extent.
|
246
|
-
This can occur if you are using clipped model files to improve run times.
|
247
|
-
Consider using model files that cover your entire analysis area, or set `crop=False`
|
248
|
-
to reduce the extent of tide model constituent files that is loaded.
|
249
|
-
"""
|
250
|
-
raise Exception(textwrap.dedent(error_msg).strip()) from None
|
251
|
-
|
252
|
-
# Calculate complex phase in radians for Euler's
|
253
|
-
cph = -1j * ph * np.pi / 180.0
|
254
|
-
|
255
|
-
# Calculate constituent oscillation
|
256
|
-
hc = amp * np.exp(cph)
|
257
|
-
|
258
|
-
# Compute deltat based on model
|
259
|
-
if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
|
260
|
-
# Use delta time at 2000.0 to match TMD outputs
|
261
|
-
deltat = np.zeros_like(timescale.tt_ut1)
|
262
|
-
else:
|
263
|
-
# Use interpolated delta times
|
264
|
-
deltat = timescale.tt_ut1
|
265
|
-
|
266
|
-
# Determine the number of points and times to process. If in
|
267
|
-
# "one-to-many" mode, these counts are used to repeat our extracted
|
268
|
-
# constituents and timesteps so we can extract tides for all
|
269
|
-
# combinations of our input times and tide modelling points.
|
270
|
-
# If in "one-to-many" mode, repeat constituents to length of time
|
271
|
-
# and number of input coords before passing to `predict_tide_drift`
|
272
|
-
# If in "one-to-one" mode, we avoid this step by setting counts to 1
|
273
|
-
# (e.g. "repeat 1 times")
|
274
|
-
points_repeat = len(x) if mode == "one-to-many" else 1
|
275
|
-
time_repeat = len(time) if mode == "one-to-many" else 1
|
276
|
-
t, hc, deltat = (
|
277
|
-
np.tile(timescale.tide, points_repeat),
|
278
|
-
hc.repeat(time_repeat, axis=0),
|
279
|
-
np.tile(deltat, points_repeat),
|
280
|
-
)
|
281
|
-
|
282
|
-
# Create arrays to hold outputs
|
283
|
-
tide = np.ma.zeros((len(t)), fill_value=np.nan)
|
284
|
-
tide.mask = np.any(hc.mask, axis=1)
|
285
|
-
|
286
|
-
# Predict tidal elevations at time and infer minor corrections
|
287
|
-
tide.data[:] = pyTMD.predict.drift(
|
288
|
-
t,
|
289
|
-
hc,
|
290
|
-
c,
|
291
|
-
deltat=deltat,
|
292
|
-
corrections=pytmd_model.corrections,
|
293
|
-
)
|
294
|
-
minor = pyTMD.predict.infer_minor(
|
295
|
-
t,
|
296
|
-
hc,
|
297
|
-
c,
|
298
|
-
deltat=deltat,
|
299
|
-
corrections=pytmd_model.corrections,
|
300
|
-
minor=pytmd_model.minor,
|
301
|
-
)
|
302
|
-
tide.data[:] += minor.data[:]
|
303
|
-
|
304
|
-
# Replace invalid values with fill value
|
305
|
-
tide.data[tide.mask] = tide.fill_value
|
306
|
-
|
307
|
-
# Convert data to pandas.DataFrame, and set index to our input
|
308
|
-
# time/x/y values
|
309
|
-
tide_df = pd.DataFrame({
|
310
|
-
"time": np.tile(time, points_repeat),
|
311
|
-
"x": np.repeat(x, time_repeat),
|
312
|
-
"y": np.repeat(y, time_repeat),
|
313
|
-
"tide_model": model,
|
314
|
-
"tide_height": tide,
|
315
|
-
}).set_index(["time", "x", "y"])
|
316
|
-
|
317
|
-
# Optionally convert outputs to integer units (can save memory)
|
318
|
-
if output_units == "m":
|
319
|
-
tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
|
320
|
-
elif output_units == "cm":
|
321
|
-
tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
|
322
|
-
elif output_units == "mm":
|
323
|
-
tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
|
324
|
-
|
325
|
-
return tide_df
|
24
|
+
from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw, list_models
|
326
25
|
|
327
26
|
|
328
27
|
def _ensemble_model(
|
@@ -490,6 +189,181 @@ def _ensemble_model(
|
|
490
189
|
return pd.concat(ensemble_list)
|
491
190
|
|
492
191
|
|
192
|
+
def _parallel_splits(
|
193
|
+
total_points: int,
|
194
|
+
model_count: int,
|
195
|
+
parallel_max: int | None = None,
|
196
|
+
min_points_per_split: int = 1000,
|
197
|
+
) -> int:
|
198
|
+
"""
|
199
|
+
Calculates the optimal number of parallel splits for data
|
200
|
+
processing based on system resources and processing constraints.
|
201
|
+
|
202
|
+
Parameters:
|
203
|
+
-----------
|
204
|
+
total_points : int
|
205
|
+
Total number of data points to process
|
206
|
+
model_count : int
|
207
|
+
Number of models that will be run in parallel
|
208
|
+
parallel_max : int, optional
|
209
|
+
Maximum number of parallel processes to use. If None, uses CPU core count
|
210
|
+
min_points_per_split : int, default=1000
|
211
|
+
Minimum number of points that should be processed in each split
|
212
|
+
"""
|
213
|
+
# Get available CPUs. First see if `CPU_GUARANTEE` exists in
|
214
|
+
# environment (if running in JupyterHub); if not use psutil
|
215
|
+
# followed by standard CPU count
|
216
|
+
if parallel_max is None:
|
217
|
+
# Take the first valid output
|
218
|
+
raw_value = os.environ.get("CPU_GUARANTEE") or psutil.cpu_count(logical=False) or os.cpu_count() or 1
|
219
|
+
|
220
|
+
# Convert to integer
|
221
|
+
if isinstance(raw_value, str):
|
222
|
+
parallel_max = int(float(raw_value))
|
223
|
+
else:
|
224
|
+
parallel_max = int(raw_value)
|
225
|
+
|
226
|
+
# Calculate optimal number of splits based on constraints
|
227
|
+
splits_by_size = total_points / min_points_per_split
|
228
|
+
splits_by_cpu = parallel_max / model_count
|
229
|
+
optimal_splits = min(splits_by_size, splits_by_cpu)
|
230
|
+
|
231
|
+
# Convert to integer and ensure at least 1 split
|
232
|
+
final_split_count = int(max(1, optimal_splits))
|
233
|
+
return final_split_count
|
234
|
+
|
235
|
+
|
236
|
+
def _model_tides(
|
237
|
+
model,
|
238
|
+
x,
|
239
|
+
y,
|
240
|
+
time,
|
241
|
+
directory,
|
242
|
+
crs,
|
243
|
+
crop,
|
244
|
+
method,
|
245
|
+
extrapolate,
|
246
|
+
cutoff,
|
247
|
+
output_units,
|
248
|
+
mode,
|
249
|
+
):
|
250
|
+
"""Worker function applied in parallel by `model_tides`. Handles the
|
251
|
+
extraction of tide modelling constituents and tide modelling using
|
252
|
+
`pyTMD`.
|
253
|
+
"""
|
254
|
+
# Obtain model details
|
255
|
+
pytmd_model = pyTMD.io.model(directory).elevation(model)
|
256
|
+
|
257
|
+
# Reproject x, y to latitude/longitude
|
258
|
+
transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
|
259
|
+
lon, lat = transformer.transform(x.flatten(), y.flatten())
|
260
|
+
|
261
|
+
# Convert datetime
|
262
|
+
timescale = pyTMD.time.timescale().from_datetime(time.flatten())
|
263
|
+
|
264
|
+
try:
|
265
|
+
# Read tidal constants and interpolate to grid points
|
266
|
+
amp, ph, c = pytmd_model.extract_constants(
|
267
|
+
lon,
|
268
|
+
lat,
|
269
|
+
type=pytmd_model.type,
|
270
|
+
crop=crop,
|
271
|
+
method=method,
|
272
|
+
extrapolate=extrapolate,
|
273
|
+
cutoff=cutoff,
|
274
|
+
append_node=False,
|
275
|
+
# append_node=True,
|
276
|
+
)
|
277
|
+
|
278
|
+
# TODO: Return constituents
|
279
|
+
# print(model, amp.shape)
|
280
|
+
# print(amp.shape, ph.shape, c)
|
281
|
+
# print(pd.DataFrame({"amplitude": amp}))
|
282
|
+
|
283
|
+
# Raise error if constituent files no not cover analysis extent
|
284
|
+
except IndexError:
|
285
|
+
error_msg = f"""
|
286
|
+
The {model} tide model constituent files do not cover the analysis extent
|
287
|
+
({min(lon):.2f}, {max(lon):.2f}, {min(lat):.2f}, {max(lat):.2f}).
|
288
|
+
This can occur if you are using clipped model files to improve run times.
|
289
|
+
Consider using model files that cover your entire analysis area, or set `crop=False`
|
290
|
+
to reduce the extent of tide model constituent files that is loaded.
|
291
|
+
"""
|
292
|
+
raise Exception(textwrap.dedent(error_msg).strip()) from None
|
293
|
+
|
294
|
+
# Calculate complex phase in radians for Euler's
|
295
|
+
cph = -1j * ph * np.pi / 180.0
|
296
|
+
|
297
|
+
# Calculate constituent oscillation
|
298
|
+
hc = amp * np.exp(cph)
|
299
|
+
|
300
|
+
# Compute delta times based on model
|
301
|
+
if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
|
302
|
+
# Use delta time at 2000.0 to match TMD outputs
|
303
|
+
deltat = np.zeros_like(timescale.tt_ut1)
|
304
|
+
else:
|
305
|
+
# Use interpolated delta times
|
306
|
+
deltat = timescale.tt_ut1
|
307
|
+
|
308
|
+
# In "one-to-many" mode, extracted tidal constituents and timesteps
|
309
|
+
# are repeated/multiplied out to match the number of input points and
|
310
|
+
# timesteps, enabling the modeling of tides across all combinations
|
311
|
+
# of input times and points. In "one-to-one" mode, no repetition is
|
312
|
+
# needed, so each repeat count is set to 1.
|
313
|
+
points_repeat = len(x) if mode == "one-to-many" else 1
|
314
|
+
time_repeat = len(time) if mode == "one-to-many" else 1
|
315
|
+
t, hc, deltat = (
|
316
|
+
np.tile(timescale.tide, points_repeat),
|
317
|
+
hc.repeat(time_repeat, axis=0),
|
318
|
+
np.tile(deltat, points_repeat),
|
319
|
+
)
|
320
|
+
|
321
|
+
# Create arrays to hold outputs
|
322
|
+
tide = np.ma.zeros((len(t)), fill_value=np.nan)
|
323
|
+
tide.mask = np.any(hc.mask, axis=1)
|
324
|
+
|
325
|
+
# Predict tidal elevations at time and infer minor corrections
|
326
|
+
tide.data[:] = pyTMD.predict.drift(
|
327
|
+
t,
|
328
|
+
hc,
|
329
|
+
c,
|
330
|
+
deltat=deltat,
|
331
|
+
corrections=pytmd_model.corrections,
|
332
|
+
)
|
333
|
+
minor = pyTMD.predict.infer_minor(
|
334
|
+
t,
|
335
|
+
hc,
|
336
|
+
c,
|
337
|
+
deltat=deltat,
|
338
|
+
corrections=pytmd_model.corrections,
|
339
|
+
minor=pytmd_model.minor,
|
340
|
+
)
|
341
|
+
tide.data[:] += minor.data[:]
|
342
|
+
|
343
|
+
# Replace invalid values with fill value
|
344
|
+
tide.data[tide.mask] = tide.fill_value
|
345
|
+
|
346
|
+
# Convert data to pandas.DataFrame, and set index to our input
|
347
|
+
# time/x/y values
|
348
|
+
tide_df = pd.DataFrame({
|
349
|
+
"time": np.tile(time, points_repeat),
|
350
|
+
"x": np.repeat(x, time_repeat),
|
351
|
+
"y": np.repeat(y, time_repeat),
|
352
|
+
"tide_model": model,
|
353
|
+
"tide_height": tide,
|
354
|
+
}).set_index(["time", "x", "y"])
|
355
|
+
|
356
|
+
# Optionally convert outputs to integer units (can save memory)
|
357
|
+
if output_units == "m":
|
358
|
+
tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
|
359
|
+
elif output_units == "cm":
|
360
|
+
tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
|
361
|
+
elif output_units == "mm":
|
362
|
+
tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
|
363
|
+
|
364
|
+
return tide_df
|
365
|
+
|
366
|
+
|
493
367
|
def model_tides(
|
494
368
|
x: float | list[float] | xr.DataArray,
|
495
369
|
y: float | list[float] | xr.DataArray,
|
@@ -498,12 +372,13 @@ def model_tides(
|
|
498
372
|
directory: str | os.PathLike | None = None,
|
499
373
|
crs: str = "EPSG:4326",
|
500
374
|
crop: bool = True,
|
501
|
-
method: str = "
|
375
|
+
method: str = "linear",
|
502
376
|
extrapolate: bool = True,
|
503
377
|
cutoff: float | None = None,
|
504
378
|
mode: str = "one-to-many",
|
505
379
|
parallel: bool = True,
|
506
|
-
parallel_splits: int =
|
380
|
+
parallel_splits: int | str = "auto",
|
381
|
+
parallel_max: int | None = None,
|
507
382
|
output_units: str = "m",
|
508
383
|
output_format: str = "long",
|
509
384
|
ensemble_models: list[str] | None = None,
|
@@ -564,11 +439,11 @@ def model_tides(
|
|
564
439
|
1 degree buffer around all input points. Defaults to True.
|
565
440
|
method : str, optional
|
566
441
|
Method used to interpolate tidal constituents
|
567
|
-
from model files.
|
442
|
+
from model files. Defaults to "linear"; options include:
|
568
443
|
|
569
|
-
- "spline": scipy bivariate spline interpolation (default)
|
570
|
-
- "bilinear": quick bilinear interpolation
|
571
444
|
- "linear", "nearest": scipy regular grid interpolations
|
445
|
+
- "spline": scipy bivariate spline interpolation
|
446
|
+
- "bilinear": quick bilinear interpolation
|
572
447
|
extrapolate : bool, optional
|
573
448
|
Whether to extrapolate tides for x and y coordinates outside of
|
574
449
|
the valid tide modelling domain using nearest-neighbor.
|
@@ -594,12 +469,16 @@ def model_tides(
|
|
594
469
|
parallel. Optionally, tide modelling can also be run in parallel
|
595
470
|
across input x and y coordinates (see "parallel_splits" below).
|
596
471
|
Default is True.
|
597
|
-
parallel_splits : int, optional
|
472
|
+
parallel_splits : str or int, optional
|
598
473
|
Whether to split the input x and y coordinates into smaller,
|
599
474
|
evenly-sized chunks that are processed in parallel. This can
|
600
475
|
provide a large performance boost when processing large numbers
|
601
|
-
of coordinates. The default is
|
602
|
-
|
476
|
+
of coordinates. The default is "auto", which will automatically
|
477
|
+
attempt to determine optimal splits based on available CPUs,
|
478
|
+
the number of input points, and the number of models.
|
479
|
+
parallel_max : int, optional
|
480
|
+
Maximum number of processes to run in parallel. The default of
|
481
|
+
None will automatically determine this from your available CPUs.
|
603
482
|
output_units : str, optional
|
604
483
|
Whether to return modelled tides in floating point metre units,
|
605
484
|
or integer centimetre units (i.e. scaled by 100) or integer
|
@@ -633,7 +512,6 @@ def model_tides(
|
|
633
512
|
|
634
513
|
"""
|
635
514
|
# Turn inputs into arrays for consistent handling
|
636
|
-
models_requested = list(np.atleast_1d(model))
|
637
515
|
x = np.atleast_1d(x)
|
638
516
|
y = np.atleast_1d(y)
|
639
517
|
time = _standardise_time(time)
|
@@ -662,58 +540,12 @@ def model_tides(
|
|
662
540
|
# provided, try global environment variable.
|
663
541
|
directory = _set_directory(directory)
|
664
542
|
|
665
|
-
#
|
666
|
-
|
667
|
-
|
668
|
-
directory
|
543
|
+
# Standardise model list, handling "all" and "ensemble" functionality
|
544
|
+
models_to_process, models_requested, ensemble_models = _standardise_models(
|
545
|
+
model=model,
|
546
|
+
directory=directory,
|
547
|
+
ensemble_models=ensemble_models,
|
669
548
|
)
|
670
|
-
# TODO: This is hacky, find a better way. Perhaps a kwarg that
|
671
|
-
# turns ensemble functionality on, and checks that supplied
|
672
|
-
# models match models expected for ensemble?
|
673
|
-
available_models = available_models + ["ensemble"]
|
674
|
-
valid_models = valid_models + ["ensemble"]
|
675
|
-
|
676
|
-
# Error if any models are not supported
|
677
|
-
if not all(m in valid_models for m in models_requested):
|
678
|
-
error_text = (
|
679
|
-
f"One or more of the requested models are not valid:\n"
|
680
|
-
f"{models_requested}\n\n"
|
681
|
-
"The following models are supported:\n"
|
682
|
-
f"{valid_models}"
|
683
|
-
)
|
684
|
-
raise ValueError(error_text)
|
685
|
-
|
686
|
-
# Error if any models are not available in `directory`
|
687
|
-
if not all(m in available_models for m in models_requested):
|
688
|
-
error_text = (
|
689
|
-
f"One or more of the requested models are valid, but not available in `{directory}`:\n"
|
690
|
-
f"{models_requested}\n\n"
|
691
|
-
f"The following models are available in `{directory}`:\n"
|
692
|
-
f"{available_models}"
|
693
|
-
)
|
694
|
-
raise ValueError(error_text)
|
695
|
-
|
696
|
-
# If ensemble modelling is requested, use a custom list of models
|
697
|
-
# for subsequent processing
|
698
|
-
if "ensemble" in models_requested:
|
699
|
-
print("Running ensemble tide modelling")
|
700
|
-
models_to_process = (
|
701
|
-
ensemble_models
|
702
|
-
if ensemble_models is not None
|
703
|
-
else [
|
704
|
-
"FES2014",
|
705
|
-
"TPXO9-atlas-v5",
|
706
|
-
"EOT20",
|
707
|
-
"HAMTIDE11",
|
708
|
-
"GOT4.10",
|
709
|
-
"FES2012",
|
710
|
-
"TPXO8-atlas-v1",
|
711
|
-
]
|
712
|
-
)
|
713
|
-
|
714
|
-
# Otherwise, models to process are the same as those requested
|
715
|
-
else:
|
716
|
-
models_to_process = models_requested
|
717
549
|
|
718
550
|
# Update tide modelling func to add default keyword arguments that
|
719
551
|
# are used for every iteration during parallel processing
|
@@ -729,13 +561,28 @@ def model_tides(
|
|
729
561
|
mode=mode,
|
730
562
|
)
|
731
563
|
|
732
|
-
#
|
733
|
-
|
564
|
+
# If automatic parallel splits, calculate optimal value
|
565
|
+
# based on available parallelisation, number of points
|
566
|
+
# and number of models
|
567
|
+
if parallel_splits == "auto":
|
568
|
+
parallel_splits = _parallel_splits(
|
569
|
+
total_points=len(x),
|
570
|
+
model_count=len(models_to_process),
|
571
|
+
parallel_max=parallel_max,
|
572
|
+
)
|
573
|
+
|
574
|
+
# Verify that parallel splits are not larger than number of points
|
575
|
+
assert isinstance(parallel_splits, int)
|
576
|
+
if parallel_splits > len(x):
|
577
|
+
raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
|
734
578
|
|
735
579
|
# Parallelise if either multiple models or multiple splits requested
|
580
|
+
|
736
581
|
if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
|
737
|
-
with ProcessPoolExecutor() as executor:
|
738
|
-
print(
|
582
|
+
with ProcessPoolExecutor(max_workers=parallel_max) as executor:
|
583
|
+
print(
|
584
|
+
f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
|
585
|
+
)
|
739
586
|
|
740
587
|
# Optionally split lon/lat points into `splits_n` chunks
|
741
588
|
# that will be applied in parallel
|
@@ -783,7 +630,7 @@ def model_tides(
|
|
783
630
|
model_outputs = []
|
784
631
|
|
785
632
|
for model_i in models_to_process:
|
786
|
-
print(f"Modelling tides
|
633
|
+
print(f"Modelling tides with {model_i}")
|
787
634
|
tide_df = iter_func(model_i, x, y, time)
|
788
635
|
model_outputs.append(tide_df)
|
789
636
|
|
@@ -792,7 +639,7 @@ def model_tides(
|
|
792
639
|
|
793
640
|
# Optionally compute ensemble model and add to dataframe
|
794
641
|
if "ensemble" in models_requested:
|
795
|
-
ensemble_df = _ensemble_model(tide_df, crs,
|
642
|
+
ensemble_df = _ensemble_model(tide_df, crs, ensemble_models, **ensemble_kwargs)
|
796
643
|
|
797
644
|
# Update requested models with any custom ensemble models, then
|
798
645
|
# filter the dataframe to keep only models originally requested
|
eo_tides/stats.py
CHANGED
@@ -6,20 +6,18 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
|
-
import odc.geo.xr
|
10
9
|
import pandas as pd
|
11
10
|
import xarray as xr
|
12
11
|
from scipy import stats
|
13
12
|
|
14
13
|
# Only import if running type checking
|
15
14
|
if TYPE_CHECKING:
|
16
|
-
import datetime
|
17
|
-
|
18
15
|
import xarray as xr
|
19
16
|
from odc.geo.geobox import GeoBox
|
20
17
|
|
21
18
|
from .eo import _standardise_inputs, pixel_tides, tag_tides
|
22
|
-
from .model import
|
19
|
+
from .model import model_tides
|
20
|
+
from .utils import DatetimeLike
|
23
21
|
|
24
22
|
|
25
23
|
def _plot_biases(
|