eo-tides 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eo_tides/__init__.py +2 -1
- eo_tides/eo.py +23 -12
- eo_tides/model.py +259 -274
- eo_tides/stats.py +261 -272
- eo_tides/utils.py +112 -3
- eo_tides/validation.py +23 -21
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/METADATA +23 -22
- eo_tides-0.4.0.dist-info/RECORD +11 -0
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/WHEEL +1 -1
- eo_tides-0.3.0.dist-info/RECORD +0 -11
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/LICENSE +0 -0
- {eo_tides-0.3.0.dist-info → eo_tides-0.4.0.dist-info}/top_level.txt +0 -0
eo_tides/model.py
CHANGED
@@ -21,172 +21,7 @@ import pyproj
|
|
21
21
|
import pyTMD
|
22
22
|
from tqdm import tqdm
|
23
23
|
|
24
|
-
from .utils import DatetimeLike, _set_directory, _standardise_time, idw
|
25
|
-
|
26
|
-
|
27
|
-
def _ensemble_model(
|
28
|
-
tide_df,
|
29
|
-
crs,
|
30
|
-
ensemble_models,
|
31
|
-
ensemble_func=None,
|
32
|
-
ensemble_top_n=3,
|
33
|
-
ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.geojson",
|
34
|
-
ranking_valid_perc=0.02,
|
35
|
-
**idw_kwargs,
|
36
|
-
):
|
37
|
-
"""Combine multiple tide models into a single locally optimised
|
38
|
-
ensemble tide model using external model ranking data (e.g.
|
39
|
-
satellite altimetry or NDWI-tide correlations along the coastline)
|
40
|
-
to inform the selection of the best local models.
|
41
|
-
|
42
|
-
This function performs the following steps:
|
43
|
-
1. Takes a dataframe of tide heights from multiple tide models, as
|
44
|
-
produced by `eo_tides.model.model_tides`
|
45
|
-
1. Loads model ranking points from a GeoJSON file, filters them
|
46
|
-
based on the valid data percentage, and retains relevant columns
|
47
|
-
2. Interpolates the model rankings into the "x" and "y" coordinates
|
48
|
-
of the original dataframe using Inverse Weighted Interpolation (IDW)
|
49
|
-
3. Uses rankings to combine multiple tide models into a single
|
50
|
-
optimised ensemble model (by default, by taking the mean of the
|
51
|
-
top 3 ranked models)
|
52
|
-
4. Returns a new dataFrame with the combined ensemble model predictions
|
53
|
-
|
54
|
-
Parameters
|
55
|
-
----------
|
56
|
-
tide_df : pandas.DataFrame
|
57
|
-
DataFrame produced by `eo_tides.model.model_tides`, containing
|
58
|
-
tide model predictions with columns:
|
59
|
-
`["time", "x", "y", "tide_height", "tide_model"]`.
|
60
|
-
crs : string
|
61
|
-
Coordinate reference system for the "x" and "y" coordinates in
|
62
|
-
`tide_df`. Used to ensure that interpolations are performed
|
63
|
-
in the correct CRS.
|
64
|
-
ensemble_models : list
|
65
|
-
A list of models to include in the ensemble modelling process.
|
66
|
-
All values must exist as columns with the prefix "rank_" in
|
67
|
-
`ranking_points`.
|
68
|
-
ensemble_func : dict, optional
|
69
|
-
By default, a simple ensemble model will be calculated by taking
|
70
|
-
the mean of the `ensemble_top_n` tide models at each location.
|
71
|
-
However, a dictionary containing more complex ensemble
|
72
|
-
calculations can also be provided. Dictionary keys are used
|
73
|
-
to name output ensemble models; functions should take a column
|
74
|
-
named "rank" and convert it to a weighting, e.g.:
|
75
|
-
`ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
|
76
|
-
ensemble_top_n : int, optional
|
77
|
-
If `ensemble_func` is None, this sets the number of top models
|
78
|
-
to include in the mean ensemble calculation. Defaults to 3.
|
79
|
-
ranking_points : str, optional
|
80
|
-
Path to the GeoJSON file containing model ranking points. This
|
81
|
-
dataset should include columns containing rankings for each tide
|
82
|
-
model, named with the prefix "rank_". e.g. "rank_EOT20".
|
83
|
-
Low values should represent high rankings (e.g. 1 = top ranked).
|
84
|
-
ranking_valid_perc : float, optional
|
85
|
-
Minimum percentage of valid data required to include a model
|
86
|
-
rank point in the analysis, as defined in a column named
|
87
|
-
"valid_perc". Defaults to 0.02.
|
88
|
-
**idw_kwargs
|
89
|
-
Optional keyword arguments to pass to the `idw` function used
|
90
|
-
for interpolation. Useful values include `k` (number of nearest
|
91
|
-
neighbours to use in interpolation), `max_dist` (maximum
|
92
|
-
distance to nearest neighbours), and `k_min` (minimum number of
|
93
|
-
neighbours required after `max_dist` is applied).
|
94
|
-
|
95
|
-
Returns
|
96
|
-
-------
|
97
|
-
pandas.DataFrame
|
98
|
-
DataFrame containing the ensemble model predictions, matching
|
99
|
-
the format of the input `tide_df` (e.g. columns `["time", "x",
|
100
|
-
"y", "tide_height", "tide_model"]`. By default the 'tide_model'
|
101
|
-
column will be labeled "ensemble" for the combined model
|
102
|
-
predictions (but if a custom dictionary of ensemble functions is
|
103
|
-
provided via `ensemble_func`, each ensemble will be named using
|
104
|
-
the provided dictionary keys).
|
105
|
-
|
106
|
-
"""
|
107
|
-
# Extract x and y coords from dataframe
|
108
|
-
x = tide_df.index.get_level_values(level="x")
|
109
|
-
y = tide_df.index.get_level_values(level="y")
|
110
|
-
|
111
|
-
# Load model ranks points and reproject to same CRS as x and y
|
112
|
-
model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
|
113
|
-
model_ranks_gdf = (
|
114
|
-
gpd.read_file(ranking_points)
|
115
|
-
.to_crs(crs)
|
116
|
-
.query(f"valid_perc > {ranking_valid_perc}")
|
117
|
-
.dropna()[model_ranking_cols + ["geometry"]]
|
118
|
-
)
|
119
|
-
|
120
|
-
# Use points to interpolate model rankings into requested x and y
|
121
|
-
id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
|
122
|
-
print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
|
123
|
-
ensemble_ranks_df = (
|
124
|
-
# Run IDW interpolation on subset of ranking columns
|
125
|
-
pd.DataFrame(
|
126
|
-
idw(
|
127
|
-
input_z=model_ranks_gdf[model_ranking_cols],
|
128
|
-
input_x=model_ranks_gdf.geometry.x,
|
129
|
-
input_y=model_ranks_gdf.geometry.y,
|
130
|
-
output_x=x,
|
131
|
-
output_y=y,
|
132
|
-
**idw_kwargs,
|
133
|
-
),
|
134
|
-
columns=model_ranking_cols,
|
135
|
-
)
|
136
|
-
.assign(x=x, y=y)
|
137
|
-
# Drop any duplicates then melt columns into long format
|
138
|
-
.drop_duplicates()
|
139
|
-
.melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
|
140
|
-
# Remore "rank_" prefix to get plain model names
|
141
|
-
.replace({"^rank_": ""}, regex=True)
|
142
|
-
# Set index columns and rank across groups
|
143
|
-
.set_index(["tide_model", "x", "y"])
|
144
|
-
.groupby(["x", "y"])
|
145
|
-
.rank()
|
146
|
-
)
|
147
|
-
|
148
|
-
# If no custom ensemble funcs are provided, use a default ensemble
|
149
|
-
# calculation that takes the mean of the top N tide models
|
150
|
-
if ensemble_func is None:
|
151
|
-
ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
|
152
|
-
|
153
|
-
# Create output list to hold computed ensemble model outputs
|
154
|
-
ensemble_list = []
|
155
|
-
|
156
|
-
# Loop through all provided ensemble generation functions
|
157
|
-
for ensemble_n, ensemble_f in ensemble_func.items():
|
158
|
-
print(f"Combining models into single {ensemble_n} model")
|
159
|
-
|
160
|
-
# Join ranks to input tide data, compute weightings and group
|
161
|
-
grouped = (
|
162
|
-
# Add tide model as an index so we can join with model ranks
|
163
|
-
tide_df.set_index("tide_model", append=True)
|
164
|
-
.join(ensemble_ranks_df)
|
165
|
-
# Add temp columns containing weightings and weighted values
|
166
|
-
.assign(
|
167
|
-
weights=ensemble_f, # use custom func to compute weights
|
168
|
-
weighted=lambda i: i.tide_height * i.weights,
|
169
|
-
)
|
170
|
-
# Groupby is specified in a weird order here as this seems
|
171
|
-
# to be the easiest way to preserve correct index sorting
|
172
|
-
.groupby(["x", "y", "time"])
|
173
|
-
)
|
174
|
-
|
175
|
-
# Use weightings to combine multiple models into single ensemble
|
176
|
-
ensemble_df = (
|
177
|
-
# Calculate weighted mean and convert back to dataframe
|
178
|
-
grouped.weighted.sum()
|
179
|
-
.div(grouped.weights.sum())
|
180
|
-
.to_frame("tide_height")
|
181
|
-
# Label ensemble model and ensure indexes are in expected order
|
182
|
-
.assign(tide_model=ensemble_n)
|
183
|
-
.reorder_levels(["time", "x", "y"], axis=0)
|
184
|
-
)
|
185
|
-
|
186
|
-
ensemble_list.append(ensemble_df)
|
187
|
-
|
188
|
-
# Combine all ensemble models and return as a single dataframe
|
189
|
-
return pd.concat(ensemble_list)
|
24
|
+
from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw
|
190
25
|
|
191
26
|
|
192
27
|
def _parallel_splits(
|
@@ -240,12 +75,13 @@ def _model_tides(
|
|
240
75
|
time,
|
241
76
|
directory,
|
242
77
|
crs,
|
243
|
-
|
78
|
+
mode,
|
79
|
+
output_units,
|
244
80
|
method,
|
245
81
|
extrapolate,
|
246
82
|
cutoff,
|
247
|
-
|
248
|
-
|
83
|
+
crop,
|
84
|
+
crop_buffer,
|
249
85
|
):
|
250
86
|
"""Worker function applied in parallel by `model_tides`. Handles the
|
251
87
|
extraction of tide modelling constituents and tide modelling using
|
@@ -268,14 +104,15 @@ def _model_tides(
|
|
268
104
|
lat,
|
269
105
|
type=pytmd_model.type,
|
270
106
|
crop=crop,
|
107
|
+
buffer=crop_buffer,
|
271
108
|
method=method,
|
272
109
|
extrapolate=extrapolate,
|
273
110
|
cutoff=cutoff,
|
274
111
|
append_node=False,
|
275
|
-
# append_node=True,
|
276
112
|
)
|
277
113
|
|
278
114
|
# TODO: Return constituents
|
115
|
+
# print(model, amp.shape)
|
279
116
|
# print(amp.shape, ph.shape, c)
|
280
117
|
# print(pd.DataFrame({"amplitude": amp}))
|
281
118
|
|
@@ -363,6 +200,189 @@ def _model_tides(
|
|
363
200
|
return tide_df
|
364
201
|
|
365
202
|
|
203
|
+
def ensemble_tides(
|
204
|
+
tide_df,
|
205
|
+
crs,
|
206
|
+
ensemble_models,
|
207
|
+
ensemble_func=None,
|
208
|
+
ensemble_top_n=3,
|
209
|
+
ranking_points="https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/derivative/dea_intertidal/supplementary/rankings_ensemble_2017-2019.fgb",
|
210
|
+
ranking_valid_perc=0.02,
|
211
|
+
**idw_kwargs,
|
212
|
+
):
|
213
|
+
"""Combine multiple tide models into a single locally optimised
|
214
|
+
ensemble tide model using external model ranking data (e.g.
|
215
|
+
satellite altimetry or NDWI-tide correlations along the coastline)
|
216
|
+
to inform the selection of the best local models.
|
217
|
+
|
218
|
+
This function performs the following steps:
|
219
|
+
|
220
|
+
1. Takes a dataframe of tide heights from multiple tide models, as
|
221
|
+
produced by `eo_tides.model.model_tides`
|
222
|
+
2. Loads model ranking points from an external file, filters them
|
223
|
+
based on the valid data percentage, and retains relevant columns
|
224
|
+
3. Interpolates the model rankings into the coordinates of the
|
225
|
+
original dataframe using Inverse Weighted Interpolation (IDW)
|
226
|
+
4. Uses rankings to combine multiple tide models into a single
|
227
|
+
optimised ensemble model (by default, by taking the mean of the
|
228
|
+
top 3 ranked models)
|
229
|
+
5. Returns a new dataframe with the combined ensemble model predictions
|
230
|
+
|
231
|
+
Parameters
|
232
|
+
----------
|
233
|
+
tide_df : pandas.DataFrame
|
234
|
+
DataFrame produced by `eo_tides.model.model_tides`, containing
|
235
|
+
tide model predictions in long format with columns:
|
236
|
+
`["time", "x", "y", "tide_height", "tide_model"]`.
|
237
|
+
crs : string
|
238
|
+
Coordinate reference system for the "x" and "y" coordinates in
|
239
|
+
`tide_df`. Used to ensure that interpolations are performed
|
240
|
+
in the correct CRS.
|
241
|
+
ensemble_models : list
|
242
|
+
A list of models to include in the ensemble modelling process.
|
243
|
+
All values must exist as columns with the prefix "rank_" in
|
244
|
+
`ranking_points`.
|
245
|
+
ensemble_func : dict, optional
|
246
|
+
By default, a simple ensemble model will be calculated by taking
|
247
|
+
the mean of the `ensemble_top_n` tide models at each location.
|
248
|
+
However, a dictionary containing more complex ensemble
|
249
|
+
calculations can also be provided. Dictionary keys are used
|
250
|
+
to name output ensemble models; functions should take a column
|
251
|
+
named "rank" and convert it to a weighting, e.g.:
|
252
|
+
`ensemble_func = {"ensemble-custom": lambda x: x["rank"] <= 3}`
|
253
|
+
ensemble_top_n : int, optional
|
254
|
+
If `ensemble_func` is None, this sets the number of top models
|
255
|
+
to include in the mean ensemble calculation. Defaults to 3.
|
256
|
+
ranking_points : str, optional
|
257
|
+
Path to the file containing model ranking points. This dataset
|
258
|
+
should include columns containing rankings for each tide
|
259
|
+
model, named with the prefix "rank_". e.g. "rank_EOT20".
|
260
|
+
Low values should represent high rankings (e.g. 1 = top ranked).
|
261
|
+
The default value points to an example file covering Australia.
|
262
|
+
ranking_valid_perc : float, optional
|
263
|
+
Minimum percentage of valid data required to include a model
|
264
|
+
rank point in the analysis, as defined in a column named
|
265
|
+
"valid_perc". Defaults to 0.02.
|
266
|
+
**idw_kwargs
|
267
|
+
Optional keyword arguments to pass to the `idw` function used
|
268
|
+
for interpolation. Useful values include `k` (number of nearest
|
269
|
+
neighbours to use in interpolation), `max_dist` (maximum
|
270
|
+
distance to nearest neighbours), and `k_min` (minimum number of
|
271
|
+
neighbours required after `max_dist` is applied).
|
272
|
+
|
273
|
+
Returns
|
274
|
+
-------
|
275
|
+
pandas.DataFrame
|
276
|
+
DataFrame containing the ensemble model predictions, matching
|
277
|
+
the format of the input `tide_df` (e.g. columns `["time", "x",
|
278
|
+
"y", "tide_height", "tide_model"]`. By default the 'tide_model'
|
279
|
+
column will be labeled "ensemble" for the combined model
|
280
|
+
predictions (but if a custom dictionary of ensemble functions is
|
281
|
+
provided via `ensemble_func`, each ensemble will be named using
|
282
|
+
the provided dictionary keys).
|
283
|
+
|
284
|
+
"""
|
285
|
+
# Raise data if `tide_df` provided in wide format
|
286
|
+
if "tide_model" not in tide_df:
|
287
|
+
raise Exception(
|
288
|
+
"`tide_df` does not contain the expected 'tide_model' and "
|
289
|
+
"'tide_height' columns. Ensure that tides were modelled in "
|
290
|
+
"long format (i.e. `output_format='long'` in `model_tides`)."
|
291
|
+
)
|
292
|
+
|
293
|
+
# Extract x and y coords from dataframe
|
294
|
+
x = tide_df.index.get_level_values(level="x")
|
295
|
+
y = tide_df.index.get_level_values(level="y")
|
296
|
+
|
297
|
+
# Load model ranks points and reproject to same CRS as x and y
|
298
|
+
model_ranking_cols = [f"rank_{m}" for m in ensemble_models]
|
299
|
+
try:
|
300
|
+
model_ranks_gdf = (
|
301
|
+
gpd.read_file(ranking_points, engine="pyogrio")
|
302
|
+
.to_crs(crs)
|
303
|
+
.query(f"valid_perc > {ranking_valid_perc}")
|
304
|
+
.dropna(how="all")[model_ranking_cols + ["geometry"]]
|
305
|
+
)
|
306
|
+
except KeyError:
|
307
|
+
error_msg = f"""
|
308
|
+
Not all of the expected "rank_" columns {model_ranking_cols} were
|
309
|
+
found in the columns of the ranking points file ({ranking_points}).
|
310
|
+
Consider passing a custom list of models using `ensemble_models`.
|
311
|
+
"""
|
312
|
+
raise Exception(textwrap.dedent(error_msg).strip()) from None
|
313
|
+
|
314
|
+
# Use points to interpolate model rankings into requested x and y
|
315
|
+
id_kwargs_str = "" if idw_kwargs == {} else idw_kwargs
|
316
|
+
print(f"Interpolating model rankings using IDW interpolation {id_kwargs_str}")
|
317
|
+
ensemble_ranks_df = (
|
318
|
+
# Run IDW interpolation on subset of ranking columns
|
319
|
+
pd.DataFrame(
|
320
|
+
idw(
|
321
|
+
input_z=model_ranks_gdf[model_ranking_cols],
|
322
|
+
input_x=model_ranks_gdf.geometry.x,
|
323
|
+
input_y=model_ranks_gdf.geometry.y,
|
324
|
+
output_x=x,
|
325
|
+
output_y=y,
|
326
|
+
**idw_kwargs,
|
327
|
+
),
|
328
|
+
columns=model_ranking_cols,
|
329
|
+
)
|
330
|
+
.assign(x=x, y=y)
|
331
|
+
# Drop any duplicates then melt columns into long format
|
332
|
+
.drop_duplicates()
|
333
|
+
.melt(id_vars=["x", "y"], var_name="tide_model", value_name="rank")
|
334
|
+
# Remore "rank_" prefix to get plain model names
|
335
|
+
.replace({"^rank_": ""}, regex=True)
|
336
|
+
# Set index columns and rank across groups
|
337
|
+
.set_index(["tide_model", "x", "y"])
|
338
|
+
.groupby(["x", "y"])
|
339
|
+
.rank()
|
340
|
+
)
|
341
|
+
|
342
|
+
# If no custom ensemble funcs are provided, use a default ensemble
|
343
|
+
# calculation that takes the mean of the top N tide models
|
344
|
+
if ensemble_func is None:
|
345
|
+
ensemble_func = {"ensemble": lambda x: x["rank"] <= ensemble_top_n}
|
346
|
+
|
347
|
+
# Create output list to hold computed ensemble model outputs
|
348
|
+
ensemble_list = []
|
349
|
+
|
350
|
+
# Loop through all provided ensemble generation functions
|
351
|
+
for ensemble_n, ensemble_f in ensemble_func.items():
|
352
|
+
print(f"Combining models into single {ensemble_n} model")
|
353
|
+
|
354
|
+
# Join ranks to input tide data, compute weightings and group
|
355
|
+
grouped = (
|
356
|
+
# Add tide model as an index so we can join with model ranks
|
357
|
+
tide_df.set_index("tide_model", append=True)
|
358
|
+
.join(ensemble_ranks_df)
|
359
|
+
# Add temp columns containing weightings and weighted values
|
360
|
+
.assign(
|
361
|
+
weights=ensemble_f, # use custom func to compute weights
|
362
|
+
weighted=lambda i: i.tide_height * i.weights,
|
363
|
+
)
|
364
|
+
# Groupby is specified in a weird order here as this seems
|
365
|
+
# to be the easiest way to preserve correct index sorting
|
366
|
+
.groupby(["x", "y", "time"])
|
367
|
+
)
|
368
|
+
|
369
|
+
# Use weightings to combine multiple models into single ensemble
|
370
|
+
ensemble_df = (
|
371
|
+
# Calculate weighted mean and convert back to dataframe
|
372
|
+
grouped.weighted.sum()
|
373
|
+
.div(grouped.weights.sum())
|
374
|
+
.to_frame("tide_height")
|
375
|
+
# Label ensemble model and ensure indexes are in expected order
|
376
|
+
.assign(tide_model=ensemble_n)
|
377
|
+
.reorder_levels(["time", "x", "y"], axis=0)
|
378
|
+
)
|
379
|
+
|
380
|
+
ensemble_list.append(ensemble_df)
|
381
|
+
|
382
|
+
# Combine all ensemble models and return as a single dataframe
|
383
|
+
return pd.concat(ensemble_list)
|
384
|
+
|
385
|
+
|
366
386
|
def model_tides(
|
367
387
|
x: float | list[float] | xr.DataArray,
|
368
388
|
y: float | list[float] | xr.DataArray,
|
@@ -370,16 +390,17 @@ def model_tides(
|
|
370
390
|
model: str | list[str] = "EOT20",
|
371
391
|
directory: str | os.PathLike | None = None,
|
372
392
|
crs: str = "EPSG:4326",
|
373
|
-
|
393
|
+
mode: str = "one-to-many",
|
394
|
+
output_format: str = "long",
|
395
|
+
output_units: str = "m",
|
374
396
|
method: str = "linear",
|
375
397
|
extrapolate: bool = True,
|
376
398
|
cutoff: float | None = None,
|
377
|
-
|
399
|
+
crop: bool = True,
|
400
|
+
crop_buffer: float | None = 5,
|
378
401
|
parallel: bool = True,
|
379
402
|
parallel_splits: int | str = "auto",
|
380
403
|
parallel_max: int | None = None,
|
381
|
-
output_units: str = "m",
|
382
|
-
output_format: str = "long",
|
383
404
|
ensemble_models: list[str] | None = None,
|
384
405
|
**ensemble_kwargs,
|
385
406
|
) -> pd.DataFrame:
|
@@ -418,10 +439,12 @@ def model_tides(
|
|
418
439
|
any format that can be converted by `pandas.to_datetime()`;
|
419
440
|
e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
|
420
441
|
datetime.datetime and strings (e.g. "2020-01-01 23:00").
|
442
|
+
For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
|
421
443
|
model : str or list of str, optional
|
422
|
-
The tide model (or models) to use to model tides.
|
423
|
-
Defaults to "EOT20";
|
424
|
-
|
444
|
+
The tide model (or list of models) to use to model tides.
|
445
|
+
Defaults to "EOT20"; specify "all" to use all models available
|
446
|
+
in `directory`. For a full list of available and supported models,
|
447
|
+
run `eo_tides.model.list_models`.
|
425
448
|
directory : str, optional
|
426
449
|
The directory containing tide model data files. If no path is
|
427
450
|
provided, this will default to the environment variable
|
@@ -432,10 +455,29 @@ def model_tides(
|
|
432
455
|
crs : str, optional
|
433
456
|
Input coordinate reference system for x and y coordinates.
|
434
457
|
Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
458
|
+
mode : str, optional
|
459
|
+
The analysis mode to use for tide modelling. Supports two options:
|
460
|
+
|
461
|
+
- "one-to-many": Models tides for every timestep in "time" at
|
462
|
+
every input x and y coordinate point. This is useful if you
|
463
|
+
want to model tides for a specific list of timesteps across
|
464
|
+
multiple spatial points (e.g. for the same set of satellite
|
465
|
+
acquisition times at various locations across your study area).
|
466
|
+
- "one-to-one": Model tides using a unique timestep for each
|
467
|
+
set of x and y coordinates. In this mode, the number of x and
|
468
|
+
y points must equal the number of timesteps provided in "time".
|
469
|
+
output_format : str, optional
|
470
|
+
Whether to return the output dataframe in long format (with
|
471
|
+
results stacked vertically along "tide_model" and "tide_height"
|
472
|
+
columns), or wide format (with a column for each tide model).
|
473
|
+
Defaults to "long".
|
474
|
+
output_units : str, optional
|
475
|
+
Whether to return modelled tides in floating point metre units,
|
476
|
+
or integer centimetre units (i.e. scaled by 100) or integer
|
477
|
+
millimetre units (i.e. scaled by 1000. Returning outputs in
|
478
|
+
integer units can be useful for reducing memory usage.
|
479
|
+
Defaults to "m" for metres; set to "cm" for centimetres or "mm"
|
480
|
+
for millimetres.
|
439
481
|
method : str, optional
|
440
482
|
Method used to interpolate tidal constituents
|
441
483
|
from model files. Defaults to "linear"; options include:
|
@@ -444,30 +486,27 @@ def model_tides(
|
|
444
486
|
- "spline": scipy bivariate spline interpolation
|
445
487
|
- "bilinear": quick bilinear interpolation
|
446
488
|
extrapolate : bool, optional
|
447
|
-
Whether to extrapolate tides
|
489
|
+
Whether to extrapolate tides into x and y coordinates outside of
|
448
490
|
the valid tide modelling domain using nearest-neighbor.
|
449
491
|
cutoff : float, optional
|
450
492
|
Extrapolation cutoff in kilometers. The default is None, which
|
451
493
|
will extrapolate for all points regardless of distance from the
|
452
494
|
valid tide modelling domain.
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
set of x and y coordinates. In this mode, the number of x and
|
463
|
-
y points must equal the number of timesteps provided in "time".
|
464
|
-
|
495
|
+
crop : bool, optional
|
496
|
+
Whether to crop tide model constituent files on-the-fly to
|
497
|
+
improve performance. Defaults to True; use `crop_buffer`
|
498
|
+
to customise the buffer distance used to crop the files.
|
499
|
+
crop_buffer : int or float, optional
|
500
|
+
The buffer distance in degrees used to crop tide model
|
501
|
+
constituent files around the modelling area. Defaults to 5,
|
502
|
+
which will crop constituents using a five degree buffer on either
|
503
|
+
side of the analysis extent.
|
465
504
|
parallel : bool, optional
|
466
|
-
Whether to parallelise tide modelling
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
Default is True.
|
505
|
+
Whether to parallelise tide modelling. If multiple tide models are
|
506
|
+
requested, these will be run in parallel using `concurrent.futures`.
|
507
|
+
If enough workers are available, the analysis will also be split
|
508
|
+
into spatial chunks for additional parallelisation (see "parallel_splits"
|
509
|
+
below). Default is True.
|
471
510
|
parallel_splits : str or int, optional
|
472
511
|
Whether to split the input x and y coordinates into smaller,
|
473
512
|
evenly-sized chunks that are processed in parallel. This can
|
@@ -478,23 +517,12 @@ def model_tides(
|
|
478
517
|
parallel_max : int, optional
|
479
518
|
Maximum number of processes to run in parallel. The default of
|
480
519
|
None will automatically determine this from your available CPUs.
|
481
|
-
output_units : str, optional
|
482
|
-
Whether to return modelled tides in floating point metre units,
|
483
|
-
or integer centimetre units (i.e. scaled by 100) or integer
|
484
|
-
millimetre units (i.e. scaled by 1000. Returning outputs in
|
485
|
-
integer units can be useful for reducing memory usage.
|
486
|
-
Defaults to "m" for metres; set to "cm" for centimetres or "mm"
|
487
|
-
for millimetres.
|
488
|
-
output_format : str, optional
|
489
|
-
Whether to return the output dataframe in long format (with
|
490
|
-
results stacked vertically along "tide_model" and "tide_height"
|
491
|
-
columns), or wide format (with a column for each tide model).
|
492
|
-
Defaults to "long".
|
493
520
|
ensemble_models : list of str, optional
|
494
521
|
An optional list of models used to generate the ensemble tide
|
495
522
|
model if "ensemble" tide modelling is requested. Defaults to
|
496
|
-
["
|
497
|
-
"
|
523
|
+
`["EOT20", "FES2012", "FES2014_extrapolated", "FES2022_extrapolated",
|
524
|
+
"GOT4.10", "GOT5.5_extrapolated", "GOT5.6_extrapolated",
|
525
|
+
"TPXO10-atlas-v2-nc", "TPXO8-atlas-nc", "TPXO9-atlas-v5-nc"]`.
|
498
526
|
**ensemble_kwargs :
|
499
527
|
Keyword arguments used to customise the generation of optional
|
500
528
|
ensemble tide models if "ensemble" modelling are requested.
|
@@ -511,13 +539,12 @@ def model_tides(
|
|
511
539
|
|
512
540
|
"""
|
513
541
|
# Turn inputs into arrays for consistent handling
|
514
|
-
models_requested = list(np.atleast_1d(model))
|
515
542
|
x = np.atleast_1d(x)
|
516
543
|
y = np.atleast_1d(y)
|
517
544
|
time = _standardise_time(time)
|
518
545
|
|
519
546
|
# Validate input arguments
|
520
|
-
assert time is not None, "Times for modelling tides
|
547
|
+
assert time is not None, "Times for modelling tides must be provided via `time`."
|
521
548
|
assert method in ("bilinear", "spline", "linear", "nearest")
|
522
549
|
assert output_units in (
|
523
550
|
"m",
|
@@ -528,6 +555,8 @@ def model_tides(
|
|
528
555
|
"long",
|
529
556
|
"wide",
|
530
557
|
), "Output format must be either 'long' or 'wide'."
|
558
|
+
assert np.issubdtype(x.dtype, np.number), "`x` must contain only valid numeric values, and must not be None."
|
559
|
+
assert np.issubdtype(y.dtype, np.number), "`y` must contain only valid numeric values, and must not be None.."
|
531
560
|
assert len(x) == len(y), "x and y must be the same length."
|
532
561
|
if mode == "one-to-one":
|
533
562
|
assert len(x) == len(time), (
|
@@ -540,58 +569,12 @@ def model_tides(
|
|
540
569
|
# provided, try global environment variable.
|
541
570
|
directory = _set_directory(directory)
|
542
571
|
|
543
|
-
#
|
544
|
-
|
545
|
-
|
546
|
-
directory
|
572
|
+
# Standardise model list, handling "all" and "ensemble" functionality
|
573
|
+
models_to_process, models_requested, ensemble_models = _standardise_models(
|
574
|
+
model=model,
|
575
|
+
directory=directory,
|
576
|
+
ensemble_models=ensemble_models,
|
547
577
|
)
|
548
|
-
# TODO: This is hacky, find a better way. Perhaps a kwarg that
|
549
|
-
# turns ensemble functionality on, and checks that supplied
|
550
|
-
# models match models expected for ensemble?
|
551
|
-
available_models = available_models + ["ensemble"]
|
552
|
-
valid_models = valid_models + ["ensemble"]
|
553
|
-
|
554
|
-
# Error if any models are not supported
|
555
|
-
if not all(m in valid_models for m in models_requested):
|
556
|
-
error_text = (
|
557
|
-
f"One or more of the requested models are not valid:\n"
|
558
|
-
f"{models_requested}\n\n"
|
559
|
-
"The following models are supported:\n"
|
560
|
-
f"{valid_models}"
|
561
|
-
)
|
562
|
-
raise ValueError(error_text)
|
563
|
-
|
564
|
-
# Error if any models are not available in `directory`
|
565
|
-
if not all(m in available_models for m in models_requested):
|
566
|
-
error_text = (
|
567
|
-
f"One or more of the requested models are valid, but not available in `{directory}`:\n"
|
568
|
-
f"{models_requested}\n\n"
|
569
|
-
f"The following models are available in `{directory}`:\n"
|
570
|
-
f"{available_models}"
|
571
|
-
)
|
572
|
-
raise ValueError(error_text)
|
573
|
-
|
574
|
-
# If ensemble modelling is requested, use a custom list of models
|
575
|
-
# for subsequent processing
|
576
|
-
if "ensemble" in models_requested:
|
577
|
-
print("Running ensemble tide modelling")
|
578
|
-
models_to_process = (
|
579
|
-
ensemble_models
|
580
|
-
if ensemble_models is not None
|
581
|
-
else [
|
582
|
-
"FES2014",
|
583
|
-
"TPXO9-atlas-v5",
|
584
|
-
"EOT20",
|
585
|
-
"HAMTIDE11",
|
586
|
-
"GOT4.10",
|
587
|
-
"FES2012",
|
588
|
-
"TPXO8-atlas-v1",
|
589
|
-
]
|
590
|
-
)
|
591
|
-
|
592
|
-
# Otherwise, models to process are the same as those requested
|
593
|
-
else:
|
594
|
-
models_to_process = models_requested
|
595
578
|
|
596
579
|
# Update tide modelling func to add default keyword arguments that
|
597
580
|
# are used for every iteration during parallel processing
|
@@ -599,12 +582,13 @@ def model_tides(
|
|
599
582
|
_model_tides,
|
600
583
|
directory=directory,
|
601
584
|
crs=crs,
|
602
|
-
|
585
|
+
mode=mode,
|
586
|
+
output_units=output_units,
|
603
587
|
method=method,
|
604
588
|
extrapolate=extrapolate,
|
605
589
|
cutoff=np.inf if cutoff is None else cutoff,
|
606
|
-
|
607
|
-
|
590
|
+
crop=crop,
|
591
|
+
crop_buffer=crop_buffer,
|
608
592
|
)
|
609
593
|
|
610
594
|
# If automatic parallel splits, calculate optimal value
|
@@ -623,7 +607,6 @@ def model_tides(
|
|
623
607
|
raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
|
624
608
|
|
625
609
|
# Parallelise if either multiple models or multiple splits requested
|
626
|
-
|
627
610
|
if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
|
628
611
|
with ProcessPoolExecutor(max_workers=parallel_max) as executor:
|
629
612
|
print(
|
@@ -685,7 +668,7 @@ def model_tides(
|
|
685
668
|
|
686
669
|
# Optionally compute ensemble model and add to dataframe
|
687
670
|
if "ensemble" in models_requested:
|
688
|
-
ensemble_df =
|
671
|
+
ensemble_df = ensemble_tides(tide_df, crs, ensemble_models, **ensemble_kwargs)
|
689
672
|
|
690
673
|
# Update requested models with any custom ensemble models, then
|
691
674
|
# filter the dataframe to keep only models originally requested
|
@@ -747,10 +730,12 @@ def model_phases(
|
|
747
730
|
any format that can be converted by `pandas.to_datetime()`;
|
748
731
|
e.g. np.ndarray[datetime64], pd.DatetimeIndex, pd.Timestamp,
|
749
732
|
datetime.datetime and strings (e.g. "2020-01-01 23:00").
|
733
|
+
For example: `time=pd.date_range(start="2000", end="2001", freq="5h")`
|
750
734
|
model : str or list of str, optional
|
751
|
-
The tide model (or models) to use to
|
752
|
-
Defaults to "EOT20";
|
753
|
-
|
735
|
+
The tide model (or list of models) to use to model tides.
|
736
|
+
Defaults to "EOT20"; specify "all" to use all models available
|
737
|
+
in `directory`. For a full list of available and supported models,
|
738
|
+
run `eo_tides.model.list_models`.
|
754
739
|
directory : str, optional
|
755
740
|
The directory containing tide model data files. If no path is
|
756
741
|
provided, this will default to the environment variable
|