eo-tides 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eo_tides/model.py CHANGED
@@ -1,15 +1,14 @@
1
1
  # Used to postpone evaluation of type annotations
2
2
  from __future__ import annotations
3
3
 
4
- import datetime
5
4
  import os
6
- import pathlib
7
5
  import textwrap
8
- import warnings
9
6
  from concurrent.futures import ProcessPoolExecutor
10
7
  from concurrent.futures.process import BrokenProcessPool
11
8
  from functools import partial
12
- from typing import TYPE_CHECKING, List, Union
9
+ from typing import TYPE_CHECKING
10
+
11
+ import psutil
13
12
 
14
13
  # Only import if running type checking
15
14
  if TYPE_CHECKING:
@@ -20,309 +19,9 @@ import numpy as np
20
19
  import pandas as pd
21
20
  import pyproj
22
21
  import pyTMD
23
- from colorama import Style, init
24
- from pyTMD.io.model import load_database, model
25
22
  from tqdm import tqdm
26
23
 
27
- from .utils import idw
28
-
29
- # Type alias for all possible inputs to "time" params
30
- DatetimeLike = Union[np.ndarray, pd.DatetimeIndex, pd.Timestamp, datetime.datetime, str, List[str]]
31
-
32
-
33
- def _set_directory(
34
- directory: str | os.PathLike | None = None,
35
- ) -> os.PathLike:
36
- """
37
- Set tide modelling files directory. If no custom
38
- path is provided, try global environmental variable
39
- instead.
40
- """
41
- if directory is None:
42
- if "EO_TIDES_TIDE_MODELS" in os.environ:
43
- directory = os.environ["EO_TIDES_TIDE_MODELS"]
44
- else:
45
- raise Exception(
46
- "No tide model directory provided via `directory`, and/or no "
47
- "`EO_TIDES_TIDE_MODELS` environment variable found. "
48
- "Please provide a valid path to your tide model directory."
49
- )
50
-
51
- # Verify path exists
52
- directory = pathlib.Path(directory).expanduser()
53
- if not directory.exists():
54
- raise FileNotFoundError(f"No valid tide model directory found at path `{directory}`")
55
- else:
56
- return directory
57
-
58
-
59
- def _standardise_time(
60
- time: DatetimeLike | None,
61
- ) -> np.ndarray | None:
62
- """
63
- Accept any time format accepted by `pd.to_datetime`,
64
- and return a datetime64 ndarray. Return None if None
65
- passed.
66
- """
67
- # Return time as-is if None
68
- if time is None:
69
- return None
70
-
71
- # Use pd.to_datetime for conversion, then convert to numpy array
72
- time = pd.to_datetime(time).to_numpy().astype("datetime64[ns]")
73
-
74
- # Ensure that data has at least one dimension
75
- return np.atleast_1d(time)
76
-
77
-
78
- def list_models(
79
- directory: str | os.PathLike | None = None,
80
- show_available: bool = True,
81
- show_supported: bool = True,
82
- raise_error: bool = False,
83
- ) -> tuple[list[str], list[str]]:
84
- """
85
- List all tide models available for tide modelling.
86
-
87
- This function scans the specified tide model directory
88
- and returns a list of models that are available in the
89
- directory as well as the full list of all models supported
90
- by `eo-tides` and `pyTMD`.
91
-
92
- For instructions on setting up tide models, see:
93
- <https://geoscienceaustralia.github.io/eo-tides/setup/>
94
-
95
- Parameters
96
- ----------
97
- directory : str, optional
98
- The directory containing tide model data files. If no path is
99
- provided, this will default to the environment variable
100
- `EO_TIDES_TIDE_MODELS` if set, or raise an error if not.
101
- Tide modelling files should be stored in sub-folders for each
102
- model that match the structure required by `pyTMD`
103
- (<https://geoscienceaustralia.github.io/eo-tides/setup/>).
104
- show_available : bool, optional
105
- Whether to print a list of locally available models.
106
- show_supported : bool, optional
107
- Whether to print a list of all supported models, in
108
- addition to models available locally.
109
- raise_error : bool, optional
110
- If True, raise an error if no available models are found.
111
- If False, raise a warning.
112
-
113
- Returns
114
- -------
115
- available_models : list of str
116
- A list of all tide models available within `directory`.
117
- supported_models : list of str
118
- A list of all tide models supported by `eo-tides`.
119
- """
120
- init() # Initialize colorama
121
-
122
- # Set tide modelling files directory. If no custom path is
123
- # provided, try global environment variable.
124
- directory = _set_directory(directory)
125
-
126
- # Get full list of supported models from pyTMD database
127
- model_database = load_database()["elevation"]
128
- supported_models = list(model_database.keys())
129
-
130
- # Extract expected model paths
131
- expected_paths = {}
132
- for m in supported_models:
133
- model_file = model_database[m]["model_file"]
134
- model_file = model_file[0] if isinstance(model_file, list) else model_file
135
- expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
136
-
137
- # Define column widths
138
- status_width = 4 # Width for emoji
139
- name_width = max(len(name) for name in supported_models)
140
- path_width = max(len(path) for path in expected_paths.values())
141
-
142
- # Print list of supported models, marking available and
143
- # unavailable models and appending available to list
144
- if show_available or show_supported:
145
- total_width = min(status_width + name_width + path_width + 6, 80)
146
- print("─" * total_width)
147
- print(f"{'󠀠🌊':^{status_width}} | {'Model':<{name_width}} | {'Expected path':<{path_width}}")
148
- print("─" * total_width)
149
-
150
- available_models = []
151
- for m in supported_models:
152
- try:
153
- model_file = model(directory=directory).elevation(m=m)
154
- available_models.append(m)
155
-
156
- if show_available:
157
- # Mark available models with a green tick
158
- status = "✅"
159
- print(f"{status:^{status_width}}│ {m:<{name_width}} │ {expected_paths[m]:<{path_width}}")
160
- except FileNotFoundError:
161
- if show_supported:
162
- # Mark unavailable models with a red cross
163
- status = "❌"
164
- print(
165
- f"{status:^{status_width}}│ {Style.DIM}{m:<{name_width}} │ {expected_paths[m]:<{path_width}}{Style.RESET_ALL}"
166
- )
167
-
168
- if show_available or show_supported:
169
- print("─" * total_width)
170
-
171
- # Print summary
172
- print(f"\n{Style.BRIGHT}Summary:{Style.RESET_ALL}")
173
- print(f"Available models: {len(available_models)}/{len(supported_models)}")
174
-
175
- # Raise error or warning if no models are available
176
- if not available_models:
177
- warning_msg = textwrap.dedent(
178
- f"""
179
- No valid tide models are available in `{directory}`.
180
- Are you sure you have provided the correct `directory` path, or set the
181
- `EO_TIDES_TIDE_MODELS` environment variable to point to the location of your
182
- tide model directory?
183
- """
184
- ).strip()
185
-
186
- if raise_error:
187
- raise Exception(warning_msg)
188
- else:
189
- warnings.warn(warning_msg, UserWarning)
190
-
191
- # Return list of available and supported models
192
- return available_models, supported_models
193
-
194
-
195
- def _model_tides(
196
- model,
197
- x,
198
- y,
199
- time,
200
- directory,
201
- crs,
202
- crop,
203
- method,
204
- extrapolate,
205
- cutoff,
206
- output_units,
207
- mode,
208
- ):
209
- """Worker function applied in parallel by `model_tides`. Handles the
210
- extraction of tide modelling constituents and tide modelling using
211
- `pyTMD`.
212
- """
213
- # Obtain model details
214
- pytmd_model = pyTMD.io.model(directory).elevation(model)
215
-
216
- # Reproject x, y to latitude/longitude
217
- transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
218
- lon, lat = transformer.transform(x.flatten(), y.flatten())
219
-
220
- # Convert datetime
221
- timescale = pyTMD.time.timescale().from_datetime(time.flatten())
222
-
223
- try:
224
- # Read tidal constants and interpolate to grid points
225
- amp, ph, c = pytmd_model.extract_constants(
226
- lon,
227
- lat,
228
- type=pytmd_model.type,
229
- crop=crop,
230
- bounds=None,
231
- method=method,
232
- extrapolate=extrapolate,
233
- cutoff=cutoff,
234
- append_node=False,
235
- # append_node=True,
236
- )
237
-
238
- # TODO: Return constituents
239
- # print(amp.shape, ph.shape, c)
240
- # print(pd.DataFrame({"amplitude": amp}))
241
-
242
- # Raise error if constituent files no not cover analysis extent
243
- except IndexError as e:
244
- error_msg = f"""
245
- The {model} tide model constituent files do not cover the requested analysis extent.
246
- This can occur if you are using clipped model files to improve run times.
247
- Consider using model files that cover your entire analysis area, or set `crop=False`
248
- to reduce the extent of tide model constituent files that is loaded.
249
- """
250
- raise Exception(textwrap.dedent(error_msg).strip()) from None
251
-
252
- # Calculate complex phase in radians for Euler's
253
- cph = -1j * ph * np.pi / 180.0
254
-
255
- # Calculate constituent oscillation
256
- hc = amp * np.exp(cph)
257
-
258
- # Compute deltat based on model
259
- if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
260
- # Use delta time at 2000.0 to match TMD outputs
261
- deltat = np.zeros_like(timescale.tt_ut1)
262
- else:
263
- # Use interpolated delta times
264
- deltat = timescale.tt_ut1
265
-
266
- # Determine the number of points and times to process. If in
267
- # "one-to-many" mode, these counts are used to repeat our extracted
268
- # constituents and timesteps so we can extract tides for all
269
- # combinations of our input times and tide modelling points.
270
- # If in "one-to-many" mode, repeat constituents to length of time
271
- # and number of input coords before passing to `predict_tide_drift`
272
- # If in "one-to-one" mode, we avoid this step by setting counts to 1
273
- # (e.g. "repeat 1 times")
274
- points_repeat = len(x) if mode == "one-to-many" else 1
275
- time_repeat = len(time) if mode == "one-to-many" else 1
276
- t, hc, deltat = (
277
- np.tile(timescale.tide, points_repeat),
278
- hc.repeat(time_repeat, axis=0),
279
- np.tile(deltat, points_repeat),
280
- )
281
-
282
- # Create arrays to hold outputs
283
- tide = np.ma.zeros((len(t)), fill_value=np.nan)
284
- tide.mask = np.any(hc.mask, axis=1)
285
-
286
- # Predict tidal elevations at time and infer minor corrections
287
- tide.data[:] = pyTMD.predict.drift(
288
- t,
289
- hc,
290
- c,
291
- deltat=deltat,
292
- corrections=pytmd_model.corrections,
293
- )
294
- minor = pyTMD.predict.infer_minor(
295
- t,
296
- hc,
297
- c,
298
- deltat=deltat,
299
- corrections=pytmd_model.corrections,
300
- minor=pytmd_model.minor,
301
- )
302
- tide.data[:] += minor.data[:]
303
-
304
- # Replace invalid values with fill value
305
- tide.data[tide.mask] = tide.fill_value
306
-
307
- # Convert data to pandas.DataFrame, and set index to our input
308
- # time/x/y values
309
- tide_df = pd.DataFrame({
310
- "time": np.tile(time, points_repeat),
311
- "x": np.repeat(x, time_repeat),
312
- "y": np.repeat(y, time_repeat),
313
- "tide_model": model,
314
- "tide_height": tide,
315
- }).set_index(["time", "x", "y"])
316
-
317
- # Optionally convert outputs to integer units (can save memory)
318
- if output_units == "m":
319
- tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
320
- elif output_units == "cm":
321
- tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
322
- elif output_units == "mm":
323
- tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
324
-
325
- return tide_df
24
+ from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw, list_models
326
25
 
327
26
 
328
27
  def _ensemble_model(
@@ -490,6 +189,181 @@ def _ensemble_model(
490
189
  return pd.concat(ensemble_list)
491
190
 
492
191
 
192
+ def _parallel_splits(
193
+ total_points: int,
194
+ model_count: int,
195
+ parallel_max: int | None = None,
196
+ min_points_per_split: int = 1000,
197
+ ) -> int:
198
+ """
199
+ Calculates the optimal number of parallel splits for data
200
+ processing based on system resources and processing constraints.
201
+
202
+ Parameters:
203
+ -----------
204
+ total_points : int
205
+ Total number of data points to process
206
+ model_count : int
207
+ Number of models that will be run in parallel
208
+ parallel_max : int, optional
209
+ Maximum number of parallel processes to use. If None, uses CPU core count
210
+ min_points_per_split : int, default=1000
211
+ Minimum number of points that should be processed in each split
212
+ """
213
+ # Get available CPUs. First see if `CPU_GUARANTEE` exists in
214
+ # environment (if running in JupyterHub); if not use psutil
215
+ # followed by standard CPU count
216
+ if parallel_max is None:
217
+ # Take the first valid output
218
+ raw_value = os.environ.get("CPU_GUARANTEE") or psutil.cpu_count(logical=False) or os.cpu_count() or 1
219
+
220
+ # Convert to integer
221
+ if isinstance(raw_value, str):
222
+ parallel_max = int(float(raw_value))
223
+ else:
224
+ parallel_max = int(raw_value)
225
+
226
+ # Calculate optimal number of splits based on constraints
227
+ splits_by_size = total_points / min_points_per_split
228
+ splits_by_cpu = parallel_max / model_count
229
+ optimal_splits = min(splits_by_size, splits_by_cpu)
230
+
231
+ # Convert to integer and ensure at least 1 split
232
+ final_split_count = int(max(1, optimal_splits))
233
+ return final_split_count
234
+
235
+
236
+ def _model_tides(
237
+ model,
238
+ x,
239
+ y,
240
+ time,
241
+ directory,
242
+ crs,
243
+ crop,
244
+ method,
245
+ extrapolate,
246
+ cutoff,
247
+ output_units,
248
+ mode,
249
+ ):
250
+ """Worker function applied in parallel by `model_tides`. Handles the
251
+ extraction of tide modelling constituents and tide modelling using
252
+ `pyTMD`.
253
+ """
254
+ # Obtain model details
255
+ pytmd_model = pyTMD.io.model(directory).elevation(model)
256
+
257
+ # Reproject x, y to latitude/longitude
258
+ transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
259
+ lon, lat = transformer.transform(x.flatten(), y.flatten())
260
+
261
+ # Convert datetime
262
+ timescale = pyTMD.time.timescale().from_datetime(time.flatten())
263
+
264
+ try:
265
+ # Read tidal constants and interpolate to grid points
266
+ amp, ph, c = pytmd_model.extract_constants(
267
+ lon,
268
+ lat,
269
+ type=pytmd_model.type,
270
+ crop=crop,
271
+ method=method,
272
+ extrapolate=extrapolate,
273
+ cutoff=cutoff,
274
+ append_node=False,
275
+ # append_node=True,
276
+ )
277
+
278
+ # TODO: Return constituents
279
+ # print(model, amp.shape)
280
+ # print(amp.shape, ph.shape, c)
281
+ # print(pd.DataFrame({"amplitude": amp}))
282
+
283
+ # Raise error if constituent files no not cover analysis extent
284
+ except IndexError:
285
+ error_msg = f"""
286
+ The {model} tide model constituent files do not cover the analysis extent
287
+ ({min(lon):.2f}, {max(lon):.2f}, {min(lat):.2f}, {max(lat):.2f}).
288
+ This can occur if you are using clipped model files to improve run times.
289
+ Consider using model files that cover your entire analysis area, or set `crop=False`
290
+ to reduce the extent of tide model constituent files that is loaded.
291
+ """
292
+ raise Exception(textwrap.dedent(error_msg).strip()) from None
293
+
294
+ # Calculate complex phase in radians for Euler's
295
+ cph = -1j * ph * np.pi / 180.0
296
+
297
+ # Calculate constituent oscillation
298
+ hc = amp * np.exp(cph)
299
+
300
+ # Compute delta times based on model
301
+ if pytmd_model.corrections in ("OTIS", "ATLAS", "TMD3", "netcdf"):
302
+ # Use delta time at 2000.0 to match TMD outputs
303
+ deltat = np.zeros_like(timescale.tt_ut1)
304
+ else:
305
+ # Use interpolated delta times
306
+ deltat = timescale.tt_ut1
307
+
308
+ # In "one-to-many" mode, extracted tidal constituents and timesteps
309
+ # are repeated/multiplied out to match the number of input points and
310
+ # timesteps, enabling the modeling of tides across all combinations
311
+ # of input times and points. In "one-to-one" mode, no repetition is
312
+ # needed, so each repeat count is set to 1.
313
+ points_repeat = len(x) if mode == "one-to-many" else 1
314
+ time_repeat = len(time) if mode == "one-to-many" else 1
315
+ t, hc, deltat = (
316
+ np.tile(timescale.tide, points_repeat),
317
+ hc.repeat(time_repeat, axis=0),
318
+ np.tile(deltat, points_repeat),
319
+ )
320
+
321
+ # Create arrays to hold outputs
322
+ tide = np.ma.zeros((len(t)), fill_value=np.nan)
323
+ tide.mask = np.any(hc.mask, axis=1)
324
+
325
+ # Predict tidal elevations at time and infer minor corrections
326
+ tide.data[:] = pyTMD.predict.drift(
327
+ t,
328
+ hc,
329
+ c,
330
+ deltat=deltat,
331
+ corrections=pytmd_model.corrections,
332
+ )
333
+ minor = pyTMD.predict.infer_minor(
334
+ t,
335
+ hc,
336
+ c,
337
+ deltat=deltat,
338
+ corrections=pytmd_model.corrections,
339
+ minor=pytmd_model.minor,
340
+ )
341
+ tide.data[:] += minor.data[:]
342
+
343
+ # Replace invalid values with fill value
344
+ tide.data[tide.mask] = tide.fill_value
345
+
346
+ # Convert data to pandas.DataFrame, and set index to our input
347
+ # time/x/y values
348
+ tide_df = pd.DataFrame({
349
+ "time": np.tile(time, points_repeat),
350
+ "x": np.repeat(x, time_repeat),
351
+ "y": np.repeat(y, time_repeat),
352
+ "tide_model": model,
353
+ "tide_height": tide,
354
+ }).set_index(["time", "x", "y"])
355
+
356
+ # Optionally convert outputs to integer units (can save memory)
357
+ if output_units == "m":
358
+ tide_df["tide_height"] = tide_df.tide_height.astype(np.float32)
359
+ elif output_units == "cm":
360
+ tide_df["tide_height"] = (tide_df.tide_height * 100).astype(np.int16)
361
+ elif output_units == "mm":
362
+ tide_df["tide_height"] = (tide_df.tide_height * 1000).astype(np.int16)
363
+
364
+ return tide_df
365
+
366
+
493
367
  def model_tides(
494
368
  x: float | list[float] | xr.DataArray,
495
369
  y: float | list[float] | xr.DataArray,
@@ -498,12 +372,13 @@ def model_tides(
498
372
  directory: str | os.PathLike | None = None,
499
373
  crs: str = "EPSG:4326",
500
374
  crop: bool = True,
501
- method: str = "spline",
375
+ method: str = "linear",
502
376
  extrapolate: bool = True,
503
377
  cutoff: float | None = None,
504
378
  mode: str = "one-to-many",
505
379
  parallel: bool = True,
506
- parallel_splits: int = 5,
380
+ parallel_splits: int | str = "auto",
381
+ parallel_max: int | None = None,
507
382
  output_units: str = "m",
508
383
  output_format: str = "long",
509
384
  ensemble_models: list[str] | None = None,
@@ -564,11 +439,11 @@ def model_tides(
564
439
  1 degree buffer around all input points. Defaults to True.
565
440
  method : str, optional
566
441
  Method used to interpolate tidal constituents
567
- from model files. Options include:
442
+ from model files. Defaults to "linear"; options include:
568
443
 
569
- - "spline": scipy bivariate spline interpolation (default)
570
- - "bilinear": quick bilinear interpolation
571
444
  - "linear", "nearest": scipy regular grid interpolations
445
+ - "spline": scipy bivariate spline interpolation
446
+ - "bilinear": quick bilinear interpolation
572
447
  extrapolate : bool, optional
573
448
  Whether to extrapolate tides for x and y coordinates outside of
574
449
  the valid tide modelling domain using nearest-neighbor.
@@ -594,12 +469,16 @@ def model_tides(
594
469
  parallel. Optionally, tide modelling can also be run in parallel
595
470
  across input x and y coordinates (see "parallel_splits" below).
596
471
  Default is True.
597
- parallel_splits : int, optional
472
+ parallel_splits : str or int, optional
598
473
  Whether to split the input x and y coordinates into smaller,
599
474
  evenly-sized chunks that are processed in parallel. This can
600
475
  provide a large performance boost when processing large numbers
601
- of coordinates. The default is 5 chunks, which will split
602
- coordinates into 5 parallelised chunks.
476
+ of coordinates. The default is "auto", which will automatically
477
+ attempt to determine optimal splits based on available CPUs,
478
+ the number of input points, and the number of models.
479
+ parallel_max : int, optional
480
+ Maximum number of processes to run in parallel. The default of
481
+ None will automatically determine this from your available CPUs.
603
482
  output_units : str, optional
604
483
  Whether to return modelled tides in floating point metre units,
605
484
  or integer centimetre units (i.e. scaled by 100) or integer
@@ -633,7 +512,6 @@ def model_tides(
633
512
 
634
513
  """
635
514
  # Turn inputs into arrays for consistent handling
636
- models_requested = list(np.atleast_1d(model))
637
515
  x = np.atleast_1d(x)
638
516
  y = np.atleast_1d(y)
639
517
  time = _standardise_time(time)
@@ -662,58 +540,12 @@ def model_tides(
662
540
  # provided, try global environment variable.
663
541
  directory = _set_directory(directory)
664
542
 
665
- # Get full list of supported models from pyTMD database;
666
- # add ensemble option to list of models
667
- available_models, valid_models = list_models(
668
- directory, show_available=False, show_supported=False, raise_error=True
543
+ # Standardise model list, handling "all" and "ensemble" functionality
544
+ models_to_process, models_requested, ensemble_models = _standardise_models(
545
+ model=model,
546
+ directory=directory,
547
+ ensemble_models=ensemble_models,
669
548
  )
670
- # TODO: This is hacky, find a better way. Perhaps a kwarg that
671
- # turns ensemble functionality on, and checks that supplied
672
- # models match models expected for ensemble?
673
- available_models = available_models + ["ensemble"]
674
- valid_models = valid_models + ["ensemble"]
675
-
676
- # Error if any models are not supported
677
- if not all(m in valid_models for m in models_requested):
678
- error_text = (
679
- f"One or more of the requested models are not valid:\n"
680
- f"{models_requested}\n\n"
681
- "The following models are supported:\n"
682
- f"{valid_models}"
683
- )
684
- raise ValueError(error_text)
685
-
686
- # Error if any models are not available in `directory`
687
- if not all(m in available_models for m in models_requested):
688
- error_text = (
689
- f"One or more of the requested models are valid, but not available in `{directory}`:\n"
690
- f"{models_requested}\n\n"
691
- f"The following models are available in `{directory}`:\n"
692
- f"{available_models}"
693
- )
694
- raise ValueError(error_text)
695
-
696
- # If ensemble modelling is requested, use a custom list of models
697
- # for subsequent processing
698
- if "ensemble" in models_requested:
699
- print("Running ensemble tide modelling")
700
- models_to_process = (
701
- ensemble_models
702
- if ensemble_models is not None
703
- else [
704
- "FES2014",
705
- "TPXO9-atlas-v5",
706
- "EOT20",
707
- "HAMTIDE11",
708
- "GOT4.10",
709
- "FES2012",
710
- "TPXO8-atlas-v1",
711
- ]
712
- )
713
-
714
- # Otherwise, models to process are the same as those requested
715
- else:
716
- models_to_process = models_requested
717
549
 
718
550
  # Update tide modelling func to add default keyword arguments that
719
551
  # are used for every iteration during parallel processing
@@ -729,13 +561,28 @@ def model_tides(
729
561
  mode=mode,
730
562
  )
731
563
 
732
- # Ensure requested parallel splits is not smaller than number of points
733
- parallel_splits = min(parallel_splits, len(x))
564
+ # If automatic parallel splits, calculate optimal value
565
+ # based on available parallelisation, number of points
566
+ # and number of models
567
+ if parallel_splits == "auto":
568
+ parallel_splits = _parallel_splits(
569
+ total_points=len(x),
570
+ model_count=len(models_to_process),
571
+ parallel_max=parallel_max,
572
+ )
573
+
574
+ # Verify that parallel splits are not larger than number of points
575
+ assert isinstance(parallel_splits, int)
576
+ if parallel_splits > len(x):
577
+ raise ValueError(f"Parallel splits ({parallel_splits}) cannot be larger than the number of points ({len(x)}).")
734
578
 
735
579
  # Parallelise if either multiple models or multiple splits requested
580
+
736
581
  if parallel & ((len(models_to_process) > 1) | (parallel_splits > 1)):
737
- with ProcessPoolExecutor() as executor:
738
- print(f"Modelling tides using {', '.join(models_to_process)} in parallel")
582
+ with ProcessPoolExecutor(max_workers=parallel_max) as executor:
583
+ print(
584
+ f"Modelling tides with {', '.join(models_to_process)} in parallel (models: {len(models_to_process)}, splits: {parallel_splits})"
585
+ )
739
586
 
740
587
  # Optionally split lon/lat points into `splits_n` chunks
741
588
  # that will be applied in parallel
@@ -783,7 +630,7 @@ def model_tides(
783
630
  model_outputs = []
784
631
 
785
632
  for model_i in models_to_process:
786
- print(f"Modelling tides using {model_i}")
633
+ print(f"Modelling tides with {model_i}")
787
634
  tide_df = iter_func(model_i, x, y, time)
788
635
  model_outputs.append(tide_df)
789
636
 
@@ -792,7 +639,7 @@ def model_tides(
792
639
 
793
640
  # Optionally compute ensemble model and add to dataframe
794
641
  if "ensemble" in models_requested:
795
- ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
642
+ ensemble_df = _ensemble_model(tide_df, crs, ensemble_models, **ensemble_kwargs)
796
643
 
797
644
  # Update requested models with any custom ensemble models, then
798
645
  # filter the dataframe to keep only models originally requested
eo_tides/stats.py CHANGED
@@ -6,20 +6,18 @@ from typing import TYPE_CHECKING
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
- import odc.geo.xr
10
9
  import pandas as pd
11
10
  import xarray as xr
12
11
  from scipy import stats
13
12
 
14
13
  # Only import if running type checking
15
14
  if TYPE_CHECKING:
16
- import datetime
17
-
18
15
  import xarray as xr
19
16
  from odc.geo.geobox import GeoBox
20
17
 
21
18
  from .eo import _standardise_inputs, pixel_tides, tag_tides
22
- from .model import DatetimeLike, model_tides
19
+ from .model import model_tides
20
+ from .utils import DatetimeLike
23
21
 
24
22
 
25
23
  def _plot_biases(