anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -9,25 +9,21 @@
|
|
|
9
9
|
|
|
10
10
|
import datetime
|
|
11
11
|
import re
|
|
12
|
+
from collections.abc import Generator
|
|
12
13
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Generator
|
|
15
|
-
from typing import List
|
|
16
|
-
from typing import Optional
|
|
17
|
-
from typing import Union
|
|
18
14
|
|
|
19
15
|
from anemoi.utils.humanize import did_you_mean
|
|
20
16
|
from earthkit.data import from_source
|
|
21
17
|
from earthkit.data.utils.availability import Availability
|
|
22
18
|
|
|
23
|
-
from anemoi.datasets.create.
|
|
19
|
+
from anemoi.datasets.create.sources import source_registry
|
|
24
20
|
|
|
25
|
-
from .legacy import
|
|
21
|
+
from .legacy import LegacySource
|
|
26
22
|
|
|
27
23
|
DEBUG = False
|
|
28
24
|
|
|
29
25
|
|
|
30
|
-
def to_list(x:
|
|
26
|
+
def to_list(x: list | tuple | Any) -> list:
|
|
31
27
|
"""Converts the input to a list if it is not already a list or tuple.
|
|
32
28
|
|
|
33
29
|
Parameters
|
|
@@ -46,8 +42,8 @@ def to_list(x: Union[list, tuple, Any]) -> list:
|
|
|
46
42
|
|
|
47
43
|
|
|
48
44
|
def _date_to_datetime(
|
|
49
|
-
d:
|
|
50
|
-
) ->
|
|
45
|
+
d: datetime.datetime | list | tuple | str,
|
|
46
|
+
) -> datetime.datetime | list[datetime.datetime]:
|
|
51
47
|
"""Converts the input date(s) to datetime objects.
|
|
52
48
|
|
|
53
49
|
Parameters
|
|
@@ -67,7 +63,7 @@ def _date_to_datetime(
|
|
|
67
63
|
return datetime.datetime.fromisoformat(d)
|
|
68
64
|
|
|
69
65
|
|
|
70
|
-
def expand_to_by(x:
|
|
66
|
+
def expand_to_by(x: str | int | list) -> str | int | list:
|
|
71
67
|
"""Expands a range expression to a list of values.
|
|
72
68
|
|
|
73
69
|
Parameters
|
|
@@ -97,7 +93,7 @@ def expand_to_by(x: Union[str, int, list]) -> Union[str, int, list]:
|
|
|
97
93
|
return x
|
|
98
94
|
|
|
99
95
|
|
|
100
|
-
def normalise_time_delta(t:
|
|
96
|
+
def normalise_time_delta(t: datetime.timedelta | str) -> datetime.timedelta:
|
|
101
97
|
"""Normalizes a time delta string to a datetime.timedelta object.
|
|
102
98
|
|
|
103
99
|
Parameters
|
|
@@ -120,7 +116,7 @@ def normalise_time_delta(t: Union[datetime.timedelta, str]) -> datetime.timedelt
|
|
|
120
116
|
return t
|
|
121
117
|
|
|
122
118
|
|
|
123
|
-
def _normalise_time(t:
|
|
119
|
+
def _normalise_time(t: int | str) -> str:
|
|
124
120
|
"""Normalizes a time value to a string in HHMM format.
|
|
125
121
|
|
|
126
122
|
Parameters
|
|
@@ -136,15 +132,15 @@ def _normalise_time(t: Union[int, str]) -> str:
|
|
|
136
132
|
t = int(t)
|
|
137
133
|
if t < 100:
|
|
138
134
|
t * 100
|
|
139
|
-
return "{:04d}"
|
|
135
|
+
return f"{t:04d}"
|
|
140
136
|
|
|
141
137
|
|
|
142
138
|
def _expand_mars_request(
|
|
143
|
-
request:
|
|
139
|
+
request: dict[str, Any],
|
|
144
140
|
date: datetime.datetime,
|
|
145
141
|
request_already_using_valid_datetime: bool = False,
|
|
146
142
|
date_key: str = "date",
|
|
147
|
-
) ->
|
|
143
|
+
) -> list[dict[str, Any]]:
|
|
148
144
|
"""Expands a MARS request with the given date and other parameters.
|
|
149
145
|
|
|
150
146
|
Parameters
|
|
@@ -222,11 +218,11 @@ def _expand_mars_request(
|
|
|
222
218
|
|
|
223
219
|
|
|
224
220
|
def factorise_requests(
|
|
225
|
-
dates:
|
|
226
|
-
*requests:
|
|
221
|
+
dates: list[datetime.datetime],
|
|
222
|
+
*requests: dict[str, Any],
|
|
227
223
|
request_already_using_valid_datetime: bool = False,
|
|
228
224
|
date_key: str = "date",
|
|
229
|
-
) -> Generator[
|
|
225
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
230
226
|
"""Factorizes the requests based on the given dates.
|
|
231
227
|
|
|
232
228
|
Parameters
|
|
@@ -268,7 +264,7 @@ def factorise_requests(
|
|
|
268
264
|
yield r
|
|
269
265
|
|
|
270
266
|
|
|
271
|
-
def use_grib_paramid(r:
|
|
267
|
+
def use_grib_paramid(r: dict[str, Any]) -> dict[str, Any]:
|
|
272
268
|
"""Converts the parameter short names to GRIB parameter IDs.
|
|
273
269
|
|
|
274
270
|
Parameters
|
|
@@ -362,135 +358,111 @@ MARS_KEYS = [
|
|
|
362
358
|
]
|
|
363
359
|
|
|
364
360
|
|
|
365
|
-
@
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
requests = [kwargs]
|
|
402
|
-
|
|
403
|
-
for r in requests:
|
|
404
|
-
param = r.get("param", [])
|
|
405
|
-
if not isinstance(param, (list, tuple)):
|
|
406
|
-
param = [param]
|
|
407
|
-
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
408
|
-
for p in param:
|
|
409
|
-
if p is False:
|
|
410
|
-
raise ValueError(
|
|
411
|
-
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
412
|
-
)
|
|
413
|
-
if p is None:
|
|
414
|
-
raise ValueError(
|
|
415
|
-
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
416
|
-
)
|
|
417
|
-
if p is True:
|
|
418
|
-
raise ValueError(
|
|
419
|
-
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
420
|
-
)
|
|
421
|
-
|
|
422
|
-
if len(dates) == 0: # When using `repeated_dates`
|
|
423
|
-
assert len(requests) == 1, requests
|
|
424
|
-
assert "date" in requests[0], requests[0]
|
|
425
|
-
if isinstance(requests[0]["date"], datetime.date):
|
|
426
|
-
requests[0]["date"] = requests[0]["date"].strftime("%Y%m%d")
|
|
427
|
-
else:
|
|
428
|
-
requests = factorise_requests(
|
|
429
|
-
dates,
|
|
430
|
-
*requests,
|
|
431
|
-
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
432
|
-
date_key=date_key,
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
requests = list(requests)
|
|
436
|
-
|
|
437
|
-
ds = from_source("empty")
|
|
438
|
-
context.trace("✅", f"{[str(d) for d in dates]}")
|
|
439
|
-
context.trace("✅", f"Will run {len(requests)} requests")
|
|
440
|
-
for r in requests:
|
|
441
|
-
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
442
|
-
context.trace("✅", f"mars {r}")
|
|
443
|
-
|
|
444
|
-
for r in requests:
|
|
445
|
-
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
446
|
-
|
|
447
|
-
if context.use_grib_paramid and "param" in r:
|
|
448
|
-
r = use_grib_paramid(r)
|
|
449
|
-
|
|
450
|
-
for k, v in r.items():
|
|
451
|
-
if k not in MARS_KEYS:
|
|
452
|
-
raise ValueError(
|
|
453
|
-
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
454
|
-
)
|
|
455
|
-
try:
|
|
456
|
-
if use_cdsapi_dataset:
|
|
457
|
-
ds = ds + from_source("cds", use_cdsapi_dataset, r)
|
|
458
|
-
else:
|
|
459
|
-
ds = ds + from_source("mars", **r)
|
|
460
|
-
except Exception as e:
|
|
461
|
-
if "File is empty:" not in str(e):
|
|
462
|
-
raise
|
|
463
|
-
return ds
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
execute = mars
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
if __name__ == "__main__":
|
|
470
|
-
import yaml
|
|
471
|
-
|
|
472
|
-
config = yaml.safe_load(
|
|
361
|
+
@source_registry.register("mars")
|
|
362
|
+
class MarsSource(LegacySource):
|
|
363
|
+
|
|
364
|
+
@staticmethod
|
|
365
|
+
def _execute(
|
|
366
|
+
context: Any,
|
|
367
|
+
dates: list[datetime.datetime],
|
|
368
|
+
*requests: dict[str, Any],
|
|
369
|
+
request_already_using_valid_datetime: bool = False,
|
|
370
|
+
date_key: str = "date",
|
|
371
|
+
use_cdsapi_dataset: str | None = None,
|
|
372
|
+
**kwargs: Any,
|
|
373
|
+
) -> Any:
|
|
374
|
+
"""Executes MARS requests based on the given context, dates, and other parameters.
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
context : Any
|
|
379
|
+
The context for the requests.
|
|
380
|
+
dates : List[datetime.datetime]
|
|
381
|
+
The list of dates to be used in the requests.
|
|
382
|
+
requests : Dict[str, Any]
|
|
383
|
+
The input requests to be executed.
|
|
384
|
+
request_already_using_valid_datetime : bool, optional
|
|
385
|
+
Flag indicating if the requests already use valid datetime.
|
|
386
|
+
date_key : str, optional
|
|
387
|
+
The key for the date in the requests.
|
|
388
|
+
use_cdsapi_dataset : Optional[str], optional
|
|
389
|
+
The dataset to be used with CDS API.
|
|
390
|
+
kwargs : Any
|
|
391
|
+
Additional keyword arguments for the requests.
|
|
392
|
+
|
|
393
|
+
Returns
|
|
394
|
+
-------
|
|
395
|
+
Any
|
|
396
|
+
The resulting dataset.
|
|
473
397
|
"""
|
|
474
|
-
- class: ea
|
|
475
|
-
expver: '0001'
|
|
476
|
-
grid: 20.0/20.0
|
|
477
|
-
levtype: sfc
|
|
478
|
-
param: [2t]
|
|
479
|
-
# param: [10u, 10v, 2d, 2t, lsm, msl, sdor, skt, slor, sp, tcw, z]
|
|
480
|
-
number: [0, 1]
|
|
481
|
-
|
|
482
|
-
# - class: ea
|
|
483
|
-
# expver: '0001'
|
|
484
|
-
# grid: 20.0/20.0
|
|
485
|
-
# levtype: pl
|
|
486
|
-
# param: [q]
|
|
487
|
-
# levelist: [1000, 850]
|
|
488
398
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
399
|
+
if not requests:
|
|
400
|
+
requests = [kwargs]
|
|
401
|
+
|
|
402
|
+
for r in requests:
|
|
403
|
+
param = r.get("param", [])
|
|
404
|
+
if not isinstance(param, (list, tuple)):
|
|
405
|
+
param = [param]
|
|
406
|
+
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
407
|
+
for p in param:
|
|
408
|
+
if p is False:
|
|
409
|
+
raise ValueError(
|
|
410
|
+
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
411
|
+
)
|
|
412
|
+
if p is None:
|
|
413
|
+
raise ValueError(
|
|
414
|
+
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
415
|
+
)
|
|
416
|
+
if p is True:
|
|
417
|
+
raise ValueError(
|
|
418
|
+
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
if len(dates) == 0: # When using `repeated_dates`
|
|
422
|
+
assert len(requests) == 1, requests
|
|
423
|
+
assert "date" in requests[0], requests[0]
|
|
424
|
+
if isinstance(requests[0]["date"], datetime.date):
|
|
425
|
+
requests[0]["date"] = requests[0]["date"].strftime("%Y%m%d")
|
|
426
|
+
else:
|
|
427
|
+
requests = factorise_requests(
|
|
428
|
+
dates,
|
|
429
|
+
*requests,
|
|
430
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
431
|
+
date_key=date_key,
|
|
432
|
+
)
|
|
493
433
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
434
|
+
requests = list(requests)
|
|
435
|
+
|
|
436
|
+
ds = from_source("empty")
|
|
437
|
+
context.trace("✅", f"{[str(d) for d in dates]}")
|
|
438
|
+
context.trace("✅", f"Will run {len(requests)} requests")
|
|
439
|
+
for r in requests:
|
|
440
|
+
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
441
|
+
context.trace("✅", f"mars {r}")
|
|
442
|
+
|
|
443
|
+
for r in requests:
|
|
444
|
+
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
445
|
+
|
|
446
|
+
if context.use_grib_paramid and "param" in r:
|
|
447
|
+
r = use_grib_paramid(r)
|
|
448
|
+
|
|
449
|
+
for k, v in r.items():
|
|
450
|
+
if k not in MARS_KEYS:
|
|
451
|
+
raise ValueError(
|
|
452
|
+
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
453
|
+
)
|
|
454
|
+
try:
|
|
455
|
+
if use_cdsapi_dataset:
|
|
456
|
+
ds = ds + from_source("cds", use_cdsapi_dataset, r)
|
|
457
|
+
else:
|
|
458
|
+
ds = ds + from_source("mars", **r)
|
|
459
|
+
except Exception as e:
|
|
460
|
+
if "File is empty:" not in str(e):
|
|
461
|
+
raise
|
|
462
|
+
return ds
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
# TODO: make clearer the interface between sources that use mars.
|
|
466
|
+
# Currently some sources use mars as a function rather than through the registry,
|
|
467
|
+
# e.g. accumulations, accumulations2, hindcasts, recentre, tendencies
|
|
468
|
+
mars = MarsSource._execute
|
|
@@ -9,34 +9,37 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
from typing import Any
|
|
12
|
-
from typing import List
|
|
13
12
|
|
|
14
13
|
import earthkit.data as ekd
|
|
15
14
|
|
|
16
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
17
17
|
from .xarray import load_many
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
@
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
20
|
+
@source_registry.register("netcdf")
|
|
21
|
+
class NetCDFSource(LegacySource):
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _execute(context: Any, dates: list[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
25
|
+
"""Execute the loading of multiple NetCDF files.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : object
|
|
30
|
+
The context in which the function is executed.
|
|
31
|
+
dates : list
|
|
32
|
+
List of dates for which data is to be loaded.
|
|
33
|
+
path : str
|
|
34
|
+
Path to the directory containing the NetCDF files.
|
|
35
|
+
*args : tuple
|
|
36
|
+
Additional positional arguments.
|
|
37
|
+
**kwargs : dict
|
|
38
|
+
Additional keyword arguments.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
object
|
|
43
|
+
The loaded data.
|
|
44
|
+
"""
|
|
45
|
+
return load_many("📁", context, dates, path, *args, **kwargs)
|
|
@@ -9,35 +9,37 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
from typing import Any
|
|
12
|
-
from typing import Dict
|
|
13
|
-
from typing import List
|
|
14
12
|
|
|
15
13
|
import earthkit.data as ekd
|
|
16
14
|
|
|
17
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
18
17
|
from .xarray import load_many
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
@
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
20
|
+
@source_registry.register("opendap")
|
|
21
|
+
class OpenDAPSource(LegacySource):
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _execute(context: dict[str, Any], dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
25
|
+
"""Execute the data loading process from an OpenDAP source.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : dict
|
|
30
|
+
The context in which the function is executed.
|
|
31
|
+
dates : list
|
|
32
|
+
List of dates for which data is to be loaded.
|
|
33
|
+
url : str
|
|
34
|
+
The URL of the OpenDAP source.
|
|
35
|
+
*args : tuple
|
|
36
|
+
Additional positional arguments.
|
|
37
|
+
**kwargs : dict
|
|
38
|
+
Additional keyword arguments.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
xarray.Dataset
|
|
43
|
+
The loaded dataset.
|
|
44
|
+
"""
|
|
45
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -9,15 +9,13 @@
|
|
|
9
9
|
|
|
10
10
|
import datetime
|
|
11
11
|
import glob
|
|
12
|
+
from collections.abc import Generator
|
|
12
13
|
from typing import Any
|
|
13
|
-
from typing import Generator
|
|
14
|
-
from typing import List
|
|
15
|
-
from typing import Tuple
|
|
16
14
|
|
|
17
15
|
from earthkit.data.utils.patterns import Pattern
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
def _expand(paths:
|
|
18
|
+
def _expand(paths: list[str]) -> Generator[str, None, None]:
|
|
21
19
|
"""Expand the given paths to include all matching file paths.
|
|
22
20
|
|
|
23
21
|
Parameters
|
|
@@ -54,8 +52,8 @@ def _expand(paths: List[str]) -> Generator[str, None, None]:
|
|
|
54
52
|
|
|
55
53
|
|
|
56
54
|
def iterate_patterns(
|
|
57
|
-
path: str, dates:
|
|
58
|
-
) -> Generator[
|
|
55
|
+
path: str, dates: list[datetime.datetime], **kwargs: Any
|
|
56
|
+
) -> Generator[tuple[str, list[str]], None, None]:
|
|
59
57
|
"""Iterate over patterns and expand them with given dates and additional keyword arguments.
|
|
60
58
|
|
|
61
59
|
Parameters
|
|
@@ -9,17 +9,15 @@
|
|
|
9
9
|
|
|
10
10
|
from copy import deepcopy
|
|
11
11
|
from typing import Any
|
|
12
|
-
from typing import Dict
|
|
13
|
-
from typing import List
|
|
14
|
-
from typing import Union
|
|
15
12
|
|
|
16
13
|
from anemoi.datasets.compute.recentre import recentre as _recentre
|
|
17
14
|
|
|
18
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
19
17
|
from .mars import mars
|
|
20
18
|
|
|
21
19
|
|
|
22
|
-
def to_list(x:
|
|
20
|
+
def to_list(x: list | tuple | str) -> list:
|
|
23
21
|
"""Converts the input to a list. If the input is a string, it splits it by '/'.
|
|
24
22
|
|
|
25
23
|
Parameters
|
|
@@ -39,7 +37,7 @@ def to_list(x: Union[list, tuple, str]) -> List:
|
|
|
39
37
|
return [x]
|
|
40
38
|
|
|
41
39
|
|
|
42
|
-
def normalise_number(number:
|
|
40
|
+
def normalise_number(number: list | tuple | str) -> list[int]:
|
|
43
41
|
"""Normalises the input number to a list of integers.
|
|
44
42
|
|
|
45
43
|
Parameters
|
|
@@ -63,7 +61,7 @@ def normalise_number(number: Union[list, tuple, str]) -> List[int]:
|
|
|
63
61
|
return number
|
|
64
62
|
|
|
65
63
|
|
|
66
|
-
def normalise_request(request:
|
|
64
|
+
def normalise_request(request: dict) -> dict:
|
|
67
65
|
"""Normalises the request dictionary by converting certain fields to lists.
|
|
68
66
|
|
|
69
67
|
Parameters
|
|
@@ -85,7 +83,7 @@ def normalise_request(request: Dict) -> Dict:
|
|
|
85
83
|
return request
|
|
86
84
|
|
|
87
85
|
|
|
88
|
-
def load_if_needed(context: Any, dates: Any, dict_or_dataset:
|
|
86
|
+
def load_if_needed(context: Any, dates: Any, dict_or_dataset: dict | Any) -> Any:
|
|
89
87
|
"""Loads the dataset if the input is a dictionary, otherwise returns the input.
|
|
90
88
|
|
|
91
89
|
Parameters
|
|
@@ -108,43 +106,43 @@ def load_if_needed(context: Any, dates: Any, dict_or_dataset: Union[Dict, Any])
|
|
|
108
106
|
return dict_or_dataset
|
|
109
107
|
|
|
110
108
|
|
|
111
|
-
@
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
109
|
+
@source_registry.register("recentre")
|
|
110
|
+
class RecentreSource(LegacySource):
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _execute(
|
|
114
|
+
context: Any,
|
|
115
|
+
dates: Any,
|
|
116
|
+
members: dict | Any,
|
|
117
|
+
centre: dict | Any,
|
|
118
|
+
alpha: float = 1.0,
|
|
119
|
+
remapping: dict = {},
|
|
120
|
+
patches: dict = {},
|
|
121
|
+
) -> Any:
|
|
122
|
+
"""Recentres the members dataset using the centre dataset.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
context : Any
|
|
127
|
+
The context for recentering.
|
|
128
|
+
dates : Any
|
|
129
|
+
The dates for recentering.
|
|
130
|
+
members : Union[dict, Any]
|
|
131
|
+
The members dataset or request dictionary.
|
|
132
|
+
centre : Union[dict, Any]
|
|
133
|
+
The centre dataset or request dictionary.
|
|
134
|
+
alpha : float, optional
|
|
135
|
+
The alpha value for recentering. Defaults to 1.0.
|
|
136
|
+
remapping : dict, optional
|
|
137
|
+
The remapping dictionary. Defaults to {}.
|
|
138
|
+
patches : dict, optional
|
|
139
|
+
The patches dictionary. Defaults to {}.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
Any
|
|
144
|
+
The recentred dataset.
|
|
145
|
+
"""
|
|
146
|
+
members = load_if_needed(context, dates, members)
|
|
147
|
+
centre = load_if_needed(context, dates, centre)
|
|
148
|
+
return _recentre(members=members, centre=centre, alpha=alpha)
|