anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import List
|
|
13
|
+
|
|
14
|
+
import earthkit.data as ekd
|
|
15
|
+
|
|
16
|
+
from .legacy import legacy_source
|
|
17
|
+
from .xarray import load_many
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@legacy_source(__file__)
|
|
21
|
+
def execute(context: Any, dates: List[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
22
|
+
"""Execute the loading of multiple NetCDF files.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
context : object
|
|
27
|
+
The context in which the function is executed.
|
|
28
|
+
dates : list
|
|
29
|
+
List of dates for which data is to be loaded.
|
|
30
|
+
path : str
|
|
31
|
+
Path to the directory containing the NetCDF files.
|
|
32
|
+
*args : tuple
|
|
33
|
+
Additional positional arguments.
|
|
34
|
+
**kwargs : dict
|
|
35
|
+
Additional keyword arguments.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
object
|
|
40
|
+
The loaded data.
|
|
41
|
+
"""
|
|
42
|
+
return load_many("📁", context, dates, path, *args, **kwargs)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Dict
|
|
13
|
+
from typing import List
|
|
14
|
+
|
|
15
|
+
import earthkit.data as ekd
|
|
16
|
+
|
|
17
|
+
from .legacy import legacy_source
|
|
18
|
+
from .xarray import load_many
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@legacy_source(__file__)
|
|
22
|
+
def execute(context: Dict[str, Any], dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
23
|
+
"""Execute the data loading process from an OpenDAP source.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
context : dict
|
|
28
|
+
The context in which the function is executed.
|
|
29
|
+
dates : list
|
|
30
|
+
List of dates for which data is to be loaded.
|
|
31
|
+
url : str
|
|
32
|
+
The URL of the OpenDAP source.
|
|
33
|
+
*args : tuple
|
|
34
|
+
Additional positional arguments.
|
|
35
|
+
**kwargs : dict
|
|
36
|
+
Additional keyword arguments.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
xarray.Dataset
|
|
41
|
+
The loaded dataset.
|
|
42
|
+
"""
|
|
43
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -7,16 +7,29 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
import datetime
|
|
10
11
|
import glob
|
|
11
|
-
import
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import Generator
|
|
14
|
+
from typing import List
|
|
15
|
+
from typing import Tuple
|
|
12
16
|
|
|
13
17
|
from earthkit.data.utils.patterns import Pattern
|
|
14
18
|
|
|
15
|
-
LOG = logging.getLogger(__name__)
|
|
16
19
|
|
|
20
|
+
def _expand(paths: List[str]) -> Generator[str, None, None]:
|
|
21
|
+
"""Expand the given paths to include all matching file paths.
|
|
17
22
|
|
|
18
|
-
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
paths : List[str]
|
|
26
|
+
List of paths to expand.
|
|
19
27
|
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
Generator[str]
|
|
31
|
+
Expanded file paths.
|
|
32
|
+
"""
|
|
20
33
|
if not isinstance(paths, list):
|
|
21
34
|
paths = [paths]
|
|
22
35
|
|
|
@@ -40,7 +53,25 @@ def _expand(paths):
|
|
|
40
53
|
yield path
|
|
41
54
|
|
|
42
55
|
|
|
43
|
-
def iterate_patterns(
|
|
56
|
+
def iterate_patterns(
|
|
57
|
+
path: str, dates: List[datetime.datetime], **kwargs: Any
|
|
58
|
+
) -> Generator[Tuple[str, List[str]], None, None]:
|
|
59
|
+
"""Iterate over patterns and expand them with given dates and additional keyword arguments.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
path : str
|
|
64
|
+
The pattern path to iterate over.
|
|
65
|
+
dates : List[datetime.datetime]
|
|
66
|
+
List of datetime objects to substitute in the pattern.
|
|
67
|
+
**kwargs : Any
|
|
68
|
+
Additional keyword arguments to substitute in the pattern.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
Generator[Tuple[str, List[str]]]
|
|
73
|
+
The expanded path and list of ISO formatted dates.
|
|
74
|
+
"""
|
|
44
75
|
given_paths = path if isinstance(path, list) else [path]
|
|
45
76
|
|
|
46
77
|
dates = [d.isoformat() for d in dates]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Dict
|
|
13
|
+
from typing import List
|
|
14
|
+
from typing import Union
|
|
15
|
+
|
|
16
|
+
from anemoi.datasets.compute.recentre import recentre as _recentre
|
|
17
|
+
|
|
18
|
+
from .legacy import legacy_source
|
|
19
|
+
from .mars import mars
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def to_list(x: Union[list, tuple, str]) -> List:
|
|
23
|
+
"""Converts the input to a list. If the input is a string, it splits it by '/'.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
x : Union[list, tuple, str]
|
|
28
|
+
The input to convert.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
list
|
|
33
|
+
The converted list.
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(x, (list, tuple)):
|
|
36
|
+
return x
|
|
37
|
+
if isinstance(x, str):
|
|
38
|
+
return x.split("/")
|
|
39
|
+
return [x]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def normalise_number(number: Union[list, tuple, str]) -> List[int]:
|
|
43
|
+
"""Normalises the input number to a list of integers.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
number : Union[list, tuple, str]
|
|
48
|
+
The number to normalise.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
list
|
|
53
|
+
The normalised list of integers.
|
|
54
|
+
"""
|
|
55
|
+
number = to_list(number)
|
|
56
|
+
|
|
57
|
+
if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
|
|
58
|
+
return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
|
|
59
|
+
|
|
60
|
+
if len(number) > 2 and number[1] == "to":
|
|
61
|
+
return list(range(int(number[0]), int(number[2]) + 1))
|
|
62
|
+
|
|
63
|
+
return number
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def normalise_request(request: Dict) -> Dict:
|
|
67
|
+
"""Normalises the request dictionary by converting certain fields to lists.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
request : dict
|
|
72
|
+
The request dictionary to normalise.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
dict
|
|
77
|
+
The normalised request dictionary.
|
|
78
|
+
"""
|
|
79
|
+
request = deepcopy(request)
|
|
80
|
+
if "number" in request:
|
|
81
|
+
request["number"] = normalise_number(request["number"])
|
|
82
|
+
if "time" in request:
|
|
83
|
+
request["time"] = to_list(request["time"])
|
|
84
|
+
request["param"] = to_list(request["param"])
|
|
85
|
+
return request
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def load_if_needed(context: Any, dates: Any, dict_or_dataset: Union[Dict, Any]) -> Any:
|
|
89
|
+
"""Loads the dataset if the input is a dictionary, otherwise returns the input.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
context : Any
|
|
94
|
+
The context for loading the dataset.
|
|
95
|
+
dates : Any
|
|
96
|
+
The dates for loading the dataset.
|
|
97
|
+
dict_or_dataset : Union[dict, Any]
|
|
98
|
+
The input dictionary or dataset.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
Any
|
|
103
|
+
The loaded dataset or the original input.
|
|
104
|
+
"""
|
|
105
|
+
if isinstance(dict_or_dataset, dict):
|
|
106
|
+
dict_or_dataset = normalise_request(dict_or_dataset)
|
|
107
|
+
dict_or_dataset = mars(context, dates, dict_or_dataset)
|
|
108
|
+
return dict_or_dataset
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@legacy_source(__file__)
|
|
112
|
+
def recentre(
|
|
113
|
+
context: Any,
|
|
114
|
+
dates: Any,
|
|
115
|
+
members: Union[Dict, Any],
|
|
116
|
+
centre: Union[Dict, Any],
|
|
117
|
+
alpha: float = 1.0,
|
|
118
|
+
remapping: Dict = {},
|
|
119
|
+
patches: Dict = {},
|
|
120
|
+
) -> Any:
|
|
121
|
+
"""Recentres the members dataset using the centre dataset.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
context : Any
|
|
126
|
+
The context for recentering.
|
|
127
|
+
dates : Any
|
|
128
|
+
The dates for recentering.
|
|
129
|
+
members : Union[dict, Any]
|
|
130
|
+
The members dataset or request dictionary.
|
|
131
|
+
centre : Union[dict, Any]
|
|
132
|
+
The centre dataset or request dictionary.
|
|
133
|
+
alpha : float, optional
|
|
134
|
+
The alpha value for recentering. Defaults to 1.0.
|
|
135
|
+
remapping : dict, optional
|
|
136
|
+
The remapping dictionary. Defaults to {}.
|
|
137
|
+
patches : dict, optional
|
|
138
|
+
The patches dictionary. Defaults to {}.
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
Any
|
|
143
|
+
The recentred dataset.
|
|
144
|
+
"""
|
|
145
|
+
members = load_if_needed(context, dates, members)
|
|
146
|
+
centre = load_if_needed(context, dates, centre)
|
|
147
|
+
return _recentre(members=members, centre=centre, alpha=alpha)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
execute = recentre
|
|
@@ -7,14 +7,37 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Dict
|
|
13
|
+
from typing import List
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
10
16
|
from earthkit.data import from_source
|
|
11
17
|
|
|
12
18
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
13
19
|
|
|
14
|
-
|
|
20
|
+
from .legacy import legacy_source
|
|
21
|
+
|
|
15
22
|
|
|
23
|
+
@legacy_source(__file__)
|
|
24
|
+
def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any:
|
|
25
|
+
"""Generates a source based on the provided context, dates, and additional keyword arguments.
|
|
16
26
|
|
|
17
|
-
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : Optional[Any]
|
|
30
|
+
The context in which the source is generated.
|
|
31
|
+
dates : List[datetime]
|
|
32
|
+
A list of datetime objects representing the dates.
|
|
33
|
+
**kwargs : Any
|
|
34
|
+
Additional keyword arguments for the source generation.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
Any
|
|
39
|
+
The generated source.
|
|
40
|
+
"""
|
|
18
41
|
name = kwargs.pop("name")
|
|
19
42
|
context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
|
|
20
43
|
if kwargs["date"] == "$from_dates":
|
|
@@ -29,7 +52,7 @@ execute = source
|
|
|
29
52
|
if __name__ == "__main__":
|
|
30
53
|
import yaml
|
|
31
54
|
|
|
32
|
-
config = yaml.safe_load(
|
|
55
|
+
config: Dict[str, Any] = yaml.safe_load(
|
|
33
56
|
"""
|
|
34
57
|
name: mars
|
|
35
58
|
class: ea
|
|
@@ -42,9 +65,8 @@ if __name__ == "__main__":
|
|
|
42
65
|
time: $from_dates
|
|
43
66
|
"""
|
|
44
67
|
)
|
|
45
|
-
dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
68
|
+
dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
46
69
|
dates = to_datetime_list(dates)
|
|
47
70
|
|
|
48
|
-
DEBUG = True
|
|
49
71
|
for f in source(None, dates, **config):
|
|
50
72
|
print(f, f.to_numpy().mean())
|
|
@@ -9,21 +9,50 @@
|
|
|
9
9
|
|
|
10
10
|
import datetime
|
|
11
11
|
from collections import defaultdict
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import Dict
|
|
14
|
+
from typing import List
|
|
15
|
+
from typing import Tuple
|
|
12
16
|
|
|
13
17
|
from earthkit.data.core.temporary import temp_file
|
|
14
18
|
from earthkit.data.readers.grib.output import new_grib_output
|
|
15
19
|
|
|
16
|
-
from anemoi.datasets.create.functions import assert_is_fieldlist
|
|
17
20
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
18
21
|
|
|
22
|
+
from .legacy import legacy_source
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
|
|
25
|
+
def _date_to_datetime(d: Any) -> Any:
|
|
26
|
+
"""Converts a date string or a list/tuple of date strings to datetime objects.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
d : Any
|
|
31
|
+
A date string or a list/tuple of date strings.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
Any
|
|
36
|
+
A datetime object or a list/tuple of datetime objects.
|
|
37
|
+
"""
|
|
21
38
|
if isinstance(d, (list, tuple)):
|
|
22
39
|
return [_date_to_datetime(x) for x in d]
|
|
23
40
|
return datetime.datetime.fromisoformat(d)
|
|
24
41
|
|
|
25
42
|
|
|
26
|
-
def normalise_time_delta(t):
|
|
43
|
+
def normalise_time_delta(t: Any) -> datetime.timedelta:
|
|
44
|
+
"""Normalizes a time delta string to a datetime.timedelta object.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
t : Any
|
|
49
|
+
A time delta string ending with 'h' or a datetime.timedelta object.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
datetime.timedelta
|
|
54
|
+
A normalized datetime.timedelta object.
|
|
55
|
+
"""
|
|
27
56
|
if isinstance(t, datetime.timedelta):
|
|
28
57
|
assert t == datetime.timedelta(hours=t.hours), t
|
|
29
58
|
|
|
@@ -34,7 +63,19 @@ def normalise_time_delta(t):
|
|
|
34
63
|
return t
|
|
35
64
|
|
|
36
65
|
|
|
37
|
-
def group_by_field(ds):
|
|
66
|
+
def group_by_field(ds: Any) -> Dict[Tuple, List[Any]]:
|
|
67
|
+
"""Groups fields by their metadata excluding 'date', 'time', and 'step'.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
ds : Any
|
|
72
|
+
A dataset object.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
Dict[Tuple, List[Any]]
|
|
77
|
+
A dictionary where keys are tuples of metadata items and values are lists of fields.
|
|
78
|
+
"""
|
|
38
79
|
d = defaultdict(list)
|
|
39
80
|
for field in ds.order_by("valid_datetime"):
|
|
40
81
|
m = field.metadata(namespace="mars")
|
|
@@ -45,7 +86,24 @@ def group_by_field(ds):
|
|
|
45
86
|
return d
|
|
46
87
|
|
|
47
88
|
|
|
48
|
-
|
|
89
|
+
@legacy_source(__file__)
|
|
90
|
+
def tendencies(dates: List[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
|
|
91
|
+
"""Computes tendencies for the given dates and time increment.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
dates : List[datetime.datetime]
|
|
96
|
+
A list of datetime objects.
|
|
97
|
+
time_increment : Any
|
|
98
|
+
A time increment string ending with 'h' or a datetime.timedelta object.
|
|
99
|
+
**kwargs : Any
|
|
100
|
+
Additional keyword arguments.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
Any
|
|
105
|
+
A dataset object with computed tendencies.
|
|
106
|
+
"""
|
|
49
107
|
print("✅", kwargs)
|
|
50
108
|
time_increment = normalise_time_delta(time_increment)
|
|
51
109
|
|
|
@@ -53,7 +111,7 @@ def tendencies(dates, time_increment, **kwargs):
|
|
|
53
111
|
all_dates = sorted(list(set(dates + shifted_dates)))
|
|
54
112
|
|
|
55
113
|
# from .mars import execute as mars
|
|
56
|
-
from anemoi.datasets.create.
|
|
114
|
+
from anemoi.datasets.create.mars import execute as mars
|
|
57
115
|
|
|
58
116
|
ds = mars(dates=all_dates, **kwargs)
|
|
59
117
|
|
|
@@ -107,7 +165,6 @@ def tendencies(dates, time_increment, **kwargs):
|
|
|
107
165
|
from earthkit.data import from_source
|
|
108
166
|
|
|
109
167
|
ds = from_source("file", path)
|
|
110
|
-
assert_is_fieldlist(ds)
|
|
111
168
|
# save a reference to the tmp file so it is deleted
|
|
112
169
|
# only when the dataset is not used anymore
|
|
113
170
|
ds._tmp = tmp
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
from typing import Dict
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import earthkit.data as ekd
|
|
15
|
+
|
|
16
|
+
from anemoi.datasets.create.typing import DateList
|
|
17
|
+
|
|
18
|
+
from ..source import Source
|
|
19
|
+
from .xarray_support import XarrayFieldList
|
|
20
|
+
from .xarray_support import load_many
|
|
21
|
+
from .xarray_support import load_one
|
|
22
|
+
|
|
23
|
+
__all__ = ["load_many", "load_one", "XarrayFieldList"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class XarraySourceBase(Source):
|
|
27
|
+
"""An Xarray base data source, intended to be subclassed."""
|
|
28
|
+
|
|
29
|
+
emoji = "✖️" # For tracing
|
|
30
|
+
|
|
31
|
+
options: Optional[Dict[str, Any]] = None
|
|
32
|
+
flavour: Optional[Dict[str, Any]] = None
|
|
33
|
+
patch: Optional[Dict[str, Any]] = None
|
|
34
|
+
|
|
35
|
+
path_or_url: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
def __init__(self, context: Any, path: str = None, url: str = None, *args: Any, **kwargs: Any):
|
|
38
|
+
"""Initialise the source.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
context : Any
|
|
43
|
+
The context for the data source.
|
|
44
|
+
*args : Any
|
|
45
|
+
Additional positional arguments.
|
|
46
|
+
**kwargs : Any
|
|
47
|
+
Additional keyword arguments.
|
|
48
|
+
"""
|
|
49
|
+
super().__init__(context, *args, **kwargs)
|
|
50
|
+
|
|
51
|
+
if path is not None and url is not None:
|
|
52
|
+
raise ValueError("Cannot specify both path and url")
|
|
53
|
+
|
|
54
|
+
if path is not None:
|
|
55
|
+
self.path_or_url = path
|
|
56
|
+
else:
|
|
57
|
+
self.path_or_url = url
|
|
58
|
+
|
|
59
|
+
self.args = args
|
|
60
|
+
self.kwargs = kwargs
|
|
61
|
+
|
|
62
|
+
def execute(self, dates: DateList) -> ekd.FieldList:
|
|
63
|
+
"""Execute the data loading process for the given dates.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
dates : DateList
|
|
68
|
+
List of dates for which data needs to be loaded.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
ekd.FieldList
|
|
73
|
+
The loaded data fields.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
# For now, just a simple wrapper around load_many
|
|
77
|
+
# TODO: move the implementation here
|
|
78
|
+
|
|
79
|
+
return load_many(
|
|
80
|
+
self.emoji,
|
|
81
|
+
self.context,
|
|
82
|
+
dates,
|
|
83
|
+
pattern=self.path_or_url,
|
|
84
|
+
options=self.options,
|
|
85
|
+
flavour=self.flavour,
|
|
86
|
+
patch=self.patch,
|
|
87
|
+
**self.kwargs,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class XarraySource(XarraySourceBase):
|
|
92
|
+
pass
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from . import source_registry
|
|
12
|
+
from .xarray import XarraySourceBase
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@source_registry.register("xarray_kerchunk")
|
|
16
|
+
class XarrayKerchunkSource(XarraySourceBase):
|
|
17
|
+
"""An Xarray data source that uses the `kerchunk` engine."""
|
|
18
|
+
|
|
19
|
+
emoji = "🧱"
|
|
20
|
+
|
|
21
|
+
def __init__(self, context, json, *args, **kwargs: dict):
|
|
22
|
+
super().__init__(context, *args, **kwargs)
|
|
23
|
+
|
|
24
|
+
self.path_or_url = "reference://"
|
|
25
|
+
|
|
26
|
+
self.options = {
|
|
27
|
+
"engine": "zarr",
|
|
28
|
+
"backend_kwargs": {
|
|
29
|
+
"consolidated": False,
|
|
30
|
+
"storage_options": {
|
|
31
|
+
"fo": json,
|
|
32
|
+
"remote_protocol": "s3",
|
|
33
|
+
"remote_options": {"anon": True},
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
The code under this directory will be migrated to earthkit-data in the future
|