anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +558 -62
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.18.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import glob
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Any
|
|
14
|
+
from typing import Dict
|
|
15
|
+
from typing import List
|
|
16
|
+
from typing import Optional
|
|
17
|
+
from typing import Union
|
|
18
|
+
|
|
19
|
+
import earthkit.data as ekd
|
|
20
|
+
from earthkit.data import from_source
|
|
21
|
+
from earthkit.data.indexing.fieldlist import FieldArray
|
|
22
|
+
from earthkit.data.utils.patterns import Pattern
|
|
23
|
+
|
|
24
|
+
from .legacy import legacy_source
|
|
25
|
+
|
|
26
|
+
LOG = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load(context: Any, name: str, record: Dict[str, Any]) -> tuple:
|
|
30
|
+
"""Load data from a given source.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
context : Any
|
|
35
|
+
The context in which the function is executed.
|
|
36
|
+
name : str
|
|
37
|
+
The name of the data source.
|
|
38
|
+
record : dict of str to Any
|
|
39
|
+
The record containing source information.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
tuple
|
|
44
|
+
A tuple containing the data as a numpy array and the UUID of the HGrid.
|
|
45
|
+
"""
|
|
46
|
+
ds = None
|
|
47
|
+
|
|
48
|
+
param = record["param"]
|
|
49
|
+
|
|
50
|
+
if "path" in record:
|
|
51
|
+
context.info(f"Using {name} from {record['path']} (param={param})")
|
|
52
|
+
ds = from_source("file", record["path"])
|
|
53
|
+
|
|
54
|
+
if "url" in record:
|
|
55
|
+
context.info(f"Using {name} from {record['url']} (param={param})")
|
|
56
|
+
ds = from_source("url", record["url"])
|
|
57
|
+
|
|
58
|
+
ds = ds.sel(param=param)
|
|
59
|
+
|
|
60
|
+
assert len(ds) == 1, f"{name} {param}, expected one field, got {len(ds)}"
|
|
61
|
+
ds = ds[0]
|
|
62
|
+
|
|
63
|
+
return ds.to_numpy(flatten=True), ds.metadata("uuidOfHGrid")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class Geography:
|
|
67
|
+
"""This class retrieves the latitudes and longitudes of unstructured grids,
|
|
68
|
+
and checks if the fields are compatible with the grid.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
context : Any
|
|
73
|
+
The context in which the function is executed.
|
|
74
|
+
latitudes : dict of str to Any
|
|
75
|
+
Latitude information.
|
|
76
|
+
longitudes : dict of str to Any
|
|
77
|
+
Longitude information.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, context: Any, latitudes: Dict[str, Any], longitudes: Dict[str, Any]) -> None:
|
|
81
|
+
"""Initialize the Geography class.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
context : Any
|
|
86
|
+
The context in which the function is executed.
|
|
87
|
+
latitudes : dict of str to Any
|
|
88
|
+
Latitude information.
|
|
89
|
+
longitudes : dict of str to Any
|
|
90
|
+
Longitude information.
|
|
91
|
+
"""
|
|
92
|
+
latitudes, uuidOfHGrid_lat = _load(context, "latitudes", latitudes)
|
|
93
|
+
longitudes, uuidOfHGrid_lon = _load(context, "longitudes", longitudes)
|
|
94
|
+
|
|
95
|
+
assert (
|
|
96
|
+
uuidOfHGrid_lat == uuidOfHGrid_lon
|
|
97
|
+
), f"uuidOfHGrid mismatch: lat={uuidOfHGrid_lat} != lon={uuidOfHGrid_lon}"
|
|
98
|
+
|
|
99
|
+
context.info(f"Latitudes: {len(latitudes)}, Longitudes: {len(longitudes)}")
|
|
100
|
+
assert len(latitudes) == len(longitudes)
|
|
101
|
+
|
|
102
|
+
self.uuidOfHGrid = uuidOfHGrid_lat
|
|
103
|
+
self.latitudes = latitudes
|
|
104
|
+
self.longitudes = longitudes
|
|
105
|
+
self.first = True
|
|
106
|
+
|
|
107
|
+
def check(self, field: Any) -> None:
|
|
108
|
+
"""Check if the field is compatible with the grid.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
field : Any
|
|
113
|
+
The field to check.
|
|
114
|
+
"""
|
|
115
|
+
if self.first:
|
|
116
|
+
# We only check the first field, for performance reasons
|
|
117
|
+
assert (
|
|
118
|
+
field.metadata("uuidOfHGrid") == self.uuidOfHGrid
|
|
119
|
+
), f"uuidOfHGrid mismatch: {field.metadata('uuidOfHGrid')} != {self.uuidOfHGrid}"
|
|
120
|
+
self.first = False
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class AddGrid:
|
|
124
|
+
"""An earth-kit.data.Field wrapper that adds grid information.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
field : Any
|
|
129
|
+
The field to wrap.
|
|
130
|
+
geography : Geography
|
|
131
|
+
The geography information.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
def __init__(self, field: Any, geography: Geography) -> None:
|
|
135
|
+
"""Initialize the AddGrid class.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
field : Any
|
|
140
|
+
The field to wrap.
|
|
141
|
+
geography : Geography
|
|
142
|
+
The geography information.
|
|
143
|
+
"""
|
|
144
|
+
self._field = field
|
|
145
|
+
|
|
146
|
+
geography.check(field)
|
|
147
|
+
|
|
148
|
+
self._latitudes = geography.latitudes
|
|
149
|
+
self._longitudes = geography.longitudes
|
|
150
|
+
|
|
151
|
+
def __getattr__(self, name: str) -> Any:
|
|
152
|
+
"""Get an attribute from the wrapped field.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
name : str
|
|
157
|
+
The name of the attribute.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
Any
|
|
162
|
+
The attribute value.
|
|
163
|
+
"""
|
|
164
|
+
return getattr(self._field, name)
|
|
165
|
+
|
|
166
|
+
def __repr__(self) -> str:
|
|
167
|
+
"""Get the string representation of the wrapped field.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
str
|
|
172
|
+
The string representation.
|
|
173
|
+
"""
|
|
174
|
+
return repr(self._field)
|
|
175
|
+
|
|
176
|
+
def grid_points(self) -> tuple:
|
|
177
|
+
"""Get the grid points (latitudes and longitudes).
|
|
178
|
+
|
|
179
|
+
Returns
|
|
180
|
+
-------
|
|
181
|
+
tuple
|
|
182
|
+
The latitudes and longitudes.
|
|
183
|
+
"""
|
|
184
|
+
return self._latitudes, self._longitudes
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def resolution(self) -> str:
|
|
188
|
+
"""Get the resolution of the grid."""
|
|
189
|
+
return "unknown"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def check(ds: Any, paths: List[str], **kwargs: Any) -> None:
|
|
193
|
+
"""Check if the dataset matches the expected number of fields.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
ds : Any
|
|
198
|
+
The dataset to check.
|
|
199
|
+
paths : list of str
|
|
200
|
+
List of paths to the GRIB files.
|
|
201
|
+
**kwargs : Any
|
|
202
|
+
Additional keyword arguments.
|
|
203
|
+
|
|
204
|
+
Raises
|
|
205
|
+
------
|
|
206
|
+
ValueError
|
|
207
|
+
If the number of fields does not match the expected count.
|
|
208
|
+
"""
|
|
209
|
+
count = 1
|
|
210
|
+
for k, v in kwargs.items():
|
|
211
|
+
if isinstance(v, (tuple, list)):
|
|
212
|
+
count *= len(v)
|
|
213
|
+
|
|
214
|
+
if len(ds) != count:
|
|
215
|
+
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, paths={paths})")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _expand(paths: List[str]) -> Any:
|
|
219
|
+
"""Expand the given paths using glob.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
paths : list of str
|
|
224
|
+
List of paths to expand.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
Any
|
|
229
|
+
The expanded paths.
|
|
230
|
+
"""
|
|
231
|
+
for path in paths:
|
|
232
|
+
cnt = 0
|
|
233
|
+
for p in glob.glob(path):
|
|
234
|
+
yield p
|
|
235
|
+
cnt += 1
|
|
236
|
+
if cnt == 0:
|
|
237
|
+
yield path
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@legacy_source(__file__)
|
|
241
|
+
def execute(
|
|
242
|
+
context: Any,
|
|
243
|
+
dates: List[Any],
|
|
244
|
+
path: Union[str, List[str]],
|
|
245
|
+
latitudes: Optional[Dict[str, Any]] = None,
|
|
246
|
+
longitudes: Optional[Dict[str, Any]] = None,
|
|
247
|
+
*args: Any,
|
|
248
|
+
**kwargs: Any,
|
|
249
|
+
) -> ekd.FieldList:
|
|
250
|
+
"""Execute the function to load data from GRIB files.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
context (Any): The context in which the function is executed.
|
|
254
|
+
dates (List[Any]): List of dates.
|
|
255
|
+
path (Union[str, List[str]]): Path or list of paths to the GRIB files.
|
|
256
|
+
latitudes (Optional[Dict[str, Any]], optional): Latitude information. Defaults to None.
|
|
257
|
+
longitudes (Optional[Dict[str, Any]], optional): Longitude information. Defaults to None.
|
|
258
|
+
*args (Any): Additional arguments.
|
|
259
|
+
**kwargs (Any): Additional keyword arguments.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
Any
|
|
264
|
+
The loaded dataset.
|
|
265
|
+
"""
|
|
266
|
+
given_paths = path if isinstance(path, list) else [path]
|
|
267
|
+
|
|
268
|
+
geography = None
|
|
269
|
+
if latitudes is not None and longitudes is not None:
|
|
270
|
+
geography = Geography(context, latitudes, longitudes)
|
|
271
|
+
|
|
272
|
+
ds = from_source("empty")
|
|
273
|
+
dates = [d.isoformat() for d in dates]
|
|
274
|
+
|
|
275
|
+
for path in given_paths:
|
|
276
|
+
paths = Pattern(path, ignore_missing_keys=True).substitute(*args, date=dates, **kwargs)
|
|
277
|
+
|
|
278
|
+
for name in ("grid", "area", "rotation", "frame", "resol", "bitmap"):
|
|
279
|
+
if name in kwargs:
|
|
280
|
+
raise ValueError(f"MARS interpolation parameter '{name}' not supported")
|
|
281
|
+
|
|
282
|
+
for path in _expand(paths):
|
|
283
|
+
context.trace("📁", "PATH", path)
|
|
284
|
+
s = from_source("file", path)
|
|
285
|
+
s = s.sel(valid_datetime=dates, **kwargs)
|
|
286
|
+
ds = ds + s
|
|
287
|
+
|
|
288
|
+
if kwargs and not context.partial_ok:
|
|
289
|
+
check(ds, given_paths, valid_datetime=dates, **kwargs)
|
|
290
|
+
|
|
291
|
+
if geography is not None:
|
|
292
|
+
ds = FieldArray([AddGrid(_, geography) for _ in ds])
|
|
293
|
+
|
|
294
|
+
if len(ds) == 0:
|
|
295
|
+
LOG.warning(f"No fields found for {dates} in {given_paths} (kwargs={kwargs})")
|
|
296
|
+
|
|
297
|
+
return ds
|
|
@@ -8,22 +8,56 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Dict
|
|
13
|
+
from typing import List
|
|
14
|
+
from typing import Union
|
|
11
15
|
|
|
12
16
|
from earthkit.data.core.fieldlist import MultiFieldList
|
|
13
17
|
|
|
14
|
-
from anemoi.datasets.create.
|
|
18
|
+
from anemoi.datasets.create.sources.mars import mars
|
|
19
|
+
|
|
20
|
+
from .legacy import legacy_source
|
|
15
21
|
|
|
16
22
|
LOGGER = logging.getLogger(__name__)
|
|
17
23
|
|
|
18
24
|
|
|
19
|
-
def _to_list(x):
|
|
25
|
+
def _to_list(x: Union[list, tuple, Any]) -> List[Any]:
|
|
26
|
+
"""Converts the input to a list if it is not already a list or tuple.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
x : Any
|
|
31
|
+
The input to convert.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
list
|
|
36
|
+
A list containing the input elements.
|
|
37
|
+
"""
|
|
20
38
|
if isinstance(x, (list, tuple)):
|
|
21
39
|
return x
|
|
22
40
|
return [x]
|
|
23
41
|
|
|
24
42
|
|
|
25
|
-
|
|
26
|
-
|
|
43
|
+
@legacy_source(__file__)
|
|
44
|
+
def hindcasts(context: Any, dates: List[Any], **request: Dict[str, Any]) -> MultiFieldList:
|
|
45
|
+
"""Generates hindcast requests based on the provided dates and request parameters.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
context : Any
|
|
50
|
+
The context containing the dates provider and trace method.
|
|
51
|
+
dates : List[Any]
|
|
52
|
+
A list of dates for which to generate hindcast requests.
|
|
53
|
+
request : Dict[str, Any]
|
|
54
|
+
Additional request parameters.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
MultiFieldList
|
|
59
|
+
A MultiFieldList containing the hindcast data.
|
|
60
|
+
"""
|
|
27
61
|
from anemoi.datasets.dates import HindcastsDates
|
|
28
62
|
|
|
29
63
|
provider = context.dates_provider
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# (C) Copyright 2025- Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import inspect
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from typing import Any
|
|
15
|
+
from typing import Callable
|
|
16
|
+
|
|
17
|
+
from ..source import Source
|
|
18
|
+
from . import source_registry
|
|
19
|
+
|
|
20
|
+
LOG = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LegacySource(Source):
|
|
24
|
+
"""A legacy source class.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
context : Any
|
|
29
|
+
The context in which the source is created.
|
|
30
|
+
*args : tuple
|
|
31
|
+
Positional arguments.
|
|
32
|
+
**kwargs : dict
|
|
33
|
+
Keyword arguments.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, context: Any, *args: Any, **kwargs: Any) -> None:
|
|
37
|
+
super().__init__(context, *args, **kwargs)
|
|
38
|
+
self.args = args
|
|
39
|
+
self.kwargs = kwargs
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class legacy_source:
|
|
43
|
+
"""A decorator class for legacy sources.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
name : str
|
|
48
|
+
The name of the legacy source.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, name: str) -> None:
|
|
52
|
+
name, _ = os.path.splitext(os.path.basename(name))
|
|
53
|
+
self.name = name
|
|
54
|
+
|
|
55
|
+
def __call__(self, execute: Callable) -> Callable:
|
|
56
|
+
"""Call method to wrap the execute function.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
execute : function
|
|
61
|
+
The execute function to be wrapped.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
function
|
|
66
|
+
The wrapped execute function.
|
|
67
|
+
"""
|
|
68
|
+
this = self
|
|
69
|
+
name = f"Legacy{self.name.title()}Source"
|
|
70
|
+
source = ".".join([execute.__module__, execute.__name__])
|
|
71
|
+
|
|
72
|
+
def execute_wrapper(self, dates) -> Any:
|
|
73
|
+
"""Wrapper method to call the execute function."""
|
|
74
|
+
try:
|
|
75
|
+
return execute(self.context, dates, *self.args, **self.kwargs)
|
|
76
|
+
except TypeError:
|
|
77
|
+
LOG.error(f"Error executing source {this.name} from {source}")
|
|
78
|
+
LOG.error(f"Function signature is: {inspect.signature(execute)}")
|
|
79
|
+
LOG.error(f"Arguments are: {self.args=}, {self.kwargs=}")
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
klass = type(
|
|
83
|
+
name,
|
|
84
|
+
(LegacySource,),
|
|
85
|
+
{
|
|
86
|
+
"execute": execute_wrapper,
|
|
87
|
+
"_source": source,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
source_registry.register(self.name)(klass)
|
|
92
|
+
|
|
93
|
+
return execute
|
|
@@ -9,6 +9,12 @@
|
|
|
9
9
|
|
|
10
10
|
import datetime
|
|
11
11
|
import re
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import Dict
|
|
14
|
+
from typing import Generator
|
|
15
|
+
from typing import List
|
|
16
|
+
from typing import Optional
|
|
17
|
+
from typing import Union
|
|
12
18
|
|
|
13
19
|
from anemoi.utils.humanize import did_you_mean
|
|
14
20
|
from earthkit.data import from_source
|
|
@@ -16,16 +22,44 @@ from earthkit.data.utils.availability import Availability
|
|
|
16
22
|
|
|
17
23
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
18
24
|
|
|
25
|
+
from .legacy import legacy_source
|
|
26
|
+
|
|
19
27
|
DEBUG = False
|
|
20
28
|
|
|
21
29
|
|
|
22
|
-
def to_list(x):
|
|
30
|
+
def to_list(x: Union[list, tuple, Any]) -> list:
|
|
31
|
+
"""Converts the input to a list if it is not already a list or tuple.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
x : Any
|
|
36
|
+
The input value to be converted.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
list
|
|
41
|
+
A list containing the input value(s).
|
|
42
|
+
"""
|
|
23
43
|
if isinstance(x, (list, tuple)):
|
|
24
44
|
return x
|
|
25
45
|
return [x]
|
|
26
46
|
|
|
27
47
|
|
|
28
|
-
def _date_to_datetime(
|
|
48
|
+
def _date_to_datetime(
|
|
49
|
+
d: Union[datetime.datetime, list, tuple, str],
|
|
50
|
+
) -> Union[datetime.datetime, List[datetime.datetime]]:
|
|
51
|
+
"""Converts the input date(s) to datetime objects.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
d : Union[datetime.datetime, list, tuple, str]
|
|
56
|
+
The input date(s) to be converted.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
Union[datetime.datetime, List[datetime.datetime]]
|
|
61
|
+
A datetime object or a list of datetime objects.
|
|
62
|
+
"""
|
|
29
63
|
if isinstance(d, datetime.datetime):
|
|
30
64
|
return d
|
|
31
65
|
if isinstance(d, (list, tuple)):
|
|
@@ -33,8 +67,19 @@ def _date_to_datetime(d):
|
|
|
33
67
|
return datetime.datetime.fromisoformat(d)
|
|
34
68
|
|
|
35
69
|
|
|
36
|
-
def expand_to_by(x):
|
|
70
|
+
def expand_to_by(x: Union[str, int, list]) -> Union[str, int, list]:
|
|
71
|
+
"""Expands a range expression to a list of values.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
x : Union[str, int, list]
|
|
76
|
+
The input range expression.
|
|
37
77
|
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
Union[str, int, list]
|
|
81
|
+
A list of expanded values.
|
|
82
|
+
"""
|
|
38
83
|
if isinstance(x, (str, int)):
|
|
39
84
|
return expand_to_by(str(x).split("/"))
|
|
40
85
|
|
|
@@ -52,7 +97,19 @@ def expand_to_by(x):
|
|
|
52
97
|
return x
|
|
53
98
|
|
|
54
99
|
|
|
55
|
-
def normalise_time_delta(t):
|
|
100
|
+
def normalise_time_delta(t: Union[datetime.timedelta, str]) -> datetime.timedelta:
|
|
101
|
+
"""Normalizes a time delta string to a datetime.timedelta object.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
t : Union[datetime.timedelta, str]
|
|
106
|
+
The input time delta string.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
datetime.timedelta
|
|
111
|
+
A normalized datetime.timedelta object.
|
|
112
|
+
"""
|
|
56
113
|
if isinstance(t, datetime.timedelta):
|
|
57
114
|
assert t == datetime.timedelta(hours=t.hours), t
|
|
58
115
|
|
|
@@ -63,14 +120,49 @@ def normalise_time_delta(t):
|
|
|
63
120
|
return t
|
|
64
121
|
|
|
65
122
|
|
|
66
|
-
def _normalise_time(t):
|
|
123
|
+
def _normalise_time(t: Union[int, str]) -> str:
|
|
124
|
+
"""Normalizes a time value to a string in HHMM format.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
t : Union[int, str]
|
|
129
|
+
The input time value.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
str
|
|
134
|
+
A string representing the normalized time.
|
|
135
|
+
"""
|
|
67
136
|
t = int(t)
|
|
68
137
|
if t < 100:
|
|
69
138
|
t * 100
|
|
70
139
|
return "{:04d}".format(t)
|
|
71
140
|
|
|
72
141
|
|
|
73
|
-
def _expand_mars_request(
|
|
142
|
+
def _expand_mars_request(
|
|
143
|
+
request: Dict[str, Any],
|
|
144
|
+
date: datetime.datetime,
|
|
145
|
+
request_already_using_valid_datetime: bool = False,
|
|
146
|
+
date_key: str = "date",
|
|
147
|
+
) -> List[Dict[str, Any]]:
|
|
148
|
+
"""Expands a MARS request with the given date and other parameters.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
request : Dict[str, Any]
|
|
153
|
+
The input MARS request.
|
|
154
|
+
date : datetime.datetime
|
|
155
|
+
The date to be used in the request.
|
|
156
|
+
request_already_using_valid_datetime : bool, optional
|
|
157
|
+
Flag indicating if the request already uses valid datetime.
|
|
158
|
+
date_key : str, optional
|
|
159
|
+
The key for the date in the request.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
List[Dict[str, Any]]
|
|
164
|
+
A list of expanded MARS requests.
|
|
165
|
+
"""
|
|
74
166
|
requests = []
|
|
75
167
|
|
|
76
168
|
user_step = to_list(expand_to_by(request.get("step", [0])))
|
|
@@ -130,11 +222,29 @@ def _expand_mars_request(request, date, request_already_using_valid_datetime=Fal
|
|
|
130
222
|
|
|
131
223
|
|
|
132
224
|
def factorise_requests(
|
|
133
|
-
dates,
|
|
134
|
-
*requests,
|
|
135
|
-
request_already_using_valid_datetime=False,
|
|
136
|
-
date_key="date",
|
|
137
|
-
):
|
|
225
|
+
dates: List[datetime.datetime],
|
|
226
|
+
*requests: Dict[str, Any],
|
|
227
|
+
request_already_using_valid_datetime: bool = False,
|
|
228
|
+
date_key: str = "date",
|
|
229
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
230
|
+
"""Factorizes the requests based on the given dates.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
dates : List[datetime.datetime]
|
|
235
|
+
The list of dates to be used in the requests.
|
|
236
|
+
requests : Dict[str, Any]
|
|
237
|
+
The input requests to be factorized.
|
|
238
|
+
request_already_using_valid_datetime : bool, optional
|
|
239
|
+
Flag indicating if the requests already use valid datetime.
|
|
240
|
+
date_key : str, optional
|
|
241
|
+
The key for the date in the requests.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
Generator[Dict[str, Any], None, None]
|
|
246
|
+
Factorized requests.
|
|
247
|
+
"""
|
|
138
248
|
updates = []
|
|
139
249
|
for req in requests:
|
|
140
250
|
# req = normalise_request(req)
|
|
@@ -158,7 +268,19 @@ def factorise_requests(
|
|
|
158
268
|
yield r
|
|
159
269
|
|
|
160
270
|
|
|
161
|
-
def use_grib_paramid(r):
|
|
271
|
+
def use_grib_paramid(r: Dict[str, Any]) -> Dict[str, Any]:
|
|
272
|
+
"""Converts the parameter short names to GRIB parameter IDs.
|
|
273
|
+
|
|
274
|
+
Parameters
|
|
275
|
+
----------
|
|
276
|
+
r : Dict[str, Any]
|
|
277
|
+
The input request containing parameter short names.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
Dict[str, Any]
|
|
282
|
+
The request with parameter IDs.
|
|
283
|
+
"""
|
|
162
284
|
from anemoi.utils.grib import shortname_to_paramid
|
|
163
285
|
|
|
164
286
|
params = r["param"]
|
|
@@ -240,15 +362,40 @@ MARS_KEYS = [
|
|
|
240
362
|
]
|
|
241
363
|
|
|
242
364
|
|
|
365
|
+
@legacy_source(__file__)
|
|
243
366
|
def mars(
|
|
244
|
-
context,
|
|
245
|
-
dates,
|
|
246
|
-
*requests,
|
|
247
|
-
request_already_using_valid_datetime=False,
|
|
248
|
-
date_key="date",
|
|
249
|
-
use_cdsapi_dataset=None,
|
|
250
|
-
**kwargs,
|
|
251
|
-
):
|
|
367
|
+
context: Any,
|
|
368
|
+
dates: List[datetime.datetime],
|
|
369
|
+
*requests: Dict[str, Any],
|
|
370
|
+
request_already_using_valid_datetime: bool = False,
|
|
371
|
+
date_key: str = "date",
|
|
372
|
+
use_cdsapi_dataset: Optional[str] = None,
|
|
373
|
+
**kwargs: Any,
|
|
374
|
+
) -> Any:
|
|
375
|
+
"""Executes MARS requests based on the given context, dates, and other parameters.
|
|
376
|
+
|
|
377
|
+
Parameters
|
|
378
|
+
----------
|
|
379
|
+
context : Any
|
|
380
|
+
The context for the requests.
|
|
381
|
+
dates : List[datetime.datetime]
|
|
382
|
+
The list of dates to be used in the requests.
|
|
383
|
+
requests : Dict[str, Any]
|
|
384
|
+
The input requests to be executed.
|
|
385
|
+
request_already_using_valid_datetime : bool, optional
|
|
386
|
+
Flag indicating if the requests already use valid datetime.
|
|
387
|
+
date_key : str, optional
|
|
388
|
+
The key for the date in the requests.
|
|
389
|
+
use_cdsapi_dataset : Optional[str], optional
|
|
390
|
+
The dataset to be used with CDS API.
|
|
391
|
+
kwargs : Any
|
|
392
|
+
Additional keyword arguments for the requests.
|
|
393
|
+
|
|
394
|
+
Returns
|
|
395
|
+
-------
|
|
396
|
+
Any
|
|
397
|
+
The resulting dataset.
|
|
398
|
+
"""
|
|
252
399
|
|
|
253
400
|
if not requests:
|
|
254
401
|
requests = [kwargs]
|
|
@@ -318,6 +465,7 @@ def mars(
|
|
|
318
465
|
|
|
319
466
|
execute = mars
|
|
320
467
|
|
|
468
|
+
|
|
321
469
|
if __name__ == "__main__":
|
|
322
470
|
import yaml
|
|
323
471
|
|