anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/create/__init__.py +42 -1
- anemoi/datasets/create/config.py +2 -0
- anemoi/datasets/create/input/__init__.py +43 -63
- anemoi/datasets/create/input/action.py +296 -236
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +2 -1
- anemoi/datasets/create/input/misc.py +0 -71
- anemoi/datasets/create/input/repeated_dates.py +0 -114
- anemoi/datasets/create/input/result/__init__.py +17 -0
- anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
- anemoi/datasets/create/sources/accumulations.py +74 -94
- anemoi/datasets/create/sources/accumulations2.py +16 -45
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -38
- anemoi/datasets/create/sources/empty.py +26 -22
- anemoi/datasets/create/sources/forcings.py +29 -28
- anemoi/datasets/create/sources/grib.py +92 -72
- anemoi/datasets/create/sources/grib_index.py +46 -42
- anemoi/datasets/create/sources/hindcasts.py +56 -55
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +107 -131
- anemoi/datasets/create/sources/netcdf.py +28 -24
- anemoi/datasets/create/sources/opendap.py +28 -24
- anemoi/datasets/create/sources/recentre.py +42 -41
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -48
- anemoi/datasets/create/sources/tendencies.py +67 -94
- anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
- anemoi/datasets/create/sources/xarray_support/field.py +4 -4
- anemoi/datasets/create/sources/xarray_zarr.py +28 -24
- anemoi/datasets/create/sources/zenodo.py +43 -39
- anemoi/datasets/create/utils.py +0 -42
- anemoi/datasets/data/dataset.py +6 -0
- anemoi/datasets/data/grids.py +0 -152
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/stores.py +7 -9
- anemoi/datasets/dates/__init__.py +2 -0
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +1 -178
- anemoi/datasets/schemas/recipe.json +131 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
- anemoi/datasets/create/filter.py +0 -47
- anemoi/datasets/create/input/concat.py +0 -161
- anemoi/datasets/create/input/context.py +0 -86
- anemoi/datasets/create/input/empty.py +0 -53
- anemoi/datasets/create/input/filter.py +0 -117
- anemoi/datasets/create/input/function.py +0 -232
- anemoi/datasets/create/input/join.py +0 -129
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -173
- anemoi/datasets/create/input/template.py +0 -161
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,8 @@ from typing import Any
|
|
|
12
12
|
|
|
13
13
|
from anemoi.datasets.compute.recentre import recentre as _recentre
|
|
14
14
|
|
|
15
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
16
17
|
from .mars import mars
|
|
17
18
|
|
|
18
19
|
|
|
@@ -105,43 +106,43 @@ def load_if_needed(context: Any, dates: Any, dict_or_dataset: dict | Any) -> Any
|
|
|
105
106
|
return dict_or_dataset
|
|
106
107
|
|
|
107
108
|
|
|
108
|
-
@
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
109
|
+
@source_registry.register("recentre")
|
|
110
|
+
class RecentreSource(LegacySource):
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _execute(
|
|
114
|
+
context: Any,
|
|
115
|
+
dates: Any,
|
|
116
|
+
members: dict | Any,
|
|
117
|
+
centre: dict | Any,
|
|
118
|
+
alpha: float = 1.0,
|
|
119
|
+
remapping: dict = {},
|
|
120
|
+
patches: dict = {},
|
|
121
|
+
) -> Any:
|
|
122
|
+
"""Recentres the members dataset using the centre dataset.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
context : Any
|
|
127
|
+
The context for recentering.
|
|
128
|
+
dates : Any
|
|
129
|
+
The dates for recentering.
|
|
130
|
+
members : Union[dict, Any]
|
|
131
|
+
The members dataset or request dictionary.
|
|
132
|
+
centre : Union[dict, Any]
|
|
133
|
+
The centre dataset or request dictionary.
|
|
134
|
+
alpha : float, optional
|
|
135
|
+
The alpha value for recentering. Defaults to 1.0.
|
|
136
|
+
remapping : dict, optional
|
|
137
|
+
The remapping dictionary. Defaults to {}.
|
|
138
|
+
patches : dict, optional
|
|
139
|
+
The patches dictionary. Defaults to {}.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
Any
|
|
144
|
+
The recentred dataset.
|
|
145
|
+
"""
|
|
146
|
+
members = load_if_needed(context, dates, members)
|
|
147
|
+
centre = load_if_needed(context, dates, centre)
|
|
148
|
+
return _recentre(members=members, centre=centre, alpha=alpha)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from anemoi.transform.fields import new_field_with_valid_datetime
|
|
15
|
+
from anemoi.transform.fields import new_fieldlist_from_list
|
|
16
|
+
|
|
17
|
+
from anemoi.datasets.create.input.repeated_dates import DateMapper
|
|
18
|
+
from anemoi.datasets.create.source import Source
|
|
19
|
+
from anemoi.datasets.create.sources import source_registry
|
|
20
|
+
|
|
21
|
+
LOG = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@source_registry.register("repeated_dates")
|
|
25
|
+
class RepeatedDatesSource(Source):
|
|
26
|
+
|
|
27
|
+
def __init__(self, context, source: Any, mode: str, **kwargs) -> None:
|
|
28
|
+
# assert False, (context, source, mode, kwargs)
|
|
29
|
+
super().__init__(context, **kwargs)
|
|
30
|
+
self.mapper = DateMapper.from_mode(mode, source, kwargs)
|
|
31
|
+
self.source = source
|
|
32
|
+
|
|
33
|
+
def execute(self, group_of_dates):
|
|
34
|
+
source = self.context.create_source(self.source, "data_sources", str(id(self)))
|
|
35
|
+
|
|
36
|
+
result = []
|
|
37
|
+
for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
|
|
38
|
+
print(f"one_date_group: {one_date_group}, many_dates_group: {many_dates_group}")
|
|
39
|
+
source_results = source(self.context, one_date_group)
|
|
40
|
+
for field in source_results:
|
|
41
|
+
for date in many_dates_group:
|
|
42
|
+
result.append(new_field_with_valid_datetime(field, date))
|
|
43
|
+
|
|
44
|
+
return new_fieldlist_from_list(result)
|
|
@@ -12,58 +12,36 @@ from typing import Any
|
|
|
12
12
|
|
|
13
13
|
from earthkit.data import from_source
|
|
14
14
|
|
|
15
|
-
from anemoi.datasets.create.
|
|
15
|
+
from anemoi.datasets.create.sources import source_registry
|
|
16
16
|
|
|
17
|
-
from .legacy import
|
|
17
|
+
from .legacy import LegacySource
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
@
|
|
21
|
-
|
|
22
|
-
"""Generates a source based on the provided context, dates, and additional keyword arguments.
|
|
20
|
+
@source_registry.register("source")
|
|
21
|
+
class GenericSource(LegacySource):
|
|
23
22
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
The context in which the source is generated.
|
|
28
|
-
dates : List[datetime]
|
|
29
|
-
A list of datetime objects representing the dates.
|
|
30
|
-
**kwargs : Any
|
|
31
|
-
Additional keyword arguments for the source generation.
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _execute(context: Any | None, dates: list[datetime], **kwargs: Any) -> Any:
|
|
25
|
+
"""Generates a source based on the provided context, dates, and additional keyword arguments.
|
|
32
26
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
|
|
42
|
-
if kwargs["time"] == "$from_dates":
|
|
43
|
-
kwargs["time"] = list({d.strftime("%H%M") for d in dates})
|
|
44
|
-
return from_source(name, **kwargs)
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : Optional[Any]
|
|
30
|
+
The context in which the source is generated.
|
|
31
|
+
dates : List[datetime]
|
|
32
|
+
A list of datetime objects representing the dates.
|
|
33
|
+
**kwargs : Any
|
|
34
|
+
Additional keyword arguments for the source generation.
|
|
45
35
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
import yaml
|
|
51
|
-
|
|
52
|
-
config: dict[str, Any] = yaml.safe_load(
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
Any
|
|
39
|
+
The generated source.
|
|
53
40
|
"""
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
date: $from_dates
|
|
62
|
-
time: $from_dates
|
|
63
|
-
"""
|
|
64
|
-
)
|
|
65
|
-
dates: list[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
66
|
-
dates = to_datetime_list(dates)
|
|
67
|
-
|
|
68
|
-
for f in source(None, dates, **config):
|
|
69
|
-
print(f, f.to_numpy().mean())
|
|
41
|
+
name = kwargs.pop("name")
|
|
42
|
+
context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
|
|
43
|
+
if kwargs["date"] == "$from_dates":
|
|
44
|
+
kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
|
|
45
|
+
if kwargs["time"] == "$from_dates":
|
|
46
|
+
kwargs["time"] = list({d.strftime("%H%M") for d in dates})
|
|
47
|
+
return from_source(name, **kwargs)
|
|
@@ -14,9 +14,9 @@ from typing import Any
|
|
|
14
14
|
from earthkit.data.core.temporary import temp_file
|
|
15
15
|
from earthkit.data.readers.grib.output import new_grib_output
|
|
16
16
|
|
|
17
|
-
from anemoi.datasets.create.
|
|
17
|
+
from anemoi.datasets.create.sources import source_registry
|
|
18
18
|
|
|
19
|
-
from .legacy import
|
|
19
|
+
from .legacy import LegacySource
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def _date_to_datetime(d: Any) -> Any:
|
|
@@ -83,116 +83,89 @@ def group_by_field(ds: Any) -> dict[tuple, list[Any]]:
|
|
|
83
83
|
return d
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
@
|
|
87
|
-
|
|
88
|
-
"""Computes tendencies for the given dates and time increment.
|
|
86
|
+
@source_registry.register("tendencies")
|
|
87
|
+
class TendenciesSource(LegacySource):
|
|
89
88
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
A list of datetime objects.
|
|
94
|
-
time_increment : Any
|
|
95
|
-
A time increment string ending with 'h' or a datetime.timedelta object.
|
|
96
|
-
**kwargs : Any
|
|
97
|
-
Additional keyword arguments.
|
|
98
|
-
|
|
99
|
-
Returns
|
|
100
|
-
-------
|
|
101
|
-
Any
|
|
102
|
-
A dataset object with computed tendencies.
|
|
103
|
-
"""
|
|
104
|
-
print("✅", kwargs)
|
|
105
|
-
time_increment = normalise_time_delta(time_increment)
|
|
106
|
-
|
|
107
|
-
shifted_dates = [d - time_increment for d in dates]
|
|
108
|
-
all_dates = sorted(list(set(dates + shifted_dates)))
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _execute(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
|
|
91
|
+
"""Computes tendencies for the given dates and time increment.
|
|
109
92
|
|
|
110
|
-
|
|
111
|
-
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
dates : List[datetime.datetime]
|
|
96
|
+
A list of datetime objects.
|
|
97
|
+
time_increment : Any
|
|
98
|
+
A time increment string ending with 'h' or a datetime.timedelta object.
|
|
99
|
+
**kwargs : Any
|
|
100
|
+
Additional keyword arguments.
|
|
112
101
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
|
|
121
|
-
|
|
122
|
-
assert len(ds1) == len(ds2), (len(ds1), len(ds2))
|
|
123
|
-
|
|
124
|
-
group1 = group_by_field(ds1)
|
|
125
|
-
group2 = group_by_field(ds2)
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
Any
|
|
105
|
+
A dataset object with computed tendencies.
|
|
106
|
+
"""
|
|
107
|
+
print("✅", kwargs)
|
|
108
|
+
time_increment = normalise_time_delta(time_increment)
|
|
126
109
|
|
|
127
|
-
|
|
110
|
+
shifted_dates = [d - time_increment for d in dates]
|
|
111
|
+
all_dates = sorted(list(set(dates + shifted_dates)))
|
|
128
112
|
|
|
129
|
-
|
|
130
|
-
tmp = temp_file()
|
|
131
|
-
path = tmp.path
|
|
132
|
-
out = new_grib_output(path)
|
|
113
|
+
from .mars import mars
|
|
133
114
|
|
|
134
|
-
|
|
135
|
-
assert len(group1[k]) == len(group2[k]), k
|
|
136
|
-
print()
|
|
137
|
-
print("❌", k)
|
|
115
|
+
ds = mars(dates=all_dates, **kwargs)
|
|
138
116
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
k,
|
|
143
|
-
field.metadata(k),
|
|
144
|
-
b_field.metadata(k),
|
|
145
|
-
)
|
|
117
|
+
dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
|
|
118
|
+
for d in all_dates:
|
|
119
|
+
assert d.isoformat() in dates_in_data, d
|
|
146
120
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
assert c.shape == b.shape, (c.shape, b.shape)
|
|
121
|
+
ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
|
|
122
|
+
ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
|
|
150
123
|
|
|
151
|
-
|
|
152
|
-
# Actual computation happens here
|
|
153
|
-
x = c - b
|
|
154
|
-
################
|
|
124
|
+
assert len(ds1) == len(ds2), (len(ds1), len(ds2))
|
|
155
125
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
out.write(x, template=field)
|
|
126
|
+
group1 = group_by_field(ds1)
|
|
127
|
+
group2 = group_by_field(ds2)
|
|
159
128
|
|
|
160
|
-
|
|
129
|
+
assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
|
|
161
130
|
|
|
162
|
-
|
|
131
|
+
# prepare output tmp file so we can read it back
|
|
132
|
+
tmp = temp_file()
|
|
133
|
+
path = tmp.path
|
|
134
|
+
out = new_grib_output(path)
|
|
163
135
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
136
|
+
for k in group1:
|
|
137
|
+
assert len(group1[k]) == len(group2[k]), k
|
|
138
|
+
print()
|
|
139
|
+
print("❌", k)
|
|
168
140
|
|
|
169
|
-
|
|
141
|
+
for field, b_field in zip(group1[k], group2[k]):
|
|
142
|
+
for k in ["param", "level", "number", "grid", "shape"]:
|
|
143
|
+
assert field.metadata(k) == b_field.metadata(k), (
|
|
144
|
+
k,
|
|
145
|
+
field.metadata(k),
|
|
146
|
+
b_field.metadata(k),
|
|
147
|
+
)
|
|
170
148
|
|
|
149
|
+
c = field.to_numpy()
|
|
150
|
+
b = b_field.to_numpy()
|
|
151
|
+
assert c.shape == b.shape, (c.shape, b.shape)
|
|
171
152
|
|
|
172
|
-
|
|
153
|
+
################
|
|
154
|
+
# Actual computation happens here
|
|
155
|
+
x = c - b
|
|
156
|
+
################
|
|
173
157
|
|
|
174
|
-
|
|
175
|
-
|
|
158
|
+
assert x.shape == c.shape, c.shape
|
|
159
|
+
print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
|
|
160
|
+
out.write(x, template=field)
|
|
176
161
|
|
|
177
|
-
|
|
178
|
-
"""
|
|
162
|
+
out.close()
|
|
179
163
|
|
|
180
|
-
|
|
181
|
-
time_increment: 12h
|
|
182
|
-
database: marser
|
|
183
|
-
class: ea
|
|
184
|
-
# date: computed automatically
|
|
185
|
-
# time: computed automatically
|
|
186
|
-
expver: "0001"
|
|
187
|
-
grid: 20.0/20.0
|
|
188
|
-
levtype: sfc
|
|
189
|
-
param: [2t]
|
|
190
|
-
"""
|
|
191
|
-
)["config"]
|
|
164
|
+
from earthkit.data import from_source
|
|
192
165
|
|
|
193
|
-
|
|
194
|
-
|
|
166
|
+
ds = from_source("file", path)
|
|
167
|
+
# save a reference to the tmp file so it is deleted
|
|
168
|
+
# only when the dataset is not used anymore
|
|
169
|
+
ds._tmp = tmp
|
|
195
170
|
|
|
196
|
-
|
|
197
|
-
for f in tendencies(dates, **config):
|
|
198
|
-
print(f, f.to_numpy().mean())
|
|
171
|
+
return ds
|
|
@@ -17,7 +17,8 @@ from earthkit.data.core.fieldlist import MultiFieldList
|
|
|
17
17
|
|
|
18
18
|
from anemoi.datasets.create.sources.patterns import iterate_patterns
|
|
19
19
|
|
|
20
|
-
from ..
|
|
20
|
+
from .. import source_registry
|
|
21
|
+
from ..legacy import LegacySource
|
|
21
22
|
from .fieldlist import XarrayFieldList
|
|
22
23
|
|
|
23
24
|
LOG = logging.getLogger(__name__)
|
|
@@ -152,26 +153,30 @@ def load_many(emoji: str, context: Any, dates: list[datetime.datetime], pattern:
|
|
|
152
153
|
return MultiFieldList(result)
|
|
153
154
|
|
|
154
155
|
|
|
155
|
-
@
|
|
156
|
-
|
|
157
|
-
""
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
156
|
+
@source_registry.register("xarray")
|
|
157
|
+
class LegacyXarraySource(LegacySource):
|
|
158
|
+
name = "xarray"
|
|
159
|
+
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
162
|
+
"""Executes the loading of datasets.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
context : Any
|
|
167
|
+
Context object.
|
|
168
|
+
dates : List[str]
|
|
169
|
+
List of dates.
|
|
170
|
+
url : str
|
|
171
|
+
URL pattern for loading datasets.
|
|
172
|
+
*args : Any
|
|
173
|
+
Additional arguments.
|
|
174
|
+
**kwargs : Any
|
|
175
|
+
Additional keyword arguments.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
ekd.FieldList
|
|
180
|
+
The loaded datasets.
|
|
181
|
+
"""
|
|
182
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -121,16 +121,16 @@ class XArrayField(Field):
|
|
|
121
121
|
Index to select a specific element, by default None.
|
|
122
122
|
"""
|
|
123
123
|
if index is not None:
|
|
124
|
-
values = self.selection[index]
|
|
124
|
+
values = self.selection[index].values
|
|
125
125
|
else:
|
|
126
|
-
values = self.selection
|
|
126
|
+
values = self.selection.values
|
|
127
127
|
|
|
128
128
|
assert dtype is None
|
|
129
129
|
|
|
130
130
|
if flatten:
|
|
131
|
-
return values.
|
|
131
|
+
return values.flatten()
|
|
132
132
|
|
|
133
|
-
return values
|
|
133
|
+
return values
|
|
134
134
|
|
|
135
135
|
@cached_property
|
|
136
136
|
def _metadata(self) -> XArrayMetadata:
|
|
@@ -11,30 +11,34 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
import earthkit.data as ekd
|
|
13
13
|
|
|
14
|
-
from .
|
|
14
|
+
from . import source_registry
|
|
15
|
+
from .legacy import LegacySource
|
|
15
16
|
from .xarray import load_many
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
@
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
19
|
+
@source_registry.register("xarray_zarr")
|
|
20
|
+
class XarrayZarrSource(LegacySource):
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
24
|
+
"""Execute the data loading process.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
context : Any
|
|
29
|
+
The context in which the execution occurs.
|
|
30
|
+
dates : List[str]
|
|
31
|
+
List of dates for which data is to be loaded.
|
|
32
|
+
url : str
|
|
33
|
+
The URL from which data is to be loaded.
|
|
34
|
+
*args : tuple
|
|
35
|
+
Additional positional arguments.
|
|
36
|
+
**kwargs : dict
|
|
37
|
+
Additional keyword arguments.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
ekd.FieldList
|
|
42
|
+
The loaded data.
|
|
43
|
+
"""
|
|
44
|
+
return load_many("🇿", context, dates, url, *args, **kwargs)
|
|
@@ -14,54 +14,58 @@ import earthkit.data as ekd
|
|
|
14
14
|
from earthkit.data.core.fieldlist import MultiFieldList
|
|
15
15
|
from earthkit.data.sources.url import download_and_cache
|
|
16
16
|
|
|
17
|
-
from .
|
|
17
|
+
from . import source_registry
|
|
18
|
+
from .legacy import LegacySource
|
|
18
19
|
from .patterns import iterate_patterns
|
|
19
20
|
from .xarray import load_one
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
@
|
|
23
|
-
|
|
24
|
-
"""Executes the download and processing of files from Zenodo.
|
|
23
|
+
@source_registry.register("zenodo")
|
|
24
|
+
class ZenodoSource(LegacySource):
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
The context in which the function is executed.
|
|
30
|
-
dates : Any
|
|
31
|
-
The dates for which the data is required.
|
|
32
|
-
record_id : str
|
|
33
|
-
The Zenodo record ID.
|
|
34
|
-
file_key : str
|
|
35
|
-
The key to identify the file.
|
|
36
|
-
*args : Any
|
|
37
|
-
Additional arguments.
|
|
38
|
-
**kwargs : Any
|
|
39
|
-
Additional keyword arguments.
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _execute(context: Any, dates: Any, record_id: str, file_key: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
28
|
+
"""Executes the download and processing of files from Zenodo.
|
|
40
29
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
context : Any
|
|
33
|
+
The context in which the function is executed.
|
|
34
|
+
dates : Any
|
|
35
|
+
The dates for which the data is required.
|
|
36
|
+
record_id : str
|
|
37
|
+
The Zenodo record ID.
|
|
38
|
+
file_key : str
|
|
39
|
+
The key to identify the file.
|
|
40
|
+
*args : Any
|
|
41
|
+
Additional arguments.
|
|
42
|
+
**kwargs : Any
|
|
43
|
+
Additional keyword arguments.
|
|
47
44
|
|
|
48
|
-
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
MultiFieldList
|
|
48
|
+
A list of fields loaded from the downloaded files.
|
|
49
|
+
"""
|
|
50
|
+
import requests
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
url = URLPATTERN.format(record_id=record_id)
|
|
52
|
-
r = requests.get(url)
|
|
53
|
-
r.raise_for_status()
|
|
54
|
-
record: dict[str, Any] = r.json()
|
|
52
|
+
result: list[Any] = []
|
|
55
53
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
URLPATTERN = "https://zenodo.org/api/records/{record_id}"
|
|
55
|
+
url = URLPATTERN.format(record_id=record_id)
|
|
56
|
+
r = requests.get(url)
|
|
57
|
+
r.raise_for_status()
|
|
58
|
+
record: dict[str, Any] = r.json()
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
urls: dict[str, str] = {}
|
|
61
|
+
for file in record["files"]:
|
|
62
|
+
urls[file["key"]] = file["links"]["self"]
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
for url, dates in iterate_patterns(file_key, dates, **kwargs):
|
|
65
|
+
if url not in urls:
|
|
66
|
+
continue
|
|
66
67
|
|
|
67
|
-
|
|
68
|
+
path = download_and_cache(urls[url])
|
|
69
|
+
result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
|
|
70
|
+
|
|
71
|
+
return MultiFieldList(result)
|