anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +42 -1
  6. anemoi/datasets/create/config.py +2 -0
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
  16. anemoi/datasets/create/sources/accumulations.py +74 -94
  17. anemoi/datasets/create/sources/accumulations2.py +16 -45
  18. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  19. anemoi/datasets/create/sources/constants.py +39 -38
  20. anemoi/datasets/create/sources/empty.py +26 -22
  21. anemoi/datasets/create/sources/forcings.py +29 -28
  22. anemoi/datasets/create/sources/grib.py +92 -72
  23. anemoi/datasets/create/sources/grib_index.py +46 -42
  24. anemoi/datasets/create/sources/hindcasts.py +56 -55
  25. anemoi/datasets/create/sources/legacy.py +10 -62
  26. anemoi/datasets/create/sources/mars.py +107 -131
  27. anemoi/datasets/create/sources/netcdf.py +28 -24
  28. anemoi/datasets/create/sources/opendap.py +28 -24
  29. anemoi/datasets/create/sources/recentre.py +42 -41
  30. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  31. anemoi/datasets/create/sources/source.py +26 -48
  32. anemoi/datasets/create/sources/tendencies.py +67 -94
  33. anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
  34. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  35. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  36. anemoi/datasets/create/sources/zenodo.py +43 -39
  37. anemoi/datasets/create/utils.py +0 -42
  38. anemoi/datasets/data/dataset.py +6 -0
  39. anemoi/datasets/data/grids.py +0 -152
  40. anemoi/datasets/data/rolling_average.py +141 -0
  41. anemoi/datasets/data/stores.py +7 -9
  42. anemoi/datasets/dates/__init__.py +2 -0
  43. anemoi/datasets/dumper.py +76 -0
  44. anemoi/datasets/grids.py +1 -178
  45. anemoi/datasets/schemas/recipe.json +131 -0
  46. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
  47. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
  48. anemoi/datasets/create/filter.py +0 -47
  49. anemoi/datasets/create/input/concat.py +0 -161
  50. anemoi/datasets/create/input/context.py +0 -86
  51. anemoi/datasets/create/input/empty.py +0 -53
  52. anemoi/datasets/create/input/filter.py +0 -117
  53. anemoi/datasets/create/input/function.py +0 -232
  54. anemoi/datasets/create/input/join.py +0 -129
  55. anemoi/datasets/create/input/pipe.py +0 -66
  56. anemoi/datasets/create/input/step.py +0 -173
  57. anemoi/datasets/create/input/template.py +0 -161
  58. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  59. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  60. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  61. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,8 @@ from typing import Any
12
12
 
13
13
  from anemoi.datasets.compute.recentre import recentre as _recentre
14
14
 
15
- from .legacy import legacy_source
15
+ from . import source_registry
16
+ from .legacy import LegacySource
16
17
  from .mars import mars
17
18
 
18
19
 
@@ -105,43 +106,43 @@ def load_if_needed(context: Any, dates: Any, dict_or_dataset: dict | Any) -> Any
105
106
  return dict_or_dataset
106
107
 
107
108
 
108
- @legacy_source(__file__)
109
- def recentre(
110
- context: Any,
111
- dates: Any,
112
- members: dict | Any,
113
- centre: dict | Any,
114
- alpha: float = 1.0,
115
- remapping: dict = {},
116
- patches: dict = {},
117
- ) -> Any:
118
- """Recentres the members dataset using the centre dataset.
119
-
120
- Parameters
121
- ----------
122
- context : Any
123
- The context for recentering.
124
- dates : Any
125
- The dates for recentering.
126
- members : Union[dict, Any]
127
- The members dataset or request dictionary.
128
- centre : Union[dict, Any]
129
- The centre dataset or request dictionary.
130
- alpha : float, optional
131
- The alpha value for recentering. Defaults to 1.0.
132
- remapping : dict, optional
133
- The remapping dictionary. Defaults to {}.
134
- patches : dict, optional
135
- The patches dictionary. Defaults to {}.
136
-
137
- Returns
138
- -------
139
- Any
140
- The recentred dataset.
141
- """
142
- members = load_if_needed(context, dates, members)
143
- centre = load_if_needed(context, dates, centre)
144
- return _recentre(members=members, centre=centre, alpha=alpha)
145
-
146
-
147
- execute = recentre
109
+ @source_registry.register("recentre")
110
+ class RecentreSource(LegacySource):
111
+
112
+ @staticmethod
113
+ def _execute(
114
+ context: Any,
115
+ dates: Any,
116
+ members: dict | Any,
117
+ centre: dict | Any,
118
+ alpha: float = 1.0,
119
+ remapping: dict = {},
120
+ patches: dict = {},
121
+ ) -> Any:
122
+ """Recentres the members dataset using the centre dataset.
123
+
124
+ Parameters
125
+ ----------
126
+ context : Any
127
+ The context for recentering.
128
+ dates : Any
129
+ The dates for recentering.
130
+ members : Union[dict, Any]
131
+ The members dataset or request dictionary.
132
+ centre : Union[dict, Any]
133
+ The centre dataset or request dictionary.
134
+ alpha : float, optional
135
+ The alpha value for recentering. Defaults to 1.0.
136
+ remapping : dict, optional
137
+ The remapping dictionary. Defaults to {}.
138
+ patches : dict, optional
139
+ The patches dictionary. Defaults to {}.
140
+
141
+ Returns
142
+ -------
143
+ Any
144
+ The recentred dataset.
145
+ """
146
+ members = load_if_needed(context, dates, members)
147
+ centre = load_if_needed(context, dates, centre)
148
+ return _recentre(members=members, centre=centre, alpha=alpha)
@@ -0,0 +1,44 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import logging
12
+ from typing import Any
13
+
14
+ from anemoi.transform.fields import new_field_with_valid_datetime
15
+ from anemoi.transform.fields import new_fieldlist_from_list
16
+
17
+ from anemoi.datasets.create.input.repeated_dates import DateMapper
18
+ from anemoi.datasets.create.source import Source
19
+ from anemoi.datasets.create.sources import source_registry
20
+
21
+ LOG = logging.getLogger(__name__)
22
+
23
+
24
+ @source_registry.register("repeated_dates")
25
+ class RepeatedDatesSource(Source):
26
+
27
+ def __init__(self, context, source: Any, mode: str, **kwargs) -> None:
28
+ # assert False, (context, source, mode, kwargs)
29
+ super().__init__(context, **kwargs)
30
+ self.mapper = DateMapper.from_mode(mode, source, kwargs)
31
+ self.source = source
32
+
33
+ def execute(self, group_of_dates):
34
+ source = self.context.create_source(self.source, "data_sources", str(id(self)))
35
+
36
+ result = []
37
+ for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
38
+ print(f"one_date_group: {one_date_group}, many_dates_group: {many_dates_group}")
39
+ source_results = source(self.context, one_date_group)
40
+ for field in source_results:
41
+ for date in many_dates_group:
42
+ result.append(new_field_with_valid_datetime(field, date))
43
+
44
+ return new_fieldlist_from_list(result)
@@ -12,58 +12,36 @@ from typing import Any
12
12
 
13
13
  from earthkit.data import from_source
14
14
 
15
- from anemoi.datasets.create.utils import to_datetime_list
15
+ from anemoi.datasets.create.sources import source_registry
16
16
 
17
- from .legacy import legacy_source
17
+ from .legacy import LegacySource
18
18
 
19
19
 
20
- @legacy_source(__file__)
21
- def source(context: Any | None, dates: list[datetime], **kwargs: Any) -> Any:
22
- """Generates a source based on the provided context, dates, and additional keyword arguments.
20
+ @source_registry.register("source")
21
+ class GenericSource(LegacySource):
23
22
 
24
- Parameters
25
- ----------
26
- context : Optional[Any]
27
- The context in which the source is generated.
28
- dates : List[datetime]
29
- A list of datetime objects representing the dates.
30
- **kwargs : Any
31
- Additional keyword arguments for the source generation.
23
+ @staticmethod
24
+ def _execute(context: Any | None, dates: list[datetime], **kwargs: Any) -> Any:
25
+ """Generates a source based on the provided context, dates, and additional keyword arguments.
32
26
 
33
- Returns
34
- -------
35
- Any
36
- The generated source.
37
- """
38
- name = kwargs.pop("name")
39
- context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
40
- if kwargs["date"] == "$from_dates":
41
- kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
42
- if kwargs["time"] == "$from_dates":
43
- kwargs["time"] = list({d.strftime("%H%M") for d in dates})
44
- return from_source(name, **kwargs)
27
+ Parameters
28
+ ----------
29
+ context : Optional[Any]
30
+ The context in which the source is generated.
31
+ dates : List[datetime]
32
+ A list of datetime objects representing the dates.
33
+ **kwargs : Any
34
+ Additional keyword arguments for the source generation.
45
35
 
46
-
47
- execute = source
48
-
49
- if __name__ == "__main__":
50
- import yaml
51
-
52
- config: dict[str, Any] = yaml.safe_load(
36
+ Returns
37
+ -------
38
+ Any
39
+ The generated source.
53
40
  """
54
- name: mars
55
- class: ea
56
- expver: '0001'
57
- grid: 20.0/20.0
58
- levtype: sfc
59
- param: [2t]
60
- number: [0, 1]
61
- date: $from_dates
62
- time: $from_dates
63
- """
64
- )
65
- dates: list[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
66
- dates = to_datetime_list(dates)
67
-
68
- for f in source(None, dates, **config):
69
- print(f, f.to_numpy().mean())
41
+ name = kwargs.pop("name")
42
+ context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
43
+ if kwargs["date"] == "$from_dates":
44
+ kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
45
+ if kwargs["time"] == "$from_dates":
46
+ kwargs["time"] = list({d.strftime("%H%M") for d in dates})
47
+ return from_source(name, **kwargs)
@@ -14,9 +14,9 @@ from typing import Any
14
14
  from earthkit.data.core.temporary import temp_file
15
15
  from earthkit.data.readers.grib.output import new_grib_output
16
16
 
17
- from anemoi.datasets.create.utils import to_datetime_list
17
+ from anemoi.datasets.create.sources import source_registry
18
18
 
19
- from .legacy import legacy_source
19
+ from .legacy import LegacySource
20
20
 
21
21
 
22
22
  def _date_to_datetime(d: Any) -> Any:
@@ -83,116 +83,89 @@ def group_by_field(ds: Any) -> dict[tuple, list[Any]]:
83
83
  return d
84
84
 
85
85
 
86
- @legacy_source(__file__)
87
- def tendencies(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
88
- """Computes tendencies for the given dates and time increment.
86
+ @source_registry.register("tendencies")
87
+ class TendenciesSource(LegacySource):
89
88
 
90
- Parameters
91
- ----------
92
- dates : List[datetime.datetime]
93
- A list of datetime objects.
94
- time_increment : Any
95
- A time increment string ending with 'h' or a datetime.timedelta object.
96
- **kwargs : Any
97
- Additional keyword arguments.
98
-
99
- Returns
100
- -------
101
- Any
102
- A dataset object with computed tendencies.
103
- """
104
- print("✅", kwargs)
105
- time_increment = normalise_time_delta(time_increment)
106
-
107
- shifted_dates = [d - time_increment for d in dates]
108
- all_dates = sorted(list(set(dates + shifted_dates)))
89
+ @staticmethod
90
+ def _execute(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
91
+ """Computes tendencies for the given dates and time increment.
109
92
 
110
- # from .mars import execute as mars
111
- from anemoi.datasets.create.mars import execute as mars
93
+ Parameters
94
+ ----------
95
+ dates : List[datetime.datetime]
96
+ A list of datetime objects.
97
+ time_increment : Any
98
+ A time increment string ending with 'h' or a datetime.timedelta object.
99
+ **kwargs : Any
100
+ Additional keyword arguments.
112
101
 
113
- ds = mars(dates=all_dates, **kwargs)
114
-
115
- dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
116
- for d in all_dates:
117
- assert d.isoformat() in dates_in_data, d
118
-
119
- ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
120
- ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
121
-
122
- assert len(ds1) == len(ds2), (len(ds1), len(ds2))
123
-
124
- group1 = group_by_field(ds1)
125
- group2 = group_by_field(ds2)
102
+ Returns
103
+ -------
104
+ Any
105
+ A dataset object with computed tendencies.
106
+ """
107
+ print("✅", kwargs)
108
+ time_increment = normalise_time_delta(time_increment)
126
109
 
127
- assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
110
+ shifted_dates = [d - time_increment for d in dates]
111
+ all_dates = sorted(list(set(dates + shifted_dates)))
128
112
 
129
- # prepare output tmp file so we can read it back
130
- tmp = temp_file()
131
- path = tmp.path
132
- out = new_grib_output(path)
113
+ from .mars import mars
133
114
 
134
- for k in group1:
135
- assert len(group1[k]) == len(group2[k]), k
136
- print()
137
- print("❌", k)
115
+ ds = mars(dates=all_dates, **kwargs)
138
116
 
139
- for field, b_field in zip(group1[k], group2[k]):
140
- for k in ["param", "level", "number", "grid", "shape"]:
141
- assert field.metadata(k) == b_field.metadata(k), (
142
- k,
143
- field.metadata(k),
144
- b_field.metadata(k),
145
- )
117
+ dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
118
+ for d in all_dates:
119
+ assert d.isoformat() in dates_in_data, d
146
120
 
147
- c = field.to_numpy()
148
- b = b_field.to_numpy()
149
- assert c.shape == b.shape, (c.shape, b.shape)
121
+ ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
122
+ ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
150
123
 
151
- ################
152
- # Actual computation happens here
153
- x = c - b
154
- ################
124
+ assert len(ds1) == len(ds2), (len(ds1), len(ds2))
155
125
 
156
- assert x.shape == c.shape, c.shape
157
- print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
158
- out.write(x, template=field)
126
+ group1 = group_by_field(ds1)
127
+ group2 = group_by_field(ds2)
159
128
 
160
- out.close()
129
+ assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
161
130
 
162
- from earthkit.data import from_source
131
+ # prepare output tmp file so we can read it back
132
+ tmp = temp_file()
133
+ path = tmp.path
134
+ out = new_grib_output(path)
163
135
 
164
- ds = from_source("file", path)
165
- # save a reference to the tmp file so it is deleted
166
- # only when the dataset is not used anymore
167
- ds._tmp = tmp
136
+ for k in group1:
137
+ assert len(group1[k]) == len(group2[k]), k
138
+ print()
139
+ print("❌", k)
168
140
 
169
- return ds
141
+ for field, b_field in zip(group1[k], group2[k]):
142
+ for k in ["param", "level", "number", "grid", "shape"]:
143
+ assert field.metadata(k) == b_field.metadata(k), (
144
+ k,
145
+ field.metadata(k),
146
+ b_field.metadata(k),
147
+ )
170
148
 
149
+ c = field.to_numpy()
150
+ b = b_field.to_numpy()
151
+ assert c.shape == b.shape, (c.shape, b.shape)
171
152
 
172
- execute = tendencies
153
+ ################
154
+ # Actual computation happens here
155
+ x = c - b
156
+ ################
173
157
 
174
- if __name__ == "__main__":
175
- import yaml
158
+ assert x.shape == c.shape, c.shape
159
+ print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
160
+ out.write(x, template=field)
176
161
 
177
- config = yaml.safe_load(
178
- """
162
+ out.close()
179
163
 
180
- config:
181
- time_increment: 12h
182
- database: marser
183
- class: ea
184
- # date: computed automatically
185
- # time: computed automatically
186
- expver: "0001"
187
- grid: 20.0/20.0
188
- levtype: sfc
189
- param: [2t]
190
- """
191
- )["config"]
164
+ from earthkit.data import from_source
192
165
 
193
- dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
194
- dates = to_datetime_list(dates)
166
+ ds = from_source("file", path)
167
+ # save a reference to the tmp file so it is deleted
168
+ # only when the dataset is not used anymore
169
+ ds._tmp = tmp
195
170
 
196
- DEBUG = True
197
- for f in tendencies(dates, **config):
198
- print(f, f.to_numpy().mean())
171
+ return ds
@@ -17,7 +17,8 @@ from earthkit.data.core.fieldlist import MultiFieldList
17
17
 
18
18
  from anemoi.datasets.create.sources.patterns import iterate_patterns
19
19
 
20
- from ..legacy import legacy_source
20
+ from .. import source_registry
21
+ from ..legacy import LegacySource
21
22
  from .fieldlist import XarrayFieldList
22
23
 
23
24
  LOG = logging.getLogger(__name__)
@@ -152,26 +153,30 @@ def load_many(emoji: str, context: Any, dates: list[datetime.datetime], pattern:
152
153
  return MultiFieldList(result)
153
154
 
154
155
 
155
- @legacy_source("xarray")
156
- def execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
157
- """Executes the loading of datasets.
158
-
159
- Parameters
160
- ----------
161
- context : Any
162
- Context object.
163
- dates : List[str]
164
- List of dates.
165
- url : str
166
- URL pattern for loading datasets.
167
- *args : Any
168
- Additional arguments.
169
- **kwargs : Any
170
- Additional keyword arguments.
171
-
172
- Returns
173
- -------
174
- ekd.FieldList
175
- The loaded datasets.
176
- """
177
- return load_many("🌐", context, dates, url, *args, **kwargs)
156
+ @source_registry.register("xarray")
157
+ class LegacyXarraySource(LegacySource):
158
+ name = "xarray"
159
+
160
+ @staticmethod
161
+ def _execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
162
+ """Executes the loading of datasets.
163
+
164
+ Parameters
165
+ ----------
166
+ context : Any
167
+ Context object.
168
+ dates : List[str]
169
+ List of dates.
170
+ url : str
171
+ URL pattern for loading datasets.
172
+ *args : Any
173
+ Additional arguments.
174
+ **kwargs : Any
175
+ Additional keyword arguments.
176
+
177
+ Returns
178
+ -------
179
+ ekd.FieldList
180
+ The loaded datasets.
181
+ """
182
+ return load_many("🌐", context, dates, url, *args, **kwargs)
@@ -121,16 +121,16 @@ class XArrayField(Field):
121
121
  Index to select a specific element, by default None.
122
122
  """
123
123
  if index is not None:
124
- values = self.selection[index]
124
+ values = self.selection[index].values
125
125
  else:
126
- values = self.selection
126
+ values = self.selection.values
127
127
 
128
128
  assert dtype is None
129
129
 
130
130
  if flatten:
131
- return values.values.flatten()
131
+ return values.flatten()
132
132
 
133
- return values # .reshape(self.shape)
133
+ return values
134
134
 
135
135
  @cached_property
136
136
  def _metadata(self) -> XArrayMetadata:
@@ -11,30 +11,34 @@ from typing import Any
11
11
 
12
12
  import earthkit.data as ekd
13
13
 
14
- from .legacy import legacy_source
14
+ from . import source_registry
15
+ from .legacy import LegacySource
15
16
  from .xarray import load_many
16
17
 
17
18
 
18
- @legacy_source(__file__)
19
- def execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
20
- """Execute the data loading process.
21
-
22
- Parameters
23
- ----------
24
- context : Any
25
- The context in which the execution occurs.
26
- dates : List[str]
27
- List of dates for which data is to be loaded.
28
- url : str
29
- The URL from which data is to be loaded.
30
- *args : tuple
31
- Additional positional arguments.
32
- **kwargs : dict
33
- Additional keyword arguments.
34
-
35
- Returns
36
- -------
37
- ekd.FieldList
38
- The loaded data.
39
- """
40
- return load_many("🇿", context, dates, url, *args, **kwargs)
19
+ @source_registry.register("xarray_zarr")
20
+ class XarrayZarrSource(LegacySource):
21
+
22
+ @staticmethod
23
+ def _execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
24
+ """Execute the data loading process.
25
+
26
+ Parameters
27
+ ----------
28
+ context : Any
29
+ The context in which the execution occurs.
30
+ dates : List[str]
31
+ List of dates for which data is to be loaded.
32
+ url : str
33
+ The URL from which data is to be loaded.
34
+ *args : tuple
35
+ Additional positional arguments.
36
+ **kwargs : dict
37
+ Additional keyword arguments.
38
+
39
+ Returns
40
+ -------
41
+ ekd.FieldList
42
+ The loaded data.
43
+ """
44
+ return load_many("🇿", context, dates, url, *args, **kwargs)
@@ -14,54 +14,58 @@ import earthkit.data as ekd
14
14
  from earthkit.data.core.fieldlist import MultiFieldList
15
15
  from earthkit.data.sources.url import download_and_cache
16
16
 
17
- from .legacy import legacy_source
17
+ from . import source_registry
18
+ from .legacy import LegacySource
18
19
  from .patterns import iterate_patterns
19
20
  from .xarray import load_one
20
21
 
21
22
 
22
- @legacy_source(__file__)
23
- def execute(context: Any, dates: Any, record_id: str, file_key: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
24
- """Executes the download and processing of files from Zenodo.
23
+ @source_registry.register("zenodo")
24
+ class ZenodoSource(LegacySource):
25
25
 
26
- Parameters
27
- ----------
28
- context : Any
29
- The context in which the function is executed.
30
- dates : Any
31
- The dates for which the data is required.
32
- record_id : str
33
- The Zenodo record ID.
34
- file_key : str
35
- The key to identify the file.
36
- *args : Any
37
- Additional arguments.
38
- **kwargs : Any
39
- Additional keyword arguments.
26
+ @staticmethod
27
+ def _execute(context: Any, dates: Any, record_id: str, file_key: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
28
+ """Executes the download and processing of files from Zenodo.
40
29
 
41
- Returns
42
- -------
43
- MultiFieldList
44
- A list of fields loaded from the downloaded files.
45
- """
46
- import requests
30
+ Parameters
31
+ ----------
32
+ context : Any
33
+ The context in which the function is executed.
34
+ dates : Any
35
+ The dates for which the data is required.
36
+ record_id : str
37
+ The Zenodo record ID.
38
+ file_key : str
39
+ The key to identify the file.
40
+ *args : Any
41
+ Additional arguments.
42
+ **kwargs : Any
43
+ Additional keyword arguments.
47
44
 
48
- result: list[Any] = []
45
+ Returns
46
+ -------
47
+ MultiFieldList
48
+ A list of fields loaded from the downloaded files.
49
+ """
50
+ import requests
49
51
 
50
- URLPATTERN = "https://zenodo.org/api/records/{record_id}"
51
- url = URLPATTERN.format(record_id=record_id)
52
- r = requests.get(url)
53
- r.raise_for_status()
54
- record: dict[str, Any] = r.json()
52
+ result: list[Any] = []
55
53
 
56
- urls: dict[str, str] = {}
57
- for file in record["files"]:
58
- urls[file["key"]] = file["links"]["self"]
54
+ URLPATTERN = "https://zenodo.org/api/records/{record_id}"
55
+ url = URLPATTERN.format(record_id=record_id)
56
+ r = requests.get(url)
57
+ r.raise_for_status()
58
+ record: dict[str, Any] = r.json()
59
59
 
60
- for url, dates in iterate_patterns(file_key, dates, **kwargs):
61
- if url not in urls:
62
- continue
60
+ urls: dict[str, str] = {}
61
+ for file in record["files"]:
62
+ urls[file["key"]] = file["links"]["self"]
63
63
 
64
- path = download_and_cache(urls[url])
65
- result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
64
+ for url, dates in iterate_patterns(file_key, dates, **kwargs):
65
+ if url not in urls:
66
+ continue
66
67
 
67
- return MultiFieldList(result)
68
+ path = download_and_cache(urls[url])
69
+ result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
70
+
71
+ return MultiFieldList(result)