anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -7,253 +7,311 @@
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
- import json
11
10
  import logging
12
- from copy import deepcopy
13
- from typing import Any
14
- from typing import Dict
15
- from typing import List
16
11
 
17
- from earthkit.data.core.order import build_remapping
18
-
19
- from ...dates.groups import GroupOfDates
20
- from .context import Context
21
- from .template import substitute
12
+ from anemoi.datasets.dates import DatesProvider
22
13
 
23
14
  LOG = logging.getLogger(__name__)
24
15
 
25
16
 
26
17
  class Action:
27
- """Represents an action to be performed within a given context.
28
-
29
- Attributes
30
- ----------
31
- context : ActionContext
32
- The context in which the action exists.
33
- kwargs : Dict[str, Any]
34
- Additional keyword arguments.
35
- args : Any
36
- Additional positional arguments.
37
- action_path : List[str]
38
- The action path.
18
+ """An "Action" represents a single operation described in the yaml configuration, e.g. a source, a filter,
19
+ pipe, join, etc.
20
+
21
+ See :ref:`operations` for more details.
22
+
39
23
  """
40
24
 
41
- def __init__(
42
- self, context: "ActionContext", action_path: List[str], /, *args: Any, **kwargs: Dict[str, Any]
43
- ) -> None:
44
- """Initialize an Action instance.
45
-
46
- Parameters
47
- ----------
48
- context : ActionContext
49
- The context in which the action exists.
50
- action_path : List[str]
51
- The action path.
52
- args : Any
53
- Additional positional arguments.
54
- kwargs : Dict[str, Any]
55
- Additional keyword arguments.
56
- """
57
- if "args" in kwargs and "kwargs" in kwargs:
58
- """We have:
59
- args = []
60
- kwargs = {args: [...], kwargs: {...}}
61
- move the content of kwargs to args and kwargs.
62
- """
63
- assert len(kwargs) == 2, (args, kwargs)
64
- assert not args, (args, kwargs)
65
- args = kwargs.pop("args")
66
- kwargs = kwargs.pop("kwargs")
67
-
68
- assert isinstance(context, ActionContext), type(context)
69
- self.context = context
70
- self.kwargs = kwargs
71
- self.args = args
72
- self.action_path = action_path
73
-
74
- @classmethod
75
- def _short_str(cls, x: str) -> str:
76
- """Shorten the string representation if it exceeds 1000 characters.
77
-
78
- Parameters
79
- ----------
80
- x : str
81
- The string to shorten.
82
-
83
- Returns
84
- -------
85
- str
86
- The shortened string.
87
- """
88
- x = str(x)
89
- if len(x) < 1000:
90
- return x
91
- return x[:1000] + "..."
92
-
93
- def _repr(self, *args: Any, _indent_: str = "\n", _inline_: str = "", **kwargs: Any) -> str:
94
- """Generate a string representation of the Action instance.
95
-
96
- Parameters
97
- ----------
98
- args : Any
99
- Additional positional arguments.
100
- _indent_ : str, optional
101
- The indentation string, by default "\n".
102
- _inline_ : str, optional
103
- The inline string, by default "".
104
- kwargs : Any
105
- Additional keyword arguments.
106
-
107
- Returns
108
- -------
109
- str
110
- The string representation.
111
- """
112
- more = ",".join([str(a)[:5000] for a in args])
113
- more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
114
-
115
- more = more[:5000]
116
- txt = f"{self.__class__.__name__}: {_inline_}{_indent_}{more}"
117
- if _indent_:
118
- txt = txt.replace("\n", "\n ")
119
- return txt
120
-
121
- def __repr__(self) -> str:
122
- """Return the string representation of the Action instance.
123
-
124
- Returns
125
- -------
126
- str
127
- The string representation.
128
- """
129
- return self._repr()
130
-
131
- def select(self, dates: object, **kwargs: Any) -> None:
132
- """Select dates for the action.
133
-
134
- Parameters
135
- ----------
136
- dates : object
137
- The dates to select.
138
- kwargs : Any
139
- Additional keyword arguments.
140
- """
141
- self._raise_not_implemented()
142
-
143
- def _raise_not_implemented(self) -> None:
144
- """Raise a NotImplementedError indicating the method is not implemented."""
145
- raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
146
-
147
- def _trace_select(self, group_of_dates: GroupOfDates) -> str:
148
- """Trace the selection of a group of dates.
149
-
150
- Parameters
151
- ----------
152
- group_of_dates : GroupOfDates
153
- The group of dates to trace.
154
-
155
- Returns
156
- -------
157
- str
158
- The trace string.
159
- """
160
- return f"{self.__class__.__name__}({group_of_dates})"
161
-
162
-
163
- class ActionContext(Context):
164
- """Represents the context in which an action is performed.
165
-
166
- Attributes
167
- ----------
168
- order_by : str
169
- The order by criteria.
170
- flatten_grid : bool
171
- Whether to flatten the grid.
172
- remapping : Dict[str, Any]
173
- The remapping configuration.
174
- use_grib_paramid : bool
175
- Whether to use GRIB parameter ID.
25
+ def __init__(self, config, *path):
26
+ self.config = config
27
+ self.path = path
28
+ assert path[0] in (
29
+ "input",
30
+ "data_sources",
31
+ ), f"{self.__class__.__name__}: path must start with 'input' or 'data_sources': {path}"
32
+
33
+
34
+ class Concat(Action):
35
+ """The Concat contruct is used to concat different actions that are responsible
36
+ for delivery fields for different dates.
37
+
38
+ See :ref:`building-concat` for more details.
39
+
40
+ .. block-code:: yaml
41
+
42
+ input:
43
+ concat:
44
+ - dates:
45
+ start: 2023-01-01
46
+ end: 2023-01-31
47
+ frequency: 1d
48
+ action: # some action
49
+ ...
50
+
51
+ - dates:
52
+ start: 2023-02-01
53
+ end: 2023-02-28
54
+ frequency: 1d
55
+ action: # some action
56
+
176
57
  """
177
58
 
178
- def __init__(self, /, order_by: str, flatten_grid: bool, remapping: Dict[str, Any], use_grib_paramid: bool) -> None:
179
- """Initialize an ActionContext instance.
180
-
181
- Parameters
182
- ----------
183
- order_by : str
184
- The order by criteria.
185
- flatten_grid : bool
186
- Whether to flatten the grid.
187
- remapping : Dict[str, Any]
188
- The remapping configuration.
189
- use_grib_paramid : bool
190
- Whether to use GRIB parameter ID.
191
- """
192
- super().__init__()
193
- self.order_by = order_by
194
- self.flatten_grid = flatten_grid
195
- self.remapping = build_remapping(remapping)
196
- self.use_grib_paramid = use_grib_paramid
197
-
198
-
199
- def action_factory(config: Dict[str, Any], context: ActionContext, action_path: List[str]) -> Action:
200
- """Factory function to create an Action instance based on the configuration.
201
-
202
- Parameters
203
- ----------
204
- config : Dict[str, Any]
205
- The action configuration.
206
- context : ActionContext
207
- The context in which the action exists.
208
- action_path : List[str]
209
- The action path.
210
-
211
- Returns
212
- -------
213
- Action
214
- The created Action instance.
59
+ def __init__(self, config, *path):
60
+ super().__init__(config, *path, "concat")
61
+
62
+ assert isinstance(config, list), f"Value must be a dict {list}"
63
+
64
+ self.choices = []
65
+
66
+ for i, item in enumerate(config):
67
+
68
+ dates = item["dates"]
69
+ filtering_dates = DatesProvider.from_config(**dates)
70
+ action = action_factory({k: v for k, v in item.items() if k != "dates"}, *self.path, str(i))
71
+ self.choices.append((filtering_dates, action))
72
+
73
+ def __repr__(self):
74
+ return f"Concat({self.choices})"
75
+
76
+ def __call__(self, context, argument):
77
+
78
+ results = context.empty_result()
79
+
80
+ for filtering_dates, action in self.choices:
81
+ dates = context.matching_dates(filtering_dates, argument)
82
+ if len(dates) == 0:
83
+ continue
84
+ results += action(context, dates)
85
+
86
+ return context.register(results, self.path)
87
+
88
+
89
+ class Join(Action):
90
+ """Implement the join operation to combine results from multiple actions.
91
+
92
+ See :ref:`building-join` for more details.
93
+
94
+ .. block-code:: yaml
95
+
96
+ input:
97
+ join:
98
+ - grib:
99
+ ...
100
+
101
+ - netcdf: # some other action
102
+ ...
103
+
215
104
  """
216
- from .concat import ConcatAction
217
- from .data_sources import DataSourcesAction
218
- from .function import FunctionAction
219
- from .join import JoinAction
220
- from .pipe import PipeAction
221
- from .repeated_dates import RepeatedDatesAction
222
-
223
- # from .data_sources import DataSourcesAction
224
-
225
- assert isinstance(context, Context), (type, context)
226
- if not isinstance(config, dict):
227
- raise ValueError(f"Invalid input config {config}")
228
- if len(config) != 1:
229
- print(json.dumps(config, indent=2, default=str))
230
- raise ValueError(f"Invalid input config. Expecting dict with only one key, got {list(config.keys())}")
231
-
232
- config = deepcopy(config)
233
- key = list(config.keys())[0]
234
-
235
- if isinstance(config[key], list):
236
- args, kwargs = config[key], {}
237
- elif isinstance(config[key], dict):
238
- args, kwargs = [], config[key]
239
- else:
240
- raise ValueError(f"Invalid input config {config[key]} ({type(config[key])}")
241
-
242
- cls = {
243
- "data_sources": DataSourcesAction,
244
- "data-sources": DataSourcesAction,
245
- "concat": ConcatAction,
246
- "join": JoinAction,
247
- "pipe": PipeAction,
248
- "function": FunctionAction,
249
- "repeated_dates": RepeatedDatesAction,
250
- "repeated-dates": RepeatedDatesAction,
251
- }.get(key)
252
-
253
- if cls is None:
254
- from ..sources import create_source
255
-
256
- source = create_source(None, substitute(context, config))
257
- return FunctionAction(context, action_path + [key], key, source)
258
-
259
- return cls(context, action_path + [key], *args, **kwargs)
105
+
106
+ def __init__(self, config, *path):
107
+ super().__init__(config, *path, "join")
108
+
109
+ assert isinstance(config, list), f"Value of Join Action must be a list, got: {config}"
110
+
111
+ self.actions = [action_factory(item, *self.path, str(i)) for i, item in enumerate(config)]
112
+
113
+ def __repr__(self):
114
+ return f"Join({self.actions})"
115
+
116
+ def __call__(self, context, argument):
117
+ results = context.empty_result()
118
+
119
+ for action in self.actions:
120
+ results += action(context, argument)
121
+
122
+ return context.register(results, self.path)
123
+
124
+
125
+ class Pipe(Action):
126
+ """Implement the pipe operation to chain results from a
127
+ source through multiple filters.
128
+
129
+ See :ref:`building-pipe` for more details.
130
+
131
+ .. block-code:: yaml
132
+
133
+ input:
134
+ pipe:
135
+ - grib:
136
+ ...
137
+
138
+ - rename:
139
+ ...
140
+
141
+ """
142
+
143
+ def __init__(self, config, *path):
144
+ assert isinstance(config, list), f"Value of Pipe Action must be a list, got {config}"
145
+ super().__init__(config, *path, "pipe")
146
+ self.actions = [action_factory(item, *self.path, str(i)) for i, item in enumerate(config)]
147
+
148
+ def __repr__(self):
149
+ return f"Pipe({self.actions})"
150
+
151
+ def __call__(self, context, argument):
152
+ result = context.empty_result()
153
+
154
+ for i, action in enumerate(self.actions):
155
+ if i == 0:
156
+ result = action(context, argument)
157
+ else:
158
+ result = action(context, result)
159
+
160
+ return context.register(result, self.path)
161
+
162
+
163
+ class Function(Action):
164
+ """Base class for sources and filters."""
165
+
166
+ def __init__(self, config, *path):
167
+ super().__init__(config, *path, self.name)
168
+
169
+ def __call__(self, context, argument):
170
+
171
+ config = context.resolve(self.config) # Substitute the ${} variables in the config
172
+
173
+ config["_type"] = self.name # Find a better way to do this
174
+
175
+ source = self.create_object(context, config)
176
+
177
+ return context.register(self.call_object(context, source, argument), self.path)
178
+
179
+
180
+ class DatasetSourceMixin:
181
+ """Mixin class for sources defined in anemoi-datasets"""
182
+
183
+ def create_object(self, context, config):
184
+ from anemoi.datasets.create.sources import create_source as create_datasets_source
185
+
186
+ return create_datasets_source(context, config)
187
+
188
+ def call_object(self, context, source, argument):
189
+ return source.execute(context.source_argument(argument))
190
+
191
+
192
+ class TransformSourceMixin:
193
+ """Mixin class for sources defined in anemoi-transform"""
194
+
195
+ def create_object(self, context, config):
196
+ from anemoi.transform.sources import create_source as create_transform_source
197
+
198
+ return create_transform_source(context, config)
199
+
200
+
201
+ class TransformFilterMixin:
202
+ """Mixin class for filters defined in anemoi-transform"""
203
+
204
+ def create_object(self, context, config):
205
+ from anemoi.transform.filters import create_filter as create_transform_filter
206
+
207
+ return create_transform_filter(context, config)
208
+
209
+ def call_object(self, context, filter, argument):
210
+ return filter.forward(context.filter_argument(argument))
211
+
212
+
213
+ class FilterFunction(Function):
214
+ """Action to call a filter on the argument (e.g. rename, regrid, etc.)."""
215
+
216
+ def __call__(self, context, argument):
217
+ return self.call(context, argument, context.filter_argument)
218
+
219
+
220
+ def _make_name(name, what):
221
+ name = name.replace("_", "-")
222
+ name = "".join(x.title() for x in name.split("-"))
223
+ return name + what.title()
224
+
225
+
226
+ def new_source(name, mixin):
227
+ return type(
228
+ _make_name(name, "source"),
229
+ (Function, mixin),
230
+ {"name": name},
231
+ )
232
+
233
+
234
+ def new_filter(name, mixin):
235
+ return type(
236
+ _make_name(name, "filter"),
237
+ (Function, mixin),
238
+ {"name": name},
239
+ )
240
+
241
+
242
+ class DataSources(Action):
243
+ """Action to call a source (e.g. mars, netcdf, grib, etc.)."""
244
+
245
+ def __init__(self, config, *path):
246
+ super().__init__(config, *path)
247
+ assert isinstance(config, (dict, list)), f"Invalid config type: {type(config)}"
248
+ if isinstance(config, dict):
249
+ self.sources = {k: action_factory(v, *path, k) for k, v in config.items()}
250
+ else:
251
+ self.sources = {i: action_factory(v, *path, str(i)) for i, v in enumerate(config)}
252
+
253
+ def __call__(self, context, argument):
254
+ for name, source in self.sources.items():
255
+ context.register(source(context, argument), self.path + (name,))
256
+
257
+
258
+ class Recipe(Action):
259
+ """Action that represent a recipe (i.e. a sequence of data_sources and input)."""
260
+
261
+ def __init__(self, input, data_sources):
262
+ self.input = input
263
+ self.data_sources = data_sources
264
+
265
+ def __call__(self, context, argument):
266
+ # Load data_sources
267
+ self.data_sources(context, argument)
268
+ return self.input(context, argument)
269
+
270
+
271
+ KLASS = {
272
+ "concat": Concat,
273
+ "join": Join,
274
+ "pipe": Pipe,
275
+ "data-sources": DataSources,
276
+ }
277
+
278
+ LEN_KLASS = len(KLASS)
279
+
280
+
281
+ def make(key, config, *path):
282
+
283
+ if LEN_KLASS == len(KLASS):
284
+
285
+ # Load pluggins
286
+ from anemoi.transform.filters import filter_registry as transform_filter_registry
287
+ from anemoi.transform.sources import source_registry as transform_source_registry
288
+
289
+ from anemoi.datasets.create.sources import source_registry as dataset_source_registry
290
+
291
+ # Register sources, local first
292
+ for name in dataset_source_registry.registered:
293
+ if name not in KLASS:
294
+ KLASS[name.replace("_", "-")] = new_source(name, DatasetSourceMixin)
295
+
296
+ for name in transform_source_registry.registered:
297
+ if name not in KLASS:
298
+ KLASS[name.replace("_", "-")] = new_source(name, TransformSourceMixin)
299
+
300
+ # Register filters
301
+ for name in transform_filter_registry.registered:
302
+ if name not in KLASS:
303
+ KLASS[name.replace("_", "-")] = new_filter(name, TransformFilterMixin)
304
+
305
+ return KLASS[key.replace("_", "-")](config, *path)
306
+
307
+
308
+ def action_factory(data, *path):
309
+
310
+ assert len(path) > 0, f"Path must contain at least one element {path}"
311
+ assert path[0] in ("input", "data_sources")
312
+
313
+ assert isinstance(data, dict), f"Input data must be a dictionary, got {type(data)}"
314
+ assert len(data) == 1, f"Input data must contain exactly one key-value pair {data} {'.'.join(x for x in path)}"
315
+
316
+ key, value = next(iter(data.items()))
317
+ return make(key, value, *path)
@@ -0,0 +1,71 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ import logging
11
+ from abc import ABC
12
+ from abc import abstractmethod
13
+ from typing import Any
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ class Context(ABC):
19
+ """Context for building input data."""
20
+
21
+ def __init__(self, /, argument: Any) -> None:
22
+ self.results = {}
23
+ self.cache = {}
24
+ self.argument = argument
25
+
26
+ def trace(self, emoji, *message) -> None:
27
+
28
+ print(f"{emoji}: {message}")
29
+
30
+ def register(self, data: Any, path: list[str]) -> Any:
31
+
32
+ if not path:
33
+ return data
34
+
35
+ assert path[0] in ("input", "data_sources"), path
36
+
37
+ LOG.info(f"Registering data at path: {path}")
38
+ self.results[tuple(path)] = data
39
+ return data
40
+
41
+ def resolve(self, config):
42
+ config = config.copy()
43
+
44
+ for key, value in list(config.items()):
45
+ if isinstance(value, str) and value.startswith("${") and value.endswith("}"):
46
+ path = tuple(value[2:-1].split("."))
47
+ if path in self.results:
48
+ config[key] = self.results[path]
49
+ else:
50
+ LOG.warning(f"Path not found {path}")
51
+ for p in sorted(self.results):
52
+ LOG.info(f" Available paths: {p}")
53
+ raise KeyError(f"Path {path} not found in results: {self.results.keys()}")
54
+
55
+ return config
56
+
57
+ def create_source(self, config: Any, *path) -> Any:
58
+ from anemoi.datasets.create.input.action import action_factory
59
+
60
+ if not isinstance(config, dict):
61
+ # It is already a result (e.g. ekd.FieldList), loaded from ${a.b.c}
62
+ # TODO: something more elegant
63
+ return lambda *args, **kwargs: config
64
+
65
+ return action_factory(config, *path)
66
+
67
+ @abstractmethod
68
+ def empty_result(self) -> Any: ...
69
+
70
+ @abstractmethod
71
+ def create_result(self, data: Any) -> Any: ...
@@ -0,0 +1,54 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ from typing import Any
12
+
13
+ from earthkit.data.core.order import build_remapping
14
+
15
+ from ..result.field import FieldResult
16
+ from . import Context
17
+
18
+
19
+ class FieldContext(Context):
20
+
21
+ def __init__(
22
+ self,
23
+ /,
24
+ argument: Any,
25
+ order_by: str,
26
+ flatten_grid: bool,
27
+ remapping: dict[str, Any],
28
+ use_grib_paramid: bool,
29
+ ) -> None:
30
+ super().__init__(argument)
31
+ self.order_by = order_by
32
+ self.flatten_grid = flatten_grid
33
+ self.remapping = build_remapping(remapping)
34
+ self.use_grib_paramid = use_grib_paramid
35
+ self.partial_ok = False
36
+
37
+ def empty_result(self) -> Any:
38
+ import earthkit.data as ekd
39
+
40
+ return ekd.from_source("empty")
41
+
42
+ def source_argument(self, argument: Any) -> Any:
43
+ return argument # .dates
44
+
45
+ def filter_argument(self, argument: Any) -> Any:
46
+ return argument
47
+
48
+ def create_result(self, data):
49
+ return FieldResult(self, data)
50
+
51
+ def matching_dates(self, filtering_dates, group_of_dates: Any) -> Any:
52
+ from anemoi.datasets.dates.groups import GroupOfDates
53
+
54
+ return GroupOfDates(sorted(set(group_of_dates) & set(filtering_dates)), group_of_dates.provider)