anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +46 -13
  6. anemoi/datasets/create/config.py +52 -53
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
  16. anemoi/datasets/create/sources/accumulate.py +517 -0
  17. anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
  18. anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
  19. anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
  20. anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
  21. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  22. anemoi/datasets/create/sources/constants.py +39 -38
  23. anemoi/datasets/create/sources/empty.py +26 -22
  24. anemoi/datasets/create/sources/forcings.py +29 -28
  25. anemoi/datasets/create/sources/grib.py +92 -72
  26. anemoi/datasets/create/sources/grib_index.py +102 -54
  27. anemoi/datasets/create/sources/hindcasts.py +56 -55
  28. anemoi/datasets/create/sources/legacy.py +10 -62
  29. anemoi/datasets/create/sources/mars.py +159 -154
  30. anemoi/datasets/create/sources/netcdf.py +28 -24
  31. anemoi/datasets/create/sources/opendap.py +28 -24
  32. anemoi/datasets/create/sources/recentre.py +42 -41
  33. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  34. anemoi/datasets/create/sources/source.py +26 -48
  35. anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
  36. anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
  37. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  38. anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
  39. anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
  40. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  41. anemoi/datasets/create/sources/zenodo.py +43 -39
  42. anemoi/datasets/create/utils.py +0 -42
  43. anemoi/datasets/data/complement.py +26 -17
  44. anemoi/datasets/data/dataset.py +12 -0
  45. anemoi/datasets/data/grids.py +0 -152
  46. anemoi/datasets/data/masked.py +74 -13
  47. anemoi/datasets/data/missing.py +5 -0
  48. anemoi/datasets/data/rolling_average.py +141 -0
  49. anemoi/datasets/data/stores.py +7 -9
  50. anemoi/datasets/dates/__init__.py +2 -0
  51. anemoi/datasets/dumper.py +76 -0
  52. anemoi/datasets/grids.py +1 -178
  53. anemoi/datasets/schemas/recipe.json +131 -0
  54. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
  55. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
  56. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
  57. anemoi/datasets/create/filter.py +0 -47
  58. anemoi/datasets/create/input/concat.py +0 -161
  59. anemoi/datasets/create/input/context.py +0 -86
  60. anemoi/datasets/create/input/empty.py +0 -53
  61. anemoi/datasets/create/input/filter.py +0 -117
  62. anemoi/datasets/create/input/function.py +0 -232
  63. anemoi/datasets/create/input/join.py +0 -129
  64. anemoi/datasets/create/input/pipe.py +0 -66
  65. anemoi/datasets/create/input/step.py +0 -173
  66. anemoi/datasets/create/input/template.py +0 -161
  67. anemoi/datasets/create/sources/accumulations.py +0 -1062
  68. anemoi/datasets/create/sources/accumulations2.py +0 -647
  69. anemoi/datasets/create/sources/tendencies.py +0 -198
  70. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
  71. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
  72. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0
@@ -1,129 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
- import logging
11
- from functools import cached_property
12
- from typing import Any
13
-
14
- from earthkit.data import FieldList
15
-
16
- from ...dates.groups import GroupOfDates
17
- from .action import Action
18
- from .action import action_factory
19
- from .empty import EmptyResult
20
- from .misc import _tidy
21
- from .misc import assert_fieldlist
22
- from .result import Result
23
- from .template import notify_result
24
- from .trace import trace_datasource
25
- from .trace import trace_select
26
-
27
- LOG = logging.getLogger(__name__)
28
-
29
-
30
- class JoinResult(Result):
31
- """Represents a result that combines multiple results.
32
-
33
- Attributes
34
- ----------
35
- context : object
36
- The context object.
37
- action_path : list
38
- The action path.
39
- group_of_dates : GroupOfDates
40
- The group of dates.
41
- results : List[Result]
42
- The list of results.
43
- """
44
-
45
- def __init__(
46
- self, context: object, action_path: list, group_of_dates: GroupOfDates, results: list[Result], **kwargs: Any
47
- ) -> None:
48
- """Initializes a JoinResult instance.
49
-
50
- Parameters
51
- ----------
52
- context : object
53
- The context object.
54
- action_path : list
55
- The action path.
56
- group_of_dates : GroupOfDates
57
- The group of dates.
58
- results : List[Result]
59
- The list of results.
60
- """
61
- super().__init__(context, action_path, group_of_dates)
62
- self.results: list[Result] = [r for r in results if not r.empty]
63
-
64
- @cached_property
65
- @assert_fieldlist
66
- @notify_result
67
- @trace_datasource
68
- def datasource(self) -> FieldList:
69
- """Returns the combined datasource from all results."""
70
- ds: FieldList = EmptyResult(self.context, self.action_path, self.group_of_dates).datasource
71
- for i in self.results:
72
- ds += i.datasource
73
- return _tidy(ds)
74
-
75
- def __repr__(self) -> str:
76
- """Returns a string representation of the JoinResult instance."""
77
- content: str = "\n".join([str(i) for i in self.results])
78
- return self._repr(content)
79
-
80
-
81
- class JoinAction(Action):
82
- """Represents an action that combines multiple actions.
83
-
84
- Attributes
85
- ----------
86
- context : object
87
- The context object.
88
- action_path : list
89
- The action path.
90
- actions : List[Action]
91
- The list of actions.
92
- """
93
-
94
- def __init__(self, context: object, action_path: list, *configs: dict) -> None:
95
- """Initializes a JoinAction instance.
96
-
97
- Parameters
98
- ----------
99
- context : object
100
- The context object.
101
- action_path : list
102
- The action path.
103
- *configs : dict
104
- The configuration dictionaries.
105
- """
106
- super().__init__(context, action_path, *configs)
107
- self.actions: list[Action] = [action_factory(c, context, action_path + [str(i)]) for i, c in enumerate(configs)]
108
-
109
- def __repr__(self) -> str:
110
- """Returns a string representation of the JoinAction instance."""
111
- content: str = "\n".join([str(i) for i in self.actions])
112
- return self._repr(content)
113
-
114
- @trace_select
115
- def select(self, group_of_dates: GroupOfDates) -> JoinResult:
116
- """Selects the results for the given group of dates.
117
-
118
- Parameters
119
- ----------
120
- group_of_dates : GroupOfDates
121
- The group of dates.
122
-
123
- Returns
124
- -------
125
- JoinResult
126
- The combined result for the given group of dates.
127
- """
128
- results: list[Result] = [a.select(group_of_dates) for a in self.actions]
129
- return JoinResult(self.context, self.action_path, group_of_dates, results)
@@ -1,66 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
- import json
11
- import logging
12
- from typing import Any
13
-
14
- from .action import Action
15
- from .action import action_factory
16
- from .step import step_factory
17
- from .trace import trace_select
18
-
19
- LOG = logging.getLogger(__name__)
20
-
21
-
22
- class PipeAction(Action):
23
- """A class to represent a pipeline of actions."""
24
-
25
- def __init__(self, context: Any, action_path: list, *configs: dict) -> None:
26
- """Initialize the PipeAction.
27
-
28
- Parameters
29
- ----------
30
- context : Any
31
- The context for the action.
32
- action_path : list
33
- The path of the action.
34
- configs : dict
35
- The configurations for the actions.
36
- """
37
- super().__init__(context, action_path, *configs)
38
- if len(configs) <= 1:
39
- raise ValueError(
40
- f"PipeAction requires at least two actions, got {len(configs)}\n{json.dumps(configs, indent=2)}"
41
- )
42
-
43
- current: Any = action_factory(configs[0], context, action_path + ["0"])
44
- for i, c in enumerate(configs[1:]):
45
- current = step_factory(c, context, action_path + [str(i + 1)], previous_step=current)
46
- self.last_step: Any = current
47
-
48
- @trace_select
49
- def select(self, group_of_dates: Any) -> Any:
50
- """Select data based on the group of dates.
51
-
52
- Parameters
53
- ----------
54
- group_of_dates : Any
55
- The group of dates to select data for.
56
-
57
- Returns
58
- -------
59
- Any
60
- The selected data.
61
- """
62
- return self.last_step.select(group_of_dates)
63
-
64
- def __repr__(self) -> str:
65
- """Return a string representation of the PipeAction."""
66
- return f"PipeAction({self.last_step})"
@@ -1,173 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
- import logging
11
- from copy import deepcopy
12
- from typing import Any
13
-
14
- from .action import Action
15
- from .action import ActionContext
16
- from .context import Context
17
- from .result import Result
18
- from .template import notify_result
19
- from .trace import trace_datasource
20
- from .trace import trace_select
21
-
22
- LOG = logging.getLogger(__name__)
23
-
24
-
25
- class StepResult(Result):
26
- """Represents the result of a step in the data processing pipeline."""
27
-
28
- def __init__(
29
- self, context: Context, action_path: list[str], group_of_dates: Any, action: Action, upstream_result: Result
30
- ) -> None:
31
- """Initialize a StepResult instance.
32
-
33
- Parameters
34
- ----------
35
- context
36
- The context in which the step is executed.
37
- action_path
38
- The path of actions leading to this step.
39
- group_of_dates
40
- The group of dates associated with this step.
41
- action
42
- The action associated with this step.
43
- upstream_result
44
- The result of the upstream step.
45
- """
46
- super().__init__(context, action_path, group_of_dates)
47
- assert isinstance(upstream_result, Result), type(upstream_result)
48
- self.upstream_result: Result = upstream_result
49
- self.action: Action = action
50
-
51
- @property
52
- @notify_result
53
- @trace_datasource
54
- def datasource(self) -> Any:
55
- """Retrieve the datasource associated with this step result."""
56
- raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
57
-
58
-
59
- class StepAction(Action):
60
- """Represents an action that is part of a step in the data processing pipeline."""
61
-
62
- result_class: type[StepResult] | None = None
63
-
64
- def __init__(
65
- self, context: ActionContext, action_path: list[str], previous_step: Any, *args: Any, **kwargs: Any
66
- ) -> None:
67
- """Initialize a StepAction instance.
68
-
69
- Parameters
70
- ----------
71
- context
72
- The context in which the action is executed.
73
- action_path
74
- The path of actions leading to this step.
75
- previous_step
76
- The previous step in the pipeline.
77
- """
78
- super().__init__(context, action_path, *args, **kwargs)
79
- self.previous_step: Any = previous_step
80
-
81
- @trace_select
82
- def select(self, group_of_dates: Any) -> StepResult:
83
- """Select the result for a given group of dates.
84
-
85
- Parameters
86
- ----------
87
- group_of_dates
88
- The group of dates to select the result for.
89
-
90
- Returns
91
- -------
92
- unknown
93
- The result of the step.
94
- """
95
- return self.result_class(
96
- self.context,
97
- self.action_path,
98
- group_of_dates,
99
- self,
100
- self.previous_step.select(group_of_dates),
101
- )
102
-
103
- def __repr__(self) -> str:
104
- """Return a string representation of the StepAction instance.
105
-
106
- Returns
107
- -------
108
- unknown
109
- String representation of the instance.
110
- """
111
- return self._repr(self.previous_step, _inline_=str(self.kwargs))
112
-
113
-
114
- def step_factory(config: dict[str, Any], context: ActionContext, action_path: list[str], previous_step: Any) -> Any:
115
- """Factory function to create a step action based on the given configuration.
116
-
117
- Parameters
118
- ----------
119
- config
120
- The configuration dictionary for the step.
121
- context
122
- The context in which the step is executed.
123
- action_path
124
- The path of actions leading to this step.
125
- previous_step
126
- The previous step in the pipeline.
127
-
128
- Returns
129
- -------
130
- unknown
131
- An instance of a step action.
132
- """
133
-
134
- from .filter import FilterStepAction
135
- from .filter import FunctionStepAction
136
-
137
- assert isinstance(context, Context), (type, context)
138
- if not isinstance(config, dict):
139
- raise ValueError(f"Invalid input config {config}")
140
-
141
- config = deepcopy(config)
142
- assert len(config) == 1, config
143
-
144
- key = list(config.keys())[0]
145
- cls = dict(
146
- filter=FilterStepAction,
147
- # rename=RenameAction,
148
- # remapping=RemappingAction,
149
- ).get(key)
150
-
151
- if isinstance(config[key], list):
152
- args, kwargs = config[key], {}
153
-
154
- if isinstance(config[key], dict):
155
- args, kwargs = [], config[key]
156
-
157
- if isinstance(config[key], str):
158
- args, kwargs = [config[key]], {}
159
-
160
- if cls is not None:
161
- return cls(context, action_path, previous_step, *args, **kwargs)
162
-
163
- # Try filters from transform filter registry
164
- from anemoi.transform.filters import filter_registry as transform_filter_registry
165
-
166
- if transform_filter_registry.is_registered(key):
167
- from ..filter import TransformFilter
168
-
169
- return FunctionStepAction(
170
- context, action_path + [key], previous_step, key, TransformFilter(context, key, config)
171
- )
172
-
173
- raise ValueError(f"Unknown step action `{key}`")
@@ -1,161 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
-
11
- import logging
12
- import re
13
- from abc import ABC
14
- from abc import abstractmethod
15
- from collections.abc import Callable
16
- from functools import wraps
17
- from typing import Any
18
-
19
- from .context import Context
20
-
21
- LOG = logging.getLogger(__name__)
22
-
23
-
24
- def notify_result(method: Callable[..., Any]) -> Callable[..., Any]:
25
- """Decorator to notify the context of the result of the method call.
26
-
27
- Parameters
28
- ----------
29
- method : Callable[..., Any]
30
- The method to wrap.
31
-
32
- Returns
33
- -------
34
- Callable[..., Any]
35
- The wrapped method.
36
- """
37
-
38
- @wraps(method)
39
- def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
40
- result: Any = method(self, *args, **kwargs)
41
- self.context.notify_result(self.action_path, result)
42
- return result
43
-
44
- return wrapper
45
-
46
-
47
- class Substitution(ABC):
48
- """Abstract base class for substitutions in templates."""
49
-
50
- @abstractmethod
51
- def resolve(self, context: Context) -> Any:
52
- """Resolve the substitution using the given context.
53
-
54
- Parameters
55
- ----------
56
- context : Context
57
- The context to use for resolution.
58
-
59
- Returns
60
- -------
61
- Any
62
- The resolved value.
63
- """
64
- pass
65
-
66
-
67
- class Reference(Substitution):
68
- """A class to represent a reference to another value in the context."""
69
-
70
- def __init__(self, context: Any, action_path: list[str]) -> None:
71
- """Initialize a Reference instance.
72
-
73
- Parameters
74
- ----------
75
- context : Any
76
- The context in which the reference exists.
77
- action_path : list of str
78
- The action path to resolve.
79
- """
80
- self.context: Any = context
81
- self.action_path: list[str] = action_path
82
-
83
- def resolve(self, context: Context) -> Any:
84
- """Resolve the reference using the given context.
85
-
86
- Parameters
87
- ----------
88
- context : Context
89
- The context to use for resolution.
90
-
91
- Returns
92
- -------
93
- Any
94
- The resolved value.
95
- """
96
- return context.get_result(self.action_path)
97
-
98
-
99
- def resolve(context: Context, x: Any) -> Any:
100
- """Recursively resolve substitutions in the given structure using the context.
101
-
102
- Parameters
103
- ----------
104
- context : Context
105
- The context to use for resolution.
106
- x : Union[tuple, list, dict, Substitution, Any]
107
- The structure to resolve.
108
-
109
- Returns
110
- -------
111
- Any
112
- The resolved structure.
113
- """
114
- if isinstance(x, tuple):
115
- return tuple([resolve(context, y) for y in x])
116
-
117
- if isinstance(x, list):
118
- return [resolve(context, y) for y in x]
119
-
120
- if isinstance(x, dict):
121
- return {k: resolve(context, v) for k, v in x.items()}
122
-
123
- if isinstance(x, Substitution):
124
- return x.resolve(context)
125
-
126
- return x
127
-
128
-
129
- def substitute(context: Context, x: Any) -> Any:
130
- """Recursively substitute references in the given structure using the context.
131
-
132
- Parameters
133
- ----------
134
- context : Context
135
- The context to use for substitution.
136
- x : Union[tuple, list, dict, str, Any]
137
- The structure to substitute.
138
-
139
- Returns
140
- -------
141
- Any
142
- The substituted structure.
143
- """
144
- if isinstance(x, tuple):
145
- return tuple([substitute(context, y) for y in x])
146
-
147
- if isinstance(x, list):
148
- return [substitute(context, y) for y in x]
149
-
150
- if isinstance(x, dict):
151
- return {k: substitute(context, v) for k, v in x.items()}
152
-
153
- if not isinstance(x, str):
154
- return x
155
-
156
- if re.match(r"^\${[\.\w\-]+}$", x):
157
- path = x[2:-1].split(".")
158
- context.will_need_reference(path)
159
- return Reference(context, path)
160
-
161
- return x