anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. anemoi/datasets/create/functions/__init__.py +0 -66
  132. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  133. anemoi/datasets/create/functions/filters/empty.py +0 -17
  134. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  135. anemoi/datasets/create/functions/filters/rename.py +0 -79
  136. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  137. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  138. anemoi/datasets/create/functions/sources/empty.py +0 -15
  139. anemoi/datasets/create/functions/sources/grib.py +0 -150
  140. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  141. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  142. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  143. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  144. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  145. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  146. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  147. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  148. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  149. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  150. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  151. anemoi/datasets/utils/fields.py +0 -47
  152. anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
  153. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  154. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
  155. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ from typing import Any
12
+ from typing import List
13
+
14
+ import earthkit.data as ekd
15
+
16
+ from .legacy import legacy_source
17
+ from .xarray import load_many
18
+
19
+
20
+ @legacy_source(__file__)
21
+ def execute(context: Any, dates: List[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
22
+ """Execute the loading of multiple NetCDF files.
23
+
24
+ Parameters
25
+ ----------
26
+ context : object
27
+ The context in which the function is executed.
28
+ dates : list
29
+ List of dates for which data is to be loaded.
30
+ path : str
31
+ Path to the directory containing the NetCDF files.
32
+ *args : tuple
33
+ Additional positional arguments.
34
+ **kwargs : dict
35
+ Additional keyword arguments.
36
+
37
+ Returns
38
+ -------
39
+ object
40
+ The loaded data.
41
+ """
42
+ return load_many("📁", context, dates, path, *args, **kwargs)
@@ -0,0 +1,43 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ from typing import Any
12
+ from typing import Dict
13
+ from typing import List
14
+
15
+ import earthkit.data as ekd
16
+
17
+ from .legacy import legacy_source
18
+ from .xarray import load_many
19
+
20
+
21
+ @legacy_source(__file__)
22
+ def execute(context: Dict[str, Any], dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
23
+ """Execute the data loading process from an OpenDAP source.
24
+
25
+ Parameters
26
+ ----------
27
+ context : dict
28
+ The context in which the function is executed.
29
+ dates : list
30
+ List of dates for which data is to be loaded.
31
+ url : str
32
+ The URL of the OpenDAP source.
33
+ *args : tuple
34
+ Additional positional arguments.
35
+ **kwargs : dict
36
+ Additional keyword arguments.
37
+
38
+ Returns
39
+ -------
40
+ xarray.Dataset
41
+ The loaded dataset.
42
+ """
43
+ return load_many("🌐", context, dates, url, *args, **kwargs)
@@ -7,16 +7,29 @@
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
+ import datetime
10
11
  import glob
11
- import logging
12
+ from typing import Any
13
+ from typing import Generator
14
+ from typing import List
15
+ from typing import Tuple
12
16
 
13
17
  from earthkit.data.utils.patterns import Pattern
14
18
 
15
- LOG = logging.getLogger(__name__)
16
19
 
20
+ def _expand(paths: List[str]) -> Generator[str, None, None]:
21
+ """Expand the given paths to include all matching file paths.
17
22
 
18
- def _expand(paths):
23
+ Parameters
24
+ ----------
25
+ paths : List[str]
26
+ List of paths to expand.
19
27
 
28
+ Returns
29
+ -------
30
+ Generator[str]
31
+ Expanded file paths.
32
+ """
20
33
  if not isinstance(paths, list):
21
34
  paths = [paths]
22
35
 
@@ -40,7 +53,25 @@ def _expand(paths):
40
53
  yield path
41
54
 
42
55
 
43
- def iterate_patterns(path, dates, **kwargs):
56
+ def iterate_patterns(
57
+ path: str, dates: List[datetime.datetime], **kwargs: Any
58
+ ) -> Generator[Tuple[str, List[str]], None, None]:
59
+ """Iterate over patterns and expand them with given dates and additional keyword arguments.
60
+
61
+ Parameters
62
+ ----------
63
+ path : str
64
+ The pattern path to iterate over.
65
+ dates : List[datetime.datetime]
66
+ List of datetime objects to substitute in the pattern.
67
+ **kwargs : Any
68
+ Additional keyword arguments to substitute in the pattern.
69
+
70
+ Returns
71
+ -------
72
+ Generator[Tuple[str, List[str]]]
73
+ The expanded path and list of ISO formatted dates.
74
+ """
44
75
  given_paths = path if isinstance(path, list) else [path]
45
76
 
46
77
  dates = [d.isoformat() for d in dates]
@@ -0,0 +1,150 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ from copy import deepcopy
11
+ from typing import Any
12
+ from typing import Dict
13
+ from typing import List
14
+ from typing import Union
15
+
16
+ from anemoi.datasets.compute.recentre import recentre as _recentre
17
+
18
+ from .legacy import legacy_source
19
+ from .mars import mars
20
+
21
+
22
+ def to_list(x: Union[list, tuple, str]) -> List:
23
+ """Converts the input to a list. If the input is a string, it splits it by '/'.
24
+
25
+ Parameters
26
+ ----------
27
+ x : Union[list, tuple, str]
28
+ The input to convert.
29
+
30
+ Returns
31
+ -------
32
+ list
33
+ The converted list.
34
+ """
35
+ if isinstance(x, (list, tuple)):
36
+ return x
37
+ if isinstance(x, str):
38
+ return x.split("/")
39
+ return [x]
40
+
41
+
42
+ def normalise_number(number: Union[list, tuple, str]) -> List[int]:
43
+ """Normalises the input number to a list of integers.
44
+
45
+ Parameters
46
+ ----------
47
+ number : Union[list, tuple, str]
48
+ The number to normalise.
49
+
50
+ Returns
51
+ -------
52
+ list
53
+ The normalised list of integers.
54
+ """
55
+ number = to_list(number)
56
+
57
+ if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
58
+ return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
59
+
60
+ if len(number) > 2 and number[1] == "to":
61
+ return list(range(int(number[0]), int(number[2]) + 1))
62
+
63
+ return number
64
+
65
+
66
+ def normalise_request(request: Dict) -> Dict:
67
+ """Normalises the request dictionary by converting certain fields to lists.
68
+
69
+ Parameters
70
+ ----------
71
+ request : dict
72
+ The request dictionary to normalise.
73
+
74
+ Returns
75
+ -------
76
+ dict
77
+ The normalised request dictionary.
78
+ """
79
+ request = deepcopy(request)
80
+ if "number" in request:
81
+ request["number"] = normalise_number(request["number"])
82
+ if "time" in request:
83
+ request["time"] = to_list(request["time"])
84
+ request["param"] = to_list(request["param"])
85
+ return request
86
+
87
+
88
+ def load_if_needed(context: Any, dates: Any, dict_or_dataset: Union[Dict, Any]) -> Any:
89
+ """Loads the dataset if the input is a dictionary, otherwise returns the input.
90
+
91
+ Parameters
92
+ ----------
93
+ context : Any
94
+ The context for loading the dataset.
95
+ dates : Any
96
+ The dates for loading the dataset.
97
+ dict_or_dataset : Union[dict, Any]
98
+ The input dictionary or dataset.
99
+
100
+ Returns
101
+ -------
102
+ Any
103
+ The loaded dataset or the original input.
104
+ """
105
+ if isinstance(dict_or_dataset, dict):
106
+ dict_or_dataset = normalise_request(dict_or_dataset)
107
+ dict_or_dataset = mars(context, dates, dict_or_dataset)
108
+ return dict_or_dataset
109
+
110
+
111
+ @legacy_source(__file__)
112
+ def recentre(
113
+ context: Any,
114
+ dates: Any,
115
+ members: Union[Dict, Any],
116
+ centre: Union[Dict, Any],
117
+ alpha: float = 1.0,
118
+ remapping: Dict = {},
119
+ patches: Dict = {},
120
+ ) -> Any:
121
+ """Recentres the members dataset using the centre dataset.
122
+
123
+ Parameters
124
+ ----------
125
+ context : Any
126
+ The context for recentering.
127
+ dates : Any
128
+ The dates for recentering.
129
+ members : Union[dict, Any]
130
+ The members dataset or request dictionary.
131
+ centre : Union[dict, Any]
132
+ The centre dataset or request dictionary.
133
+ alpha : float, optional
134
+ The alpha value for recentering. Defaults to 1.0.
135
+ remapping : dict, optional
136
+ The remapping dictionary. Defaults to {}.
137
+ patches : dict, optional
138
+ The patches dictionary. Defaults to {}.
139
+
140
+ Returns
141
+ -------
142
+ Any
143
+ The recentred dataset.
144
+ """
145
+ members = load_if_needed(context, dates, members)
146
+ centre = load_if_needed(context, dates, centre)
147
+ return _recentre(members=members, centre=centre, alpha=alpha)
148
+
149
+
150
+ execute = recentre
@@ -7,14 +7,37 @@
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
+ from datetime import datetime
11
+ from typing import Any
12
+ from typing import Dict
13
+ from typing import List
14
+ from typing import Optional
15
+
10
16
  from earthkit.data import from_source
11
17
 
12
18
  from anemoi.datasets.create.utils import to_datetime_list
13
19
 
14
- DEBUG = True
20
+ from .legacy import legacy_source
21
+
15
22
 
23
+ @legacy_source(__file__)
24
+ def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any:
25
+ """Generates a source based on the provided context, dates, and additional keyword arguments.
16
26
 
17
- def source(context, dates, **kwargs):
27
+ Parameters
28
+ ----------
29
+ context : Optional[Any]
30
+ The context in which the source is generated.
31
+ dates : List[datetime]
32
+ A list of datetime objects representing the dates.
33
+ **kwargs : Any
34
+ Additional keyword arguments for the source generation.
35
+
36
+ Returns
37
+ -------
38
+ Any
39
+ The generated source.
40
+ """
18
41
  name = kwargs.pop("name")
19
42
  context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
20
43
  if kwargs["date"] == "$from_dates":
@@ -29,7 +52,7 @@ execute = source
29
52
  if __name__ == "__main__":
30
53
  import yaml
31
54
 
32
- config = yaml.safe_load(
55
+ config: Dict[str, Any] = yaml.safe_load(
33
56
  """
34
57
  name: mars
35
58
  class: ea
@@ -42,9 +65,8 @@ if __name__ == "__main__":
42
65
  time: $from_dates
43
66
  """
44
67
  )
45
- dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
68
+ dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
46
69
  dates = to_datetime_list(dates)
47
70
 
48
- DEBUG = True
49
71
  for f in source(None, dates, **config):
50
72
  print(f, f.to_numpy().mean())
@@ -9,21 +9,50 @@
9
9
 
10
10
  import datetime
11
11
  from collections import defaultdict
12
+ from typing import Any
13
+ from typing import Dict
14
+ from typing import List
15
+ from typing import Tuple
12
16
 
13
17
  from earthkit.data.core.temporary import temp_file
14
18
  from earthkit.data.readers.grib.output import new_grib_output
15
19
 
16
- from anemoi.datasets.create.functions import assert_is_fieldlist
17
20
  from anemoi.datasets.create.utils import to_datetime_list
18
21
 
22
+ from .legacy import legacy_source
19
23
 
20
- def _date_to_datetime(d):
24
+
25
+ def _date_to_datetime(d: Any) -> Any:
26
+ """Converts a date string or a list/tuple of date strings to datetime objects.
27
+
28
+ Parameters
29
+ ----------
30
+ d : Any
31
+ A date string or a list/tuple of date strings.
32
+
33
+ Returns
34
+ -------
35
+ Any
36
+ A datetime object or a list/tuple of datetime objects.
37
+ """
21
38
  if isinstance(d, (list, tuple)):
22
39
  return [_date_to_datetime(x) for x in d]
23
40
  return datetime.datetime.fromisoformat(d)
24
41
 
25
42
 
26
- def normalise_time_delta(t):
43
+ def normalise_time_delta(t: Any) -> datetime.timedelta:
44
+ """Normalizes a time delta string to a datetime.timedelta object.
45
+
46
+ Parameters
47
+ ----------
48
+ t : Any
49
+ A time delta string ending with 'h' or a datetime.timedelta object.
50
+
51
+ Returns
52
+ -------
53
+ datetime.timedelta
54
+ A normalized datetime.timedelta object.
55
+ """
27
56
  if isinstance(t, datetime.timedelta):
28
57
  assert t == datetime.timedelta(hours=t.hours), t
29
58
 
@@ -34,7 +63,19 @@ def normalise_time_delta(t):
34
63
  return t
35
64
 
36
65
 
37
- def group_by_field(ds):
66
+ def group_by_field(ds: Any) -> Dict[Tuple, List[Any]]:
67
+ """Groups fields by their metadata excluding 'date', 'time', and 'step'.
68
+
69
+ Parameters
70
+ ----------
71
+ ds : Any
72
+ A dataset object.
73
+
74
+ Returns
75
+ -------
76
+ Dict[Tuple, List[Any]]
77
+ A dictionary where keys are tuples of metadata items and values are lists of fields.
78
+ """
38
79
  d = defaultdict(list)
39
80
  for field in ds.order_by("valid_datetime"):
40
81
  m = field.metadata(namespace="mars")
@@ -45,7 +86,24 @@ def group_by_field(ds):
45
86
  return d
46
87
 
47
88
 
48
- def tendencies(dates, time_increment, **kwargs):
89
+ @legacy_source(__file__)
90
+ def tendencies(dates: List[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
91
+ """Computes tendencies for the given dates and time increment.
92
+
93
+ Parameters
94
+ ----------
95
+ dates : List[datetime.datetime]
96
+ A list of datetime objects.
97
+ time_increment : Any
98
+ A time increment string ending with 'h' or a datetime.timedelta object.
99
+ **kwargs : Any
100
+ Additional keyword arguments.
101
+
102
+ Returns
103
+ -------
104
+ Any
105
+ A dataset object with computed tendencies.
106
+ """
49
107
  print("✅", kwargs)
50
108
  time_increment = normalise_time_delta(time_increment)
51
109
 
@@ -53,7 +111,7 @@ def tendencies(dates, time_increment, **kwargs):
53
111
  all_dates = sorted(list(set(dates + shifted_dates)))
54
112
 
55
113
  # from .mars import execute as mars
56
- from anemoi.datasets.create.functions.mars import execute as mars
114
+ from anemoi.datasets.create.mars import execute as mars
57
115
 
58
116
  ds = mars(dates=all_dates, **kwargs)
59
117
 
@@ -107,7 +165,6 @@ def tendencies(dates, time_increment, **kwargs):
107
165
  from earthkit.data import from_source
108
166
 
109
167
  ds = from_source("file", path)
110
- assert_is_fieldlist(ds)
111
168
  # save a reference to the tmp file so it is deleted
112
169
  # only when the dataset is not used anymore
113
170
  ds._tmp = tmp
@@ -0,0 +1,92 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ from typing import Any
11
+ from typing import Dict
12
+ from typing import Optional
13
+
14
+ import earthkit.data as ekd
15
+
16
+ from anemoi.datasets.create.typing import DateList
17
+
18
+ from ..source import Source
19
+ from .xarray_support import XarrayFieldList
20
+ from .xarray_support import load_many
21
+ from .xarray_support import load_one
22
+
23
+ __all__ = ["load_many", "load_one", "XarrayFieldList"]
24
+
25
+
26
+ class XarraySourceBase(Source):
27
+ """An Xarray base data source, intended to be subclassed."""
28
+
29
+ emoji = "✖️" # For tracing
30
+
31
+ options: Optional[Dict[str, Any]] = None
32
+ flavour: Optional[Dict[str, Any]] = None
33
+ patch: Optional[Dict[str, Any]] = None
34
+
35
+ path_or_url: Optional[str] = None
36
+
37
+ def __init__(self, context: Any, path: str = None, url: str = None, *args: Any, **kwargs: Any):
38
+ """Initialise the source.
39
+
40
+ Parameters
41
+ ----------
42
+ context : Any
43
+ The context for the data source.
44
+ *args : Any
45
+ Additional positional arguments.
46
+ **kwargs : Any
47
+ Additional keyword arguments.
48
+ """
49
+ super().__init__(context, *args, **kwargs)
50
+
51
+ if path is not None and url is not None:
52
+ raise ValueError("Cannot specify both path and url")
53
+
54
+ if path is not None:
55
+ self.path_or_url = path
56
+ else:
57
+ self.path_or_url = url
58
+
59
+ self.args = args
60
+ self.kwargs = kwargs
61
+
62
+ def execute(self, dates: DateList) -> ekd.FieldList:
63
+ """Execute the data loading process for the given dates.
64
+
65
+ Parameters
66
+ ----------
67
+ dates : DateList
68
+ List of dates for which data needs to be loaded.
69
+
70
+ Returns
71
+ -------
72
+ ekd.FieldList
73
+ The loaded data fields.
74
+ """
75
+
76
+ # For now, just a simple wrapper around load_many
77
+ # TODO: move the implementation here
78
+
79
+ return load_many(
80
+ self.emoji,
81
+ self.context,
82
+ dates,
83
+ pattern=self.path_or_url,
84
+ options=self.options,
85
+ flavour=self.flavour,
86
+ patch=self.patch,
87
+ **self.kwargs,
88
+ )
89
+
90
+
91
+ class XarraySource(XarraySourceBase):
92
+ pass
@@ -0,0 +1,36 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ from . import source_registry
12
+ from .xarray import XarraySourceBase
13
+
14
+
15
+ @source_registry.register("xarray_kerchunk")
16
+ class XarrayKerchunkSource(XarraySourceBase):
17
+ """An Xarray data source that uses the `kerchunk` engine."""
18
+
19
+ emoji = "🧱"
20
+
21
+ def __init__(self, context, json, *args, **kwargs: dict):
22
+ super().__init__(context, *args, **kwargs)
23
+
24
+ self.path_or_url = "reference://"
25
+
26
+ self.options = {
27
+ "engine": "zarr",
28
+ "backend_kwargs": {
29
+ "consolidated": False,
30
+ "storage_options": {
31
+ "fo": json,
32
+ "remote_protocol": "s3",
33
+ "remote_options": {"anon": True},
34
+ },
35
+ },
36
+ }
@@ -0,0 +1 @@
1
+ The code under this directory will be migrated to earthkit-data in the future