anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
  132. anemoi/datasets/create/functions/__init__.py +0 -66
  133. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  134. anemoi/datasets/create/functions/filters/empty.py +0 -17
  135. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  136. anemoi/datasets/create/functions/filters/rename.py +0 -79
  137. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  138. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  139. anemoi/datasets/create/functions/sources/empty.py +0 -15
  140. anemoi/datasets/create/functions/sources/grib.py +0 -150
  141. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  142. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  143. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  144. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  145. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  146. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  147. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  148. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  149. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  150. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  151. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  152. anemoi/datasets/utils/fields.py +0 -47
  153. anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
  154. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  155. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,9 @@ import os
16
16
  import pickle
17
17
  import shutil
18
18
  import socket
19
+ from typing import Any
20
+ from typing import Iterator
21
+ from typing import Tuple
19
22
 
20
23
  import numpy as np
21
24
  from anemoi.utils.provenance import gather_provenance_info
@@ -24,30 +27,62 @@ LOG = logging.getLogger(__name__)
24
27
 
25
28
 
26
29
  class PersistentDict:
30
+ """A dictionary-like object that persists its contents to disk using pickle files.
31
+
32
+ Attributes
33
+ ----------
34
+ version : int
35
+ The version of the PersistentDict.
36
+ dirname : str
37
+ The directory where the data is stored.
38
+ name : str
39
+ The name of the directory.
40
+ ext : str
41
+ The extension of the directory.
42
+ """
43
+
27
44
  version = 3
28
45
 
29
46
  # Used in parrallel, during data loading,
30
47
  # to write data in pickle files.
31
- def __init__(self, directory, create=True):
32
- """dirname: str The directory where the data will be stored."""
48
+ def __init__(self, directory: str, create: bool = True):
49
+ """Initialize the PersistentDict.
50
+
51
+ Parameters
52
+ ----------
53
+ directory : str
54
+ The directory where the data will be stored.
55
+ create : bool, optional
56
+ Whether to create the directory if it doesn't exist.
57
+ """
33
58
  self.dirname = directory
34
59
  self.name, self.ext = os.path.splitext(os.path.basename(self.dirname))
35
60
  if create:
36
61
  self.create()
37
62
 
38
- def create(self):
63
+ def create(self) -> None:
64
+ """Create the directory if it doesn't exist."""
39
65
  os.makedirs(self.dirname, exist_ok=True)
40
66
 
41
- def delete(self):
67
+ def delete(self) -> None:
68
+ """Delete the directory and its contents."""
42
69
  try:
43
70
  shutil.rmtree(self.dirname)
44
71
  except FileNotFoundError:
45
72
  pass
46
73
 
47
- def __str__(self):
74
+ def __str__(self) -> str:
75
+ """Return a string representation of the PersistentDict."""
48
76
  return f"{self.__class__.__name__}({self.dirname})"
49
77
 
50
- def items(self):
78
+ def items(self) -> Iterator[Any]:
79
+ """Yield items stored in the directory.
80
+
81
+ Yields
82
+ ------
83
+ Iterator[Any]
84
+ An iterator over the items.
85
+ """
51
86
  # use glob to read all pickles
52
87
  files = glob.glob(self.dirname + "/*.pickle")
53
88
  LOG.debug(f"Reading {self.name} data, found {len(files)} files in {self.dirname}")
@@ -56,7 +91,14 @@ class PersistentDict:
56
91
  with open(f, "rb") as f:
57
92
  yield pickle.load(f)
58
93
 
59
- def add_provenance(self, **kwargs):
94
+ def add_provenance(self, **kwargs: Any) -> None:
95
+ """Add provenance information to the directory.
96
+
97
+ Parameters
98
+ ----------
99
+ **kwargs : Any
100
+ Additional provenance information.
101
+ """
60
102
  path = os.path.join(self.dirname, "provenance.json")
61
103
  if os.path.exists(path):
62
104
  return
@@ -64,10 +106,28 @@ class PersistentDict:
64
106
  with open(path, "w") as f:
65
107
  json.dump(out, f)
66
108
 
67
- def add(self, elt, *, key):
109
+ def add(self, elt: Any, *, key: Any) -> None:
110
+ """Add an element to the PersistentDict.
111
+
112
+ Parameters
113
+ ----------
114
+ elt : Any
115
+ The element to add.
116
+ key : Any
117
+ The key associated with the element.
118
+ """
68
119
  self[key] = elt
69
120
 
70
- def __setitem__(self, key, elt):
121
+ def __setitem__(self, key: Any, elt: Any) -> None:
122
+ """Set an item in the PersistentDict.
123
+
124
+ Parameters
125
+ ----------
126
+ key : Any
127
+ The key associated with the element.
128
+ elt : Any
129
+ The element to set.
130
+ """
71
131
  h = hashlib.sha256(str(key).encode("utf-8")).hexdigest()
72
132
  path = os.path.join(self.dirname, f"{h}.pickle")
73
133
 
@@ -81,42 +141,99 @@ class PersistentDict:
81
141
 
82
142
  LOG.debug(f"Written {self.name} data for len {key} in {path}")
83
143
 
84
- def flush(self):
144
+ def flush(self) -> None:
145
+ """Flush the PersistentDict (no-op)."""
85
146
  pass
86
147
 
87
148
 
88
149
  class BufferedPersistentDict(PersistentDict):
89
- def __init__(self, buffer_size=1000, **kwargs):
150
+ """A buffered version of PersistentDict that stores elements in memory before persisting them to disk.
151
+
152
+ Attributes
153
+ ----------
154
+ buffer_size : int
155
+ The size of the buffer.
156
+ elements : list
157
+ The list of elements in the buffer.
158
+ keys : list
159
+ The list of keys in the buffer.
160
+ storage : PersistentDict
161
+ The underlying PersistentDict used for storage.
162
+ """
163
+
164
+ def __init__(self, buffer_size: int = 1000, **kwargs: Any):
165
+ """Initialize the BufferedPersistentDict.
166
+
167
+ Parameters
168
+ ----------
169
+ buffer_size : int, optional
170
+ The size of the buffer.
171
+ **kwargs : Any
172
+ Additional arguments for PersistentDict.
173
+ """
90
174
  self.buffer_size = buffer_size
91
175
  self.elements = []
92
176
  self.keys = []
93
177
  self.storage = PersistentDict(**kwargs)
94
178
 
95
- def add(self, elt, *, key):
179
+ def add(self, elt: Any, *, key: Any) -> None:
180
+ """Add an element to the BufferedPersistentDict.
181
+
182
+ Parameters
183
+ ----------
184
+ elt : Any
185
+ The element to add.
186
+ key : Any
187
+ The key associated with the element.
188
+ """
96
189
  self.elements.append(elt)
97
190
  self.keys.append(key)
98
191
  if len(self.keys) > self.buffer_size:
99
192
  self.flush()
100
193
 
101
- def flush(self):
194
+ def flush(self) -> None:
195
+ """Flush the buffer and store the elements in PersistentDict."""
102
196
  k = sorted(self.keys)
103
197
  self.storage.add(self.elements, key=k)
104
198
  self.elements = []
105
199
  self.keys = []
106
200
 
107
- def items(self):
201
+ def items(self) -> Iterator[Tuple[Any, Any]]:
202
+ """Yield items stored in the BufferedPersistentDict.
203
+
204
+ Yields
205
+ ------
206
+ Iterator[Tuple[Any, Any]]
207
+ An iterator over the items.
208
+ """
108
209
  for keys, elements in self.storage.items():
109
210
  for key, elt in zip(keys, elements):
110
211
  yield key, elt
111
212
 
112
- def delete(self):
213
+ def delete(self) -> None:
214
+ """Delete the storage directory and its contents."""
113
215
  self.storage.delete()
114
216
 
115
- def create(self):
217
+ def create(self) -> None:
218
+ """Create the storage directory if it doesn't exist."""
116
219
  self.storage.create()
117
220
 
118
221
 
119
- def build_storage(directory, create=True):
222
+ def build_storage(directory: str, create: bool = True) -> BufferedPersistentDict:
223
+ """Build a BufferedPersistentDict storage.
224
+
225
+ Parameters
226
+ ----------
227
+ directory : str
228
+ The directory where the data will be stored.
229
+ create : bool, optional
230
+ Whether to create the directory if it doesn't exist.
231
+
232
+ Returns
233
+ -------
234
+ BufferedPersistentDict
235
+ The created BufferedPersistentDict.
236
+ """
120
237
  return BufferedPersistentDict(directory=directory, create=create)
121
238
 
122
239
 
@@ -10,6 +10,8 @@
10
10
 
11
11
  import logging
12
12
  import os
13
+ from typing import Dict
14
+ from typing import Optional
13
15
 
14
16
  import tqdm
15
17
  from anemoi.utils.humanize import bytes_to_human
@@ -17,7 +19,19 @@ from anemoi.utils.humanize import bytes_to_human
17
19
  LOG = logging.getLogger(__name__)
18
20
 
19
21
 
20
- def compute_directory_sizes(path):
22
+ def compute_directory_sizes(path: str) -> Optional[Dict[str, int]]:
23
+ """Computes the total size and number of files in a directory.
24
+
25
+ Parameters
26
+ ----------
27
+ path : str
28
+ The path to the directory.
29
+
30
+ Returns
31
+ -------
32
+ dict of str to int or None
33
+ A dictionary with the total size and number of files, or None if the path is not a directory.
34
+ """
21
35
  if not os.path.isdir(path):
22
36
  return None
23
37
 
@@ -0,0 +1,51 @@
1
+ # (C) Copyright 2025- Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ from abc import ABC
11
+ from abc import abstractmethod
12
+
13
+ import earthkit.data as ekd
14
+
15
+ from anemoi.datasets.create.typing import DateList
16
+
17
+
18
+ class Source(ABC):
19
+ """Represents a data source with a given context."""
20
+
21
+ emoji = "📦" # For tracing
22
+
23
+ def __init__(self, context: any, *args: tuple, **kwargs: dict):
24
+ """Initialise the source.
25
+ Parameters
26
+ ----------
27
+ context : Any
28
+ The context for the data source.
29
+ *args : tuple
30
+ Additional positional arguments.
31
+ **kwargs : dict
32
+ Additional keyword arguments.
33
+ """
34
+ self.context = context
35
+
36
+ @abstractmethod
37
+ def execute(self, dates: DateList) -> ekd.FieldList:
38
+ """Execute the filter.
39
+
40
+ Parameters
41
+ ----------
42
+ dates : DateList
43
+ The input dates.
44
+
45
+ Returns
46
+ -------
47
+ ekd.FieldList
48
+ The output data.
49
+ """
50
+
51
+ pass
@@ -0,0 +1,36 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ from anemoi.utils.registry import Registry
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ source_registry = Registry(__name__)
19
+
20
+
21
+ def create_source(context: Any, config: Any) -> Any:
22
+ """Create a source based on the provided configuration.
23
+
24
+ Parameters
25
+ ----------
26
+ context : Any
27
+ The context in which the source is created.
28
+ config : Any
29
+ The configuration for the source.
30
+
31
+ Returns
32
+ -------
33
+ Any
34
+ The created source.
35
+ """
36
+ return source_registry.from_config(config, context)