anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/grib-index.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +21 -24
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +6 -24
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/planetary_computer.py +44 -0
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
- anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
- anemoi/datasets/create/sources/xarray_support/field.py +17 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +2 -74
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +52 -23
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +30 -28
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +23 -77
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +11 -12
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/create/filters/__init__.py +0 -33
- anemoi/datasets/create/filters/empty.py +0 -37
- anemoi/datasets/create/filters/legacy.py +0 -93
- anemoi/datasets/create/filters/noop.py +0 -37
- anemoi/datasets/create/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
- anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
- anemoi/datasets/create/filters/rename.py +0 -205
- anemoi/datasets/create/filters/rotate_winds.py +0 -105
- anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
- anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
- anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
- anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
- anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
- anemoi/datasets/create/filters/sum.py +0 -68
- anemoi/datasets/create/filters/transform.py +0 -51
- anemoi/datasets/create/filters/unrotate_winds.py +0 -105
- anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
- anemoi/datasets/create/filters/wz_to_w.py +0 -98
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
anemoi/datasets/__init__.py
CHANGED
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
-
from typing import List
|
|
11
10
|
|
|
12
11
|
from .data import MissingDateError
|
|
13
12
|
from .data import add_dataset_path
|
|
@@ -23,7 +22,7 @@ except ImportError: # pragma: no cover
|
|
|
23
22
|
# Local copy or not installed with setuptools
|
|
24
23
|
__version__ = "999"
|
|
25
24
|
|
|
26
|
-
__all__:
|
|
25
|
+
__all__: list[str] = [
|
|
27
26
|
"add_dataset_path",
|
|
28
27
|
"add_named_dataset",
|
|
29
28
|
"list_dataset_names",
|
anemoi/datasets/_version.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '0.5.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.27'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 27)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -77,7 +77,7 @@ class Check(Command):
|
|
|
77
77
|
|
|
78
78
|
recipe_filename = os.path.basename(recipe)
|
|
79
79
|
recipe_name = os.path.splitext(recipe_filename)[0]
|
|
80
|
-
in_recipe_name = yaml.safe_load(open(recipe,
|
|
80
|
+
in_recipe_name = yaml.safe_load(open(recipe, encoding="utf-8"))["name"]
|
|
81
81
|
if recipe_name != in_recipe_name:
|
|
82
82
|
print(f"Recipe name {recipe_name} does not match the name in the recipe file {in_recipe_name}")
|
|
83
83
|
|
anemoi/datasets/commands/copy.py
CHANGED
|
@@ -14,7 +14,6 @@ import sys
|
|
|
14
14
|
from concurrent.futures import ThreadPoolExecutor
|
|
15
15
|
from concurrent.futures import as_completed
|
|
16
16
|
from typing import Any
|
|
17
|
-
from typing import Optional
|
|
18
17
|
|
|
19
18
|
import tqdm
|
|
20
19
|
from anemoi.utils.remote import Transfer
|
|
@@ -136,7 +135,7 @@ class ZarrCopier:
|
|
|
136
135
|
return zarr.storage.NestedDirectoryStore(path)
|
|
137
136
|
return path
|
|
138
137
|
|
|
139
|
-
def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) ->
|
|
138
|
+
def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> slice | None:
|
|
140
139
|
"""Copy a chunk of data from source to target.
|
|
141
140
|
|
|
142
141
|
Parameters
|
|
@@ -81,7 +81,7 @@ class GribIndexCmd(Command):
|
|
|
81
81
|
bool
|
|
82
82
|
True if the path matches, False otherwise.
|
|
83
83
|
"""
|
|
84
|
-
return fnmatch.fnmatch(path, args.match)
|
|
84
|
+
return fnmatch.fnmatch(os.path.basename(path), args.match)
|
|
85
85
|
|
|
86
86
|
from anemoi.datasets.create.sources.grib_index import GribIndex
|
|
87
87
|
|
|
@@ -14,10 +14,6 @@ import os
|
|
|
14
14
|
from copy import deepcopy
|
|
15
15
|
from functools import cached_property
|
|
16
16
|
from typing import Any
|
|
17
|
-
from typing import Dict
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Union
|
|
21
17
|
|
|
22
18
|
import numpy as np
|
|
23
19
|
import semantic_version
|
|
@@ -39,7 +35,7 @@ from . import Command
|
|
|
39
35
|
LOG = logging.getLogger(__name__)
|
|
40
36
|
|
|
41
37
|
|
|
42
|
-
def compute_directory_size(path: str) ->
|
|
38
|
+
def compute_directory_size(path: str) -> tuple[int, int] | tuple[None, None]:
|
|
43
39
|
"""Compute the total size and number of files in a directory.
|
|
44
40
|
|
|
45
41
|
Parameters
|
|
@@ -104,7 +100,7 @@ def cos_local_time_bug(lon: float, date: datetime.datetime) -> float:
|
|
|
104
100
|
return np.cos(radians)
|
|
105
101
|
|
|
106
102
|
|
|
107
|
-
def find(config:
|
|
103
|
+
def find(config: dict | list, name: str) -> Any:
|
|
108
104
|
"""Recursively search for a key in a nested dictionary or list.
|
|
109
105
|
|
|
110
106
|
Parameters
|
|
@@ -167,7 +163,7 @@ class Version:
|
|
|
167
163
|
print(f"🔢 Format version: {self.version}")
|
|
168
164
|
|
|
169
165
|
@property
|
|
170
|
-
def name_to_index(self) ->
|
|
166
|
+
def name_to_index(self) -> dict[str, int]:
|
|
171
167
|
"""Get a mapping of variable names to their indices."""
|
|
172
168
|
return find(self.metadata, "name_to_index")
|
|
173
169
|
|
|
@@ -208,30 +204,30 @@ class Version:
|
|
|
208
204
|
return self.metadata["resolution"]
|
|
209
205
|
|
|
210
206
|
@property
|
|
211
|
-
def field_shape(self) ->
|
|
207
|
+
def field_shape(self) -> tuple | None:
|
|
212
208
|
"""Get the field shape of the dataset."""
|
|
213
209
|
return self.metadata.get("field_shape")
|
|
214
210
|
|
|
215
211
|
@property
|
|
216
|
-
def proj_string(self) ->
|
|
212
|
+
def proj_string(self) -> str | None:
|
|
217
213
|
"""Get the projection string of the dataset."""
|
|
218
214
|
return self.metadata.get("proj_string")
|
|
219
215
|
|
|
220
216
|
@property
|
|
221
|
-
def shape(self) ->
|
|
217
|
+
def shape(self) -> tuple | None:
|
|
222
218
|
"""Get the shape of the dataset."""
|
|
223
219
|
if self.data and hasattr(self.data, "shape"):
|
|
224
220
|
return self.data.shape
|
|
225
221
|
|
|
226
222
|
@property
|
|
227
|
-
def n_missing_dates(self) ->
|
|
223
|
+
def n_missing_dates(self) -> int | None:
|
|
228
224
|
"""Get the number of missing dates in the dataset."""
|
|
229
225
|
if "missing_dates" in self.metadata:
|
|
230
226
|
return len(self.metadata["missing_dates"])
|
|
231
227
|
return None
|
|
232
228
|
|
|
233
229
|
@property
|
|
234
|
-
def uncompressed_data_size(self) ->
|
|
230
|
+
def uncompressed_data_size(self) -> int | None:
|
|
235
231
|
"""Get the uncompressed data size of the dataset."""
|
|
236
232
|
if self.data and hasattr(self.data, "dtype") and hasattr(self.data, "size"):
|
|
237
233
|
return self.data.dtype.itemsize * self.data.size
|
|
@@ -258,7 +254,7 @@ class Version:
|
|
|
258
254
|
print()
|
|
259
255
|
shape_str = "📐 Shape : "
|
|
260
256
|
if self.shape:
|
|
261
|
-
shape_str += " × ".join(["{:,}"
|
|
257
|
+
shape_str += " × ".join([f"{s:,}" for s in self.shape])
|
|
262
258
|
if self.uncompressed_data_size:
|
|
263
259
|
shape_str += f" ({bytes(self.uncompressed_data_size)})"
|
|
264
260
|
print(shape_str)
|
|
@@ -293,17 +289,17 @@ class Version:
|
|
|
293
289
|
print()
|
|
294
290
|
|
|
295
291
|
@property
|
|
296
|
-
def variables(self) ->
|
|
292
|
+
def variables(self) -> list[str]:
|
|
297
293
|
"""Get the list of variables in the dataset."""
|
|
298
294
|
return [v[0] for v in sorted(self.name_to_index.items(), key=lambda x: x[1])]
|
|
299
295
|
|
|
300
296
|
@property
|
|
301
|
-
def total_size(self) ->
|
|
297
|
+
def total_size(self) -> int | None:
|
|
302
298
|
"""Get the total size of the dataset."""
|
|
303
299
|
return self.zarr.attrs.get("total_size")
|
|
304
300
|
|
|
305
301
|
@property
|
|
306
|
-
def total_number_of_files(self) ->
|
|
302
|
+
def total_number_of_files(self) -> int | None:
|
|
307
303
|
"""Get the total number of files in the dataset."""
|
|
308
304
|
return self.zarr.attrs.get("total_number_of_files")
|
|
309
305
|
|
|
@@ -348,7 +344,7 @@ class Version:
|
|
|
348
344
|
return False
|
|
349
345
|
|
|
350
346
|
@property
|
|
351
|
-
def statistics_started(self) ->
|
|
347
|
+
def statistics_started(self) -> datetime.datetime | None:
|
|
352
348
|
"""Get the timestamp when statistics computation started."""
|
|
353
349
|
for d in reversed(self.metadata.get("history", [])):
|
|
354
350
|
if d["action"] == "compute_statistics_start":
|
|
@@ -356,12 +352,12 @@ class Version:
|
|
|
356
352
|
return None
|
|
357
353
|
|
|
358
354
|
@property
|
|
359
|
-
def build_flags(self) ->
|
|
355
|
+
def build_flags(self) -> NDArray[Any] | None:
|
|
360
356
|
"""Get the build flags of the dataset."""
|
|
361
357
|
return self.zarr.get("_build_flags")
|
|
362
358
|
|
|
363
359
|
@cached_property
|
|
364
|
-
def copy_flags(self) ->
|
|
360
|
+
def copy_flags(self) -> NDArray[Any] | None:
|
|
365
361
|
"""Get the copy flags of the dataset."""
|
|
366
362
|
if "_copy" not in self.zarr:
|
|
367
363
|
return None
|
|
@@ -381,7 +377,7 @@ class Version:
|
|
|
381
377
|
return not all(self.copy_flags)
|
|
382
378
|
|
|
383
379
|
@property
|
|
384
|
-
def build_lengths(self) ->
|
|
380
|
+
def build_lengths(self) -> NDArray | None:
|
|
385
381
|
"""Get the build lengths of the dataset."""
|
|
386
382
|
return self.zarr.get("_build_lengths")
|
|
387
383
|
|
|
@@ -396,17 +392,13 @@ class Version:
|
|
|
396
392
|
print(
|
|
397
393
|
"📈 Progress:",
|
|
398
394
|
progress(built, total, width=50),
|
|
399
|
-
"{
|
|
395
|
+
f"{built / total * 100:.0f}%",
|
|
400
396
|
)
|
|
401
397
|
return
|
|
402
398
|
|
|
403
|
-
|
|
404
|
-
print("🪫 Dataset not initialised")
|
|
405
|
-
return
|
|
406
|
-
|
|
407
|
-
build_flags = self.build_flags
|
|
399
|
+
build_flags = self.build_flags or np.array([], dtype=bool)
|
|
408
400
|
|
|
409
|
-
build_lengths = self.build_lengths
|
|
401
|
+
build_lengths = self.build_lengths or np.array([], dtype=bool)
|
|
410
402
|
assert build_flags.size == build_lengths.size
|
|
411
403
|
|
|
412
404
|
latest_write_timestamp = self.zarr.attrs.get("latest_write_timestamp")
|
|
@@ -422,7 +414,7 @@ class Version:
|
|
|
422
414
|
print(
|
|
423
415
|
"📈 Progress:",
|
|
424
416
|
progress(built, total, width=50),
|
|
425
|
-
"{
|
|
417
|
+
f"{built / total * 100:.0f}%",
|
|
426
418
|
)
|
|
427
419
|
start = self.initialised
|
|
428
420
|
if self.initialised:
|
|
@@ -623,7 +615,7 @@ class Version0_6(Version):
|
|
|
623
615
|
"""Represents version 0.6 of a dataset."""
|
|
624
616
|
|
|
625
617
|
@property
|
|
626
|
-
def initialised(self) ->
|
|
618
|
+
def initialised(self) -> datetime.datetime | None:
|
|
627
619
|
"""Get the initialization timestamp of the dataset."""
|
|
628
620
|
for record in self.metadata.get("history", []):
|
|
629
621
|
if record["action"] == "initialised":
|
|
@@ -659,12 +651,12 @@ class Version0_6(Version):
|
|
|
659
651
|
return all(build_flags)
|
|
660
652
|
|
|
661
653
|
@property
|
|
662
|
-
def name_to_index(self) ->
|
|
654
|
+
def name_to_index(self) -> dict[str, int]:
|
|
663
655
|
"""Get a mapping of variable names to their indices."""
|
|
664
656
|
return {n: i for i, n in enumerate(self.metadata["variables"])}
|
|
665
657
|
|
|
666
658
|
@property
|
|
667
|
-
def variables(self) ->
|
|
659
|
+
def variables(self) -> list[str]:
|
|
668
660
|
"""Get the list of variables in the dataset."""
|
|
669
661
|
return self.metadata["variables"]
|
|
670
662
|
|
|
@@ -706,7 +698,7 @@ class Version0_13(Version0_12):
|
|
|
706
698
|
"""Represents version 0.13 of a dataset."""
|
|
707
699
|
|
|
708
700
|
@property
|
|
709
|
-
def build_flags(self) ->
|
|
701
|
+
def build_flags(self) -> NDArray | None:
|
|
710
702
|
"""Get the build flags for the dataset."""
|
|
711
703
|
if "_build" not in self.zarr:
|
|
712
704
|
return None
|
|
@@ -714,7 +706,7 @@ class Version0_13(Version0_12):
|
|
|
714
706
|
return build.get("flags")
|
|
715
707
|
|
|
716
708
|
@property
|
|
717
|
-
def build_lengths(self) ->
|
|
709
|
+
def build_lengths(self) -> NDArray | None:
|
|
718
710
|
"""Get the build lengths for the dataset."""
|
|
719
711
|
if "_build" not in self.zarr:
|
|
720
712
|
return None
|
|
@@ -792,10 +784,10 @@ class InspectZarr(Command):
|
|
|
792
784
|
|
|
793
785
|
try:
|
|
794
786
|
if progress:
|
|
795
|
-
|
|
787
|
+
version.progress()
|
|
796
788
|
|
|
797
789
|
if statistics:
|
|
798
|
-
|
|
790
|
+
version.brute_force_statistics()
|
|
799
791
|
|
|
800
792
|
version.info(detailed, size)
|
|
801
793
|
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
import importlib
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from anemoi.datasets.validate import validate_dataset
|
|
14
|
+
|
|
15
|
+
from . import Command
|
|
16
|
+
|
|
17
|
+
LOG = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
DEFAULT_DATASET = "aifs-ea-an-oper-0001-mars-o96-1979-2023-6h-v8"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Validate(Command):
|
|
23
|
+
"""Command to validate an anemoi dataset."""
|
|
24
|
+
|
|
25
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
26
|
+
"""Add arguments to the command parser.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
command_parser : Any
|
|
31
|
+
The command parser.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
command_parser.add_argument("--callable", metavar="DATASET", default="anemoi.datasets.open_dataset")
|
|
35
|
+
command_parser.add_argument("--costly-checks", action="store_true", help="Run costly checks")
|
|
36
|
+
command_parser.add_argument("--detailed", action="store_true", help="Give detailed report")
|
|
37
|
+
command_parser.add_argument("path", metavar="DATASET")
|
|
38
|
+
|
|
39
|
+
def run(self, args: Any) -> None:
|
|
40
|
+
"""Run the command.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
args : Any
|
|
45
|
+
The command arguments.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
module_path, func_name = args.callable.rsplit(".", 1)
|
|
49
|
+
module = importlib.import_module(module_path)
|
|
50
|
+
callable_func = getattr(module, func_name)
|
|
51
|
+
|
|
52
|
+
if args.path == "default":
|
|
53
|
+
args.path = DEFAULT_DATASET
|
|
54
|
+
|
|
55
|
+
dataset = callable_func(args.path)
|
|
56
|
+
validate_dataset(dataset, costly_checks=args.costly_checks, detailed=args.detailed)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
command = Validate
|
|
@@ -10,9 +10,6 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Optional
|
|
15
|
-
from typing import Tuple
|
|
16
13
|
|
|
17
14
|
import numpy as np
|
|
18
15
|
from earthkit.data.core.temporary import temp_file
|
|
@@ -36,7 +33,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset"
|
|
|
36
33
|
|
|
37
34
|
|
|
38
35
|
def check_compatible(
|
|
39
|
-
f1: Any, f2: Any, centre_field_as_mars:
|
|
36
|
+
f1: Any, f2: Any, centre_field_as_mars: dict[str, Any], ensemble_field_as_mars: dict[str, Any]
|
|
40
37
|
) -> None:
|
|
41
38
|
"""Check if two fields are compatible.
|
|
42
39
|
|
|
@@ -75,9 +72,9 @@ def recentre(
|
|
|
75
72
|
*,
|
|
76
73
|
members: Any,
|
|
77
74
|
centre: Any,
|
|
78
|
-
clip_variables:
|
|
75
|
+
clip_variables: tuple[str, ...] = CLIP_VARIABLES,
|
|
79
76
|
alpha: float = 1.0,
|
|
80
|
-
output:
|
|
77
|
+
output: str | None = None,
|
|
81
78
|
) -> Any:
|
|
82
79
|
"""Recentre ensemble members around the centre field.
|
|
83
80
|
|
|
@@ -16,8 +16,6 @@ import uuid
|
|
|
16
16
|
import warnings
|
|
17
17
|
from functools import cached_property
|
|
18
18
|
from typing import Any
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Union
|
|
21
19
|
|
|
22
20
|
import cftime
|
|
23
21
|
import numpy as np
|
|
@@ -102,8 +100,8 @@ def json_tidy(o: Any) -> Any:
|
|
|
102
100
|
|
|
103
101
|
def build_statistics_dates(
|
|
104
102
|
dates: list[datetime.datetime],
|
|
105
|
-
start:
|
|
106
|
-
end:
|
|
103
|
+
start: datetime.datetime | None,
|
|
104
|
+
end: datetime.datetime | None,
|
|
107
105
|
) -> tuple[str, str]:
|
|
108
106
|
"""Compute the start and end dates for the statistics.
|
|
109
107
|
|
|
@@ -359,7 +357,7 @@ class Actor: # TODO: rename to Creator
|
|
|
359
357
|
|
|
360
358
|
dataset_class = WritableDataset
|
|
361
359
|
|
|
362
|
-
def __init__(self, path: str, cache:
|
|
360
|
+
def __init__(self, path: str, cache: str | None = None):
|
|
363
361
|
"""Initialize an Actor instance.
|
|
364
362
|
|
|
365
363
|
Parameters
|
|
@@ -577,10 +575,10 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
577
575
|
check_name: bool = False,
|
|
578
576
|
overwrite: bool = False,
|
|
579
577
|
use_threads: bool = False,
|
|
580
|
-
statistics_temp_dir:
|
|
578
|
+
statistics_temp_dir: str | None = None,
|
|
581
579
|
progress: Any = None,
|
|
582
580
|
test: bool = False,
|
|
583
|
-
cache:
|
|
581
|
+
cache: str | None = None,
|
|
584
582
|
**kwargs: Any,
|
|
585
583
|
):
|
|
586
584
|
"""Initialize an Init instance.
|
|
@@ -809,11 +807,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
809
807
|
def __init__(
|
|
810
808
|
self,
|
|
811
809
|
path: str,
|
|
812
|
-
parts:
|
|
810
|
+
parts: str | None = None,
|
|
813
811
|
use_threads: bool = False,
|
|
814
|
-
statistics_temp_dir:
|
|
812
|
+
statistics_temp_dir: str | None = None,
|
|
815
813
|
progress: Any = None,
|
|
816
|
-
cache:
|
|
814
|
+
cache: str | None = None,
|
|
817
815
|
**kwargs: Any,
|
|
818
816
|
):
|
|
819
817
|
"""Initialize a Load instance.
|
|
@@ -907,8 +905,8 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
907
905
|
print("Requested dates", compress_dates(dates))
|
|
908
906
|
print("Cube dates", compress_dates(dates_in_data))
|
|
909
907
|
|
|
910
|
-
a =
|
|
911
|
-
b =
|
|
908
|
+
a = {as_datetime(_) for _ in dates}
|
|
909
|
+
b = {as_datetime(_) for _ in dates_in_data}
|
|
912
910
|
|
|
913
911
|
print("Missing dates", compress_dates(a - b))
|
|
914
912
|
print("Extra dates", compress_dates(b - a))
|
|
@@ -958,7 +956,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
958
956
|
array.flush()
|
|
959
957
|
LOG.info("Flushed data array")
|
|
960
958
|
|
|
961
|
-
def _get_allow_nans(self) ->
|
|
959
|
+
def _get_allow_nans(self) -> bool | list:
|
|
962
960
|
"""Get the allow_nans configuration.
|
|
963
961
|
|
|
964
962
|
Returns
|
|
@@ -991,7 +989,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
991
989
|
total = cube.count(reading_chunks)
|
|
992
990
|
LOG.debug(f"Loading datacube: {cube}")
|
|
993
991
|
|
|
994
|
-
def position(x: Any) ->
|
|
992
|
+
def position(x: Any) -> int | None:
|
|
995
993
|
if isinstance(x, str) and "/" in x:
|
|
996
994
|
x = x.split("/")
|
|
997
995
|
return int(x[0])
|
|
@@ -1038,7 +1036,7 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
|
|
|
1038
1036
|
def __init__(
|
|
1039
1037
|
self,
|
|
1040
1038
|
path: str,
|
|
1041
|
-
statistics_temp_dir:
|
|
1039
|
+
statistics_temp_dir: str | None = None,
|
|
1042
1040
|
delta: list = [],
|
|
1043
1041
|
use_threads: bool = False,
|
|
1044
1042
|
**kwargs: Any,
|
|
@@ -1217,19 +1215,19 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
1217
1215
|
LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
|
|
1218
1216
|
|
|
1219
1217
|
|
|
1220
|
-
class
|
|
1218
|
+
class _LoadAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
1221
1219
|
"""A class to run dataset additions."""
|
|
1222
1220
|
|
|
1223
1221
|
def __init__(
|
|
1224
1222
|
self,
|
|
1225
1223
|
path: str,
|
|
1226
1224
|
delta: str,
|
|
1227
|
-
parts:
|
|
1225
|
+
parts: str | None = None,
|
|
1228
1226
|
use_threads: bool = False,
|
|
1229
1227
|
progress: Any = None,
|
|
1230
1228
|
**kwargs: Any,
|
|
1231
1229
|
):
|
|
1232
|
-
"""Initialize a
|
|
1230
|
+
"""Initialize a _LoadAdditions instance.
|
|
1233
1231
|
|
|
1234
1232
|
Parameters
|
|
1235
1233
|
----------
|
|
@@ -1469,7 +1467,7 @@ def multi_addition(cls: type) -> type:
|
|
|
1469
1467
|
|
|
1470
1468
|
|
|
1471
1469
|
InitAdditions = multi_addition(_InitAdditions)
|
|
1472
|
-
|
|
1470
|
+
LoadAdditions = multi_addition(_LoadAdditions)
|
|
1473
1471
|
FinaliseAdditions = multi_addition(_FinaliseAdditions)
|
|
1474
1472
|
|
|
1475
1473
|
|
|
@@ -1480,7 +1478,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
1480
1478
|
self,
|
|
1481
1479
|
path: str,
|
|
1482
1480
|
use_threads: bool = False,
|
|
1483
|
-
statistics_temp_dir:
|
|
1481
|
+
statistics_temp_dir: str | None = None,
|
|
1484
1482
|
progress: Any = None,
|
|
1485
1483
|
**kwargs: Any,
|
|
1486
1484
|
):
|
|
@@ -1539,7 +1537,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
|
|
|
1539
1537
|
LOG.info(f"Wrote statistics in {self.path}")
|
|
1540
1538
|
|
|
1541
1539
|
@cached_property
|
|
1542
|
-
def allow_nans(self) ->
|
|
1540
|
+
def allow_nans(self) -> bool | list:
|
|
1543
1541
|
"""Check if NaNs are allowed."""
|
|
1544
1542
|
import zarr
|
|
1545
1543
|
|
|
@@ -1581,7 +1579,7 @@ def chain(tasks: list) -> type:
|
|
|
1581
1579
|
return Chain
|
|
1582
1580
|
|
|
1583
1581
|
|
|
1584
|
-
def creator_factory(name: str, trace:
|
|
1582
|
+
def creator_factory(name: str, trace: str | None = None, **kwargs: Any) -> Any:
|
|
1585
1583
|
"""Create a dataset creator.
|
|
1586
1584
|
|
|
1587
1585
|
Parameters
|
|
@@ -1612,10 +1610,9 @@ def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> An
|
|
|
1612
1610
|
cleanup=Cleanup,
|
|
1613
1611
|
verify=Verify,
|
|
1614
1612
|
init_additions=InitAdditions,
|
|
1615
|
-
load_additions=
|
|
1616
|
-
run_additions=RunAdditions,
|
|
1613
|
+
load_additions=LoadAdditions,
|
|
1617
1614
|
finalise_additions=chain([FinaliseAdditions, Size]),
|
|
1618
|
-
additions=chain([InitAdditions,
|
|
1615
|
+
additions=chain([InitAdditions, LoadAdditions, FinaliseAdditions, Size, Cleanup]),
|
|
1619
1616
|
)[name]
|
|
1620
1617
|
LOG.debug(f"Creating {cls.__name__} with {kwargs}")
|
|
1621
1618
|
return cls(**kwargs)
|
anemoi/datasets/create/check.py
CHANGED
|
@@ -12,10 +12,8 @@ import datetime
|
|
|
12
12
|
import logging
|
|
13
13
|
import re
|
|
14
14
|
import warnings
|
|
15
|
+
from collections.abc import Callable
|
|
15
16
|
from typing import Any
|
|
16
|
-
from typing import Callable
|
|
17
|
-
from typing import Optional
|
|
18
|
-
from typing import Union
|
|
19
17
|
|
|
20
18
|
import numpy as np
|
|
21
19
|
from anemoi.utils.config import load_config
|
|
@@ -31,10 +29,10 @@ class DatasetName:
|
|
|
31
29
|
def __init__(
|
|
32
30
|
self,
|
|
33
31
|
name: str,
|
|
34
|
-
resolution:
|
|
35
|
-
start_date:
|
|
36
|
-
end_date:
|
|
37
|
-
frequency:
|
|
32
|
+
resolution: str | None = None,
|
|
33
|
+
start_date: datetime.date | None = None,
|
|
34
|
+
end_date: datetime.date | None = None,
|
|
35
|
+
frequency: datetime.timedelta | None = None,
|
|
38
36
|
):
|
|
39
37
|
"""Initialize a DatasetName instance.
|
|
40
38
|
|
|
@@ -146,7 +144,7 @@ class DatasetName:
|
|
|
146
144
|
"https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html"
|
|
147
145
|
)
|
|
148
146
|
|
|
149
|
-
def check_resolution(self, resolution:
|
|
147
|
+
def check_resolution(self, resolution: str | None) -> None:
|
|
150
148
|
"""Check if the resolution matches the expected format.
|
|
151
149
|
|
|
152
150
|
Parameters
|
|
@@ -175,7 +173,7 @@ class DatasetName:
|
|
|
175
173
|
if not c.isalnum() and c not in "-":
|
|
176
174
|
self.messages.append(f"the {self.name} should only contain alphanumeric characters and '-'.")
|
|
177
175
|
|
|
178
|
-
def check_frequency(self, frequency:
|
|
176
|
+
def check_frequency(self, frequency: datetime.timedelta | None) -> None:
|
|
179
177
|
"""Check if the frequency matches the expected format.
|
|
180
178
|
|
|
181
179
|
Parameters
|
|
@@ -189,7 +187,7 @@ class DatasetName:
|
|
|
189
187
|
self._check_missing("frequency", frequency_str)
|
|
190
188
|
self._check_mismatch("frequency", frequency_str)
|
|
191
189
|
|
|
192
|
-
def check_start_date(self, start_date:
|
|
190
|
+
def check_start_date(self, start_date: datetime.date | None) -> None:
|
|
193
191
|
"""Check if the start date matches the expected format.
|
|
194
192
|
|
|
195
193
|
Parameters
|
|
@@ -203,7 +201,7 @@ class DatasetName:
|
|
|
203
201
|
self._check_missing("start_date", start_date_str)
|
|
204
202
|
self._check_mismatch("start_date", start_date_str)
|
|
205
203
|
|
|
206
|
-
def check_end_date(self, end_date:
|
|
204
|
+
def check_end_date(self, end_date: datetime.date | None) -> None:
|
|
207
205
|
"""Check if the end date matches the expected format.
|
|
208
206
|
|
|
209
207
|
Parameters
|
|
@@ -251,7 +249,7 @@ class StatisticsValueError(ValueError):
|
|
|
251
249
|
|
|
252
250
|
|
|
253
251
|
def check_data_values(
|
|
254
|
-
arr: NDArray[Any], *, name: str, log: list = [], allow_nans:
|
|
252
|
+
arr: NDArray[Any], *, name: str, log: list = [], allow_nans: bool | list | set | tuple | dict = False
|
|
255
253
|
) -> None:
|
|
256
254
|
"""Check the values in the data array for validity.
|
|
257
255
|
|
anemoi/datasets/create/chunks.py
CHANGED
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import warnings
|
|
12
|
-
from typing import Union
|
|
13
12
|
|
|
14
13
|
LOG = logging.getLogger(__name__)
|
|
15
14
|
|
|
@@ -27,7 +26,7 @@ class ChunkFilter:
|
|
|
27
26
|
The chunks that are allowed to be processed.
|
|
28
27
|
"""
|
|
29
28
|
|
|
30
|
-
def __init__(self, *, parts:
|
|
29
|
+
def __init__(self, *, parts: str | list, total: int):
|
|
31
30
|
"""Initializes the ChunkFilter with the given parts and total number of chunks.
|
|
32
31
|
|
|
33
32
|
Parameters
|