anemoi-datasets 0.5.20__py3-none-any.whl → 0.5.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/check.py +93 -0
- anemoi/datasets/commands/check.py +101 -0
- anemoi/datasets/commands/copy.py +43 -3
- anemoi/datasets/commands/create.py +2 -3
- anemoi/datasets/commands/grib-index.py +0 -3
- anemoi/datasets/commands/inspect.py +2 -2
- anemoi/datasets/commands/scan.py +17 -5
- anemoi/datasets/create/__init__.py +19 -8
- anemoi/datasets/create/check.py +19 -1
- anemoi/datasets/create/input/action.py +2 -0
- anemoi/datasets/create/input/result.py +6 -2
- anemoi/datasets/create/sources/accumulations.py +400 -34
- anemoi/datasets/create/sources/forcings.py +1 -1
- anemoi/datasets/create/sources/grib.py +27 -181
- anemoi/datasets/create/sources/xarray_support/metadata.py +6 -0
- anemoi/datasets/create/sources/xarray_zarr.py +1 -1
- anemoi/datasets/create/writer.py +1 -1
- anemoi/datasets/data/complement.py +28 -11
- anemoi/datasets/data/forwards.py +4 -0
- anemoi/datasets/data/grids.py +3 -3
- anemoi/datasets/data/misc.py +1 -1
- anemoi/datasets/data/stores.py +36 -4
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/METADATA +5 -3
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/RECORD +29 -27
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/WHEEL +1 -1
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.20.dist-info → anemoi_datasets-0.5.22.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
anemoi/datasets/check.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# A collection of functions to support pytest testing
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import math
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
|
|
18
|
+
LOG = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _check_group(group, verbosity: int, *path) -> None:
|
|
22
|
+
import zarr
|
|
23
|
+
|
|
24
|
+
group_keys = sorted(group.keys())
|
|
25
|
+
if not group_keys:
|
|
26
|
+
raise ValueError(f"Check group: {group} is empty.")
|
|
27
|
+
|
|
28
|
+
for name in sorted(group_keys):
|
|
29
|
+
if name.startswith("."):
|
|
30
|
+
if verbosity > 1:
|
|
31
|
+
LOG.info(f"Check group: skipping {name}")
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
if isinstance(group[name], zarr.hierarchy.Group):
|
|
35
|
+
_check_group(group[name], verbosity, *path, name)
|
|
36
|
+
else:
|
|
37
|
+
_check_array(group[name], verbosity, *path, name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _check_array(array, verbosity: int, *path) -> None:
|
|
41
|
+
assert len(array.chunks) == len(array.shape)
|
|
42
|
+
assert math.prod(array.shape) % math.prod(array.chunks) == 0
|
|
43
|
+
|
|
44
|
+
file_count = math.prod(array.shape) // math.prod(array.chunks)
|
|
45
|
+
|
|
46
|
+
full = os.path.join(*path)
|
|
47
|
+
|
|
48
|
+
chunks = array.chunks
|
|
49
|
+
|
|
50
|
+
count = 0
|
|
51
|
+
for f in os.listdir(full):
|
|
52
|
+
if verbosity > 1:
|
|
53
|
+
LOG.info(f"Check array: checking {f}")
|
|
54
|
+
|
|
55
|
+
if f.startswith("."):
|
|
56
|
+
if verbosity > 1:
|
|
57
|
+
LOG.info(f"Check array: skipping {f}")
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
bits = f.split(".")
|
|
61
|
+
|
|
62
|
+
if len(bits) != len(chunks):
|
|
63
|
+
raise ValueError(f"File {f} is not a valid chunk file.")
|
|
64
|
+
|
|
65
|
+
if not all(re.match(r"^\d+$", bit) for bit in bits):
|
|
66
|
+
raise ValueError(f"File {f} is not a valid chunk file.")
|
|
67
|
+
|
|
68
|
+
count += 1
|
|
69
|
+
|
|
70
|
+
if count != file_count:
|
|
71
|
+
raise ValueError(f"File count {count} does not match expected {file_count} for {array.name}.")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def check_zarr(path: str, verbosity: int = 0) -> None:
|
|
75
|
+
"""Check if a Zarr archive is valid, that no files are missing, and that the chunking is correct.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
path : str
|
|
80
|
+
Path to the Zarr archive.
|
|
81
|
+
verbosity : int, optional
|
|
82
|
+
Verbosity level for logging. Default is 0 (no logging).
|
|
83
|
+
"""
|
|
84
|
+
import zarr
|
|
85
|
+
|
|
86
|
+
if verbosity > 0:
|
|
87
|
+
LOG.info(f"Checking Zarr archive {path}")
|
|
88
|
+
|
|
89
|
+
if not os.path.exists(path) and not os.path.isdir(path):
|
|
90
|
+
# This does not work with non-directory Zarr archives
|
|
91
|
+
raise ValueError(f"Path {path} does not exist.")
|
|
92
|
+
|
|
93
|
+
_check_group(zarr.open(path, mode="r"), verbosity, path)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
from anemoi.datasets.create.check import DatasetName
|
|
17
|
+
|
|
18
|
+
from . import Command
|
|
19
|
+
|
|
20
|
+
LOG = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Check(Command):
|
|
24
|
+
"""Check if a dataset name follow naming conventions."""
|
|
25
|
+
|
|
26
|
+
timestamp = True
|
|
27
|
+
|
|
28
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
29
|
+
"""Add command line arguments to the parser.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
command_parser : Any
|
|
34
|
+
The command line argument parser.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
exclusive_group = command_parser.add_mutually_exclusive_group(required=True)
|
|
38
|
+
|
|
39
|
+
exclusive_group.add_argument(
|
|
40
|
+
"--name",
|
|
41
|
+
help="Check a dataset name.",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
exclusive_group.add_argument(
|
|
45
|
+
"--recipe",
|
|
46
|
+
help="Specify the recipe file to check.",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
exclusive_group.add_argument(
|
|
50
|
+
"--zarr",
|
|
51
|
+
help="Specify the Zarr archive to check.",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
exclusive_group.add_argument(
|
|
55
|
+
"--metadata",
|
|
56
|
+
help="Specify the metadata file to check.",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def run(self, args: Any) -> None:
|
|
60
|
+
|
|
61
|
+
if args.recipe:
|
|
62
|
+
self._check_recipe(args.recipe)
|
|
63
|
+
|
|
64
|
+
if args.metadata:
|
|
65
|
+
self._check_metadata(args.metadata)
|
|
66
|
+
|
|
67
|
+
if args.name:
|
|
68
|
+
self._check_name(args.name)
|
|
69
|
+
|
|
70
|
+
if args.zarr:
|
|
71
|
+
self._check_zarr(args.zarr)
|
|
72
|
+
|
|
73
|
+
def _check_metadata(self, metadata: str) -> None:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
def _check_recipe(self, recipe: str) -> None:
|
|
77
|
+
|
|
78
|
+
recipe_filename = os.path.basename(recipe)
|
|
79
|
+
recipe_name = os.path.splitext(recipe_filename)[0]
|
|
80
|
+
in_recipe_name = yaml.safe_load(open(recipe, "r", encoding="utf-8"))["name"]
|
|
81
|
+
if recipe_name != in_recipe_name:
|
|
82
|
+
print(f"Recipe name {recipe_name} does not match the name in the recipe file {in_recipe_name}")
|
|
83
|
+
|
|
84
|
+
name = in_recipe_name
|
|
85
|
+
DatasetName(name=name).raise_if_not_valid()
|
|
86
|
+
|
|
87
|
+
def _check_name(self, name: str) -> None:
|
|
88
|
+
|
|
89
|
+
DatasetName(name=name).raise_if_not_valid()
|
|
90
|
+
|
|
91
|
+
def _check_zarr(self, zarr: str) -> None:
|
|
92
|
+
|
|
93
|
+
from anemoi.datasets.check import check_zarr
|
|
94
|
+
|
|
95
|
+
check_zarr(zarr)
|
|
96
|
+
|
|
97
|
+
# ds = xr.open_dataset(zarr)
|
|
98
|
+
# print(ds)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
command = Check
|
anemoi/datasets/commands/copy.py
CHANGED
|
@@ -20,6 +20,8 @@ import tqdm
|
|
|
20
20
|
from anemoi.utils.remote import Transfer
|
|
21
21
|
from anemoi.utils.remote import TransferMethodNotImplementedError
|
|
22
22
|
|
|
23
|
+
from anemoi.datasets.check import check_zarr
|
|
24
|
+
|
|
23
25
|
from . import Command
|
|
24
26
|
|
|
25
27
|
LOG = logging.getLogger(__name__)
|
|
@@ -319,10 +321,30 @@ class ZarrCopier:
|
|
|
319
321
|
"""
|
|
320
322
|
import zarr
|
|
321
323
|
|
|
324
|
+
if self.verbosity > 0:
|
|
325
|
+
LOG.info(f"Copying group {source} to {target}")
|
|
326
|
+
|
|
322
327
|
for k, v in source.attrs.items():
|
|
328
|
+
if self.verbosity > 1:
|
|
329
|
+
import textwrap
|
|
330
|
+
|
|
331
|
+
LOG.info(f"Copying attribute {k} = {textwrap.shorten(str(v), 40)}")
|
|
323
332
|
target.attrs[k] = v
|
|
324
333
|
|
|
325
|
-
|
|
334
|
+
source_keys = list(source.keys())
|
|
335
|
+
|
|
336
|
+
if not source_keys:
|
|
337
|
+
raise ValueError(f"Source group {source} is empty.")
|
|
338
|
+
|
|
339
|
+
if self.verbosity > 1:
|
|
340
|
+
LOG.info(f"Keys {source_keys}")
|
|
341
|
+
|
|
342
|
+
for name in sorted(source_keys):
|
|
343
|
+
if name.startswith("."):
|
|
344
|
+
if self.verbosity > 1:
|
|
345
|
+
LOG.info(f"Skipping {name}")
|
|
346
|
+
continue
|
|
347
|
+
|
|
326
348
|
if isinstance(source[name], zarr.hierarchy.Group):
|
|
327
349
|
group = target[name] if name in target else target.create_group(name)
|
|
328
350
|
self.copy_group(
|
|
@@ -362,6 +384,11 @@ class ZarrCopier:
|
|
|
362
384
|
_copy = target["_copy"]
|
|
363
385
|
_copy_np = _copy[:]
|
|
364
386
|
|
|
387
|
+
if self.verbosity > 1:
|
|
388
|
+
import numpy as np
|
|
389
|
+
|
|
390
|
+
LOG.info(f"copy {np.sum(_copy_np)} of {len(_copy_np)}")
|
|
391
|
+
|
|
365
392
|
self.copy_group(source, target, _copy_np, verbosity)
|
|
366
393
|
del target["_copy"]
|
|
367
394
|
|
|
@@ -417,12 +444,25 @@ class ZarrCopier:
|
|
|
417
444
|
LOG.error("Target already exists, use either --overwrite or --resume.")
|
|
418
445
|
sys.exit(1)
|
|
419
446
|
|
|
447
|
+
if self.verbosity > 0:
|
|
448
|
+
LOG.info(f"Open target: {self.target}")
|
|
449
|
+
|
|
420
450
|
target = open_target()
|
|
421
451
|
|
|
422
452
|
assert target is not None, target
|
|
423
453
|
|
|
454
|
+
if self.verbosity > 0:
|
|
455
|
+
LOG.info(f"Open source: {self.source}")
|
|
456
|
+
|
|
424
457
|
source = zarr.open(self._store(self.source), mode="r")
|
|
458
|
+
# zarr.consolidate_metadata(source)
|
|
459
|
+
|
|
425
460
|
self.copy(source, target, self.verbosity)
|
|
461
|
+
if os.path.exists(self.target) and os.path.isdir(self.target):
|
|
462
|
+
LOG.info(f"Checking target: {self.target}")
|
|
463
|
+
check_zarr(self.target, self.verbosity)
|
|
464
|
+
else:
|
|
465
|
+
LOG.info(f"Target {self.target} is not a local directory, skipping check.")
|
|
426
466
|
|
|
427
467
|
|
|
428
468
|
class CopyMixin:
|
|
@@ -488,8 +528,8 @@ class CopyMixin:
|
|
|
488
528
|
if args.source.startswith("s3://") and not args.source.endswith("/"):
|
|
489
529
|
args.source = args.source + "/"
|
|
490
530
|
copier = Transfer(
|
|
491
|
-
args.source,
|
|
492
|
-
args.target,
|
|
531
|
+
source=args.source,
|
|
532
|
+
target=args.target,
|
|
493
533
|
overwrite=args.overwrite,
|
|
494
534
|
resume=args.resume,
|
|
495
535
|
verbosity=args.verbosity,
|
|
@@ -180,10 +180,9 @@ class Create(Command):
|
|
|
180
180
|
executor.submit(task, "init-additions", options).result()
|
|
181
181
|
|
|
182
182
|
with ExecutorClass(max_workers=parallel) as executor:
|
|
183
|
-
opt = options.copy()
|
|
184
|
-
opt["parts"] = f"{n+1}/{total}"
|
|
185
|
-
futures.append(executor.submit(task, "load", opt))
|
|
186
183
|
for n in range(total):
|
|
184
|
+
opt = options.copy()
|
|
185
|
+
opt["parts"] = f"{n+1}/{total}"
|
|
187
186
|
futures.append(executor.submit(task, "load-additions", opt))
|
|
188
187
|
|
|
189
188
|
for future in tqdm.tqdm(
|
|
@@ -29,8 +29,6 @@ class GribIndexCmd(Command):
|
|
|
29
29
|
The command parser to which arguments are added.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
from anemoi.datasets.create.sources.grib_index import KEYS
|
|
33
|
-
|
|
34
32
|
command_parser.add_argument(
|
|
35
33
|
"--index",
|
|
36
34
|
help="Path to the index file to create or update",
|
|
@@ -52,7 +50,6 @@ class GribIndexCmd(Command):
|
|
|
52
50
|
command_parser.add_argument(
|
|
53
51
|
"--keys",
|
|
54
52
|
help="GRIB keys to add to the index, separated by commas. If the list starts with a +, the keys are added to default list.",
|
|
55
|
-
default=",".join(KEYS),
|
|
56
53
|
)
|
|
57
54
|
|
|
58
55
|
command_parser.add_argument(
|
|
@@ -401,7 +401,7 @@ class Version:
|
|
|
401
401
|
return
|
|
402
402
|
|
|
403
403
|
if self.build_flags is None:
|
|
404
|
-
print("🪫 Dataset not
|
|
404
|
+
print("🪫 Dataset not initialised")
|
|
405
405
|
return
|
|
406
406
|
|
|
407
407
|
build_flags = self.build_flags
|
|
@@ -426,7 +426,7 @@ class Version:
|
|
|
426
426
|
)
|
|
427
427
|
start = self.initialised
|
|
428
428
|
if self.initialised:
|
|
429
|
-
print(f"🕰️ Dataset
|
|
429
|
+
print(f"🕰️ Dataset initialised {when(start)}.")
|
|
430
430
|
if built and latest:
|
|
431
431
|
speed = (latest - start) / built
|
|
432
432
|
eta = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + speed * (total - built)
|
anemoi/datasets/commands/scan.py
CHANGED
|
@@ -23,6 +23,16 @@ KEYS = ("class", "type", "stream", "expver", "levtype", "domain")
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class Scan(Command):
|
|
26
|
+
"""Command to scan files and generate a configuration file.
|
|
27
|
+
|
|
28
|
+
Attributes
|
|
29
|
+
----------
|
|
30
|
+
internal : bool
|
|
31
|
+
Indicates whether the command is internal.
|
|
32
|
+
timestamp : bool
|
|
33
|
+
Indicates whether to include a timestamp.
|
|
34
|
+
"""
|
|
35
|
+
|
|
26
36
|
internal = True
|
|
27
37
|
timestamp = True
|
|
28
38
|
|
|
@@ -32,8 +42,9 @@ class Scan(Command):
|
|
|
32
42
|
Parameters
|
|
33
43
|
----------
|
|
34
44
|
command_parser : Any
|
|
35
|
-
The command parser
|
|
45
|
+
The command-line argument parser.
|
|
36
46
|
"""
|
|
47
|
+
|
|
37
48
|
command_parser.add_argument(
|
|
38
49
|
"--match",
|
|
39
50
|
help="Give a glob pattern to match files (default: *.grib)",
|
|
@@ -51,22 +62,23 @@ class Scan(Command):
|
|
|
51
62
|
Parameters
|
|
52
63
|
----------
|
|
53
64
|
args : Any
|
|
54
|
-
|
|
65
|
+
Parsed command-line arguments.
|
|
55
66
|
"""
|
|
56
67
|
|
|
57
68
|
def match(path: str) -> bool:
|
|
58
|
-
"""Check if a path matches the given pattern.
|
|
69
|
+
"""Check if a file path matches the given glob pattern.
|
|
59
70
|
|
|
60
71
|
Parameters
|
|
61
72
|
----------
|
|
62
73
|
path : str
|
|
63
|
-
The path to check.
|
|
74
|
+
The file path to check.
|
|
64
75
|
|
|
65
76
|
Returns
|
|
66
77
|
-------
|
|
67
78
|
bool
|
|
68
|
-
True if the path matches, False otherwise.
|
|
79
|
+
True if the path matches the pattern, False otherwise.
|
|
69
80
|
"""
|
|
81
|
+
|
|
70
82
|
return fnmatch.fnmatch(path, args.match)
|
|
71
83
|
|
|
72
84
|
paths = []
|
|
@@ -938,13 +938,23 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
938
938
|
check_shape(cube, dates, dates_in_data)
|
|
939
939
|
|
|
940
940
|
def check_dates_in_data(dates_in_data, requested_dates):
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
"Dates in data are not the requested ones:"
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
941
|
+
_requested_dates = [np.datetime64(_) for _ in requested_dates]
|
|
942
|
+
_dates_in_data = [np.datetime64(_) for _ in dates_in_data]
|
|
943
|
+
if _dates_in_data != _requested_dates:
|
|
944
|
+
LOG.error("Dates in data are not the requested ones:")
|
|
945
|
+
|
|
946
|
+
dates_in_data = set(dates_in_data)
|
|
947
|
+
requested_dates = set(requested_dates)
|
|
948
|
+
|
|
949
|
+
missing = sorted(requested_dates - dates_in_data)
|
|
950
|
+
extra = sorted(dates_in_data - requested_dates)
|
|
951
|
+
|
|
952
|
+
if missing:
|
|
953
|
+
LOG.error(f"Missing dates: {[_.isoformat() for _ in missing]}")
|
|
954
|
+
if extra:
|
|
955
|
+
LOG.error(f"Extra dates: {[_.isoformat() for _ in extra]}")
|
|
956
|
+
|
|
957
|
+
raise ValueError("Dates in data are not the requested ones")
|
|
948
958
|
|
|
949
959
|
check_dates_in_data(dates_in_data, dates)
|
|
950
960
|
|
|
@@ -1075,6 +1085,7 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
|
|
|
1075
1085
|
|
|
1076
1086
|
def run(self) -> None:
|
|
1077
1087
|
"""Run the cleanup."""
|
|
1088
|
+
|
|
1078
1089
|
self.tmp_statistics.delete()
|
|
1079
1090
|
self.registry.clean()
|
|
1080
1091
|
for actor in self.actors:
|
|
@@ -1215,7 +1226,7 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
|
|
|
1215
1226
|
self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=True)
|
|
1216
1227
|
self.tmp_storage.delete()
|
|
1217
1228
|
self.tmp_storage.create()
|
|
1218
|
-
LOG.info(f"Dataset {self.tmp_storage_path} additions
|
|
1229
|
+
LOG.info(f"Dataset {self.tmp_storage_path} additions initialised.")
|
|
1219
1230
|
|
|
1220
1231
|
def cleanup(self) -> None:
|
|
1221
1232
|
"""Clean up the temporary storage."""
|
anemoi/datasets/create/check.py
CHANGED
|
@@ -18,6 +18,7 @@ from typing import Optional
|
|
|
18
18
|
from typing import Union
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
|
+
from anemoi.utils.config import load_config
|
|
21
22
|
from anemoi.utils.dates import frequency_to_string
|
|
22
23
|
from numpy.typing import NDArray
|
|
23
24
|
|
|
@@ -25,7 +26,7 @@ LOG = logging.getLogger(__name__)
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class DatasetName:
|
|
28
|
-
"""
|
|
29
|
+
"""Validate and parse dataset names according to naming conventions."""
|
|
29
30
|
|
|
30
31
|
def __init__(
|
|
31
32
|
self,
|
|
@@ -58,6 +59,14 @@ class DatasetName:
|
|
|
58
59
|
|
|
59
60
|
self.messages = []
|
|
60
61
|
|
|
62
|
+
config = load_config().get("datasets", {})
|
|
63
|
+
|
|
64
|
+
if config.get("ignore_naming_conventions", False):
|
|
65
|
+
# setting the env variable ANEMOI_CONFIG_DATASETS_IGNORE_NAMING_CONVENTIONS=1
|
|
66
|
+
# will ignore the naming conventions
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
self.check_characters()
|
|
61
70
|
self.check_parsed()
|
|
62
71
|
self.check_resolution(resolution)
|
|
63
72
|
self.check_frequency(frequency)
|
|
@@ -157,6 +166,15 @@ class DatasetName:
|
|
|
157
166
|
self._check_missing("resolution", resolution_str)
|
|
158
167
|
self._check_mismatch("resolution", resolution_str)
|
|
159
168
|
|
|
169
|
+
def check_characters(self) -> None:
|
|
170
|
+
if not self.name.islower():
|
|
171
|
+
self.messages.append(f"the {self.name} should be in lower case.")
|
|
172
|
+
if "_" in self.name:
|
|
173
|
+
self.messages.append(f"the {self.name} should use '-' instead of '_'.")
|
|
174
|
+
for c in self.name:
|
|
175
|
+
if not c.isalnum() and c not in "-":
|
|
176
|
+
self.messages.append(f"the {self.name} should only contain alphanumeric characters and '-'.")
|
|
177
|
+
|
|
160
178
|
def check_frequency(self, frequency: Optional[datetime.timedelta]) -> None:
|
|
161
179
|
"""Check if the frequency matches the expected format.
|
|
162
180
|
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
import json
|
|
10
11
|
import logging
|
|
11
12
|
from copy import deepcopy
|
|
12
13
|
from typing import Any
|
|
@@ -225,6 +226,7 @@ def action_factory(config: Dict[str, Any], context: ActionContext, action_path:
|
|
|
225
226
|
if not isinstance(config, dict):
|
|
226
227
|
raise ValueError(f"Invalid input config {config}")
|
|
227
228
|
if len(config) != 1:
|
|
229
|
+
print(json.dumps(config, indent=2, default=str))
|
|
228
230
|
raise ValueError(f"Invalid input config. Expecting dict with only one key, got {list(config.keys())}")
|
|
229
231
|
|
|
230
232
|
config = deepcopy(config)
|
|
@@ -132,7 +132,8 @@ def _fields_metatata(variables: Tuple[str, ...], cube: Any) -> Dict[str, Any]:
|
|
|
132
132
|
|
|
133
133
|
# GRIB1 precipitation accumulations are not correctly encoded
|
|
134
134
|
if startStep == endStep and stepTypeForConversion == "accum":
|
|
135
|
-
|
|
135
|
+
endStep = f.metadata("P1")
|
|
136
|
+
startStep = f.metadata("P2")
|
|
136
137
|
|
|
137
138
|
if startStep != endStep:
|
|
138
139
|
# https://codes.ecmwf.int/grib/format/grib2/ctables/4/10/
|
|
@@ -415,7 +416,10 @@ class Result:
|
|
|
415
416
|
print()
|
|
416
417
|
print("Number of unique values found for each coordinate:")
|
|
417
418
|
for k, v in user_coords.items():
|
|
418
|
-
print(f" {k:20}:", len(v)
|
|
419
|
+
print(f" {k:20}:", len(v))
|
|
420
|
+
for n in sorted(v):
|
|
421
|
+
print(" ", n)
|
|
422
|
+
|
|
419
423
|
print()
|
|
420
424
|
user_shape: Tuple[int, ...] = tuple(len(v) for k, v in user_coords.items())
|
|
421
425
|
print("Shape of the hypercube :", user_shape)
|