anemoi-datasets 0.5.6__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +11 -3
- anemoi/datasets/__main__.py +2 -3
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/__init__.py +2 -3
- anemoi/datasets/commands/cleanup.py +9 -0
- anemoi/datasets/commands/compare.py +3 -3
- anemoi/datasets/commands/copy.py +38 -68
- anemoi/datasets/commands/create.py +20 -5
- anemoi/datasets/commands/finalise-additions.py +9 -0
- anemoi/datasets/commands/finalise.py +9 -0
- anemoi/datasets/commands/init-additions.py +9 -0
- anemoi/datasets/commands/init.py +9 -0
- anemoi/datasets/commands/inspect.py +7 -1
- anemoi/datasets/commands/load-additions.py +9 -0
- anemoi/datasets/commands/load.py +9 -0
- anemoi/datasets/commands/patch.py +9 -0
- anemoi/datasets/commands/publish.py +9 -0
- anemoi/datasets/commands/scan.py +9 -0
- anemoi/datasets/compute/__init__.py +8 -0
- anemoi/datasets/compute/recentre.py +3 -2
- anemoi/datasets/create/__init__.py +64 -48
- anemoi/datasets/create/check.py +4 -3
- anemoi/datasets/create/chunks.py +3 -2
- anemoi/datasets/create/config.py +5 -5
- anemoi/datasets/create/functions/__init__.py +22 -7
- anemoi/datasets/create/functions/filters/__init__.py +2 -1
- anemoi/datasets/create/functions/filters/empty.py +3 -2
- anemoi/datasets/create/functions/filters/noop.py +2 -2
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/rename.py +16 -10
- anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
- anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
- anemoi/datasets/create/functions/sources/__init__.py +2 -2
- anemoi/datasets/create/functions/sources/accumulations.py +10 -4
- anemoi/datasets/create/functions/sources/constants.py +3 -2
- anemoi/datasets/create/functions/sources/empty.py +3 -2
- anemoi/datasets/create/functions/sources/forcings.py +3 -2
- anemoi/datasets/create/functions/sources/grib.py +2 -2
- anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
- anemoi/datasets/create/functions/sources/mars.py +97 -17
- anemoi/datasets/create/functions/sources/netcdf.py +3 -2
- anemoi/datasets/create/functions/sources/opendap.py +2 -2
- anemoi/datasets/create/functions/sources/recentre.py +3 -2
- anemoi/datasets/create/functions/sources/source.py +3 -2
- anemoi/datasets/create/functions/sources/tendencies.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +5 -2
- anemoi/datasets/create/functions/sources/xarray/field.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -2
- anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -16
- anemoi/datasets/create/functions/sources/xarray/grid.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/time.py +39 -4
- anemoi/datasets/create/functions/sources/xarray/variable.py +6 -6
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
- anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
- anemoi/datasets/create/functions/sources/zenodo.py +2 -2
- anemoi/datasets/create/input/__init__.py +3 -17
- anemoi/datasets/create/input/action.py +3 -2
- anemoi/datasets/create/input/concat.py +3 -2
- anemoi/datasets/create/input/context.py +3 -2
- anemoi/datasets/create/input/data_sources.py +3 -2
- anemoi/datasets/create/input/empty.py +3 -2
- anemoi/datasets/create/input/filter.py +3 -2
- anemoi/datasets/create/input/function.py +3 -2
- anemoi/datasets/create/input/join.py +3 -2
- anemoi/datasets/create/input/misc.py +3 -2
- anemoi/datasets/create/input/pipe.py +3 -2
- anemoi/datasets/create/input/repeated_dates.py +3 -2
- anemoi/datasets/create/input/result.py +187 -3
- anemoi/datasets/create/input/step.py +4 -2
- anemoi/datasets/create/input/template.py +3 -2
- anemoi/datasets/create/input/trace.py +3 -2
- anemoi/datasets/create/patch.py +9 -1
- anemoi/datasets/create/persistent.py +7 -3
- anemoi/datasets/create/size.py +3 -2
- anemoi/datasets/create/statistics/__init__.py +7 -3
- anemoi/datasets/create/statistics/summary.py +3 -2
- anemoi/datasets/create/utils.py +15 -2
- anemoi/datasets/create/writer.py +3 -2
- anemoi/datasets/create/zarr.py +8 -3
- anemoi/datasets/data/__init__.py +27 -1
- anemoi/datasets/data/concat.py +5 -1
- anemoi/datasets/data/dataset.py +216 -37
- anemoi/datasets/data/debug.py +4 -1
- anemoi/datasets/data/ensemble.py +4 -1
- anemoi/datasets/data/fill_missing.py +165 -0
- anemoi/datasets/data/forwards.py +27 -2
- anemoi/datasets/data/grids.py +236 -58
- anemoi/datasets/data/indexing.py +4 -1
- anemoi/datasets/data/interpolate.py +4 -1
- anemoi/datasets/data/join.py +17 -1
- anemoi/datasets/data/masked.py +36 -10
- anemoi/datasets/data/merge.py +180 -0
- anemoi/datasets/data/misc.py +18 -3
- anemoi/datasets/data/missing.py +4 -1
- anemoi/datasets/data/rescale.py +4 -1
- anemoi/datasets/data/select.py +15 -1
- anemoi/datasets/data/statistics.py +4 -1
- anemoi/datasets/data/stores.py +70 -3
- anemoi/datasets/data/subset.py +6 -1
- anemoi/datasets/data/unchecked.py +9 -1
- anemoi/datasets/data/xy.py +20 -5
- anemoi/datasets/dates/__init__.py +9 -7
- anemoi/datasets/dates/groups.py +3 -1
- anemoi/datasets/fields.py +3 -1
- anemoi/datasets/grids.py +86 -2
- anemoi/datasets/testing.py +60 -0
- anemoi/datasets/utils/__init__.py +8 -0
- anemoi/datasets/utils/fields.py +2 -2
- {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/METADATA +11 -29
- anemoi_datasets-0.5.10.dist-info/RECORD +124 -0
- {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/WHEEL +1 -1
- anemoi_datasets-0.5.6.dist-info/RECORD +0 -121
- {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/top_level.txt +0 -0
anemoi/datasets/__init__.py
CHANGED
|
@@ -1,19 +1,27 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
8
|
-
from ._version import __version__
|
|
9
10
|
from .data import MissingDateError
|
|
10
11
|
from .data import add_dataset_path
|
|
11
12
|
from .data import add_named_dataset
|
|
12
13
|
from .data import list_dataset_names
|
|
13
14
|
from .data import open_dataset
|
|
14
15
|
|
|
16
|
+
try:
|
|
17
|
+
# NOTE: the `_version.py` file must not be present in the git repository
|
|
18
|
+
# as it is generated by setuptools at install time
|
|
19
|
+
from ._version import __version__ # type: ignore
|
|
20
|
+
except ImportError: # pragma: no cover
|
|
21
|
+
# Local copy or not installed with setuptools
|
|
22
|
+
__version__ = "999"
|
|
23
|
+
|
|
15
24
|
__all__ = [
|
|
16
|
-
"__version__",
|
|
17
25
|
"add_dataset_path",
|
|
18
26
|
"add_named_dataset",
|
|
19
27
|
"list_dataset_names",
|
anemoi/datasets/__main__.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
|
-
#
|
|
10
9
|
|
|
11
10
|
from anemoi.utils.cli import cli_main
|
|
12
11
|
from anemoi.utils.cli import make_parser
|
anemoi/datasets/_version.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
|
-
#
|
|
10
9
|
|
|
11
10
|
import os
|
|
12
11
|
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import tqdm
|
anemoi/datasets/commands/copy.py
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
import os
|
|
10
|
-
import shutil
|
|
11
13
|
import sys
|
|
12
14
|
from concurrent.futures import ThreadPoolExecutor
|
|
13
15
|
from concurrent.futures import as_completed
|
|
14
16
|
|
|
15
17
|
import tqdm
|
|
16
|
-
from anemoi.utils.
|
|
17
|
-
from anemoi.utils.
|
|
18
|
+
from anemoi.utils.remote import Transfer
|
|
19
|
+
from anemoi.utils.remote import TransferMethodNotImplementedError
|
|
18
20
|
|
|
19
21
|
from . import Command
|
|
20
22
|
|
|
@@ -26,54 +28,7 @@ except AttributeError:
|
|
|
26
28
|
isatty = False
|
|
27
29
|
|
|
28
30
|
|
|
29
|
-
class
|
|
30
|
-
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
|
|
31
|
-
self.source = source
|
|
32
|
-
self.target = target
|
|
33
|
-
self.transfers = transfers
|
|
34
|
-
self.overwrite = overwrite
|
|
35
|
-
self.resume = resume
|
|
36
|
-
self.verbosity = verbosity
|
|
37
|
-
|
|
38
|
-
def run(self):
|
|
39
|
-
if self.target == ".":
|
|
40
|
-
self.target = os.path.basename(self.source)
|
|
41
|
-
|
|
42
|
-
if self.overwrite and os.path.exists(self.target):
|
|
43
|
-
LOG.info(f"Deleting {self.target}")
|
|
44
|
-
shutil.rmtree(self.target)
|
|
45
|
-
|
|
46
|
-
download(
|
|
47
|
-
self.source + "/" if not self.source.endswith("/") else self.source,
|
|
48
|
-
self.target,
|
|
49
|
-
overwrite=self.overwrite,
|
|
50
|
-
resume=self.resume,
|
|
51
|
-
verbosity=self.verbosity,
|
|
52
|
-
threads=self.transfers,
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class S3Uploader:
|
|
57
|
-
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
|
|
58
|
-
self.source = source
|
|
59
|
-
self.target = target
|
|
60
|
-
self.transfers = transfers
|
|
61
|
-
self.overwrite = overwrite
|
|
62
|
-
self.resume = resume
|
|
63
|
-
self.verbosity = verbosity
|
|
64
|
-
|
|
65
|
-
def run(self):
|
|
66
|
-
upload(
|
|
67
|
-
self.source,
|
|
68
|
-
self.target,
|
|
69
|
-
overwrite=self.overwrite,
|
|
70
|
-
resume=self.resume,
|
|
71
|
-
verbosity=self.verbosity,
|
|
72
|
-
threads=self.transfers,
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class DefaultCopier:
|
|
31
|
+
class ZarrCopier:
|
|
77
32
|
def __init__(self, source, target, transfers, block_size, overwrite, resume, verbosity, nested, rechunk, **kwargs):
|
|
78
33
|
self.source = source
|
|
79
34
|
self.target = target
|
|
@@ -87,6 +42,14 @@ class DefaultCopier:
|
|
|
87
42
|
|
|
88
43
|
self.rechunking = rechunk.split(",") if rechunk else []
|
|
89
44
|
|
|
45
|
+
source_is_ssh = self.source.startswith("ssh://")
|
|
46
|
+
target_is_ssh = self.target.startswith("ssh://")
|
|
47
|
+
|
|
48
|
+
if source_is_ssh or target_is_ssh:
|
|
49
|
+
if self.rechunk:
|
|
50
|
+
raise NotImplementedError("Rechunking with SSH not implemented.")
|
|
51
|
+
assert NotImplementedError("SSH not implemented.")
|
|
52
|
+
|
|
90
53
|
def _store(self, path, nested=False):
|
|
91
54
|
if nested:
|
|
92
55
|
import zarr
|
|
@@ -334,26 +297,33 @@ class CopyMixin:
|
|
|
334
297
|
if args.source == args.target:
|
|
335
298
|
raise ValueError("Source and target are the same.")
|
|
336
299
|
|
|
337
|
-
kwargs = vars(args)
|
|
338
|
-
|
|
339
300
|
if args.overwrite and args.resume:
|
|
340
301
|
raise ValueError("Cannot use --overwrite and --resume together.")
|
|
341
302
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
303
|
+
if not args.rechunk:
|
|
304
|
+
# rechunking is only supported for ZARR datasets, it is implemented in this package
|
|
305
|
+
try:
|
|
306
|
+
if args.source.startswith("s3://") and not args.source.endswith("/"):
|
|
307
|
+
args.source = args.source + "/"
|
|
308
|
+
copier = Transfer(
|
|
309
|
+
args.source,
|
|
310
|
+
args.target,
|
|
311
|
+
overwrite=args.overwrite,
|
|
312
|
+
resume=args.resume,
|
|
313
|
+
verbosity=args.verbosity,
|
|
314
|
+
threads=args.transfers,
|
|
315
|
+
)
|
|
316
|
+
copier.run()
|
|
317
|
+
return
|
|
318
|
+
except TransferMethodNotImplementedError:
|
|
319
|
+
# DataTransfer relies on anemoi-utils which is agnostic to the source and target format
|
|
320
|
+
# it transfers file and folders, ignoring that it is zarr data
|
|
321
|
+
# if it is not implemented, we fallback to the ZarrCopier
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
copier = ZarrCopier(**vars(args))
|
|
356
325
|
copier.run()
|
|
326
|
+
return
|
|
357
327
|
|
|
358
328
|
|
|
359
329
|
class Copy(CopyMixin, Command):
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import datetime
|
|
2
11
|
import logging
|
|
3
12
|
import time
|
|
@@ -14,9 +23,7 @@ LOG = logging.getLogger(__name__)
|
|
|
14
23
|
|
|
15
24
|
|
|
16
25
|
def task(what, options, *args, **kwargs):
|
|
17
|
-
"""
|
|
18
|
-
Make sure `import Creator` is done in the sub-processes, and not in the main one.
|
|
19
|
-
"""
|
|
26
|
+
"""Make sure `import Creator` is done in the sub-processes, and not in the main one."""
|
|
20
27
|
|
|
21
28
|
now = datetime.datetime.now()
|
|
22
29
|
LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
|
|
@@ -117,7 +124,9 @@ class Create(Command):
|
|
|
117
124
|
opt["parts"] = f"{n+1}/{total}"
|
|
118
125
|
futures.append(executor.submit(task, "load", opt))
|
|
119
126
|
|
|
120
|
-
for future in tqdm.tqdm(
|
|
127
|
+
for future in tqdm.tqdm(
|
|
128
|
+
as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
|
|
129
|
+
):
|
|
121
130
|
future.result()
|
|
122
131
|
|
|
123
132
|
with ExecutorClass(max_workers=1) as executor:
|
|
@@ -133,7 +142,13 @@ class Create(Command):
|
|
|
133
142
|
for n in range(total):
|
|
134
143
|
futures.append(executor.submit(task, "load-additions", opt))
|
|
135
144
|
|
|
136
|
-
for future in tqdm.tqdm(
|
|
145
|
+
for future in tqdm.tqdm(
|
|
146
|
+
as_completed(futures),
|
|
147
|
+
desc="Computing additions",
|
|
148
|
+
total=len(futures),
|
|
149
|
+
colour="green",
|
|
150
|
+
position=parallel + 1,
|
|
151
|
+
):
|
|
137
152
|
future.result()
|
|
138
153
|
|
|
139
154
|
with ExecutorClass(max_workers=1) as executor:
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
anemoi/datasets/commands/init.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
@@ -506,6 +508,10 @@ class Version0_6(Version):
|
|
|
506
508
|
def variables(self):
|
|
507
509
|
return self.metadata["variables"]
|
|
508
510
|
|
|
511
|
+
@property
|
|
512
|
+
def variables_metadata(self):
|
|
513
|
+
return self.metadata.get("variables_metadata", {})
|
|
514
|
+
|
|
509
515
|
|
|
510
516
|
class Version0_12(Version0_6):
|
|
511
517
|
def details(self):
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
anemoi/datasets/commands/load.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
import time
|
|
3
12
|
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import logging
|
|
2
11
|
|
|
3
12
|
from . import Command
|
anemoi/datasets/commands/scan.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
1
10
|
import fnmatch
|
|
2
11
|
import os
|
|
3
12
|
import sys
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
2
|
#
|
|
3
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
5
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
8
|
# nor does it submit to any jurisdiction.
|
|
8
|
-
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
import logging
|
|
11
12
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
2
|
#
|
|
3
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
5
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
8
|
# nor does it submit to any jurisdiction.
|
|
8
|
-
#
|
|
9
9
|
|
|
10
10
|
import datetime
|
|
11
11
|
import json
|
|
@@ -14,9 +14,9 @@ import os
|
|
|
14
14
|
import time
|
|
15
15
|
import uuid
|
|
16
16
|
import warnings
|
|
17
|
-
from copy import deepcopy
|
|
18
17
|
from functools import cached_property
|
|
19
18
|
|
|
19
|
+
import cftime
|
|
20
20
|
import numpy as np
|
|
21
21
|
import tqdm
|
|
22
22
|
from anemoi.utils.config import DotDict as DotDict
|
|
@@ -25,6 +25,7 @@ from anemoi.utils.dates import frequency_to_string
|
|
|
25
25
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
26
26
|
from anemoi.utils.humanize import compress_dates
|
|
27
27
|
from anemoi.utils.humanize import seconds_to_human
|
|
28
|
+
from anemoi.utils.sanitise import sanitise
|
|
28
29
|
from earthkit.data.core.order import build_remapping
|
|
29
30
|
|
|
30
31
|
from anemoi.datasets import MissingDateError
|
|
@@ -52,7 +53,7 @@ from .writer import ViewCacheArray
|
|
|
52
53
|
|
|
53
54
|
LOG = logging.getLogger(__name__)
|
|
54
55
|
|
|
55
|
-
VERSION = "0.
|
|
56
|
+
VERSION = "0.30"
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
def json_tidy(o):
|
|
@@ -66,6 +67,19 @@ def json_tidy(o):
|
|
|
66
67
|
if isinstance(o, datetime.timedelta):
|
|
67
68
|
return frequency_to_string(o)
|
|
68
69
|
|
|
70
|
+
if isinstance(o, cftime.DatetimeJulian):
|
|
71
|
+
import pandas as pd
|
|
72
|
+
|
|
73
|
+
o = pd.Timestamp(
|
|
74
|
+
o.year,
|
|
75
|
+
o.month,
|
|
76
|
+
o.day,
|
|
77
|
+
o.hour,
|
|
78
|
+
o.minute,
|
|
79
|
+
o.second,
|
|
80
|
+
)
|
|
81
|
+
return o.isoformat()
|
|
82
|
+
|
|
69
83
|
raise TypeError(repr(o) + " is not JSON serializable")
|
|
70
84
|
|
|
71
85
|
|
|
@@ -93,10 +107,6 @@ def build_statistics_dates(dates, start, end):
|
|
|
93
107
|
return (start.isoformat(), end.isoformat())
|
|
94
108
|
|
|
95
109
|
|
|
96
|
-
def _ignore(*args, **kwargs):
|
|
97
|
-
pass
|
|
98
|
-
|
|
99
|
-
|
|
100
110
|
def _path_readable(path):
|
|
101
111
|
import zarr
|
|
102
112
|
|
|
@@ -277,6 +287,16 @@ class Size(Actor):
|
|
|
277
287
|
metadata = compute_directory_sizes(self.path)
|
|
278
288
|
self.update_metadata(**metadata)
|
|
279
289
|
|
|
290
|
+
# Look for constant fields
|
|
291
|
+
ds = open_dataset(self.path)
|
|
292
|
+
constants = ds.computed_constant_fields()
|
|
293
|
+
|
|
294
|
+
variables_metadata = self.dataset.zarr_metadata.get("variables_metadata", {}).copy()
|
|
295
|
+
for k in constants:
|
|
296
|
+
variables_metadata[k]["constant_in_time"] = True
|
|
297
|
+
|
|
298
|
+
self.update_metadata(constant_fields=constants, variables_metadata=variables_metadata)
|
|
299
|
+
|
|
280
300
|
|
|
281
301
|
class HasRegistryMixin:
|
|
282
302
|
@cached_property
|
|
@@ -325,46 +345,22 @@ def build_input_(main_config, output_config):
|
|
|
325
345
|
return builder
|
|
326
346
|
|
|
327
347
|
|
|
328
|
-
def tidy_recipe(config: object):
|
|
329
|
-
"""Remove potentially private information in the config"""
|
|
330
|
-
config = deepcopy(config)
|
|
331
|
-
if isinstance(config, (tuple, list)):
|
|
332
|
-
return [tidy_recipe(_) for _ in config]
|
|
333
|
-
if isinstance(config, (dict, DotDict)):
|
|
334
|
-
for k, v in config.items():
|
|
335
|
-
if k.startswith("_"):
|
|
336
|
-
config[k] = "*** REMOVED FOR SECURITY ***"
|
|
337
|
-
else:
|
|
338
|
-
config[k] = tidy_recipe(v)
|
|
339
|
-
if isinstance(config, str):
|
|
340
|
-
if config.startswith("_"):
|
|
341
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
342
|
-
if config.startswith("s3://"):
|
|
343
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
344
|
-
if config.startswith("gs://"):
|
|
345
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
346
|
-
if config.startswith("http"):
|
|
347
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
348
|
-
if config.startswith("ftp"):
|
|
349
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
350
|
-
if config.startswith("file"):
|
|
351
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
352
|
-
if config.startswith("ssh"):
|
|
353
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
354
|
-
if config.startswith("scp"):
|
|
355
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
356
|
-
if config.startswith("rsync"):
|
|
357
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
358
|
-
if config.startswith("/"):
|
|
359
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
360
|
-
if "@" in config:
|
|
361
|
-
return "*** REMOVED FOR SECURITY ***"
|
|
362
|
-
return config
|
|
363
|
-
|
|
364
|
-
|
|
365
348
|
class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
366
349
|
dataset_class = NewDataset
|
|
367
|
-
|
|
350
|
+
|
|
351
|
+
def __init__(
|
|
352
|
+
self,
|
|
353
|
+
path,
|
|
354
|
+
config,
|
|
355
|
+
check_name=False,
|
|
356
|
+
overwrite=False,
|
|
357
|
+
use_threads=False,
|
|
358
|
+
statistics_temp_dir=None,
|
|
359
|
+
progress=None,
|
|
360
|
+
test=False,
|
|
361
|
+
cache=None,
|
|
362
|
+
**kwargs,
|
|
363
|
+
):
|
|
368
364
|
if _path_readable(path) and not overwrite:
|
|
369
365
|
raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
|
|
370
366
|
|
|
@@ -448,7 +444,24 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
448
444
|
metadata.update(self.main_config.get("add_metadata", {}))
|
|
449
445
|
|
|
450
446
|
metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
|
|
451
|
-
|
|
447
|
+
|
|
448
|
+
recipe = sanitise(self.main_config.get_serialisable_dict())
|
|
449
|
+
|
|
450
|
+
# Remove stuff added by prepml
|
|
451
|
+
for k in [
|
|
452
|
+
"build_dataset",
|
|
453
|
+
"config_format_version",
|
|
454
|
+
"config_path",
|
|
455
|
+
"dataset_status",
|
|
456
|
+
"ecflow",
|
|
457
|
+
"metadata",
|
|
458
|
+
"platform",
|
|
459
|
+
"reading_chunks",
|
|
460
|
+
"upload",
|
|
461
|
+
]:
|
|
462
|
+
recipe.pop(k, None)
|
|
463
|
+
|
|
464
|
+
metadata["recipe"] = recipe
|
|
452
465
|
|
|
453
466
|
metadata["description"] = self.main_config.description
|
|
454
467
|
metadata["licence"] = self.main_config["licence"]
|
|
@@ -467,6 +480,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
467
480
|
metadata["data_request"] = self.minimal_input.data_request
|
|
468
481
|
metadata["field_shape"] = self.minimal_input.field_shape
|
|
469
482
|
metadata["proj_string"] = self.minimal_input.proj_string
|
|
483
|
+
metadata["variables_metadata"] = self.minimal_input.variables_metadata
|
|
470
484
|
|
|
471
485
|
metadata["start_date"] = dates[0].isoformat()
|
|
472
486
|
metadata["end_date"] = dates[-1].isoformat()
|
|
@@ -531,7 +545,9 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
531
545
|
|
|
532
546
|
|
|
533
547
|
class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
|
|
534
|
-
def __init__(
|
|
548
|
+
def __init__(
|
|
549
|
+
self, path, parts=None, use_threads=False, statistics_temp_dir=None, progress=None, cache=None, **kwargs
|
|
550
|
+
):
|
|
535
551
|
super().__init__(path, cache=cache)
|
|
536
552
|
self.use_threads = use_threads
|
|
537
553
|
self.statistics_temp_dir = statistics_temp_dir
|