anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
anemoi/datasets/__init__.py
CHANGED
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
from typing import List
|
|
11
|
+
|
|
10
12
|
from .data import MissingDateError
|
|
11
13
|
from .data import add_dataset_path
|
|
12
14
|
from .data import add_named_dataset
|
|
@@ -21,10 +23,11 @@ except ImportError: # pragma: no cover
|
|
|
21
23
|
# Local copy or not installed with setuptools
|
|
22
24
|
__version__ = "999"
|
|
23
25
|
|
|
24
|
-
__all__ = [
|
|
26
|
+
__all__: List[str] = [
|
|
25
27
|
"add_dataset_path",
|
|
26
28
|
"add_named_dataset",
|
|
27
29
|
"list_dataset_names",
|
|
28
30
|
"MissingDateError",
|
|
29
31
|
"open_dataset",
|
|
32
|
+
"__version__",
|
|
30
33
|
]
|
anemoi/datasets/__main__.py
CHANGED
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
10
12
|
from anemoi.utils.cli import cli_main
|
|
11
13
|
from anemoi.utils.cli import make_parser
|
|
12
14
|
|
|
@@ -15,11 +17,19 @@ from .commands import COMMANDS
|
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
# For read-the-docs
|
|
18
|
-
def create_parser():
|
|
20
|
+
def create_parser() -> Any:
|
|
21
|
+
"""Create the argument parser for the CLI.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Any
|
|
26
|
+
The argument parser instance.
|
|
27
|
+
"""
|
|
19
28
|
return make_parser(__doc__, COMMANDS)
|
|
20
29
|
|
|
21
30
|
|
|
22
|
-
def main():
|
|
31
|
+
def main() -> None:
|
|
32
|
+
"""The main entry point for the CLI application."""
|
|
23
33
|
cli_main(__version__, __doc__, COMMANDS)
|
|
24
34
|
|
|
25
35
|
|
anemoi/datasets/_version.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
# file generated by
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
3
6
|
TYPE_CHECKING = False
|
|
4
7
|
if TYPE_CHECKING:
|
|
5
|
-
from typing import Tuple
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
12
|
else:
|
|
8
13
|
VERSION_TUPLE = object
|
|
@@ -12,5 +17,5 @@ __version__: str
|
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
|
13
18
|
version_tuple: VERSION_TUPLE
|
|
14
19
|
|
|
15
|
-
__version__ = version = '0.5.
|
|
16
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
20
|
+
__version__ = version = '0.5.17'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 5, 17)
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import time
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
from anemoi.utils.humanize import seconds_to_human
|
|
14
15
|
|
|
@@ -25,7 +26,14 @@ class Cleanup(Command):
|
|
|
25
26
|
internal = True
|
|
26
27
|
timestamp = True
|
|
27
28
|
|
|
28
|
-
def add_arguments(self, subparser):
|
|
29
|
+
def add_arguments(self, subparser: Any) -> None:
|
|
30
|
+
"""Add command line arguments to the parser.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
subparser : Any
|
|
35
|
+
The argument parser.
|
|
36
|
+
"""
|
|
29
37
|
subparser.add_argument("path", help="Path to store the created data.")
|
|
30
38
|
subparser.add_argument(
|
|
31
39
|
"--delta",
|
|
@@ -33,7 +41,14 @@ class Cleanup(Command):
|
|
|
33
41
|
nargs="+",
|
|
34
42
|
)
|
|
35
43
|
|
|
36
|
-
def run(self, args):
|
|
44
|
+
def run(self, args: Any) -> None:
|
|
45
|
+
"""Execute the cleanup command.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
args : Any
|
|
50
|
+
The command line arguments.
|
|
51
|
+
"""
|
|
37
52
|
options = vars(args)
|
|
38
53
|
options.pop("command")
|
|
39
54
|
now = time.time()
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
11
13
|
import numpy as np
|
|
12
14
|
import tqdm
|
|
13
15
|
import zarr
|
|
@@ -20,13 +22,27 @@ from . import Command
|
|
|
20
22
|
class Compare(Command):
|
|
21
23
|
"""Compare two datasets. This command compares the variables in two datasets and prints the mean of the common variables. It does not compare the data itself (yet)."""
|
|
22
24
|
|
|
23
|
-
def add_arguments(self, command_parser):
|
|
25
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
26
|
+
"""Add arguments to the command parser.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
command_parser : Any
|
|
31
|
+
The command parser to which arguments are added.
|
|
32
|
+
"""
|
|
24
33
|
command_parser.add_argument("dataset1")
|
|
25
34
|
command_parser.add_argument("dataset2")
|
|
26
35
|
command_parser.add_argument("--data", action="store_true", help="Compare the data.")
|
|
27
36
|
command_parser.add_argument("--statistics", action="store_true", help="Compare the statistics.")
|
|
28
37
|
|
|
29
|
-
def run(self, args):
|
|
38
|
+
def run(self, args: Any) -> None:
|
|
39
|
+
"""Run the compare command with the provided arguments.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
args : Any
|
|
44
|
+
The arguments passed to the command.
|
|
45
|
+
"""
|
|
30
46
|
ds1 = open_dataset(args.dataset1)
|
|
31
47
|
ds2 = open_dataset(args.dataset2)
|
|
32
48
|
|
anemoi/datasets/commands/copy.py
CHANGED
|
@@ -13,6 +13,8 @@ import os
|
|
|
13
13
|
import sys
|
|
14
14
|
from concurrent.futures import ThreadPoolExecutor
|
|
15
15
|
from concurrent.futures import as_completed
|
|
16
|
+
from typing import Any
|
|
17
|
+
from typing import Optional
|
|
16
18
|
|
|
17
19
|
import tqdm
|
|
18
20
|
from anemoi.utils.remote import Transfer
|
|
@@ -29,7 +31,68 @@ except AttributeError:
|
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class ZarrCopier:
|
|
32
|
-
|
|
34
|
+
"""Class to handle copying of Zarr datasets.
|
|
35
|
+
|
|
36
|
+
Attributes
|
|
37
|
+
----------
|
|
38
|
+
source : str
|
|
39
|
+
Source location of the dataset.
|
|
40
|
+
target : str
|
|
41
|
+
Target location of the dataset.
|
|
42
|
+
transfers : int
|
|
43
|
+
Number of parallel transfers.
|
|
44
|
+
block_size : int
|
|
45
|
+
Size of data blocks to transfer.
|
|
46
|
+
overwrite : bool
|
|
47
|
+
Flag to overwrite existing dataset.
|
|
48
|
+
resume : bool
|
|
49
|
+
Flag to resume copying an existing dataset.
|
|
50
|
+
verbosity : int
|
|
51
|
+
Verbosity level of logging.
|
|
52
|
+
nested : bool
|
|
53
|
+
Flag to use ZARR's nested directory backend.
|
|
54
|
+
rechunk : str
|
|
55
|
+
Rechunk size for the target data array.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
source: str,
|
|
61
|
+
target: str,
|
|
62
|
+
transfers: int,
|
|
63
|
+
block_size: int,
|
|
64
|
+
overwrite: bool,
|
|
65
|
+
resume: bool,
|
|
66
|
+
verbosity: int,
|
|
67
|
+
nested: bool,
|
|
68
|
+
rechunk: str,
|
|
69
|
+
**kwargs: Any,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Initialize the ZarrCopier.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
source : str
|
|
76
|
+
Source location of the dataset.
|
|
77
|
+
target : str
|
|
78
|
+
Target location of the dataset.
|
|
79
|
+
transfers : int
|
|
80
|
+
Number of parallel transfers.
|
|
81
|
+
block_size : int
|
|
82
|
+
Size of data blocks to transfer.
|
|
83
|
+
overwrite : bool
|
|
84
|
+
Flag to overwrite existing dataset.
|
|
85
|
+
resume : bool
|
|
86
|
+
Flag to resume copying an existing dataset.
|
|
87
|
+
verbosity : int
|
|
88
|
+
Verbosity level of logging.
|
|
89
|
+
nested : bool
|
|
90
|
+
Flag to use ZARR's nested directory backend.
|
|
91
|
+
rechunk : str
|
|
92
|
+
Rechunk size for the target data array.
|
|
93
|
+
**kwargs : Any
|
|
94
|
+
Additional keyword arguments.
|
|
95
|
+
"""
|
|
33
96
|
self.source = source
|
|
34
97
|
self.target = target
|
|
35
98
|
self.transfers = transfers
|
|
@@ -50,14 +113,50 @@ class ZarrCopier:
|
|
|
50
113
|
raise NotImplementedError("Rechunking with SSH not implemented.")
|
|
51
114
|
assert NotImplementedError("SSH not implemented.")
|
|
52
115
|
|
|
53
|
-
def _store(self, path, nested=False):
|
|
116
|
+
def _store(self, path: str, nested: bool = False) -> Any:
|
|
117
|
+
"""Get the storage path.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
path : str
|
|
122
|
+
Path to the storage.
|
|
123
|
+
nested : bool, optional
|
|
124
|
+
Flag to use nested directory storage.
|
|
125
|
+
|
|
126
|
+
Returns
|
|
127
|
+
-------
|
|
128
|
+
Any
|
|
129
|
+
Storage path.
|
|
130
|
+
"""
|
|
54
131
|
if nested:
|
|
55
132
|
import zarr
|
|
56
133
|
|
|
57
134
|
return zarr.storage.NestedDirectoryStore(path)
|
|
58
135
|
return path
|
|
59
136
|
|
|
60
|
-
def copy_chunk(self, n, m, source, target, _copy, verbosity):
|
|
137
|
+
def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> Optional[slice]:
|
|
138
|
+
"""Copy a chunk of data from source to target.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
n : int
|
|
143
|
+
Start index of the chunk.
|
|
144
|
+
m : int
|
|
145
|
+
End index of the chunk.
|
|
146
|
+
source : Any
|
|
147
|
+
Source data.
|
|
148
|
+
target : Any
|
|
149
|
+
Target data.
|
|
150
|
+
_copy : Any
|
|
151
|
+
Copy status array.
|
|
152
|
+
verbosity : int
|
|
153
|
+
Verbosity level of logging.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
slice or None
|
|
158
|
+
Slice of copied data or None if skipped.
|
|
159
|
+
"""
|
|
61
160
|
if _copy[n:m].all():
|
|
62
161
|
LOG.info(f"Skipping {n} to {m}")
|
|
63
162
|
return None
|
|
@@ -83,7 +182,21 @@ class ZarrCopier:
|
|
|
83
182
|
|
|
84
183
|
return slice(n, m)
|
|
85
184
|
|
|
86
|
-
def parse_rechunking(self, rechunking, source_data):
|
|
185
|
+
def parse_rechunking(self, rechunking: list[str], source_data: Any) -> tuple:
|
|
186
|
+
"""Parse the rechunking configuration.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
rechunking : list of str
|
|
191
|
+
List of rechunk sizes.
|
|
192
|
+
source_data : Any
|
|
193
|
+
Source data.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
tuple
|
|
198
|
+
Parsed chunk sizes.
|
|
199
|
+
"""
|
|
87
200
|
shape = source_data.shape
|
|
88
201
|
chunks = list(source_data.chunks)
|
|
89
202
|
for i, c in enumerate(rechunking):
|
|
@@ -102,7 +215,20 @@ class ZarrCopier:
|
|
|
102
215
|
# raise NotImplementedError("Rechunking with multiple transfers is not implemented")
|
|
103
216
|
return chunks
|
|
104
217
|
|
|
105
|
-
def copy_data(self, source, target, _copy, verbosity):
|
|
218
|
+
def copy_data(self, source: Any, target: Any, _copy: Any, verbosity: int) -> None:
|
|
219
|
+
"""Copy data from source to target.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
source : Any
|
|
224
|
+
Source data.
|
|
225
|
+
target : Any
|
|
226
|
+
Target data.
|
|
227
|
+
_copy : Any
|
|
228
|
+
Copy status array.
|
|
229
|
+
verbosity : int
|
|
230
|
+
Verbosity level of logging.
|
|
231
|
+
"""
|
|
106
232
|
LOG.info("Copying data")
|
|
107
233
|
source_data = source["data"]
|
|
108
234
|
|
|
@@ -147,7 +273,22 @@ class ZarrCopier:
|
|
|
147
273
|
|
|
148
274
|
LOG.info("Copied data")
|
|
149
275
|
|
|
150
|
-
def copy_array(self, name, source, target, _copy, verbosity):
|
|
276
|
+
def copy_array(self, name: str, source: Any, target: Any, _copy: Any, verbosity: int) -> None:
|
|
277
|
+
"""Copy an array from source to target.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
name : str
|
|
282
|
+
Name of the array.
|
|
283
|
+
source : Any
|
|
284
|
+
Source data.
|
|
285
|
+
target : Any
|
|
286
|
+
Target data.
|
|
287
|
+
_copy : Any
|
|
288
|
+
Copy status array.
|
|
289
|
+
verbosity : int
|
|
290
|
+
Verbosity level of logging.
|
|
291
|
+
"""
|
|
151
292
|
for k, v in source.attrs.items():
|
|
152
293
|
target.attrs[k] = v
|
|
153
294
|
|
|
@@ -162,7 +303,20 @@ class ZarrCopier:
|
|
|
162
303
|
target[name] = source[name]
|
|
163
304
|
LOG.info(f"Copied {name}")
|
|
164
305
|
|
|
165
|
-
def copy_group(self, source, target, _copy, verbosity):
|
|
306
|
+
def copy_group(self, source: Any, target: Any, _copy: Any, verbosity: int) -> None:
|
|
307
|
+
"""Copy a group from source to target.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
source : Any
|
|
312
|
+
Source data.
|
|
313
|
+
target : Any
|
|
314
|
+
Target data.
|
|
315
|
+
_copy : Any
|
|
316
|
+
Copy status array.
|
|
317
|
+
verbosity : int
|
|
318
|
+
Verbosity level of logging.
|
|
319
|
+
"""
|
|
166
320
|
import zarr
|
|
167
321
|
|
|
168
322
|
for k, v in source.attrs.items():
|
|
@@ -186,7 +340,18 @@ class ZarrCopier:
|
|
|
186
340
|
verbosity,
|
|
187
341
|
)
|
|
188
342
|
|
|
189
|
-
def copy(self, source, target, verbosity):
|
|
343
|
+
def copy(self, source: Any, target: Any, verbosity: int) -> None:
|
|
344
|
+
"""Copy the entire dataset from source to target.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
source : Any
|
|
349
|
+
Source data.
|
|
350
|
+
target : Any
|
|
351
|
+
Target data.
|
|
352
|
+
verbosity : int
|
|
353
|
+
Verbosity level of logging.
|
|
354
|
+
"""
|
|
190
355
|
import zarr
|
|
191
356
|
|
|
192
357
|
if "_copy" not in target:
|
|
@@ -200,7 +365,8 @@ class ZarrCopier:
|
|
|
200
365
|
self.copy_group(source, target, _copy_np, verbosity)
|
|
201
366
|
del target["_copy"]
|
|
202
367
|
|
|
203
|
-
def run(self):
|
|
368
|
+
def run(self) -> None:
|
|
369
|
+
"""Execute the copy operation."""
|
|
204
370
|
import zarr
|
|
205
371
|
|
|
206
372
|
# base, ext = os.path.splitext(os.path.basename(args.source))
|
|
@@ -208,14 +374,14 @@ class ZarrCopier:
|
|
|
208
374
|
# assert "." not in base, base
|
|
209
375
|
LOG.info(f"Copying {self.source} to {self.target}")
|
|
210
376
|
|
|
211
|
-
def target_exists():
|
|
377
|
+
def target_exists() -> bool:
|
|
212
378
|
try:
|
|
213
379
|
zarr.open(self._store(self.target), mode="r")
|
|
214
380
|
return True
|
|
215
381
|
except ValueError:
|
|
216
382
|
return False
|
|
217
383
|
|
|
218
|
-
def target_finished():
|
|
384
|
+
def target_finished() -> bool:
|
|
219
385
|
target = zarr.open(self._store(self.target), mode="r")
|
|
220
386
|
if "_copy" in target:
|
|
221
387
|
done = sum(1 if x else 0 for x in target["_copy"])
|
|
@@ -231,7 +397,7 @@ class ZarrCopier:
|
|
|
231
397
|
return True
|
|
232
398
|
return False
|
|
233
399
|
|
|
234
|
-
def open_target():
|
|
400
|
+
def open_target() -> Any:
|
|
235
401
|
|
|
236
402
|
if not target_exists():
|
|
237
403
|
return zarr.open(self._store(self.target, self.nested), mode="w")
|
|
@@ -260,10 +426,19 @@ class ZarrCopier:
|
|
|
260
426
|
|
|
261
427
|
|
|
262
428
|
class CopyMixin:
|
|
429
|
+
"""Mixin class for adding copy command arguments and running the copy operation."""
|
|
430
|
+
|
|
263
431
|
internal = True
|
|
264
432
|
timestamp = True
|
|
265
433
|
|
|
266
|
-
def add_arguments(self, command_parser):
|
|
434
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
435
|
+
"""Add arguments to the command parser.
|
|
436
|
+
|
|
437
|
+
Parameters
|
|
438
|
+
----------
|
|
439
|
+
command_parser : Any
|
|
440
|
+
Command parser object.
|
|
441
|
+
"""
|
|
267
442
|
group = command_parser.add_mutually_exclusive_group()
|
|
268
443
|
group.add_argument(
|
|
269
444
|
"--overwrite",
|
|
@@ -293,7 +468,14 @@ class CopyMixin:
|
|
|
293
468
|
command_parser.add_argument("source", help="Source location.")
|
|
294
469
|
command_parser.add_argument("target", help="Target location.")
|
|
295
470
|
|
|
296
|
-
def run(self, args):
|
|
471
|
+
def run(self, args: Any) -> None:
|
|
472
|
+
"""Run the copy command with the provided arguments.
|
|
473
|
+
|
|
474
|
+
Parameters
|
|
475
|
+
----------
|
|
476
|
+
args : Any
|
|
477
|
+
Command arguments.
|
|
478
|
+
"""
|
|
297
479
|
if args.source == args.target:
|
|
298
480
|
raise ValueError("Source and target are the same.")
|
|
299
481
|
|
|
@@ -13,6 +13,7 @@ import time
|
|
|
13
13
|
from concurrent.futures import ProcessPoolExecutor
|
|
14
14
|
from concurrent.futures import ThreadPoolExecutor
|
|
15
15
|
from concurrent.futures import as_completed
|
|
16
|
+
from typing import Any
|
|
16
17
|
|
|
17
18
|
import tqdm
|
|
18
19
|
from anemoi.utils.humanize import seconds_to_human
|
|
@@ -22,9 +23,25 @@ from . import Command
|
|
|
22
23
|
LOG = logging.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
def task(what, options, *args, **kwargs):
|
|
26
|
-
"""Make sure `import Creator` is done in the sub-processes, and not in the main one.
|
|
27
|
-
|
|
26
|
+
def task(what: str, options: dict, *args: Any, **kwargs: Any) -> Any:
|
|
27
|
+
"""Make sure `import Creator` is done in the sub-processes, and not in the main one.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
what : str
|
|
32
|
+
The task to be executed.
|
|
33
|
+
options : dict
|
|
34
|
+
Options for the task.
|
|
35
|
+
*args : Any
|
|
36
|
+
Additional arguments.
|
|
37
|
+
**kwargs : Any
|
|
38
|
+
Additional keyword arguments.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
Any
|
|
43
|
+
The result of the task.
|
|
44
|
+
"""
|
|
28
45
|
now = datetime.datetime.now()
|
|
29
46
|
LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
|
|
30
47
|
|
|
@@ -45,7 +62,14 @@ class Create(Command):
|
|
|
45
62
|
internal = True
|
|
46
63
|
timestamp = True
|
|
47
64
|
|
|
48
|
-
def add_arguments(self, command_parser):
|
|
65
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
66
|
+
"""Add command line arguments to the parser.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
command_parser : Any
|
|
71
|
+
The command line argument parser.
|
|
72
|
+
"""
|
|
49
73
|
command_parser.add_argument(
|
|
50
74
|
"--overwrite",
|
|
51
75
|
action="store_true",
|
|
@@ -63,8 +87,14 @@ class Create(Command):
|
|
|
63
87
|
group.add_argument("--processes", help="Use `n` parallel process workers.", type=int, default=0)
|
|
64
88
|
command_parser.add_argument("--trace", action="store_true")
|
|
65
89
|
|
|
66
|
-
def run(self, args):
|
|
90
|
+
def run(self, args: Any) -> None:
|
|
91
|
+
"""Execute the create command.
|
|
67
92
|
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
args : Any
|
|
96
|
+
Command line arguments.
|
|
97
|
+
"""
|
|
68
98
|
now = time.time()
|
|
69
99
|
if args.threads + args.processes:
|
|
70
100
|
self.parallel_create(args)
|
|
@@ -72,8 +102,14 @@ class Create(Command):
|
|
|
72
102
|
self.serial_create(args)
|
|
73
103
|
LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
|
|
74
104
|
|
|
75
|
-
def serial_create(self, args):
|
|
105
|
+
def serial_create(self, args: Any) -> None:
|
|
106
|
+
"""Create the dataset in serial mode.
|
|
76
107
|
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
args : Any
|
|
111
|
+
Command line arguments.
|
|
112
|
+
"""
|
|
77
113
|
options = vars(args)
|
|
78
114
|
options.pop("command")
|
|
79
115
|
options.pop("threads")
|
|
@@ -92,7 +128,14 @@ class Create(Command):
|
|
|
92
128
|
task("cleanup", options)
|
|
93
129
|
task("verify", options)
|
|
94
130
|
|
|
95
|
-
def parallel_create(self, args):
|
|
131
|
+
def parallel_create(self, args: Any) -> None:
|
|
132
|
+
"""Create the dataset in parallel mode.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
args : Any
|
|
137
|
+
Command line arguments.
|
|
138
|
+
"""
|
|
96
139
|
"""Some modules, like fsspec do not work well with fork()
|
|
97
140
|
Other modules may not be thread safe. So we implement
|
|
98
141
|
parallel loadining using multiprocessing before any
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import time
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
from anemoi.utils.humanize import seconds_to_human
|
|
14
15
|
|
|
@@ -25,7 +26,14 @@ class FinaliseAdditions(Command):
|
|
|
25
26
|
internal = True
|
|
26
27
|
timestamp = True
|
|
27
28
|
|
|
28
|
-
def add_arguments(self, command_parser):
|
|
29
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
30
|
+
"""Add command line arguments to the parser.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
command_parser : Any
|
|
35
|
+
The argument parser instance to which arguments will be added.
|
|
36
|
+
"""
|
|
29
37
|
command_parser.add_argument(
|
|
30
38
|
"--delta",
|
|
31
39
|
help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
|
|
@@ -35,7 +43,14 @@ class FinaliseAdditions(Command):
|
|
|
35
43
|
command_parser.add_argument("path", help="Path to store the created data.")
|
|
36
44
|
command_parser.add_argument("--trace", action="store_true")
|
|
37
45
|
|
|
38
|
-
def run(self, args):
|
|
46
|
+
def run(self, args: Any) -> None:
|
|
47
|
+
"""Execute the command with the given arguments.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
args : Any
|
|
52
|
+
The arguments passed to the command.
|
|
53
|
+
"""
|
|
39
54
|
options = vars(args)
|
|
40
55
|
options.pop("command")
|
|
41
56
|
step = "finalise-additions"
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import time
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
from anemoi.utils.humanize import seconds_to_human
|
|
14
15
|
|
|
@@ -25,11 +26,25 @@ class Finalise(Command):
|
|
|
25
26
|
internal = True
|
|
26
27
|
timestamp = True
|
|
27
28
|
|
|
28
|
-
def add_arguments(self, command_parser):
|
|
29
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
30
|
+
"""Add arguments to the command parser.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
command_parser : Any
|
|
35
|
+
The command parser to which arguments will be added.
|
|
36
|
+
"""
|
|
29
37
|
command_parser.add_argument("path", help="Path to store the created data.")
|
|
30
38
|
command_parser.add_argument("--trace", action="store_true")
|
|
31
39
|
|
|
32
|
-
def run(self, args):
|
|
40
|
+
def run(self, args: Any) -> None:
|
|
41
|
+
"""Execute the finalise command.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
args : Any
|
|
46
|
+
The arguments passed to the command.
|
|
47
|
+
"""
|
|
33
48
|
options = vars(args)
|
|
34
49
|
options.pop("command")
|
|
35
50
|
now = time.time()
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import time
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
from anemoi.utils.humanize import seconds_to_human
|
|
14
15
|
|
|
@@ -25,7 +26,14 @@ class InitAdditions(Command):
|
|
|
25
26
|
internal = True
|
|
26
27
|
timestamp = True
|
|
27
28
|
|
|
28
|
-
def add_arguments(self, command_parser):
|
|
29
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
30
|
+
"""Add command line arguments to the parser.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
command_parser : Any
|
|
35
|
+
The argument parser instance.
|
|
36
|
+
"""
|
|
29
37
|
command_parser.add_argument(
|
|
30
38
|
"--delta",
|
|
31
39
|
help="Compute statistics tendencies on a given time delta, if possible. Must be a multiple of the frequency.",
|
|
@@ -35,7 +43,14 @@ class InitAdditions(Command):
|
|
|
35
43
|
command_parser.add_argument("path", help="Path to store the created data.")
|
|
36
44
|
command_parser.add_argument("--trace", action="store_true")
|
|
37
45
|
|
|
38
|
-
def run(self, args):
|
|
46
|
+
def run(self, args: Any) -> None:
|
|
47
|
+
"""Execute the command with the given arguments.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
args : Any
|
|
52
|
+
The command line arguments.
|
|
53
|
+
"""
|
|
39
54
|
options = vars(args)
|
|
40
55
|
options.pop("command")
|
|
41
56
|
step = "init-additions"
|