anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
anemoi/datasets/create/utils.py
CHANGED
|
@@ -12,12 +12,28 @@ import datetime
|
|
|
12
12
|
import os
|
|
13
13
|
import warnings
|
|
14
14
|
from contextlib import contextmanager
|
|
15
|
+
from typing import Any
|
|
16
|
+
from typing import Union
|
|
15
17
|
|
|
16
18
|
import numpy as np
|
|
17
19
|
from earthkit.data import settings
|
|
20
|
+
from numpy.typing import NDArray
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def cache_context(dirname):
|
|
23
|
+
def cache_context(dirname: str) -> contextmanager:
|
|
24
|
+
"""Context manager for setting a temporary cache directory.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
dirname : str
|
|
29
|
+
The directory name for the cache.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
contextmanager
|
|
34
|
+
A context manager that sets the cache directory.
|
|
35
|
+
"""
|
|
36
|
+
|
|
21
37
|
@contextmanager
|
|
22
38
|
def no_cache_context():
|
|
23
39
|
yield
|
|
@@ -30,7 +46,21 @@ def cache_context(dirname):
|
|
|
30
46
|
return settings.temporary({"cache-policy": "user", "user-cache-directory": dirname})
|
|
31
47
|
|
|
32
48
|
|
|
33
|
-
def to_datetime_list(*args, **kwargs):
|
|
49
|
+
def to_datetime_list(*args: Any, **kwargs: Any) -> list[datetime.datetime]:
|
|
50
|
+
"""Convert various date formats to a list of datetime objects.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
*args : Any
|
|
55
|
+
Positional arguments for date conversion.
|
|
56
|
+
**kwargs : Any
|
|
57
|
+
Keyword arguments for date conversion.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
list[datetime.datetime]
|
|
62
|
+
A list of datetime objects.
|
|
63
|
+
"""
|
|
34
64
|
from earthkit.data.utils.dates import to_datetime_list as to_datetime_list_
|
|
35
65
|
|
|
36
66
|
warnings.warn(
|
|
@@ -41,7 +71,21 @@ def to_datetime_list(*args, **kwargs):
|
|
|
41
71
|
return to_datetime_list_(*args, **kwargs)
|
|
42
72
|
|
|
43
73
|
|
|
44
|
-
def to_datetime(*args, **kwargs):
|
|
74
|
+
def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
|
|
75
|
+
"""Convert various date formats to a single datetime object.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
*args : Any
|
|
80
|
+
Positional arguments for date conversion.
|
|
81
|
+
**kwargs : Any
|
|
82
|
+
Keyword arguments for date conversion.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
datetime.datetime
|
|
87
|
+
A datetime object.
|
|
88
|
+
"""
|
|
45
89
|
from earthkit.data.utils.dates import to_datetime as to_datetime_
|
|
46
90
|
|
|
47
91
|
warnings.warn(
|
|
@@ -53,7 +97,24 @@ def to_datetime(*args, **kwargs):
|
|
|
53
97
|
return to_datetime_(*args, **kwargs)
|
|
54
98
|
|
|
55
99
|
|
|
56
|
-
def make_list_int(value):
|
|
100
|
+
def make_list_int(value: Union[str, list, tuple, int]) -> list[int]:
|
|
101
|
+
"""Convert a string, list, tuple, or integer to a list of integers.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
value : str or list or tuple or int
|
|
106
|
+
The value to convert.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
list[int]
|
|
111
|
+
A list of integers.
|
|
112
|
+
|
|
113
|
+
Raises
|
|
114
|
+
------
|
|
115
|
+
ValueError
|
|
116
|
+
If the value cannot be converted to a list of integers.
|
|
117
|
+
"""
|
|
57
118
|
# Convert a string like "1/2/3" or "1/to/3" or "1/to/10/by/2" to a list of integers.
|
|
58
119
|
# Moved to anemoi.utils.humanize
|
|
59
120
|
# replace with from anemoi.utils.humanize import make_list_int
|
|
@@ -78,8 +139,38 @@ def make_list_int(value):
|
|
|
78
139
|
raise ValueError(f"Cannot make list from {value}")
|
|
79
140
|
|
|
80
141
|
|
|
81
|
-
def normalize_and_check_dates(
|
|
82
|
-
|
|
142
|
+
def normalize_and_check_dates(
|
|
143
|
+
dates: list[datetime.datetime],
|
|
144
|
+
start: datetime.datetime,
|
|
145
|
+
end: datetime.datetime,
|
|
146
|
+
frequency: datetime.timedelta,
|
|
147
|
+
dtype: str = "datetime64[s]",
|
|
148
|
+
) -> NDArray[Any]:
|
|
149
|
+
"""Normalize and check a list of dates against a specified frequency.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
dates : list[datetime.datetime]
|
|
154
|
+
The list of dates to check.
|
|
155
|
+
start : datetime.datetime
|
|
156
|
+
The start date.
|
|
157
|
+
end : datetime.datetime
|
|
158
|
+
The end date.
|
|
159
|
+
frequency : datetime.timedelta
|
|
160
|
+
The frequency of the dates.
|
|
161
|
+
dtype : str, optional
|
|
162
|
+
The data type of the dates, by default "datetime64[s]".
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
NDArray[Any]
|
|
167
|
+
An array of normalized dates.
|
|
168
|
+
|
|
169
|
+
Raises
|
|
170
|
+
------
|
|
171
|
+
ValueError
|
|
172
|
+
If the final date size does not match the data shape.
|
|
173
|
+
"""
|
|
83
174
|
dates = [d.hdate if hasattr(d, "hdate") else d for d in dates]
|
|
84
175
|
|
|
85
176
|
assert isinstance(frequency, datetime.timedelta), frequency
|
anemoi/datasets/create/writer.py
CHANGED
|
@@ -9,8 +9,10 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
import numpy as np
|
|
15
|
+
from numpy.typing import NDArray
|
|
14
16
|
|
|
15
17
|
LOG = logging.getLogger(__name__)
|
|
16
18
|
|
|
@@ -23,20 +25,40 @@ class ViewCacheArray:
|
|
|
23
25
|
temporarily store the data before flushing it to the array.
|
|
24
26
|
|
|
25
27
|
The `flush` method copies the contents of the cache to the final array.
|
|
26
|
-
|
|
27
28
|
"""
|
|
28
29
|
|
|
29
|
-
def __init__(self, array, *, shape, indexes):
|
|
30
|
+
def __init__(self, array: NDArray[Any], *, shape: tuple[int, ...], indexes: list[int]):
|
|
31
|
+
"""Initialize the ViewCacheArray.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
array : NDArray[Any]
|
|
36
|
+
The NumPy-like array to store the final data.
|
|
37
|
+
shape : tuple[int, ...]
|
|
38
|
+
The shape of the cache array.
|
|
39
|
+
indexes : list[int]
|
|
40
|
+
List to reindex the first dimension.
|
|
41
|
+
"""
|
|
30
42
|
assert len(indexes) == shape[0], (len(indexes), shape[0])
|
|
31
43
|
self.array = array
|
|
32
44
|
self.dtype = array.dtype
|
|
33
45
|
self.cache = np.full(shape, np.nan, dtype=self.dtype)
|
|
34
46
|
self.indexes = indexes
|
|
35
47
|
|
|
36
|
-
def __setitem__(self, key, value):
|
|
48
|
+
def __setitem__(self, key: tuple[int, ...], value: NDArray[Any]) -> None:
|
|
49
|
+
"""Set the value in the cache array at the specified key.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
key : tuple[int, ...]
|
|
54
|
+
The index key to set the value.
|
|
55
|
+
value : NDArray[Any]
|
|
56
|
+
The value to set in the cache array.
|
|
57
|
+
"""
|
|
37
58
|
self.cache[key] = value
|
|
38
59
|
|
|
39
|
-
def flush(self):
|
|
60
|
+
def flush(self) -> None:
|
|
61
|
+
"""Copy the contents of the cache to the final array."""
|
|
40
62
|
for i in range(self.cache.shape[0]):
|
|
41
63
|
global_i = self.indexes[i]
|
|
42
64
|
self.array[global_i] = self.cache[i]
|
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -10,24 +10,56 @@
|
|
|
10
10
|
import datetime
|
|
11
11
|
import logging
|
|
12
12
|
import shutil
|
|
13
|
+
from typing import Any
|
|
14
|
+
from typing import Optional
|
|
13
15
|
|
|
14
16
|
import numpy as np
|
|
17
|
+
import zarr
|
|
18
|
+
from numpy.typing import NDArray
|
|
15
19
|
|
|
16
20
|
LOG = logging.getLogger(__name__)
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
def add_zarr_dataset(
|
|
20
24
|
*,
|
|
21
|
-
name,
|
|
22
|
-
dtype=None,
|
|
23
|
-
fill_value=None,
|
|
24
|
-
zarr_root,
|
|
25
|
-
shape=None,
|
|
26
|
-
array=None,
|
|
27
|
-
overwrite=True,
|
|
28
|
-
dimensions=None,
|
|
25
|
+
name: str,
|
|
26
|
+
dtype: np.dtype = None,
|
|
27
|
+
fill_value: np.generic = None,
|
|
28
|
+
zarr_root: zarr.Group,
|
|
29
|
+
shape: tuple[int, ...] = None,
|
|
30
|
+
array: NDArray[Any] = None,
|
|
31
|
+
overwrite: bool = True,
|
|
32
|
+
dimensions: tuple[str, ...] = None,
|
|
29
33
|
**kwargs,
|
|
30
|
-
):
|
|
34
|
+
) -> zarr.Array:
|
|
35
|
+
"""Add a dataset to a Zarr group.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
name : str
|
|
40
|
+
Name of the dataset.
|
|
41
|
+
dtype : np.dtype, optional
|
|
42
|
+
Data type of the dataset.
|
|
43
|
+
fill_value : np.generic, optional
|
|
44
|
+
Fill value for the dataset.
|
|
45
|
+
zarr_root : zarr.Group
|
|
46
|
+
Root Zarr group.
|
|
47
|
+
shape : tuple[int, ...], optional
|
|
48
|
+
Shape of the dataset.
|
|
49
|
+
array : NDArray[Any], optional
|
|
50
|
+
Array to initialize the dataset with.
|
|
51
|
+
overwrite : bool
|
|
52
|
+
Whether to overwrite existing dataset.
|
|
53
|
+
dimensions : tuple[str, ...]
|
|
54
|
+
Dimensions of the dataset.
|
|
55
|
+
**kwargs
|
|
56
|
+
Additional arguments for Zarr dataset creation.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
zarr.Array
|
|
61
|
+
The created Zarr array.
|
|
62
|
+
"""
|
|
31
63
|
assert dimensions is not None, "Please pass dimensions to add_zarr_dataset."
|
|
32
64
|
assert isinstance(dimensions, (tuple, list))
|
|
33
65
|
|
|
@@ -80,13 +112,26 @@ def add_zarr_dataset(
|
|
|
80
112
|
|
|
81
113
|
|
|
82
114
|
class ZarrBuiltRegistry:
|
|
115
|
+
"""A class to manage the creation and access of Zarr datasets."""
|
|
116
|
+
|
|
83
117
|
name_lengths = "lengths"
|
|
84
118
|
name_flags = "flags"
|
|
85
119
|
lengths = None
|
|
86
120
|
flags = None
|
|
87
121
|
z = None
|
|
88
122
|
|
|
89
|
-
def __init__(self, path, synchronizer_path=None, use_threads=False):
|
|
123
|
+
def __init__(self, path: str, synchronizer_path: Optional[str] = None, use_threads: bool = False):
|
|
124
|
+
"""Initialize the ZarrBuiltRegistry.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
path : str
|
|
129
|
+
Path to the Zarr store.
|
|
130
|
+
synchronizer_path : Optional[str], optional
|
|
131
|
+
Path to the synchronizer.
|
|
132
|
+
use_threads : bool
|
|
133
|
+
Whether to use thread-based synchronization.
|
|
134
|
+
"""
|
|
90
135
|
import zarr
|
|
91
136
|
|
|
92
137
|
assert isinstance(path, str), path
|
|
@@ -101,19 +146,33 @@ class ZarrBuiltRegistry:
|
|
|
101
146
|
self.synchronizer_path = synchronizer_path
|
|
102
147
|
self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
|
|
103
148
|
|
|
104
|
-
def clean(self):
|
|
149
|
+
def clean(self) -> None:
|
|
150
|
+
"""Clean up the synchronizer path."""
|
|
105
151
|
if self.synchronizer_path is not None:
|
|
106
152
|
try:
|
|
107
153
|
shutil.rmtree(self.synchronizer_path)
|
|
108
154
|
except FileNotFoundError:
|
|
109
155
|
pass
|
|
110
156
|
|
|
111
|
-
def _open_write(self):
|
|
157
|
+
def _open_write(self) -> zarr.Group:
|
|
158
|
+
"""Open the Zarr store in write mode."""
|
|
112
159
|
import zarr
|
|
113
160
|
|
|
114
161
|
return zarr.open(self.zarr_path, mode="r+", synchronizer=self.synchronizer)
|
|
115
162
|
|
|
116
|
-
def _open_read(self, sync=True):
|
|
163
|
+
def _open_read(self, sync: bool = True) -> zarr.Group:
|
|
164
|
+
"""Open the Zarr store in read mode.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
sync : bool
|
|
169
|
+
Whether to use synchronization.
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
zarr.Group
|
|
174
|
+
The opened Zarr group.
|
|
175
|
+
"""
|
|
117
176
|
import zarr
|
|
118
177
|
|
|
119
178
|
if sync:
|
|
@@ -121,12 +180,30 @@ class ZarrBuiltRegistry:
|
|
|
121
180
|
else:
|
|
122
181
|
return zarr.open(self.zarr_path, mode="r")
|
|
123
182
|
|
|
124
|
-
def new_dataset(self, *args, **kwargs):
|
|
183
|
+
def new_dataset(self, *args, **kwargs) -> None:
|
|
184
|
+
"""Create a new dataset in the Zarr store.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
*args
|
|
189
|
+
Positional arguments for dataset creation.
|
|
190
|
+
**kwargs
|
|
191
|
+
Keyword arguments for dataset creation.
|
|
192
|
+
"""
|
|
125
193
|
z = self._open_write()
|
|
126
194
|
zarr_root = z["_build"]
|
|
127
195
|
add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, dimensions=("tmp",), **kwargs)
|
|
128
196
|
|
|
129
|
-
def add_to_history(self, action, **kwargs):
|
|
197
|
+
def add_to_history(self, action: str, **kwargs) -> None:
|
|
198
|
+
"""Add an action to the history attribute of the Zarr store.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
action : str
|
|
203
|
+
The action to record.
|
|
204
|
+
**kwargs
|
|
205
|
+
Additional information about the action.
|
|
206
|
+
"""
|
|
130
207
|
new = dict(
|
|
131
208
|
action=action,
|
|
132
209
|
timestamp=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat(),
|
|
@@ -138,37 +215,107 @@ class ZarrBuiltRegistry:
|
|
|
138
215
|
history.append(new)
|
|
139
216
|
z.attrs["history"] = history
|
|
140
217
|
|
|
141
|
-
def get_lengths(self):
|
|
218
|
+
def get_lengths(self) -> list[int]:
|
|
219
|
+
"""Get the lengths dataset.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
list[int]
|
|
224
|
+
The lengths dataset.
|
|
225
|
+
"""
|
|
142
226
|
z = self._open_read()
|
|
143
227
|
return list(z["_build"][self.name_lengths][:])
|
|
144
228
|
|
|
145
|
-
def get_flags(self, **kwargs):
|
|
229
|
+
def get_flags(self, **kwargs) -> list[bool]:
|
|
230
|
+
"""Get the flags dataset.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
**kwargs
|
|
235
|
+
Additional arguments for reading the dataset.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
list[bool]
|
|
240
|
+
The flags dataset.
|
|
241
|
+
"""
|
|
146
242
|
z = self._open_read(**kwargs)
|
|
147
243
|
return list(z["_build"][self.name_flags][:])
|
|
148
244
|
|
|
149
|
-
def get_flag(self, i):
|
|
245
|
+
def get_flag(self, i: int) -> bool:
|
|
246
|
+
"""Get a specific flag.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
i : int
|
|
251
|
+
Index of the flag.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
bool
|
|
256
|
+
The flag value.
|
|
257
|
+
"""
|
|
150
258
|
z = self._open_read()
|
|
151
259
|
return z["_build"][self.name_flags][i]
|
|
152
260
|
|
|
153
|
-
def set_flag(self, i, value=True):
|
|
261
|
+
def set_flag(self, i: int, value: bool = True) -> None:
|
|
262
|
+
"""Set a specific flag.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
i : int
|
|
267
|
+
Index of the flag.
|
|
268
|
+
value : bool
|
|
269
|
+
Value to set the flag to.
|
|
270
|
+
"""
|
|
154
271
|
z = self._open_write()
|
|
155
272
|
z.attrs["latest_write_timestamp"] = (
|
|
156
273
|
datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat()
|
|
157
274
|
)
|
|
158
275
|
z["_build"][self.name_flags][i] = value
|
|
159
276
|
|
|
160
|
-
def ready(self):
|
|
277
|
+
def ready(self) -> bool:
|
|
278
|
+
"""Check if all flags are set.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
bool
|
|
283
|
+
True if all flags are set, False otherwise.
|
|
284
|
+
"""
|
|
161
285
|
return all(self.get_flags())
|
|
162
286
|
|
|
163
|
-
def create(self, lengths, overwrite=False):
|
|
287
|
+
def create(self, lengths: list[int], overwrite: bool = False) -> None:
|
|
288
|
+
"""Create the lengths and flags datasets.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
lengths : list[int]
|
|
293
|
+
Lengths to initialize the dataset with.
|
|
294
|
+
overwrite : bool
|
|
295
|
+
Whether to overwrite existing datasets.
|
|
296
|
+
"""
|
|
164
297
|
self.new_dataset(name=self.name_lengths, array=np.array(lengths, dtype="i4"))
|
|
165
298
|
self.new_dataset(name=self.name_flags, array=np.array([False] * len(lengths), dtype=bool))
|
|
166
299
|
self.add_to_history("initialised")
|
|
167
300
|
|
|
168
|
-
def reset(self, lengths):
|
|
301
|
+
def reset(self, lengths: list[int]) -> None:
|
|
302
|
+
"""Reset the lengths and flags datasets.
|
|
303
|
+
|
|
304
|
+
Parameters
|
|
305
|
+
----------
|
|
306
|
+
lengths : list[int]
|
|
307
|
+
Lengths to initialize the dataset with.
|
|
308
|
+
"""
|
|
169
309
|
return self.create(lengths, overwrite=True)
|
|
170
310
|
|
|
171
|
-
def add_provenance(self, name):
|
|
311
|
+
def add_provenance(self, name: str) -> None:
|
|
312
|
+
"""Add provenance information to the Zarr store.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
name : str
|
|
317
|
+
Name of the provenance attribute.
|
|
318
|
+
"""
|
|
172
319
|
z = self._open_write()
|
|
173
320
|
|
|
174
321
|
if name in z.attrs:
|
anemoi/datasets/data/__init__.py
CHANGED
|
@@ -8,11 +8,20 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import Set
|
|
11
14
|
|
|
15
|
+
# from .dataset import FullIndex
|
|
16
|
+
# from .dataset import Shape
|
|
17
|
+
# from .dataset import TupleIndex
|
|
12
18
|
from .misc import _open_dataset
|
|
13
19
|
from .misc import add_dataset_path
|
|
14
20
|
from .misc import add_named_dataset
|
|
15
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from .dataset import Dataset
|
|
24
|
+
|
|
16
25
|
LOG = logging.getLogger(__name__)
|
|
17
26
|
|
|
18
27
|
__all__ = [
|
|
@@ -27,8 +36,19 @@ class MissingDateError(Exception):
|
|
|
27
36
|
pass
|
|
28
37
|
|
|
29
38
|
|
|
30
|
-
def _convert(x):
|
|
39
|
+
def _convert(x: Any) -> Any:
|
|
40
|
+
"""Convert OmegaConf objects to standard Python containers.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
x : Any
|
|
45
|
+
The object to convert.
|
|
31
46
|
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
Any
|
|
50
|
+
The converted object.
|
|
51
|
+
"""
|
|
32
52
|
if isinstance(x, list):
|
|
33
53
|
return [_convert(a) for a in x]
|
|
34
54
|
|
|
@@ -46,8 +66,21 @@ def _convert(x):
|
|
|
46
66
|
return x
|
|
47
67
|
|
|
48
68
|
|
|
49
|
-
def open_dataset(*args, **kwargs):
|
|
69
|
+
def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
70
|
+
"""Open a dataset.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
*args : Any
|
|
75
|
+
Positional arguments.
|
|
76
|
+
**kwargs : Any
|
|
77
|
+
Keyword arguments.
|
|
50
78
|
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
Dataset
|
|
82
|
+
The opened dataset.
|
|
83
|
+
"""
|
|
51
84
|
# That will get rid of OmegaConf objects
|
|
52
85
|
|
|
53
86
|
args, kwargs = _convert(args), _convert(kwargs)
|
|
@@ -59,8 +92,22 @@ def open_dataset(*args, **kwargs):
|
|
|
59
92
|
return ds
|
|
60
93
|
|
|
61
94
|
|
|
62
|
-
def list_dataset_names(*args, **kwargs):
|
|
95
|
+
def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
96
|
+
"""List the names of datasets.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
*args : Any
|
|
101
|
+
Positional arguments.
|
|
102
|
+
**kwargs : Any
|
|
103
|
+
Keyword arguments.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
list of str
|
|
108
|
+
The list of dataset names.
|
|
109
|
+
"""
|
|
63
110
|
ds = _open_dataset(*args, **kwargs)
|
|
64
|
-
names = set()
|
|
111
|
+
names: Set[str] = set()
|
|
65
112
|
ds.get_dataset_names(names)
|
|
66
113
|
return sorted(names)
|