anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +558 -62
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.18.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/top_level.txt +0 -0
anemoi/datasets/commands/init.py
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import time
|
|
12
|
+
from typing import Any
|
|
12
13
|
|
|
13
14
|
from anemoi.utils.humanize import seconds_to_human
|
|
14
15
|
|
|
@@ -25,8 +26,14 @@ class Init(Command):
|
|
|
25
26
|
internal = True
|
|
26
27
|
timestamp = True
|
|
27
28
|
|
|
28
|
-
def add_arguments(self, subparser):
|
|
29
|
+
def add_arguments(self, subparser: Any) -> None:
|
|
30
|
+
"""Add command-line arguments to the parser.
|
|
29
31
|
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
subparser : Any
|
|
35
|
+
The argument parser to which the arguments will be added.
|
|
36
|
+
"""
|
|
30
37
|
subparser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
|
|
31
38
|
subparser.add_argument("path", help="Path to store the created data.")
|
|
32
39
|
|
|
@@ -57,7 +64,14 @@ class Init(Command):
|
|
|
57
64
|
|
|
58
65
|
subparser.add_argument("--trace", action="store_true")
|
|
59
66
|
|
|
60
|
-
def run(self, args):
|
|
67
|
+
def run(self, args: Any) -> None:
|
|
68
|
+
"""Execute the command with the provided arguments.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
args : Any
|
|
73
|
+
The arguments passed to the command.
|
|
74
|
+
"""
|
|
61
75
|
options = vars(args)
|
|
62
76
|
options.pop("command")
|
|
63
77
|
now = time.time()
|
|
@@ -13,6 +13,11 @@ import logging
|
|
|
13
13
|
import os
|
|
14
14
|
from copy import deepcopy
|
|
15
15
|
from functools import cached_property
|
|
16
|
+
from typing import Any
|
|
17
|
+
from typing import Dict
|
|
18
|
+
from typing import List
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from typing import Union
|
|
16
21
|
|
|
17
22
|
import numpy as np
|
|
18
23
|
import semantic_version
|
|
@@ -23,6 +28,7 @@ from anemoi.utils.humanize import when
|
|
|
23
28
|
from anemoi.utils.text import dotted_line
|
|
24
29
|
from anemoi.utils.text import progress
|
|
25
30
|
from anemoi.utils.text import table
|
|
31
|
+
from numpy.typing import NDArray
|
|
26
32
|
|
|
27
33
|
from anemoi.datasets import open_dataset
|
|
28
34
|
from anemoi.datasets.data.stores import open_zarr
|
|
@@ -33,7 +39,20 @@ from . import Command
|
|
|
33
39
|
LOG = logging.getLogger(__name__)
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
def compute_directory_size(path):
|
|
42
|
+
def compute_directory_size(path: str) -> Union[tuple[int, int], tuple[None, None]]:
|
|
43
|
+
"""Compute the total size and number of files in a directory.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
path : str
|
|
48
|
+
The path to the directory.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
tuple[int, int] | tuple[None, None]
|
|
53
|
+
The total size in bytes and the number of files,
|
|
54
|
+
or (None, None) if the path is not a directory.
|
|
55
|
+
"""
|
|
37
56
|
if not os.path.isdir(path):
|
|
38
57
|
return None, None
|
|
39
58
|
size = 0
|
|
@@ -46,18 +65,60 @@ def compute_directory_size(path):
|
|
|
46
65
|
return size, n
|
|
47
66
|
|
|
48
67
|
|
|
49
|
-
def local_time_bug(lon, date):
|
|
68
|
+
def local_time_bug(lon: float, date: datetime.datetime) -> float:
|
|
69
|
+
"""Calculate the local time bug based on longitude and date.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
lon : float
|
|
74
|
+
Longitude.
|
|
75
|
+
date : datetime.datetime
|
|
76
|
+
Date and time.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
float
|
|
81
|
+
Local time bug in hours.
|
|
82
|
+
"""
|
|
50
83
|
delta = date - datetime.datetime(date.year, date.month, date.day)
|
|
51
84
|
hours_since_midnight = delta.days + delta.seconds / 86400.0 # * 24 is missing
|
|
52
85
|
return (lon / 360.0 * 24.0 + hours_since_midnight) % 24
|
|
53
86
|
|
|
54
87
|
|
|
55
|
-
def cos_local_time_bug(lon, date):
|
|
88
|
+
def cos_local_time_bug(lon: float, date: datetime.datetime) -> float:
|
|
89
|
+
"""Calculate the cosine of the local time bug.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
lon : float
|
|
94
|
+
Longitude.
|
|
95
|
+
date : datetime.datetime
|
|
96
|
+
Date and time.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
float
|
|
101
|
+
Cosine of the local time bug.
|
|
102
|
+
"""
|
|
56
103
|
radians = local_time_bug(lon, date) / 24 * np.pi * 2
|
|
57
104
|
return np.cos(radians)
|
|
58
105
|
|
|
59
106
|
|
|
60
|
-
def find(config, name):
|
|
107
|
+
def find(config: Union[dict, list], name: str) -> Any:
|
|
108
|
+
"""Recursively search for a key in a nested dictionary or list.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
config : dict or list
|
|
113
|
+
The configuration to search.
|
|
114
|
+
name : str
|
|
115
|
+
The key to search for.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
Any
|
|
120
|
+
The value associated with the key, or None if not found.
|
|
121
|
+
"""
|
|
61
122
|
if isinstance(config, dict):
|
|
62
123
|
if name in config:
|
|
63
124
|
return config[name]
|
|
@@ -77,7 +138,22 @@ def find(config, name):
|
|
|
77
138
|
|
|
78
139
|
|
|
79
140
|
class Version:
|
|
80
|
-
|
|
141
|
+
"""Represents a version of a dataset."""
|
|
142
|
+
|
|
143
|
+
def __init__(self, path: str, zarr: Any, metadata: dict, version: semantic_version.Version) -> None:
|
|
144
|
+
"""Initialize the Version object.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
path : str
|
|
149
|
+
Path to the dataset.
|
|
150
|
+
zarr : Any
|
|
151
|
+
Zarr object.
|
|
152
|
+
metadata : dict
|
|
153
|
+
Metadata of the dataset.
|
|
154
|
+
version : semantic_version.Version
|
|
155
|
+
Version of the dataset.
|
|
156
|
+
"""
|
|
81
157
|
self.path = path
|
|
82
158
|
self.zarr = zarr
|
|
83
159
|
self.metadata = metadata
|
|
@@ -85,69 +161,91 @@ class Version:
|
|
|
85
161
|
self.dataset = None
|
|
86
162
|
self.dataset = open_dataset(self.path)
|
|
87
163
|
|
|
88
|
-
def describe(self):
|
|
164
|
+
def describe(self) -> None:
|
|
165
|
+
"""Print a description of the dataset."""
|
|
89
166
|
print(f"📦 Path : {self.path}")
|
|
90
167
|
print(f"🔢 Format version: {self.version}")
|
|
91
168
|
|
|
92
169
|
@property
|
|
93
|
-
def name_to_index(self):
|
|
170
|
+
def name_to_index(self) -> Dict[str, int]:
|
|
171
|
+
"""Get a mapping of variable names to their indices."""
|
|
94
172
|
return find(self.metadata, "name_to_index")
|
|
95
173
|
|
|
96
174
|
@property
|
|
97
|
-
def longitudes(self):
|
|
175
|
+
def longitudes(self) -> NDArray[Any]:
|
|
176
|
+
"""Get the longitudes of the dataset."""
|
|
98
177
|
try:
|
|
99
178
|
return self.zarr.longitudes[:]
|
|
100
179
|
except (KeyError, AttributeError):
|
|
101
180
|
return self.zarr.longitude[:]
|
|
102
181
|
|
|
103
182
|
@property
|
|
104
|
-
def data(self):
|
|
183
|
+
def data(self) -> Any:
|
|
184
|
+
"""Get the data of the dataset."""
|
|
105
185
|
try:
|
|
106
186
|
return self.zarr.data
|
|
107
187
|
except AttributeError:
|
|
108
188
|
return self.zarr
|
|
109
189
|
|
|
110
190
|
@property
|
|
111
|
-
def first_date(self):
|
|
191
|
+
def first_date(self) -> datetime.datetime:
|
|
192
|
+
"""Get the first date of the dataset."""
|
|
112
193
|
return datetime.datetime.fromisoformat(self.metadata["first_date"])
|
|
113
194
|
|
|
114
195
|
@property
|
|
115
|
-
def last_date(self):
|
|
196
|
+
def last_date(self) -> datetime.datetime:
|
|
197
|
+
"""Get the last date of the dataset."""
|
|
116
198
|
return datetime.datetime.fromisoformat(self.metadata["last_date"])
|
|
117
199
|
|
|
118
200
|
@property
|
|
119
|
-
def frequency(self):
|
|
201
|
+
def frequency(self) -> str:
|
|
202
|
+
"""Get the frequency of the dataset."""
|
|
120
203
|
return self.metadata["frequency"]
|
|
121
204
|
|
|
122
205
|
@property
|
|
123
|
-
def resolution(self):
|
|
206
|
+
def resolution(self) -> str:
|
|
207
|
+
"""Get the resolution of the dataset."""
|
|
124
208
|
return self.metadata["resolution"]
|
|
125
209
|
|
|
126
210
|
@property
|
|
127
|
-
def field_shape(self):
|
|
211
|
+
def field_shape(self) -> Optional[tuple]:
|
|
212
|
+
"""Get the field shape of the dataset."""
|
|
128
213
|
return self.metadata.get("field_shape")
|
|
129
214
|
|
|
130
215
|
@property
|
|
131
|
-
def proj_string(self):
|
|
216
|
+
def proj_string(self) -> Optional[str]:
|
|
217
|
+
"""Get the projection string of the dataset."""
|
|
132
218
|
return self.metadata.get("proj_string")
|
|
133
219
|
|
|
134
220
|
@property
|
|
135
|
-
def shape(self):
|
|
221
|
+
def shape(self) -> Optional[tuple]:
|
|
222
|
+
"""Get the shape of the dataset."""
|
|
136
223
|
if self.data and hasattr(self.data, "shape"):
|
|
137
224
|
return self.data.shape
|
|
138
225
|
|
|
139
226
|
@property
|
|
140
|
-
def n_missing_dates(self):
|
|
227
|
+
def n_missing_dates(self) -> Optional[int]:
|
|
228
|
+
"""Get the number of missing dates in the dataset."""
|
|
141
229
|
if "missing_dates" in self.metadata:
|
|
142
230
|
return len(self.metadata["missing_dates"])
|
|
143
231
|
return None
|
|
144
232
|
|
|
145
233
|
@property
|
|
146
|
-
def uncompressed_data_size(self):
|
|
234
|
+
def uncompressed_data_size(self) -> Optional[int]:
|
|
235
|
+
"""Get the uncompressed data size of the dataset."""
|
|
147
236
|
if self.data and hasattr(self.data, "dtype") and hasattr(self.data, "size"):
|
|
148
237
|
return self.data.dtype.itemsize * self.data.size
|
|
149
238
|
|
|
150
|
-
def info(self, detailed, size):
|
|
239
|
+
def info(self, detailed: bool, size: bool) -> None:
|
|
240
|
+
"""Print detailed information about the dataset.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
detailed : bool
|
|
245
|
+
Whether to print detailed information.
|
|
246
|
+
size : bool
|
|
247
|
+
Whether to print the size of the dataset.
|
|
248
|
+
"""
|
|
151
249
|
print()
|
|
152
250
|
print(f'📅 Start : {self.first_date.strftime("%Y-%m-%d %H:%M")}')
|
|
153
251
|
print(f'📅 End : {self.last_date.strftime("%Y-%m-%d %H:%M")}')
|
|
@@ -195,18 +293,28 @@ class Version:
|
|
|
195
293
|
print()
|
|
196
294
|
|
|
197
295
|
@property
|
|
198
|
-
def variables(self):
|
|
296
|
+
def variables(self) -> List[str]:
|
|
297
|
+
"""Get the list of variables in the dataset."""
|
|
199
298
|
return [v[0] for v in sorted(self.name_to_index.items(), key=lambda x: x[1])]
|
|
200
299
|
|
|
201
300
|
@property
|
|
202
|
-
def total_size(self):
|
|
301
|
+
def total_size(self) -> Optional[int]:
|
|
302
|
+
"""Get the total size of the dataset."""
|
|
203
303
|
return self.zarr.attrs.get("total_size")
|
|
204
304
|
|
|
205
305
|
@property
|
|
206
|
-
def total_number_of_files(self):
|
|
306
|
+
def total_number_of_files(self) -> Optional[int]:
|
|
307
|
+
"""Get the total number of files in the dataset."""
|
|
207
308
|
return self.zarr.attrs.get("total_number_of_files")
|
|
208
309
|
|
|
209
|
-
def print_sizes(self, size):
|
|
310
|
+
def print_sizes(self, size: bool) -> None:
|
|
311
|
+
"""Print the size and number of files in the dataset.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
size : bool
|
|
316
|
+
Whether to compute and print the size.
|
|
317
|
+
"""
|
|
210
318
|
total_size = self.total_size
|
|
211
319
|
n = self.total_number_of_files
|
|
212
320
|
|
|
@@ -222,7 +330,8 @@ class Version:
|
|
|
222
330
|
print(f"📁 Files : {n:,}")
|
|
223
331
|
|
|
224
332
|
@property
|
|
225
|
-
def statistics(self):
|
|
333
|
+
def statistics(self) -> tuple[list, list, list, list]:
|
|
334
|
+
"""Get the statistics of the dataset."""
|
|
226
335
|
try:
|
|
227
336
|
if self.dataset is not None:
|
|
228
337
|
stats = self.dataset.statistics
|
|
@@ -231,31 +340,36 @@ class Version:
|
|
|
231
340
|
return [["-"] * len(self.variables)] * 4
|
|
232
341
|
|
|
233
342
|
@property
|
|
234
|
-
def statistics_ready(self):
|
|
343
|
+
def statistics_ready(self) -> bool:
|
|
344
|
+
"""Check if the statistics are ready."""
|
|
235
345
|
for d in reversed(self.metadata.get("history", [])):
|
|
236
346
|
if d["action"] == "compute_statistics_end":
|
|
237
347
|
return True
|
|
238
348
|
return False
|
|
239
349
|
|
|
240
350
|
@property
|
|
241
|
-
def statistics_started(self):
|
|
351
|
+
def statistics_started(self) -> Optional[datetime.datetime]:
|
|
352
|
+
"""Get the timestamp when statistics computation started."""
|
|
242
353
|
for d in reversed(self.metadata.get("history", [])):
|
|
243
354
|
if d["action"] == "compute_statistics_start":
|
|
244
355
|
return datetime.datetime.fromisoformat(d["timestamp"])
|
|
245
356
|
return None
|
|
246
357
|
|
|
247
358
|
@property
|
|
248
|
-
def build_flags(self):
|
|
359
|
+
def build_flags(self) -> Optional[NDArray[Any]]:
|
|
360
|
+
"""Get the build flags of the dataset."""
|
|
249
361
|
return self.zarr.get("_build_flags")
|
|
250
362
|
|
|
251
363
|
@cached_property
|
|
252
|
-
def copy_flags(self):
|
|
364
|
+
def copy_flags(self) -> Optional[NDArray[Any]]:
|
|
365
|
+
"""Get the copy flags of the dataset."""
|
|
253
366
|
if "_copy" not in self.zarr:
|
|
254
367
|
return None
|
|
255
368
|
return self.zarr["_copy"][:]
|
|
256
369
|
|
|
257
370
|
@property
|
|
258
|
-
def copy_in_progress(self):
|
|
371
|
+
def copy_in_progress(self) -> bool:
|
|
372
|
+
"""Check if a copy operation is in progress."""
|
|
259
373
|
if "_copy" not in self.zarr:
|
|
260
374
|
return False
|
|
261
375
|
|
|
@@ -267,10 +381,12 @@ class Version:
|
|
|
267
381
|
return not all(self.copy_flags)
|
|
268
382
|
|
|
269
383
|
@property
|
|
270
|
-
def build_lengths(self):
|
|
384
|
+
def build_lengths(self) -> Optional[NDArray]:
|
|
385
|
+
"""Get the build lengths of the dataset."""
|
|
271
386
|
return self.zarr.get("_build_lengths")
|
|
272
387
|
|
|
273
|
-
def progress(self):
|
|
388
|
+
def progress(self) -> None:
|
|
389
|
+
"""Print the progress of dataset initialization or copying."""
|
|
274
390
|
if self.copy_in_progress:
|
|
275
391
|
copy_flags = self.copy_flags
|
|
276
392
|
print("🪫 Dataset not ready, copy in progress.")
|
|
@@ -329,7 +445,8 @@ class Version:
|
|
|
329
445
|
else:
|
|
330
446
|
print("⏳ Statistics not ready.")
|
|
331
447
|
|
|
332
|
-
def brute_force_statistics(self):
|
|
448
|
+
def brute_force_statistics(self) -> None:
|
|
449
|
+
"""Compute and print statistics for the dataset."""
|
|
333
450
|
if self.dataset is None:
|
|
334
451
|
return
|
|
335
452
|
print("📊 Computing statistics...")
|
|
@@ -376,13 +493,17 @@ class Version:
|
|
|
376
493
|
|
|
377
494
|
|
|
378
495
|
class NoVersion(Version):
|
|
496
|
+
"""Represents a dataset with no version."""
|
|
497
|
+
|
|
379
498
|
@property
|
|
380
|
-
def first_date(self):
|
|
499
|
+
def first_date(self) -> datetime.datetime:
|
|
500
|
+
"""Get the first date of the dataset."""
|
|
381
501
|
monthly = find(self.metadata, "monthly")
|
|
382
502
|
return datetime.datetime.fromisoformat(monthly["start"])
|
|
383
503
|
|
|
384
504
|
@property
|
|
385
|
-
def last_date(self):
|
|
505
|
+
def last_date(self) -> datetime.datetime:
|
|
506
|
+
"""Get the last date of the dataset."""
|
|
386
507
|
monthly = find(self.metadata, "monthly")
|
|
387
508
|
time = max([int(t) for t in find(self.metadata["earthkit-data"], "time")])
|
|
388
509
|
assert isinstance(time, int), (time, type(time))
|
|
@@ -391,48 +512,67 @@ class NoVersion(Version):
|
|
|
391
512
|
return datetime.datetime.fromisoformat(monthly["stop"]) + datetime.timedelta(hours=time)
|
|
392
513
|
|
|
393
514
|
@property
|
|
394
|
-
def frequency(self):
|
|
515
|
+
def frequency(self) -> int:
|
|
516
|
+
"""Get the frequency of the dataset."""
|
|
395
517
|
time = find(self.metadata["earthkit-data"], "time")
|
|
396
518
|
return 24 // len(time)
|
|
397
519
|
|
|
398
520
|
@property
|
|
399
|
-
def statistics(self):
|
|
521
|
+
def statistics(self) -> tuple[list, list, list, list]:
|
|
522
|
+
"""Get the statistics of the dataset."""
|
|
400
523
|
stats = find(self.metadata, "statistics_by_index")
|
|
401
524
|
return stats["minimum"], stats["maximum"], stats["mean"], stats["stdev"]
|
|
402
525
|
|
|
403
526
|
@property
|
|
404
|
-
def statistics_ready(self):
|
|
527
|
+
def statistics_ready(self) -> bool:
|
|
528
|
+
"""Check if the statistics are ready."""
|
|
405
529
|
return find(self.metadata, "statistics_by_index") is not None
|
|
406
530
|
|
|
407
531
|
@property
|
|
408
|
-
def resolution(self):
|
|
532
|
+
def resolution(self) -> str:
|
|
533
|
+
"""Get the resolution of the dataset."""
|
|
409
534
|
return find(self.metadata, "grid")
|
|
410
535
|
|
|
411
|
-
def details(self):
|
|
536
|
+
def details(self) -> None:
|
|
537
|
+
"""Print details of the dataset."""
|
|
412
538
|
pass
|
|
413
539
|
|
|
414
|
-
def progress(self):
|
|
540
|
+
def progress(self) -> None:
|
|
541
|
+
"""Print the progress of dataset initialization or copying."""
|
|
415
542
|
pass
|
|
416
543
|
|
|
417
|
-
def ready(self):
|
|
544
|
+
def ready(self) -> bool:
|
|
545
|
+
"""Check if the dataset is ready.
|
|
546
|
+
|
|
547
|
+
Returns
|
|
548
|
+
-------
|
|
549
|
+
bool
|
|
550
|
+
True if the dataset is ready, False otherwise.
|
|
551
|
+
"""
|
|
418
552
|
return True
|
|
419
553
|
|
|
420
554
|
|
|
421
555
|
class Version0_4(Version):
|
|
422
|
-
|
|
556
|
+
"""Represents version 0.4 of a dataset."""
|
|
557
|
+
|
|
558
|
+
def details(self) -> None:
|
|
559
|
+
"""Print details of the dataset."""
|
|
423
560
|
pass
|
|
424
561
|
|
|
425
562
|
@property
|
|
426
|
-
def initialised(self):
|
|
563
|
+
def initialised(self) -> datetime.datetime:
|
|
564
|
+
"""Get the initialization timestamp of the dataset."""
|
|
427
565
|
return datetime.datetime.fromisoformat(self.metadata["creation_timestamp"])
|
|
428
566
|
|
|
429
|
-
def statistics_ready(self):
|
|
567
|
+
def statistics_ready(self) -> bool:
|
|
568
|
+
"""Check if the statistics are ready."""
|
|
430
569
|
if not self.ready():
|
|
431
570
|
return False
|
|
432
571
|
build_flags = self.zarr["_build_flags"]
|
|
433
572
|
return build_flags.attrs.get("_statistics_computed")
|
|
434
573
|
|
|
435
|
-
def ready(self):
|
|
574
|
+
def ready(self) -> bool:
|
|
575
|
+
"""Check if the dataset is ready."""
|
|
436
576
|
if "_build_flags" not in self.zarr:
|
|
437
577
|
return False
|
|
438
578
|
|
|
@@ -442,7 +582,20 @@ class Version0_4(Version):
|
|
|
442
582
|
|
|
443
583
|
return all(build_flags)
|
|
444
584
|
|
|
445
|
-
def _info(self, verbose, history, statistics, **kwargs):
|
|
585
|
+
def _info(self, verbose: bool, history: bool, statistics: bool, **kwargs: Any) -> None:
|
|
586
|
+
"""Print information about the dataset.
|
|
587
|
+
|
|
588
|
+
Parameters
|
|
589
|
+
----------
|
|
590
|
+
verbose : bool
|
|
591
|
+
Whether to print verbose information.
|
|
592
|
+
history : bool
|
|
593
|
+
Whether to print the history of the dataset.
|
|
594
|
+
statistics : bool
|
|
595
|
+
Whether to print statistics of the dataset.
|
|
596
|
+
**kwargs : Any
|
|
597
|
+
Additional keyword arguments.
|
|
598
|
+
"""
|
|
446
599
|
z = self.zarr
|
|
447
600
|
|
|
448
601
|
# for backward compatibility
|
|
@@ -467,8 +620,11 @@ class Version0_4(Version):
|
|
|
467
620
|
|
|
468
621
|
|
|
469
622
|
class Version0_6(Version):
|
|
623
|
+
"""Represents version 0.6 of a dataset."""
|
|
624
|
+
|
|
470
625
|
@property
|
|
471
|
-
def initialised(self):
|
|
626
|
+
def initialised(self) -> Optional[datetime.datetime]:
|
|
627
|
+
"""Get the initialization timestamp of the dataset."""
|
|
472
628
|
for record in self.metadata.get("history", []):
|
|
473
629
|
if record["action"] == "initialised":
|
|
474
630
|
return datetime.datetime.fromisoformat(record["timestamp"])
|
|
@@ -480,7 +636,8 @@ class Version0_6(Version):
|
|
|
480
636
|
|
|
481
637
|
return None
|
|
482
638
|
|
|
483
|
-
def details(self):
|
|
639
|
+
def details(self) -> None:
|
|
640
|
+
"""Print details of the dataset."""
|
|
484
641
|
print()
|
|
485
642
|
for d in self.metadata.get("history", []):
|
|
486
643
|
d = deepcopy(d)
|
|
@@ -493,7 +650,8 @@ class Version0_6(Version):
|
|
|
493
650
|
print(f" {timestamp} : {action} ({versions}) {more}")
|
|
494
651
|
print()
|
|
495
652
|
|
|
496
|
-
def ready(self):
|
|
653
|
+
def ready(self) -> bool:
|
|
654
|
+
"""Check if the dataset is ready."""
|
|
497
655
|
if "_build_flags" not in self.zarr:
|
|
498
656
|
return False
|
|
499
657
|
|
|
@@ -501,20 +659,26 @@ class Version0_6(Version):
|
|
|
501
659
|
return all(build_flags)
|
|
502
660
|
|
|
503
661
|
@property
|
|
504
|
-
def name_to_index(self):
|
|
662
|
+
def name_to_index(self) -> Dict[str, int]:
|
|
663
|
+
"""Get a mapping of variable names to their indices."""
|
|
505
664
|
return {n: i for i, n in enumerate(self.metadata["variables"])}
|
|
506
665
|
|
|
507
666
|
@property
|
|
508
|
-
def variables(self):
|
|
667
|
+
def variables(self) -> List[str]:
|
|
668
|
+
"""Get the list of variables in the dataset."""
|
|
509
669
|
return self.metadata["variables"]
|
|
510
670
|
|
|
511
671
|
@property
|
|
512
|
-
def variables_metadata(self):
|
|
672
|
+
def variables_metadata(self) -> dict:
|
|
673
|
+
"""Get the metadata for the variables."""
|
|
513
674
|
return self.metadata.get("variables_metadata", {})
|
|
514
675
|
|
|
515
676
|
|
|
516
677
|
class Version0_12(Version0_6):
|
|
517
|
-
|
|
678
|
+
"""Represents version 0.12 of a dataset."""
|
|
679
|
+
|
|
680
|
+
def details(self) -> None:
|
|
681
|
+
"""Print details of the dataset."""
|
|
518
682
|
print()
|
|
519
683
|
for d in self.metadata.get("history", []):
|
|
520
684
|
d = deepcopy(d)
|
|
@@ -528,24 +692,30 @@ class Version0_12(Version0_6):
|
|
|
528
692
|
print()
|
|
529
693
|
|
|
530
694
|
@property
|
|
531
|
-
def first_date(self):
|
|
695
|
+
def first_date(self) -> datetime.datetime:
|
|
696
|
+
"""Get the first date of the dataset."""
|
|
532
697
|
return datetime.datetime.fromisoformat(self.metadata["start_date"])
|
|
533
698
|
|
|
534
699
|
@property
|
|
535
|
-
def last_date(self):
|
|
700
|
+
def last_date(self) -> datetime.datetime:
|
|
701
|
+
"""Get the last date of the dataset."""
|
|
536
702
|
return datetime.datetime.fromisoformat(self.metadata["end_date"])
|
|
537
703
|
|
|
538
704
|
|
|
539
705
|
class Version0_13(Version0_12):
|
|
706
|
+
"""Represents version 0.13 of a dataset."""
|
|
707
|
+
|
|
540
708
|
@property
|
|
541
|
-
def build_flags(self):
|
|
709
|
+
def build_flags(self) -> Optional[NDArray]:
|
|
710
|
+
"""Get the build flags for the dataset."""
|
|
542
711
|
if "_build" not in self.zarr:
|
|
543
712
|
return None
|
|
544
713
|
build = self.zarr["_build"]
|
|
545
714
|
return build.get("flags")
|
|
546
715
|
|
|
547
716
|
@property
|
|
548
|
-
def build_lengths(self):
|
|
717
|
+
def build_lengths(self) -> Optional[NDArray]:
|
|
718
|
+
"""Get the build lengths for the dataset."""
|
|
549
719
|
if "_build" not in self.zarr:
|
|
550
720
|
return None
|
|
551
721
|
build = self.zarr["_build"]
|
|
@@ -562,9 +732,16 @@ VERSIONS = {
|
|
|
562
732
|
|
|
563
733
|
|
|
564
734
|
class InspectZarr(Command):
|
|
565
|
-
"""
|
|
735
|
+
"""Command to inspect a zarr dataset."""
|
|
736
|
+
|
|
737
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
738
|
+
"""Add arguments to the command parser.
|
|
566
739
|
|
|
567
|
-
|
|
740
|
+
Parameters
|
|
741
|
+
----------
|
|
742
|
+
command_parser : Any
|
|
743
|
+
The command parser.
|
|
744
|
+
"""
|
|
568
745
|
command_parser.add_argument("path", metavar="DATASET")
|
|
569
746
|
command_parser.add_argument("--detailed", action="store_true")
|
|
570
747
|
|
|
@@ -572,10 +749,42 @@ class InspectZarr(Command):
|
|
|
572
749
|
command_parser.add_argument("--statistics", action="store_true")
|
|
573
750
|
command_parser.add_argument("--size", action="store_true", help="Print size")
|
|
574
751
|
|
|
575
|
-
def run(self, args):
|
|
752
|
+
def run(self, args: Any) -> None:
|
|
753
|
+
"""Run the command.
|
|
754
|
+
|
|
755
|
+
Parameters
|
|
756
|
+
----------
|
|
757
|
+
args : Any
|
|
758
|
+
The command arguments.
|
|
759
|
+
"""
|
|
576
760
|
self.inspect_zarr(**vars(args))
|
|
577
761
|
|
|
578
|
-
def inspect_zarr(
|
|
762
|
+
def inspect_zarr(
|
|
763
|
+
self,
|
|
764
|
+
path: str,
|
|
765
|
+
progress: bool = False,
|
|
766
|
+
statistics: bool = False,
|
|
767
|
+
detailed: bool = False,
|
|
768
|
+
size: bool = False,
|
|
769
|
+
**kwargs: Any,
|
|
770
|
+
) -> None:
|
|
771
|
+
"""Inspect a zarr dataset.
|
|
772
|
+
|
|
773
|
+
Parameters
|
|
774
|
+
----------
|
|
775
|
+
path : str
|
|
776
|
+
Path to the dataset.
|
|
777
|
+
progress : bool, optional
|
|
778
|
+
Whether to print progress, by default False.
|
|
779
|
+
statistics : bool, optional
|
|
780
|
+
Whether to compute and print statistics, by default False.
|
|
781
|
+
detailed : bool, optional
|
|
782
|
+
Whether to print detailed information, by default False.
|
|
783
|
+
size : bool, optional
|
|
784
|
+
Whether to print the size of the dataset, by default False.
|
|
785
|
+
**kwargs : Any
|
|
786
|
+
Additional keyword arguments.
|
|
787
|
+
"""
|
|
579
788
|
version = self._info(path)
|
|
580
789
|
|
|
581
790
|
dotted_line()
|
|
@@ -596,7 +805,19 @@ class InspectZarr(Command):
|
|
|
596
805
|
print(type(version))
|
|
597
806
|
raise
|
|
598
807
|
|
|
599
|
-
def _info(self, path):
|
|
808
|
+
def _info(self, path: str) -> Version:
|
|
809
|
+
"""Get version information of the dataset.
|
|
810
|
+
|
|
811
|
+
Parameters
|
|
812
|
+
----------
|
|
813
|
+
path : str
|
|
814
|
+
Path to the dataset.
|
|
815
|
+
|
|
816
|
+
Returns
|
|
817
|
+
-------
|
|
818
|
+
Version
|
|
819
|
+
The version object of the dataset.
|
|
820
|
+
"""
|
|
600
821
|
z = open_zarr(zarr_lookup(path))
|
|
601
822
|
|
|
602
823
|
metadata = dict(z.attrs)
|