anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +4 -1
- anemoi/datasets/__main__.py +12 -2
- anemoi/datasets/_version.py +9 -4
- anemoi/datasets/commands/cleanup.py +17 -2
- anemoi/datasets/commands/compare.py +18 -2
- anemoi/datasets/commands/copy.py +196 -14
- anemoi/datasets/commands/create.py +50 -7
- anemoi/datasets/commands/finalise-additions.py +17 -2
- anemoi/datasets/commands/finalise.py +17 -2
- anemoi/datasets/commands/init-additions.py +17 -2
- anemoi/datasets/commands/init.py +16 -2
- anemoi/datasets/commands/inspect.py +283 -62
- anemoi/datasets/commands/load-additions.py +16 -2
- anemoi/datasets/commands/load.py +16 -2
- anemoi/datasets/commands/patch.py +17 -2
- anemoi/datasets/commands/publish.py +17 -2
- anemoi/datasets/commands/scan.py +31 -3
- anemoi/datasets/compute/recentre.py +47 -11
- anemoi/datasets/create/__init__.py +612 -85
- anemoi/datasets/create/check.py +142 -20
- anemoi/datasets/create/chunks.py +64 -4
- anemoi/datasets/create/config.py +185 -21
- anemoi/datasets/create/filter.py +50 -0
- anemoi/datasets/create/filters/__init__.py +33 -0
- anemoi/datasets/create/filters/empty.py +37 -0
- anemoi/datasets/create/filters/legacy.py +93 -0
- anemoi/datasets/create/filters/noop.py +37 -0
- anemoi/datasets/create/filters/orog_to_z.py +58 -0
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
- anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
- anemoi/datasets/create/filters/rename.py +205 -0
- anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
- anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
- anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
- anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
- anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
- anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
- anemoi/datasets/create/filters/transform.py +53 -0
- anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
- anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
- anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
- anemoi/datasets/create/input/__init__.py +76 -5
- anemoi/datasets/create/input/action.py +149 -13
- anemoi/datasets/create/input/concat.py +81 -10
- anemoi/datasets/create/input/context.py +39 -4
- anemoi/datasets/create/input/data_sources.py +72 -6
- anemoi/datasets/create/input/empty.py +21 -3
- anemoi/datasets/create/input/filter.py +60 -12
- anemoi/datasets/create/input/function.py +154 -37
- anemoi/datasets/create/input/join.py +86 -14
- anemoi/datasets/create/input/misc.py +67 -17
- anemoi/datasets/create/input/pipe.py +33 -6
- anemoi/datasets/create/input/repeated_dates.py +189 -41
- anemoi/datasets/create/input/result.py +202 -87
- anemoi/datasets/create/input/step.py +119 -22
- anemoi/datasets/create/input/template.py +100 -13
- anemoi/datasets/create/input/trace.py +62 -7
- anemoi/datasets/create/patch.py +52 -4
- anemoi/datasets/create/persistent.py +134 -17
- anemoi/datasets/create/size.py +15 -1
- anemoi/datasets/create/source.py +51 -0
- anemoi/datasets/create/sources/__init__.py +36 -0
- anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
- anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
- anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
- anemoi/datasets/create/sources/empty.py +37 -0
- anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
- anemoi/datasets/create/sources/grib.py +297 -0
- anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
- anemoi/datasets/create/sources/legacy.py +93 -0
- anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
- anemoi/datasets/create/sources/netcdf.py +42 -0
- anemoi/datasets/create/sources/opendap.py +43 -0
- anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
- anemoi/datasets/create/sources/recentre.py +150 -0
- anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
- anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
- anemoi/datasets/create/sources/xarray.py +92 -0
- anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
- anemoi/datasets/create/sources/xarray_support/README.md +1 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
- anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
- anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
- anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
- anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
- anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
- anemoi/datasets/create/sources/xarray_support/time.py +391 -0
- anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
- anemoi/datasets/create/sources/xarray_zarr.py +41 -0
- anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
- anemoi/datasets/create/statistics/__init__.py +233 -44
- anemoi/datasets/create/statistics/summary.py +52 -6
- anemoi/datasets/create/testing.py +76 -0
- anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
- anemoi/datasets/create/utils.py +97 -6
- anemoi/datasets/create/writer.py +26 -4
- anemoi/datasets/create/zarr.py +170 -23
- anemoi/datasets/data/__init__.py +51 -4
- anemoi/datasets/data/complement.py +191 -40
- anemoi/datasets/data/concat.py +141 -16
- anemoi/datasets/data/dataset.py +552 -61
- anemoi/datasets/data/debug.py +197 -26
- anemoi/datasets/data/ensemble.py +93 -8
- anemoi/datasets/data/fill_missing.py +165 -18
- anemoi/datasets/data/forwards.py +428 -56
- anemoi/datasets/data/grids.py +323 -97
- anemoi/datasets/data/indexing.py +112 -19
- anemoi/datasets/data/interpolate.py +92 -12
- anemoi/datasets/data/join.py +158 -19
- anemoi/datasets/data/masked.py +129 -15
- anemoi/datasets/data/merge.py +137 -23
- anemoi/datasets/data/misc.py +172 -16
- anemoi/datasets/data/missing.py +233 -29
- anemoi/datasets/data/rescale.py +111 -10
- anemoi/datasets/data/select.py +168 -26
- anemoi/datasets/data/statistics.py +67 -6
- anemoi/datasets/data/stores.py +149 -64
- anemoi/datasets/data/subset.py +159 -25
- anemoi/datasets/data/unchecked.py +168 -57
- anemoi/datasets/data/xy.py +168 -25
- anemoi/datasets/dates/__init__.py +191 -16
- anemoi/datasets/dates/groups.py +189 -47
- anemoi/datasets/grids.py +270 -31
- anemoi/datasets/testing.py +28 -1
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
- anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/functions/__init__.py +0 -66
- anemoi/datasets/create/functions/filters/__init__.py +0 -9
- anemoi/datasets/create/functions/filters/empty.py +0 -17
- anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/functions/filters/rename.py +0 -79
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
- anemoi/datasets/create/functions/sources/empty.py +0 -15
- anemoi/datasets/create/functions/sources/grib.py +0 -150
- anemoi/datasets/create/functions/sources/netcdf.py +0 -15
- anemoi/datasets/create/functions/sources/opendap.py +0 -15
- anemoi/datasets/create/functions/sources/recentre.py +0 -60
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
- anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
- anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
- anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
- anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
- anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
- anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
- anemoi/datasets/utils/fields.py +0 -47
- anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
- {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/stores.py
CHANGED
|
@@ -8,18 +8,29 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
import datetime
|
|
11
12
|
import logging
|
|
12
13
|
import os
|
|
13
14
|
import warnings
|
|
14
15
|
from functools import cached_property
|
|
16
|
+
from typing import Any
|
|
17
|
+
from typing import Dict
|
|
18
|
+
from typing import List
|
|
19
|
+
from typing import Optional
|
|
20
|
+
from typing import Set
|
|
21
|
+
from typing import Union
|
|
15
22
|
from urllib.parse import urlparse
|
|
16
23
|
|
|
17
24
|
import numpy as np
|
|
18
25
|
import zarr
|
|
19
26
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
27
|
+
from numpy.typing import NDArray
|
|
20
28
|
|
|
21
29
|
from . import MissingDateError
|
|
22
30
|
from .dataset import Dataset
|
|
31
|
+
from .dataset import FullIndex
|
|
32
|
+
from .dataset import Shape
|
|
33
|
+
from .dataset import TupleIndex
|
|
23
34
|
from .debug import DEBUG_ZARR_LOADING
|
|
24
35
|
from .debug import Node
|
|
25
36
|
from .debug import Source
|
|
@@ -31,28 +42,34 @@ LOG = logging.getLogger(__name__)
|
|
|
31
42
|
|
|
32
43
|
|
|
33
44
|
class ReadOnlyStore(zarr.storage.BaseStore):
|
|
34
|
-
|
|
45
|
+
"""A base class for read-only stores."""
|
|
46
|
+
|
|
47
|
+
def __delitem__(self, key: str) -> None:
|
|
48
|
+
"""Prevent deletion of items."""
|
|
35
49
|
raise NotImplementedError()
|
|
36
50
|
|
|
37
|
-
def __setitem__(self, key, value):
|
|
51
|
+
def __setitem__(self, key: str, value: bytes) -> None:
|
|
52
|
+
"""Prevent setting of items."""
|
|
38
53
|
raise NotImplementedError()
|
|
39
54
|
|
|
40
|
-
def __len__(self):
|
|
55
|
+
def __len__(self) -> int:
|
|
56
|
+
"""Return the number of items in the store."""
|
|
41
57
|
raise NotImplementedError()
|
|
42
58
|
|
|
43
|
-
def __iter__(self):
|
|
59
|
+
def __iter__(self) -> iter:
|
|
60
|
+
"""Return an iterator over the store."""
|
|
44
61
|
raise NotImplementedError()
|
|
45
62
|
|
|
46
63
|
|
|
47
64
|
class HTTPStore(ReadOnlyStore):
|
|
48
|
-
"""
|
|
49
|
-
does not play well with fork() and multiprocessing.
|
|
50
|
-
"""
|
|
65
|
+
"""A read-only store for HTTP(S) resources."""
|
|
51
66
|
|
|
52
|
-
def __init__(self, url):
|
|
67
|
+
def __init__(self, url: str) -> None:
|
|
68
|
+
"""Initialize the HTTPStore with a URL."""
|
|
53
69
|
self.url = url
|
|
54
70
|
|
|
55
|
-
def __getitem__(self, key):
|
|
71
|
+
def __getitem__(self, key: str) -> bytes:
|
|
72
|
+
"""Retrieve an item from the store."""
|
|
56
73
|
import requests
|
|
57
74
|
|
|
58
75
|
r = requests.get(self.url + "/" + key)
|
|
@@ -65,18 +82,22 @@ class HTTPStore(ReadOnlyStore):
|
|
|
65
82
|
|
|
66
83
|
|
|
67
84
|
class S3Store(ReadOnlyStore):
|
|
85
|
+
"""A read-only store for S3 resources."""
|
|
86
|
+
|
|
68
87
|
"""We write our own S3Store because the one used by zarr (s3fs)
|
|
69
88
|
does not play well with fork(). We also get to control the s3 client
|
|
70
89
|
options using the anemoi configs.
|
|
71
90
|
"""
|
|
72
91
|
|
|
73
|
-
def __init__(self, url, region=None):
|
|
92
|
+
def __init__(self, url: str, region: Optional[str] = None) -> None:
|
|
93
|
+
"""Initialize the S3Store with a URL and optional region."""
|
|
74
94
|
from anemoi.utils.remote.s3 import s3_client
|
|
75
95
|
|
|
76
96
|
_, _, self.bucket, self.key = url.split("/", 3)
|
|
77
97
|
self.s3 = s3_client(self.bucket, region=region)
|
|
78
98
|
|
|
79
|
-
def __getitem__(self, key):
|
|
99
|
+
def __getitem__(self, key: str) -> bytes:
|
|
100
|
+
"""Retrieve an item from the store."""
|
|
80
101
|
try:
|
|
81
102
|
response = self.s3.get_object(Bucket=self.bucket, Key=self.key + "/" + key)
|
|
82
103
|
except self.s3.exceptions.NoSuchKey:
|
|
@@ -87,13 +108,19 @@ class S3Store(ReadOnlyStore):
|
|
|
87
108
|
|
|
88
109
|
class PlanetaryComputerStore(ReadOnlyStore):
|
|
89
110
|
"""We write our own Store to access catalogs on Planetary Computer,
|
|
90
|
-
as it requires some extra
|
|
111
|
+
as it requires some extra arguments to use xr.open_zarr.
|
|
91
112
|
"""
|
|
92
113
|
|
|
93
|
-
def __init__(self, data_catalog_id):
|
|
114
|
+
def __init__(self, data_catalog_id: str) -> None:
|
|
115
|
+
"""Initialize the PlanetaryComputerStore with a data catalog ID.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
data_catalog_id : str
|
|
120
|
+
The data catalog ID.
|
|
121
|
+
"""
|
|
94
122
|
self.data_catalog_id = data_catalog_id
|
|
95
123
|
|
|
96
|
-
def __getitem__(self):
|
|
97
124
|
import planetary_computer
|
|
98
125
|
import pystac_client
|
|
99
126
|
|
|
@@ -117,34 +144,44 @@ class PlanetaryComputerStore(ReadOnlyStore):
|
|
|
117
144
|
**asset.extra_fields["xarray:open_kwargs"],
|
|
118
145
|
}
|
|
119
146
|
|
|
120
|
-
|
|
147
|
+
self.store = store
|
|
148
|
+
|
|
149
|
+
def __getitem__(self, key: str) -> bytes:
|
|
150
|
+
"""Retrieve an item from the store."""
|
|
151
|
+
raise NotImplementedError()
|
|
121
152
|
|
|
122
153
|
|
|
123
154
|
class DebugStore(ReadOnlyStore):
|
|
124
155
|
"""A store to debug the zarr loading."""
|
|
125
156
|
|
|
126
|
-
def __init__(self, store):
|
|
157
|
+
def __init__(self, store: ReadOnlyStore) -> None:
|
|
158
|
+
"""Initialize the DebugStore with another store."""
|
|
127
159
|
assert not isinstance(store, DebugStore)
|
|
128
160
|
self.store = store
|
|
129
161
|
|
|
130
|
-
def __getitem__(self, key):
|
|
162
|
+
def __getitem__(self, key: str) -> bytes:
|
|
163
|
+
"""Retrieve an item from the store and print debug information."""
|
|
131
164
|
# print()
|
|
132
165
|
print("GET", key, self)
|
|
133
166
|
# traceback.print_stack(file=sys.stdout)
|
|
134
167
|
return self.store[key]
|
|
135
168
|
|
|
136
|
-
def __len__(self):
|
|
169
|
+
def __len__(self) -> int:
|
|
170
|
+
"""Return the number of items in the store."""
|
|
137
171
|
return len(self.store)
|
|
138
172
|
|
|
139
|
-
def __iter__(self):
|
|
173
|
+
def __iter__(self) -> iter:
|
|
174
|
+
"""Return an iterator over the store."""
|
|
140
175
|
warnings.warn("DebugStore: iterating over the store")
|
|
141
176
|
return iter(self.store)
|
|
142
177
|
|
|
143
|
-
def __contains__(self, key):
|
|
178
|
+
def __contains__(self, key: str) -> bool:
|
|
179
|
+
"""Check if the store contains a key."""
|
|
144
180
|
return key in self.store
|
|
145
181
|
|
|
146
182
|
|
|
147
|
-
def name_to_zarr_store(path_or_url):
|
|
183
|
+
def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
|
|
184
|
+
"""Convert a path or URL to a zarr store."""
|
|
148
185
|
store = path_or_url
|
|
149
186
|
|
|
150
187
|
if store.startswith("s3://"):
|
|
@@ -158,14 +195,15 @@ def name_to_zarr_store(path_or_url):
|
|
|
158
195
|
store = S3Store(s3_url, region=bits[2])
|
|
159
196
|
elif store.startswith("https://planetarycomputer.microsoft.com/"):
|
|
160
197
|
data_catalog_id = store.rsplit("/", 1)[-1]
|
|
161
|
-
store = PlanetaryComputerStore(data_catalog_id).
|
|
198
|
+
store = PlanetaryComputerStore(data_catalog_id).store
|
|
162
199
|
else:
|
|
163
200
|
store = HTTPStore(store)
|
|
164
201
|
|
|
165
202
|
return store
|
|
166
203
|
|
|
167
204
|
|
|
168
|
-
def open_zarr(path, dont_fail=False, cache=None):
|
|
205
|
+
def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hierarchy.Group:
|
|
206
|
+
"""Open a zarr store from a path."""
|
|
169
207
|
try:
|
|
170
208
|
store = name_to_zarr_store(path)
|
|
171
209
|
|
|
@@ -193,7 +231,8 @@ def open_zarr(path, dont_fail=False, cache=None):
|
|
|
193
231
|
class Zarr(Dataset):
|
|
194
232
|
"""A zarr dataset."""
|
|
195
233
|
|
|
196
|
-
def __init__(self, path):
|
|
234
|
+
def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
|
|
235
|
+
"""Initialize the Zarr dataset with a path or zarr group."""
|
|
197
236
|
if isinstance(path, zarr.hierarchy.Group):
|
|
198
237
|
self.was_zarr = True
|
|
199
238
|
self.path = str(id(path))
|
|
@@ -205,23 +244,32 @@ class Zarr(Dataset):
|
|
|
205
244
|
|
|
206
245
|
# This seems to speed up the reading of the data a lot
|
|
207
246
|
self.data = self.z.data
|
|
208
|
-
self.
|
|
247
|
+
self._missing = set()
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def missing(self) -> Set[int]:
|
|
251
|
+
"""Return the missing dates of the dataset."""
|
|
252
|
+
return self._missing
|
|
209
253
|
|
|
210
254
|
@classmethod
|
|
211
|
-
def from_name(cls, name):
|
|
255
|
+
def from_name(cls, name: str) -> "Zarr":
|
|
256
|
+
"""Create a Zarr dataset from a name."""
|
|
212
257
|
if name.endswith(".zip") or name.endswith(".zarr"):
|
|
213
258
|
return Zarr(name)
|
|
214
259
|
return Zarr(zarr_lookup(name))
|
|
215
260
|
|
|
216
|
-
def __len__(self):
|
|
261
|
+
def __len__(self) -> int:
|
|
262
|
+
"""Return the length of the dataset."""
|
|
217
263
|
return self.data.shape[0]
|
|
218
264
|
|
|
219
265
|
@debug_indexing
|
|
220
266
|
@expand_list_indexing
|
|
221
|
-
def __getitem__(self, n):
|
|
267
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
268
|
+
"""Retrieve an item from the dataset."""
|
|
222
269
|
return self.data[n]
|
|
223
270
|
|
|
224
|
-
def _unwind(self, index, rest, shape, axis, axes):
|
|
271
|
+
def _unwind(self, index: Union[int, slice, list, tuple], rest: list, shape: tuple, axis: int, axes: list) -> iter:
|
|
272
|
+
"""Unwind the index for multi-dimensional indexing."""
|
|
225
273
|
if not isinstance(index, (int, slice, list, tuple)):
|
|
226
274
|
try:
|
|
227
275
|
# NumPy arrays, TensorFlow tensors, etc.
|
|
@@ -244,23 +292,28 @@ class Zarr(Dataset):
|
|
|
244
292
|
yield (index,) + n
|
|
245
293
|
|
|
246
294
|
@cached_property
|
|
247
|
-
def chunks(self):
|
|
295
|
+
def chunks(self) -> TupleIndex:
|
|
296
|
+
"""Return the chunks of the dataset."""
|
|
248
297
|
return self.z.data.chunks
|
|
249
298
|
|
|
250
299
|
@cached_property
|
|
251
|
-
def shape(self):
|
|
300
|
+
def shape(self) -> Shape:
|
|
301
|
+
"""Return the shape of the dataset."""
|
|
252
302
|
return self.data.shape
|
|
253
303
|
|
|
254
304
|
@cached_property
|
|
255
|
-
def dtype(self):
|
|
305
|
+
def dtype(self) -> np.dtype:
|
|
306
|
+
"""Return the data type of the dataset."""
|
|
256
307
|
return self.z.data.dtype
|
|
257
308
|
|
|
258
309
|
@cached_property
|
|
259
|
-
def dates(self):
|
|
310
|
+
def dates(self) -> NDArray[np.datetime64]:
|
|
311
|
+
"""Return the dates of the dataset."""
|
|
260
312
|
return self.z.dates[:] # Convert to numpy
|
|
261
313
|
|
|
262
314
|
@property
|
|
263
|
-
def latitudes(self):
|
|
315
|
+
def latitudes(self) -> NDArray[Any]:
|
|
316
|
+
"""Return the latitudes of the dataset."""
|
|
264
317
|
try:
|
|
265
318
|
return self.z.latitudes[:]
|
|
266
319
|
except AttributeError:
|
|
@@ -268,7 +321,8 @@ class Zarr(Dataset):
|
|
|
268
321
|
return self.z.latitude[:]
|
|
269
322
|
|
|
270
323
|
@property
|
|
271
|
-
def longitudes(self):
|
|
324
|
+
def longitudes(self) -> NDArray[Any]:
|
|
325
|
+
"""Return the longitudes of the dataset."""
|
|
272
326
|
try:
|
|
273
327
|
return self.z.longitudes[:]
|
|
274
328
|
except AttributeError:
|
|
@@ -276,7 +330,8 @@ class Zarr(Dataset):
|
|
|
276
330
|
return self.z.longitude[:]
|
|
277
331
|
|
|
278
332
|
@property
|
|
279
|
-
def statistics(self):
|
|
333
|
+
def statistics(self) -> Dict[str, NDArray[Any]]:
|
|
334
|
+
"""Return the statistics of the dataset."""
|
|
280
335
|
return dict(
|
|
281
336
|
mean=self.z.mean[:],
|
|
282
337
|
stdev=self.z.stdev[:],
|
|
@@ -284,7 +339,8 @@ class Zarr(Dataset):
|
|
|
284
339
|
minimum=self.z.minimum[:],
|
|
285
340
|
)
|
|
286
341
|
|
|
287
|
-
def statistics_tendencies(self, delta=None):
|
|
342
|
+
def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
|
|
343
|
+
"""Return the statistical tendencies of the dataset."""
|
|
288
344
|
if delta is None:
|
|
289
345
|
delta = self.frequency
|
|
290
346
|
if isinstance(delta, int):
|
|
@@ -295,7 +351,7 @@ class Zarr(Dataset):
|
|
|
295
351
|
delta = frequency_to_timedelta(delta)
|
|
296
352
|
delta = frequency_to_string(delta)
|
|
297
353
|
|
|
298
|
-
def func(k):
|
|
354
|
+
def func(k: str) -> str:
|
|
299
355
|
return f"statistics_tendencies_{delta}_{k}"
|
|
300
356
|
|
|
301
357
|
return dict(
|
|
@@ -306,11 +362,13 @@ class Zarr(Dataset):
|
|
|
306
362
|
)
|
|
307
363
|
|
|
308
364
|
@property
|
|
309
|
-
def resolution(self):
|
|
365
|
+
def resolution(self) -> str:
|
|
366
|
+
"""Return the resolution of the dataset."""
|
|
310
367
|
return self.z.attrs["resolution"]
|
|
311
368
|
|
|
312
369
|
@property
|
|
313
|
-
def field_shape(self):
|
|
370
|
+
def field_shape(self) -> tuple:
|
|
371
|
+
"""Return the field shape of the dataset."""
|
|
314
372
|
try:
|
|
315
373
|
return tuple(self.z.attrs["field_shape"])
|
|
316
374
|
except KeyError:
|
|
@@ -318,7 +376,8 @@ class Zarr(Dataset):
|
|
|
318
376
|
return (self.shape[-1],)
|
|
319
377
|
|
|
320
378
|
@property
|
|
321
|
-
def frequency(self):
|
|
379
|
+
def frequency(self) -> datetime.timedelta:
|
|
380
|
+
"""Return the frequency of the dataset."""
|
|
322
381
|
try:
|
|
323
382
|
return frequency_to_timedelta(self.z.attrs["frequency"])
|
|
324
383
|
except KeyError:
|
|
@@ -327,13 +386,15 @@ class Zarr(Dataset):
|
|
|
327
386
|
return dates[1].astype(object) - dates[0].astype(object)
|
|
328
387
|
|
|
329
388
|
@property
|
|
330
|
-
def name_to_index(self):
|
|
389
|
+
def name_to_index(self) -> Dict[str, int]:
|
|
390
|
+
"""Return the name to index mapping of the dataset."""
|
|
331
391
|
if "variables" in self.z.attrs:
|
|
332
392
|
return {n: i for i, n in enumerate(self.z.attrs["variables"])}
|
|
333
393
|
return self.z.attrs["name_to_index"]
|
|
334
394
|
|
|
335
395
|
@property
|
|
336
|
-
def variables(self):
|
|
396
|
+
def variables(self) -> List[str]:
|
|
397
|
+
"""Return the variables of the dataset."""
|
|
337
398
|
return [
|
|
338
399
|
k
|
|
339
400
|
for k, v in sorted(
|
|
@@ -343,23 +404,28 @@ class Zarr(Dataset):
|
|
|
343
404
|
]
|
|
344
405
|
|
|
345
406
|
@cached_property
|
|
346
|
-
def constant_fields(self):
|
|
407
|
+
def constant_fields(self) -> List[str]:
|
|
408
|
+
"""Return the constant fields of the dataset."""
|
|
347
409
|
result = self.z.attrs.get("constant_fields")
|
|
348
410
|
if result is None:
|
|
349
411
|
LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
|
|
350
412
|
return self.computed_constant_fields()
|
|
351
413
|
|
|
352
414
|
@property
|
|
353
|
-
def variables_metadata(self):
|
|
415
|
+
def variables_metadata(self) -> Dict[str, Any]:
|
|
416
|
+
"""Return the metadata of the variables."""
|
|
354
417
|
return self.z.attrs.get("variables_metadata", {})
|
|
355
418
|
|
|
356
|
-
def __repr__(self):
|
|
419
|
+
def __repr__(self) -> str:
|
|
420
|
+
"""Return the string representation of the dataset."""
|
|
357
421
|
return self.path
|
|
358
422
|
|
|
359
|
-
def end_of_statistics_date(self):
|
|
423
|
+
def end_of_statistics_date(self) -> np.datetime64:
|
|
424
|
+
"""Return the end date of the statistics."""
|
|
360
425
|
return self.dates[-1]
|
|
361
426
|
|
|
362
|
-
def metadata_specific(self):
|
|
427
|
+
def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
|
|
428
|
+
"""Return the specific metadata of the dataset."""
|
|
363
429
|
return super().metadata_specific(
|
|
364
430
|
attrs=dict(self.z.attrs),
|
|
365
431
|
chunks=self.chunks,
|
|
@@ -367,46 +433,60 @@ class Zarr(Dataset):
|
|
|
367
433
|
path=self.path,
|
|
368
434
|
)
|
|
369
435
|
|
|
370
|
-
def source(self, index):
|
|
436
|
+
def source(self, index: int) -> Source:
|
|
437
|
+
"""Return the source of the dataset."""
|
|
371
438
|
return Source(self, index, info=self.path)
|
|
372
439
|
|
|
373
|
-
def mutate(self):
|
|
440
|
+
def mutate(self) -> Dataset:
|
|
441
|
+
"""Mutate the dataset if it has missing dates."""
|
|
374
442
|
if len(self.z.attrs.get("missing_dates", [])):
|
|
375
443
|
LOG.warning(f"Dataset {self} has missing dates")
|
|
376
444
|
return ZarrWithMissingDates(self.z if self.was_zarr else self.path)
|
|
377
445
|
return self
|
|
378
446
|
|
|
379
|
-
def tree(self):
|
|
447
|
+
def tree(self) -> Node:
|
|
448
|
+
"""Return the tree representation of the dataset."""
|
|
380
449
|
return Node(self, [], path=self.path)
|
|
381
450
|
|
|
382
|
-
def get_dataset_names(self, names):
|
|
451
|
+
def get_dataset_names(self, names: Set[str]) -> None:
|
|
452
|
+
"""Get the names of the datasets."""
|
|
383
453
|
name, _ = os.path.splitext(os.path.basename(self.path))
|
|
384
454
|
names.add(name)
|
|
385
455
|
|
|
386
|
-
def collect_supporting_arrays(self, collected, *path):
|
|
456
|
+
def collect_supporting_arrays(self, collected: set, *path: str) -> None:
|
|
457
|
+
"""Collect supporting arrays."""
|
|
387
458
|
pass
|
|
388
459
|
|
|
389
|
-
def collect_input_sources(self, collected):
|
|
460
|
+
def collect_input_sources(self, collected: set) -> None:
|
|
461
|
+
"""Collect input sources."""
|
|
390
462
|
pass
|
|
391
463
|
|
|
392
464
|
|
|
393
465
|
class ZarrWithMissingDates(Zarr):
|
|
394
466
|
"""A zarr dataset with missing dates."""
|
|
395
467
|
|
|
396
|
-
def __init__(self, path):
|
|
468
|
+
def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
|
|
469
|
+
"""Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
|
|
397
470
|
super().__init__(path)
|
|
398
471
|
|
|
399
472
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
400
473
|
missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
|
|
401
474
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
402
|
-
self.
|
|
475
|
+
self._missing = set(self.missing_to_dates)
|
|
476
|
+
|
|
477
|
+
@property
|
|
478
|
+
def missing(self) -> Set[int]:
|
|
479
|
+
"""Return the missing dates of the dataset."""
|
|
480
|
+
return self._missing
|
|
403
481
|
|
|
404
|
-
def mutate(self):
|
|
482
|
+
def mutate(self) -> Dataset:
|
|
483
|
+
"""Mutate the dataset."""
|
|
405
484
|
return self
|
|
406
485
|
|
|
407
486
|
@debug_indexing
|
|
408
487
|
@expand_list_indexing
|
|
409
|
-
def __getitem__(self, n):
|
|
488
|
+
def __getitem__(self, n: FullIndex) -> NDArray[Any]:
|
|
489
|
+
"""Retrieve an item from the dataset."""
|
|
410
490
|
if isinstance(n, int):
|
|
411
491
|
if n in self.missing:
|
|
412
492
|
self._report_missing(n)
|
|
@@ -437,24 +517,29 @@ class ZarrWithMissingDates(Zarr):
|
|
|
437
517
|
self._report_missing(list(common)[0])
|
|
438
518
|
return self.data[n]
|
|
439
519
|
|
|
520
|
+
raise TypeError(f"Unsupported index {n} {type(n)}, {first} {type(first)}")
|
|
521
|
+
|
|
440
522
|
raise TypeError(f"Unsupported index {n} {type(n)}")
|
|
441
523
|
|
|
442
|
-
def _report_missing(self, n):
|
|
524
|
+
def _report_missing(self, n: int) -> None:
|
|
525
|
+
"""Report a missing date."""
|
|
443
526
|
raise MissingDateError(f"Date {self.missing_to_dates[n]} is missing (index={n})")
|
|
444
527
|
|
|
445
|
-
def tree(self):
|
|
528
|
+
def tree(self) -> Node:
|
|
529
|
+
"""Return the tree representation of the dataset."""
|
|
446
530
|
return Node(self, [], path=self.path, missing=sorted(self.missing))
|
|
447
531
|
|
|
448
532
|
@property
|
|
449
|
-
def label(self):
|
|
533
|
+
def label(self) -> str:
|
|
534
|
+
"""Return the label of the dataset."""
|
|
450
535
|
return "zarr*"
|
|
451
536
|
|
|
452
537
|
|
|
453
538
|
QUIET = set()
|
|
454
539
|
|
|
455
540
|
|
|
456
|
-
def zarr_lookup(name, fail=True):
|
|
457
|
-
|
|
541
|
+
def zarr_lookup(name: str, fail: bool = True) -> Optional[str]:
|
|
542
|
+
"""Look up a zarr dataset by name."""
|
|
458
543
|
if name.endswith(".zarr") or name.endswith(".zip"):
|
|
459
544
|
return name
|
|
460
545
|
|
|
@@ -464,7 +549,7 @@ def zarr_lookup(name, fail=True):
|
|
|
464
549
|
if name not in QUIET:
|
|
465
550
|
LOG.info("Opening `%s` as `%s`", name, config["named"][name])
|
|
466
551
|
QUIET.add(name)
|
|
467
|
-
return config["named"][name]
|
|
552
|
+
return str(config["named"][name])
|
|
468
553
|
|
|
469
554
|
tried = []
|
|
470
555
|
for location in config["path"]:
|