anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +1 -2
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +4 -8
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +12 -13
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +16 -12
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +16 -21
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +5 -9
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +4 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/stores.py
CHANGED
|
@@ -15,11 +15,6 @@ import tempfile
|
|
|
15
15
|
import warnings
|
|
16
16
|
from functools import cached_property
|
|
17
17
|
from typing import Any
|
|
18
|
-
from typing import Dict
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Optional
|
|
21
|
-
from typing import Set
|
|
22
|
-
from typing import Union
|
|
23
18
|
from urllib.parse import urlparse
|
|
24
19
|
|
|
25
20
|
import numpy as np
|
|
@@ -90,7 +85,7 @@ class S3Store(ReadOnlyStore):
|
|
|
90
85
|
options using the anemoi configs.
|
|
91
86
|
"""
|
|
92
87
|
|
|
93
|
-
def __init__(self, url: str, region:
|
|
88
|
+
def __init__(self, url: str, region: str | None = None) -> None:
|
|
94
89
|
"""Initialize the S3Store with a URL and optional region."""
|
|
95
90
|
from anemoi.utils.remote.s3 import s3_client
|
|
96
91
|
|
|
@@ -199,7 +194,7 @@ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hie
|
|
|
199
194
|
class Zarr(Dataset):
|
|
200
195
|
"""A zarr dataset."""
|
|
201
196
|
|
|
202
|
-
def __init__(self, path:
|
|
197
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
203
198
|
"""Initialize the Zarr dataset with a path or zarr group."""
|
|
204
199
|
if isinstance(path, zarr.hierarchy.Group):
|
|
205
200
|
self.was_zarr = True
|
|
@@ -215,7 +210,7 @@ class Zarr(Dataset):
|
|
|
215
210
|
self._missing = set()
|
|
216
211
|
|
|
217
212
|
@property
|
|
218
|
-
def missing(self) ->
|
|
213
|
+
def missing(self) -> set[int]:
|
|
219
214
|
"""Return the missing dates of the dataset."""
|
|
220
215
|
return self._missing
|
|
221
216
|
|
|
@@ -236,7 +231,7 @@ class Zarr(Dataset):
|
|
|
236
231
|
"""Retrieve an item from the dataset."""
|
|
237
232
|
return self.data[n]
|
|
238
233
|
|
|
239
|
-
def _unwind(self, index:
|
|
234
|
+
def _unwind(self, index: int | slice | list | tuple, rest: list, shape: tuple, axis: int, axes: list) -> iter:
|
|
240
235
|
"""Unwind the index for multi-dimensional indexing."""
|
|
241
236
|
if not isinstance(index, (int, slice, list, tuple)):
|
|
242
237
|
try:
|
|
@@ -298,7 +293,7 @@ class Zarr(Dataset):
|
|
|
298
293
|
return self.z.longitude[:]
|
|
299
294
|
|
|
300
295
|
@property
|
|
301
|
-
def statistics(self) ->
|
|
296
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
302
297
|
"""Return the statistics of the dataset."""
|
|
303
298
|
return dict(
|
|
304
299
|
mean=self.z.mean[:],
|
|
@@ -307,7 +302,7 @@ class Zarr(Dataset):
|
|
|
307
302
|
minimum=self.z.minimum[:],
|
|
308
303
|
)
|
|
309
304
|
|
|
310
|
-
def statistics_tendencies(self, delta:
|
|
305
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
311
306
|
"""Return the statistical tendencies of the dataset."""
|
|
312
307
|
if delta is None:
|
|
313
308
|
delta = self.frequency
|
|
@@ -354,14 +349,14 @@ class Zarr(Dataset):
|
|
|
354
349
|
return dates[1].astype(object) - dates[0].astype(object)
|
|
355
350
|
|
|
356
351
|
@property
|
|
357
|
-
def name_to_index(self) ->
|
|
352
|
+
def name_to_index(self) -> dict[str, int]:
|
|
358
353
|
"""Return the name to index mapping of the dataset."""
|
|
359
354
|
if "variables" in self.z.attrs:
|
|
360
355
|
return {n: i for i, n in enumerate(self.z.attrs["variables"])}
|
|
361
356
|
return self.z.attrs["name_to_index"]
|
|
362
357
|
|
|
363
358
|
@property
|
|
364
|
-
def variables(self) ->
|
|
359
|
+
def variables(self) -> list[str]:
|
|
365
360
|
"""Return the variables of the dataset."""
|
|
366
361
|
return [
|
|
367
362
|
k
|
|
@@ -372,7 +367,7 @@ class Zarr(Dataset):
|
|
|
372
367
|
]
|
|
373
368
|
|
|
374
369
|
@cached_property
|
|
375
|
-
def constant_fields(self) ->
|
|
370
|
+
def constant_fields(self) -> list[str]:
|
|
376
371
|
"""Return the constant fields of the dataset."""
|
|
377
372
|
result = self.z.attrs.get("constant_fields")
|
|
378
373
|
if result is None:
|
|
@@ -380,7 +375,7 @@ class Zarr(Dataset):
|
|
|
380
375
|
return self.computed_constant_fields()
|
|
381
376
|
|
|
382
377
|
@property
|
|
383
|
-
def variables_metadata(self) ->
|
|
378
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
384
379
|
"""Return the metadata of the variables."""
|
|
385
380
|
return self.z.attrs.get("variables_metadata", {})
|
|
386
381
|
|
|
@@ -392,7 +387,7 @@ class Zarr(Dataset):
|
|
|
392
387
|
"""Return the end date of the statistics."""
|
|
393
388
|
return self.dates[-1]
|
|
394
389
|
|
|
395
|
-
def metadata_specific(self, **kwargs: Any) ->
|
|
390
|
+
def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
|
|
396
391
|
"""Return the specific metadata of the dataset."""
|
|
397
392
|
return super().metadata_specific(
|
|
398
393
|
attrs=dict(self.z.attrs),
|
|
@@ -416,7 +411,7 @@ class Zarr(Dataset):
|
|
|
416
411
|
"""Return the tree representation of the dataset."""
|
|
417
412
|
return Node(self, [], path=self.path)
|
|
418
413
|
|
|
419
|
-
def get_dataset_names(self, names:
|
|
414
|
+
def get_dataset_names(self, names: set[str]) -> None:
|
|
420
415
|
"""Get the names of the datasets."""
|
|
421
416
|
name, _ = os.path.splitext(os.path.basename(self.path))
|
|
422
417
|
names.add(name)
|
|
@@ -433,17 +428,17 @@ class Zarr(Dataset):
|
|
|
433
428
|
class ZarrWithMissingDates(Zarr):
|
|
434
429
|
"""A zarr dataset with missing dates."""
|
|
435
430
|
|
|
436
|
-
def __init__(self, path:
|
|
431
|
+
def __init__(self, path: str | zarr.hierarchy.Group) -> None:
|
|
437
432
|
"""Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
|
|
438
433
|
super().__init__(path)
|
|
439
434
|
|
|
440
435
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
441
|
-
missing_dates =
|
|
436
|
+
missing_dates = {np.datetime64(x, "s") for x in missing_dates}
|
|
442
437
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
443
438
|
self._missing = set(self.missing_to_dates)
|
|
444
439
|
|
|
445
440
|
@property
|
|
446
|
-
def missing(self) ->
|
|
441
|
+
def missing(self) -> set[int]:
|
|
447
442
|
"""Return the missing dates of the dataset."""
|
|
448
443
|
return self._missing
|
|
449
444
|
|
|
@@ -506,7 +501,7 @@ class ZarrWithMissingDates(Zarr):
|
|
|
506
501
|
QUIET = set()
|
|
507
502
|
|
|
508
503
|
|
|
509
|
-
def zarr_lookup(name: str, fail: bool = True) ->
|
|
504
|
+
def zarr_lookup(name: str, fail: bool = True) -> str | None:
|
|
510
505
|
"""Look up a zarr dataset by name."""
|
|
511
506
|
|
|
512
507
|
config = load_config()["datasets"]
|
anemoi/datasets/data/subset.py
CHANGED
|
@@ -10,12 +10,9 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import logging
|
|
13
|
+
from collections.abc import Sequence
|
|
13
14
|
from functools import cached_property
|
|
14
15
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Sequence
|
|
18
|
-
from typing import Set
|
|
19
16
|
from typing import Union
|
|
20
17
|
|
|
21
18
|
import numpy as np
|
|
@@ -95,7 +92,7 @@ def _end(a: int, b: int, dates: NDArray[np.datetime64]) -> int:
|
|
|
95
92
|
return b
|
|
96
93
|
|
|
97
94
|
|
|
98
|
-
def _combine_reasons(reason1:
|
|
95
|
+
def _combine_reasons(reason1: dict[str, Any], reason2: dict[str, Any], dates: NDArray[np.datetime64]) -> dict[str, Any]:
|
|
99
96
|
"""Combine two reason dictionaries.
|
|
100
97
|
|
|
101
98
|
Parameters:
|
|
@@ -126,7 +123,7 @@ class Subset(Forwards):
|
|
|
126
123
|
reason (Dict[str, Any]): Dictionary of reasons.
|
|
127
124
|
"""
|
|
128
125
|
|
|
129
|
-
def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason:
|
|
126
|
+
def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason: dict[str, Any]) -> None:
|
|
130
127
|
"""Initialize the Subset.
|
|
131
128
|
|
|
132
129
|
Parameters:
|
|
@@ -140,8 +137,8 @@ class Subset(Forwards):
|
|
|
140
137
|
dataset = dataset.dataset
|
|
141
138
|
|
|
142
139
|
self.dataset: Dataset = dataset
|
|
143
|
-
self.indices:
|
|
144
|
-
self.reason:
|
|
140
|
+
self.indices: list[int] = list(indices)
|
|
141
|
+
self.reason: dict[str, Any] = {k: v for k, v in reason.items() if v is not None}
|
|
145
142
|
|
|
146
143
|
# Forward other properties to the super dataset
|
|
147
144
|
super().__init__(dataset)
|
|
@@ -274,10 +271,10 @@ class Subset(Forwards):
|
|
|
274
271
|
return f"Subset({self.dataset},{self.dates[0]}...{self.dates[-1]}/{self.frequency})"
|
|
275
272
|
|
|
276
273
|
@cached_property
|
|
277
|
-
def missing(self) ->
|
|
274
|
+
def missing(self) -> set[int]:
|
|
278
275
|
"""Get the missing indices of the subset."""
|
|
279
276
|
missing = self.dataset.missing
|
|
280
|
-
result:
|
|
277
|
+
result: set[int] = set()
|
|
281
278
|
for j, i in enumerate(self.indices):
|
|
282
279
|
if i in missing:
|
|
283
280
|
result.add(j)
|
|
@@ -291,7 +288,7 @@ class Subset(Forwards):
|
|
|
291
288
|
"""
|
|
292
289
|
return Node(self, [self.dataset.tree()], **self.reason)
|
|
293
290
|
|
|
294
|
-
def forwards_subclass_metadata_specific(self) ->
|
|
291
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
295
292
|
"""Get the metadata specific to the forwards subclass.
|
|
296
293
|
|
|
297
294
|
Returns:
|
|
@@ -10,14 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import logging
|
|
13
|
+
from collections.abc import Callable
|
|
13
14
|
from functools import cached_property
|
|
14
15
|
from functools import wraps
|
|
15
16
|
from typing import Any
|
|
16
|
-
from typing import Callable
|
|
17
|
-
from typing import Dict
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Set
|
|
21
17
|
|
|
22
18
|
import numpy as np
|
|
23
19
|
from numpy.typing import NDArray
|
|
@@ -163,13 +159,13 @@ class Unchecked(Combined):
|
|
|
163
159
|
|
|
164
160
|
@check("check_same_variables")
|
|
165
161
|
@property
|
|
166
|
-
def name_to_index(self) ->
|
|
162
|
+
def name_to_index(self) -> dict[str, int]:
|
|
167
163
|
"""Get the mapping of variable names to their indices."""
|
|
168
164
|
raise NotImplementedError()
|
|
169
165
|
|
|
170
166
|
@check("check_same_variables")
|
|
171
167
|
@property
|
|
172
|
-
def variables(self) ->
|
|
168
|
+
def variables(self) -> list[str]:
|
|
173
169
|
"""Get the list of variables in the dataset."""
|
|
174
170
|
raise NotImplementedError()
|
|
175
171
|
|
|
@@ -181,12 +177,12 @@ class Unchecked(Combined):
|
|
|
181
177
|
|
|
182
178
|
@check("check_same_variables")
|
|
183
179
|
@property
|
|
184
|
-
def statistics(self) ->
|
|
180
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
185
181
|
"""Get the statistics of the dataset."""
|
|
186
182
|
raise NotImplementedError()
|
|
187
183
|
|
|
188
184
|
@check("check_same_variables")
|
|
189
|
-
def statistics_tendencies(self, delta:
|
|
185
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
190
186
|
"""Get the statistics tendencies of the dataset.
|
|
191
187
|
|
|
192
188
|
Parameters
|
|
@@ -207,9 +203,9 @@ class Unchecked(Combined):
|
|
|
207
203
|
raise NotImplementedError()
|
|
208
204
|
|
|
209
205
|
@cached_property
|
|
210
|
-
def missing(self) ->
|
|
206
|
+
def missing(self) -> set[int]:
|
|
211
207
|
"""Get the missing data indices."""
|
|
212
|
-
result:
|
|
208
|
+
result: set[int] = set()
|
|
213
209
|
for d in self.datasets:
|
|
214
210
|
result = result | d.missing
|
|
215
211
|
return result
|
anemoi/datasets/data/xy.py
CHANGED
|
@@ -11,10 +11,6 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import Dict
|
|
15
|
-
from typing import List
|
|
16
|
-
from typing import Set
|
|
17
|
-
from typing import Tuple
|
|
18
14
|
|
|
19
15
|
from .dataset import Dataset
|
|
20
16
|
from .dataset import FullIndex
|
|
@@ -29,7 +25,7 @@ LOG = logging.getLogger(__name__)
|
|
|
29
25
|
class ZipBase(Combined):
|
|
30
26
|
"""Base class for handling zipped datasets."""
|
|
31
27
|
|
|
32
|
-
def __init__(self, datasets:
|
|
28
|
+
def __init__(self, datasets: list[Any], check_compatibility: bool = True) -> None:
|
|
33
29
|
"""Initialize ZipBase with a list of datasets.
|
|
34
30
|
|
|
35
31
|
Parameters
|
|
@@ -58,7 +54,7 @@ class ZipBase(Combined):
|
|
|
58
54
|
new_parents = [parent.clone(ds) for ds in self.datasets]
|
|
59
55
|
return self.clone(new_parents)
|
|
60
56
|
|
|
61
|
-
def clone(self, datasets:
|
|
57
|
+
def clone(self, datasets: list[Any]) -> "ZipBase":
|
|
62
58
|
"""Clone the ZipBase with new datasets.
|
|
63
59
|
|
|
64
60
|
Parameters
|
|
@@ -81,7 +77,11 @@ class ZipBase(Combined):
|
|
|
81
77
|
Node
|
|
82
78
|
Tree representation of the datasets.
|
|
83
79
|
"""
|
|
84
|
-
return Node(
|
|
80
|
+
return Node(
|
|
81
|
+
self,
|
|
82
|
+
[d.tree() for d in self.datasets],
|
|
83
|
+
check_compatibility=self._check_compatibility,
|
|
84
|
+
)
|
|
85
85
|
|
|
86
86
|
def __len__(self) -> int:
|
|
87
87
|
"""Get the length of the smallest dataset.
|
|
@@ -93,7 +93,7 @@ class ZipBase(Combined):
|
|
|
93
93
|
"""
|
|
94
94
|
return min(len(d) for d in self.datasets)
|
|
95
95
|
|
|
96
|
-
def __getitem__(self, n: FullIndex) ->
|
|
96
|
+
def __getitem__(self, n: FullIndex) -> tuple[Any, ...]:
|
|
97
97
|
"""Get the item at the specified index from all datasets.
|
|
98
98
|
|
|
99
99
|
Parameters
|
|
@@ -145,55 +145,55 @@ class ZipBase(Combined):
|
|
|
145
145
|
pass
|
|
146
146
|
|
|
147
147
|
@cached_property
|
|
148
|
-
def missing(self) ->
|
|
148
|
+
def missing(self) -> set[int]:
|
|
149
149
|
"""Get the set of missing indices from all datasets."""
|
|
150
|
-
result:
|
|
150
|
+
result: set[int] = set()
|
|
151
151
|
for d in self.datasets:
|
|
152
152
|
result = result | d.missing
|
|
153
153
|
return result
|
|
154
154
|
|
|
155
155
|
@property
|
|
156
|
-
def shape(self) ->
|
|
156
|
+
def shape(self) -> tuple[Any, ...]:
|
|
157
157
|
"""Get the shape of all datasets."""
|
|
158
158
|
return tuple(d.shape for d in self.datasets)
|
|
159
159
|
|
|
160
160
|
@property
|
|
161
|
-
def field_shape(self) ->
|
|
161
|
+
def field_shape(self) -> tuple[Any, ...]:
|
|
162
162
|
"""Get the field shape of all datasets."""
|
|
163
163
|
return tuple(d.shape for d in self.datasets)
|
|
164
164
|
|
|
165
165
|
@property
|
|
166
|
-
def latitudes(self) ->
|
|
166
|
+
def latitudes(self) -> tuple[Any, ...]:
|
|
167
167
|
"""Get the latitudes of all datasets."""
|
|
168
168
|
return tuple(d.latitudes for d in self.datasets)
|
|
169
169
|
|
|
170
170
|
@property
|
|
171
|
-
def longitudes(self) ->
|
|
171
|
+
def longitudes(self) -> tuple[Any, ...]:
|
|
172
172
|
"""Get the longitudes of all datasets."""
|
|
173
173
|
return tuple(d.longitudes for d in self.datasets)
|
|
174
174
|
|
|
175
175
|
@property
|
|
176
|
-
def dtype(self) ->
|
|
176
|
+
def dtype(self) -> tuple[Any, ...]:
|
|
177
177
|
"""Get the data types of all datasets."""
|
|
178
178
|
return tuple(d.dtype for d in self.datasets)
|
|
179
179
|
|
|
180
180
|
@property
|
|
181
|
-
def grids(self) ->
|
|
181
|
+
def grids(self) -> tuple[Any, ...]:
|
|
182
182
|
"""Get the grids of all datasets."""
|
|
183
183
|
return tuple(d.grids for d in self.datasets)
|
|
184
184
|
|
|
185
185
|
@property
|
|
186
|
-
def statistics(self) ->
|
|
186
|
+
def statistics(self) -> tuple[Any, ...]:
|
|
187
187
|
"""Get the statistics of all datasets."""
|
|
188
188
|
return tuple(d.statistics for d in self.datasets)
|
|
189
189
|
|
|
190
190
|
@property
|
|
191
|
-
def resolution(self) ->
|
|
191
|
+
def resolution(self) -> tuple[Any, ...]:
|
|
192
192
|
"""Get the resolution of all datasets."""
|
|
193
193
|
return tuple(d.resolution for d in self.datasets)
|
|
194
194
|
|
|
195
195
|
@property
|
|
196
|
-
def name_to_index(self) ->
|
|
196
|
+
def name_to_index(self) -> tuple[Any, ...]:
|
|
197
197
|
"""Get the name to index mapping of all datasets."""
|
|
198
198
|
return tuple(d.name_to_index for d in self.datasets)
|
|
199
199
|
|
|
@@ -210,6 +210,10 @@ class ZipBase(Combined):
|
|
|
210
210
|
if self._check_compatibility:
|
|
211
211
|
super().check_compatibility(d1, d2)
|
|
212
212
|
|
|
213
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
214
|
+
"""Get the metadata specific to the subclass."""
|
|
215
|
+
return {}
|
|
216
|
+
|
|
213
217
|
|
|
214
218
|
class Zip(ZipBase):
|
|
215
219
|
"""Class for handling zipped datasets."""
|
|
@@ -223,7 +227,7 @@ class XY(ZipBase):
|
|
|
223
227
|
pass
|
|
224
228
|
|
|
225
229
|
|
|
226
|
-
def xy_factory(args:
|
|
230
|
+
def xy_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> XY:
|
|
227
231
|
"""Factory function to create an XY instance.
|
|
228
232
|
|
|
229
233
|
Parameters
|
|
@@ -256,7 +260,7 @@ def xy_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> XY:
|
|
|
256
260
|
return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
|
|
257
261
|
|
|
258
262
|
|
|
259
|
-
def zip_factory(args:
|
|
263
|
+
def zip_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Zip:
|
|
260
264
|
"""Factory function to create a Zip instance.
|
|
261
265
|
|
|
262
266
|
Parameters
|
|
@@ -10,15 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import warnings
|
|
13
|
+
from collections.abc import Iterator
|
|
13
14
|
from functools import reduce
|
|
14
15
|
from math import gcd
|
|
15
16
|
from typing import Any
|
|
16
|
-
from typing import Dict
|
|
17
|
-
from typing import Iterator
|
|
18
|
-
from typing import List
|
|
19
|
-
from typing import Optional
|
|
20
|
-
from typing import Tuple
|
|
21
|
-
from typing import Union
|
|
22
17
|
|
|
23
18
|
# from anemoi.utils.dates import as_datetime
|
|
24
19
|
from anemoi.utils.dates import DateTimes
|
|
@@ -29,7 +24,7 @@ from anemoi.utils.hindcasts import HindcastDatesTimes
|
|
|
29
24
|
from anemoi.utils.humanize import print_dates
|
|
30
25
|
|
|
31
26
|
|
|
32
|
-
def extend(x:
|
|
27
|
+
def extend(x: str | list[Any] | tuple[Any, ...]) -> Iterator[datetime.datetime]:
|
|
33
28
|
"""Extend a date range or list of dates into individual datetime objects.
|
|
34
29
|
|
|
35
30
|
Args:
|
|
@@ -86,7 +81,7 @@ class DatesProvider:
|
|
|
86
81
|
3
|
|
87
82
|
"""
|
|
88
83
|
|
|
89
|
-
def __init__(self, missing:
|
|
84
|
+
def __init__(self, missing: list[str | datetime.datetime] | None = None) -> None:
|
|
90
85
|
"""Initialize the DatesProvider with optional missing dates.
|
|
91
86
|
|
|
92
87
|
Parameters
|
|
@@ -168,7 +163,7 @@ class ValuesDates(DatesProvider):
|
|
|
168
163
|
**kwargs (Any): Additional arguments.
|
|
169
164
|
"""
|
|
170
165
|
|
|
171
|
-
def __init__(self, values:
|
|
166
|
+
def __init__(self, values: list[str | datetime.datetime], **kwargs: Any) -> None:
|
|
172
167
|
"""Initialize ValuesDates with a list of values.
|
|
173
168
|
|
|
174
169
|
Args:
|
|
@@ -188,7 +183,7 @@ class ValuesDates(DatesProvider):
|
|
|
188
183
|
"""
|
|
189
184
|
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
190
185
|
|
|
191
|
-
def as_dict(self) ->
|
|
186
|
+
def as_dict(self) -> dict[str, Any]:
|
|
192
187
|
"""Convert the ValuesDates instance to a dictionary.
|
|
193
188
|
|
|
194
189
|
Returns
|
|
@@ -215,9 +210,9 @@ class StartEndDates(DatesProvider):
|
|
|
215
210
|
|
|
216
211
|
def __init__(
|
|
217
212
|
self,
|
|
218
|
-
start:
|
|
219
|
-
end:
|
|
220
|
-
frequency:
|
|
213
|
+
start: str | datetime.datetime,
|
|
214
|
+
end: str | datetime.datetime,
|
|
215
|
+
frequency: int | str = 1,
|
|
221
216
|
**kwargs: Any,
|
|
222
217
|
) -> None:
|
|
223
218
|
"""Initialize StartEndDates with start, end, and frequency.
|
|
@@ -259,7 +254,7 @@ class StartEndDates(DatesProvider):
|
|
|
259
254
|
|
|
260
255
|
super().__init__(missing=missing)
|
|
261
256
|
|
|
262
|
-
def as_dict(self) ->
|
|
257
|
+
def as_dict(self) -> dict[str, Any]:
|
|
263
258
|
"""Convert the StartEndDates instance to a dictionary.
|
|
264
259
|
|
|
265
260
|
Returns
|
|
@@ -314,9 +309,9 @@ class HindcastsDates(DatesProvider):
|
|
|
314
309
|
|
|
315
310
|
def __init__(
|
|
316
311
|
self,
|
|
317
|
-
start:
|
|
318
|
-
end:
|
|
319
|
-
steps:
|
|
312
|
+
start: str | list[str],
|
|
313
|
+
end: str | list[str],
|
|
314
|
+
steps: list[int] = [0],
|
|
320
315
|
years: int = 20,
|
|
321
316
|
**kwargs: Any,
|
|
322
317
|
) -> None:
|
|
@@ -403,7 +398,7 @@ class HindcastsDates(DatesProvider):
|
|
|
403
398
|
"""
|
|
404
399
|
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
405
400
|
|
|
406
|
-
def as_dict(self) ->
|
|
401
|
+
def as_dict(self) -> dict[str, Any]:
|
|
407
402
|
"""Convert the HindcastsDates instance to a dictionary.
|
|
408
403
|
|
|
409
404
|
Returns
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -12,19 +12,16 @@ import datetime
|
|
|
12
12
|
import itertools
|
|
13
13
|
from abc import ABC
|
|
14
14
|
from abc import abstractmethod
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from collections.abc import Iterator
|
|
15
17
|
from functools import cached_property
|
|
16
18
|
from typing import Any
|
|
17
|
-
from typing import Callable
|
|
18
|
-
from typing import Iterator
|
|
19
|
-
from typing import List
|
|
20
|
-
from typing import Tuple
|
|
21
|
-
from typing import Union
|
|
22
19
|
|
|
23
20
|
from anemoi.datasets.dates import DatesProvider
|
|
24
21
|
from anemoi.datasets.dates import as_datetime
|
|
25
22
|
|
|
26
23
|
|
|
27
|
-
def _shorten(dates:
|
|
24
|
+
def _shorten(dates: list[datetime.datetime] | tuple[datetime.datetime, ...]) -> str | list[str]:
|
|
28
25
|
"""Shorten the list of dates for display.
|
|
29
26
|
|
|
30
27
|
Args:
|
|
@@ -43,7 +40,7 @@ def _shorten(dates: Union[List[datetime.datetime], Tuple[datetime.datetime, ...]
|
|
|
43
40
|
class GroupOfDates:
|
|
44
41
|
"""A class to represent a group of dates."""
|
|
45
42
|
|
|
46
|
-
def __init__(self, dates:
|
|
43
|
+
def __init__(self, dates: list[datetime.datetime], provider: DatesProvider, partial_ok: bool = False) -> None:
|
|
47
44
|
assert isinstance(provider, DatesProvider), type(provider)
|
|
48
45
|
assert isinstance(dates, list)
|
|
49
46
|
|
|
@@ -197,10 +194,10 @@ class Groups:
|
|
|
197
194
|
class Filter:
|
|
198
195
|
"""A class to filter out missing dates."""
|
|
199
196
|
|
|
200
|
-
def __init__(self, missing:
|
|
201
|
-
self.missing =
|
|
197
|
+
def __init__(self, missing: list[datetime.datetime]) -> None:
|
|
198
|
+
self.missing = {as_datetime(m) for m in missing}
|
|
202
199
|
|
|
203
|
-
def __call__(self, dates:
|
|
200
|
+
def __call__(self, dates: list[datetime.datetime]) -> list[datetime.datetime]:
|
|
204
201
|
"""Filter out missing dates from the list of dates.
|
|
205
202
|
|
|
206
203
|
Args:
|
anemoi/datasets/grids.py
CHANGED
|
@@ -11,10 +11,6 @@
|
|
|
11
11
|
import base64
|
|
12
12
|
import logging
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import List
|
|
15
|
-
from typing import Optional
|
|
16
|
-
from typing import Tuple
|
|
17
|
-
from typing import Union
|
|
18
14
|
|
|
19
15
|
import numpy as np
|
|
20
16
|
from numpy.typing import NDArray
|
|
@@ -94,7 +90,7 @@ def plot_mask(
|
|
|
94
90
|
|
|
95
91
|
# TODO: Use the one from anemoi.utils.grids instead
|
|
96
92
|
# from anemoi.utils.grids import ...
|
|
97
|
-
def xyz_to_latlon(x: NDArray[Any], y: NDArray[Any], z: NDArray[Any]) ->
|
|
93
|
+
def xyz_to_latlon(x: NDArray[Any], y: NDArray[Any], z: NDArray[Any]) -> tuple[NDArray[Any], NDArray[Any]]:
|
|
98
94
|
"""Convert Cartesian coordinates to latitude and longitude.
|
|
99
95
|
|
|
100
96
|
Parameters
|
|
@@ -121,7 +117,7 @@ def xyz_to_latlon(x: NDArray[Any], y: NDArray[Any], z: NDArray[Any]) -> Tuple[ND
|
|
|
121
117
|
# from anemoi.utils.grids import ...
|
|
122
118
|
def latlon_to_xyz(
|
|
123
119
|
lat: NDArray[Any], lon: NDArray[Any], radius: float = 1.0
|
|
124
|
-
) ->
|
|
120
|
+
) -> tuple[NDArray[Any], NDArray[Any], NDArray[Any]]:
|
|
125
121
|
"""Convert latitude and longitude to Cartesian coordinates.
|
|
126
122
|
|
|
127
123
|
Parameters
|
|
@@ -272,8 +268,8 @@ def cutout_mask(
|
|
|
272
268
|
global_lons: NDArray[Any],
|
|
273
269
|
cropping_distance: float = 2.0,
|
|
274
270
|
neighbours: int = 5,
|
|
275
|
-
min_distance_km:
|
|
276
|
-
plot:
|
|
271
|
+
min_distance_km: int | float | None = None,
|
|
272
|
+
plot: str | None = None,
|
|
277
273
|
) -> NDArray[Any]:
|
|
278
274
|
"""Return a mask for the points in [global_lats, global_lons] that are inside of [lats, lons].
|
|
279
275
|
|
|
@@ -465,7 +461,7 @@ def thinning_mask(
|
|
|
465
461
|
return np.array([i for i in indices])
|
|
466
462
|
|
|
467
463
|
|
|
468
|
-
def outline(lats: NDArray[Any], lons: NDArray[Any], neighbours: int = 5) ->
|
|
464
|
+
def outline(lats: NDArray[Any], lons: NDArray[Any], neighbours: int = 5) -> list[int]:
|
|
469
465
|
"""Find the outline of the grid points.
|
|
470
466
|
|
|
471
467
|
Parameters
|