anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/grib-index.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +22 -25
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +3 -6
- anemoi/datasets/create/filter.py +21 -24
- anemoi/datasets/create/input/__init__.py +1 -2
- anemoi/datasets/create/input/action.py +3 -5
- anemoi/datasets/create/input/concat.py +5 -8
- anemoi/datasets/create/input/context.py +3 -6
- anemoi/datasets/create/input/data_sources.py +5 -8
- anemoi/datasets/create/input/empty.py +1 -2
- anemoi/datasets/create/input/filter.py +2 -3
- anemoi/datasets/create/input/function.py +1 -2
- anemoi/datasets/create/input/join.py +4 -5
- anemoi/datasets/create/input/misc.py +4 -6
- anemoi/datasets/create/input/repeated_dates.py +13 -18
- anemoi/datasets/create/input/result.py +29 -33
- anemoi/datasets/create/input/step.py +6 -24
- anemoi/datasets/create/input/template.py +3 -4
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +47 -52
- anemoi/datasets/create/sources/accumulations2.py +4 -8
- anemoi/datasets/create/sources/constants.py +1 -3
- anemoi/datasets/create/sources/empty.py +1 -2
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +1 -2
- anemoi/datasets/create/sources/grib.py +6 -10
- anemoi/datasets/create/sources/grib_index.py +13 -15
- anemoi/datasets/create/sources/hindcasts.py +2 -5
- anemoi/datasets/create/sources/legacy.py +1 -1
- anemoi/datasets/create/sources/mars.py +17 -21
- anemoi/datasets/create/sources/netcdf.py +1 -2
- anemoi/datasets/create/sources/opendap.py +1 -3
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/planetary_computer.py +44 -0
- anemoi/datasets/create/sources/recentre.py +8 -11
- anemoi/datasets/create/sources/source.py +3 -6
- anemoi/datasets/create/sources/tendencies.py +2 -5
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
- anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
- anemoi/datasets/create/sources/xarray_support/field.py +17 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
- anemoi/datasets/create/sources/xarray_zarr.py +1 -2
- anemoi/datasets/create/sources/zenodo.py +3 -5
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +2 -74
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +1 -2
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +52 -23
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +42 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +30 -28
- anemoi/datasets/data/grids.py +12 -16
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +23 -77
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +13 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/grids.py +11 -12
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
- anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
- anemoi/datasets/create/filters/__init__.py +0 -33
- anemoi/datasets/create/filters/empty.py +0 -37
- anemoi/datasets/create/filters/legacy.py +0 -93
- anemoi/datasets/create/filters/noop.py +0 -37
- anemoi/datasets/create/filters/orog_to_z.py +0 -58
- anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
- anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
- anemoi/datasets/create/filters/rename.py +0 -205
- anemoi/datasets/create/filters/rotate_winds.py +0 -105
- anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
- anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
- anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
- anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
- anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
- anemoi/datasets/create/filters/sum.py +0 -68
- anemoi/datasets/create/filters/transform.py +0 -51
- anemoi/datasets/create/filters/unrotate_winds.py +0 -105
- anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
- anemoi/datasets/create/filters/wz_to_w.py +0 -98
- anemoi/datasets/utils/__init__.py +0 -8
- anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
|
@@ -1,76 +1,4 @@
|
|
|
1
|
-
# (C) Copyright 2025- Anemoi contributors.
|
|
2
|
-
#
|
|
3
|
-
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
-
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
-
#
|
|
6
|
-
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
-
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
-
# nor does it submit to any jurisdiction.
|
|
9
|
-
|
|
10
|
-
import tempfile
|
|
11
|
-
from typing import Any
|
|
12
|
-
from typing import Dict
|
|
13
|
-
from typing import List
|
|
14
|
-
from typing import Optional
|
|
15
|
-
from typing import Union
|
|
16
|
-
|
|
17
|
-
import yaml
|
|
18
|
-
|
|
19
|
-
from anemoi.datasets.create import creator_factory
|
|
20
|
-
|
|
21
|
-
|
|
22
1
|
class TestingContext:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def create_dataset(
|
|
27
|
-
*,
|
|
28
|
-
config: Union[str, Dict[str, Any]],
|
|
29
|
-
output: Optional[str],
|
|
30
|
-
delta: Optional[List[str]] = None,
|
|
31
|
-
is_test: bool = False,
|
|
32
|
-
) -> str:
|
|
33
|
-
"""Create a dataset based on the provided configuration.
|
|
34
|
-
|
|
35
|
-
Parameters
|
|
36
|
-
----------
|
|
37
|
-
config : Union[str, Dict[str, Any]]
|
|
38
|
-
The configuration for the dataset. Can be a path to a YAML file or a dictionary.
|
|
39
|
-
output : Optional[str]
|
|
40
|
-
The output path for the dataset. If None, a temporary directory will be created.
|
|
41
|
-
delta : Optional[List[str]], optional
|
|
42
|
-
List of delta for secondary statistics, by default None.
|
|
43
|
-
is_test : bool, optional
|
|
44
|
-
Flag indicating if the dataset creation is for testing purposes, by default False.
|
|
2
|
+
"""A context for testing plugins."""
|
|
45
3
|
|
|
46
|
-
|
|
47
|
-
-------
|
|
48
|
-
str
|
|
49
|
-
The path to the created dataset.
|
|
50
|
-
"""
|
|
51
|
-
if isinstance(config, dict):
|
|
52
|
-
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml")
|
|
53
|
-
yaml.dump(config, temp_file)
|
|
54
|
-
config = temp_file.name
|
|
55
|
-
|
|
56
|
-
if output is None:
|
|
57
|
-
output = tempfile.mkdtemp(suffix=".zarr")
|
|
58
|
-
|
|
59
|
-
creator_factory("init", config=config, path=output, overwrite=True, test=is_test).run()
|
|
60
|
-
creator_factory("load", path=output).run()
|
|
61
|
-
creator_factory("finalise", path=output).run()
|
|
62
|
-
creator_factory("patch", path=output).run()
|
|
63
|
-
|
|
64
|
-
if delta is not None:
|
|
65
|
-
creator_factory("init_additions", path=output, delta=delta).run()
|
|
66
|
-
creator_factory("run_additions", path=output, delta=delta).run()
|
|
67
|
-
creator_factory("finalise_additions", path=output, delta=delta).run()
|
|
68
|
-
|
|
69
|
-
creator_factory("cleanup", path=output).run()
|
|
70
|
-
|
|
71
|
-
if delta is not None:
|
|
72
|
-
creator_factory("cleanup", path=output, delta=delta).run()
|
|
73
|
-
|
|
74
|
-
creator_factory("verify", path=output).run()
|
|
75
|
-
|
|
76
|
-
return output
|
|
4
|
+
pass
|
anemoi/datasets/create/typing.py
CHANGED
anemoi/datasets/create/utils.py
CHANGED
|
@@ -13,7 +13,6 @@ import os
|
|
|
13
13
|
import warnings
|
|
14
14
|
from contextlib import contextmanager
|
|
15
15
|
from typing import Any
|
|
16
|
-
from typing import Union
|
|
17
16
|
|
|
18
17
|
import numpy as np
|
|
19
18
|
from earthkit.data import settings
|
|
@@ -97,7 +96,7 @@ def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
|
|
|
97
96
|
return to_datetime_(*args, **kwargs)
|
|
98
97
|
|
|
99
98
|
|
|
100
|
-
def make_list_int(value:
|
|
99
|
+
def make_list_int(value: str | list | tuple | int) -> list[int]:
|
|
101
100
|
"""Convert a string, list, tuple, or integer to a list of integers.
|
|
102
101
|
|
|
103
102
|
Parameters
|
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -11,7 +11,6 @@ import datetime
|
|
|
11
11
|
import logging
|
|
12
12
|
import shutil
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import Optional
|
|
15
14
|
|
|
16
15
|
import numpy as np
|
|
17
16
|
import zarr
|
|
@@ -120,7 +119,7 @@ class ZarrBuiltRegistry:
|
|
|
120
119
|
flags = None
|
|
121
120
|
z = None
|
|
122
121
|
|
|
123
|
-
def __init__(self, path: str, synchronizer_path:
|
|
122
|
+
def __init__(self, path: str, synchronizer_path: str | None = None, use_threads: bool = False):
|
|
124
123
|
"""Initialize the ZarrBuiltRegistry.
|
|
125
124
|
|
|
126
125
|
Parameters
|
|
@@ -154,6 +153,12 @@ class ZarrBuiltRegistry:
|
|
|
154
153
|
except FileNotFoundError:
|
|
155
154
|
pass
|
|
156
155
|
|
|
156
|
+
_build = self.zarr_path + "/_build"
|
|
157
|
+
try:
|
|
158
|
+
shutil.rmtree(_build)
|
|
159
|
+
except FileNotFoundError:
|
|
160
|
+
pass
|
|
161
|
+
|
|
157
162
|
def _open_write(self) -> zarr.Group:
|
|
158
163
|
"""Open the Zarr store in write mode."""
|
|
159
164
|
import zarr
|
anemoi/datasets/data/__init__.py
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
import os
|
|
11
12
|
from typing import TYPE_CHECKING
|
|
12
13
|
from typing import Any
|
|
13
|
-
from typing import Set
|
|
14
14
|
|
|
15
15
|
# from .dataset import FullIndex
|
|
16
16
|
# from .dataset import Shape
|
|
@@ -82,6 +82,9 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
82
82
|
Dataset
|
|
83
83
|
The opened dataset.
|
|
84
84
|
"""
|
|
85
|
+
|
|
86
|
+
trace = int(os.environ.get("ANEMOI_DATASETS_TRACE", 0))
|
|
87
|
+
|
|
85
88
|
# That will get rid of OmegaConf objects
|
|
86
89
|
|
|
87
90
|
args, kwargs = _convert(args), _convert(kwargs)
|
|
@@ -90,22 +93,28 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
|
|
|
90
93
|
ds = ds.mutate()
|
|
91
94
|
ds.arguments = {"args": args, "kwargs": kwargs}
|
|
92
95
|
ds._check()
|
|
96
|
+
|
|
97
|
+
if trace:
|
|
98
|
+
from anemoi.datasets.testing import Trace
|
|
99
|
+
|
|
100
|
+
ds = Trace(ds)
|
|
101
|
+
|
|
93
102
|
return ds
|
|
94
103
|
|
|
95
104
|
|
|
96
|
-
def save_dataset(
|
|
105
|
+
def save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
|
|
97
106
|
"""Open a dataset and save it to disk.
|
|
98
107
|
|
|
99
108
|
Parameters
|
|
100
109
|
----------
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
dataset : Dataset
|
|
111
|
+
anemoi-dataset opened from python to save to Zarr store
|
|
103
112
|
zarr_path : str
|
|
104
113
|
Path to store the obtained anemoi dataset to disk.
|
|
105
114
|
n_workers : int
|
|
106
115
|
Number of workers to use for parallel processing. If none, sequential processing will be performed.
|
|
107
116
|
"""
|
|
108
|
-
_save_dataset(
|
|
117
|
+
_save_dataset(dataset, zarr_path, n_workers)
|
|
109
118
|
|
|
110
119
|
|
|
111
120
|
def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
@@ -124,6 +133,6 @@ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
|
|
|
124
133
|
The list of dataset names.
|
|
125
134
|
"""
|
|
126
135
|
ds = _open_dataset(*args, **kwargs)
|
|
127
|
-
names:
|
|
136
|
+
names: set[str] = set()
|
|
128
137
|
ds.get_dataset_names(names)
|
|
129
138
|
return sorted(names)
|
|
@@ -7,18 +7,13 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
import datetime
|
|
12
11
|
import logging
|
|
13
12
|
from abc import abstractmethod
|
|
14
13
|
from functools import cached_property
|
|
15
14
|
from typing import Any
|
|
16
|
-
from typing import Dict
|
|
17
|
-
from typing import List
|
|
18
|
-
from typing import Optional
|
|
19
|
-
from typing import Set
|
|
20
|
-
from typing import Tuple
|
|
21
15
|
|
|
16
|
+
import numpy as np
|
|
22
17
|
from numpy.typing import NDArray
|
|
23
18
|
|
|
24
19
|
from ..grids import nearest_grid_points
|
|
@@ -85,29 +80,32 @@ class Complement(Combined):
|
|
|
85
80
|
for v in self._source.variables:
|
|
86
81
|
if v not in self._target.variables:
|
|
87
82
|
self._variables.append(v)
|
|
83
|
+
LOG.info(f"The following variables will be complemented: {self._variables}")
|
|
88
84
|
|
|
89
85
|
if not self._variables:
|
|
90
86
|
raise ValueError("Augment: no missing variables")
|
|
91
87
|
|
|
92
88
|
@property
|
|
93
|
-
def variables(self) ->
|
|
89
|
+
def variables(self) -> list[str]:
|
|
94
90
|
"""Returns the list of variables to be added to the target dataset."""
|
|
95
91
|
return self._variables
|
|
96
92
|
|
|
97
93
|
@property
|
|
98
|
-
def statistics(self) ->
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
94
|
+
def statistics(self) -> dict[str, NDArray[Any]]:
|
|
95
|
+
datasets = [self._source, self._target]
|
|
96
|
+
return {
|
|
97
|
+
k: [d.statistics[k][d.name_to_index[i]] for d in datasets for i in d.variables if i in self.variables]
|
|
98
|
+
for k in datasets[0].statistics
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
|
|
104
102
|
index = [self._source.name_to_index[v] for v in self._variables]
|
|
105
103
|
if delta is None:
|
|
106
104
|
delta = self.frequency
|
|
107
105
|
return {k: v[index] for k, v in self._source.statistics_tendencies(delta).items()}
|
|
108
106
|
|
|
109
107
|
@property
|
|
110
|
-
def name_to_index(self) ->
|
|
108
|
+
def name_to_index(self) -> dict[str, int]:
|
|
111
109
|
"""Returns a dictionary mapping variable names to their indices."""
|
|
112
110
|
return {v: i for i, v in enumerate(self.variables)}
|
|
113
111
|
|
|
@@ -118,9 +116,13 @@ class Complement(Combined):
|
|
|
118
116
|
return (shape[0], len(self._variables)) + shape[2:]
|
|
119
117
|
|
|
120
118
|
@property
|
|
121
|
-
def variables_metadata(self) ->
|
|
119
|
+
def variables_metadata(self) -> dict[str, Any]:
|
|
122
120
|
"""Returns the metadata of the variables to be added to the target dataset."""
|
|
123
|
-
|
|
121
|
+
# Merge the two dicts first
|
|
122
|
+
all_meta = {**self._source.variables_metadata, **self._target.variables_metadata}
|
|
123
|
+
|
|
124
|
+
# Filter to keep only desired variables
|
|
125
|
+
return {k: v for k, v in all_meta.items() if k in self._variables}
|
|
124
126
|
|
|
125
127
|
def check_same_variables(self, d1: Dataset, d2: Dataset) -> None:
|
|
126
128
|
"""Checks if the variables in two datasets are the same.
|
|
@@ -135,7 +137,7 @@ class Complement(Combined):
|
|
|
135
137
|
pass
|
|
136
138
|
|
|
137
139
|
@cached_property
|
|
138
|
-
def missing(self) ->
|
|
140
|
+
def missing(self) -> set[int]:
|
|
139
141
|
"""Returns the set of missing indices in the source and target datasets."""
|
|
140
142
|
missing = self._source.missing.copy()
|
|
141
143
|
missing = missing | self._target.missing
|
|
@@ -231,7 +233,7 @@ class ComplementNone(Complement):
|
|
|
231
233
|
class ComplementNearest(Complement):
|
|
232
234
|
"""A class to complement a target dataset with variables from a source dataset using nearest neighbor interpolation."""
|
|
233
235
|
|
|
234
|
-
def __init__(self, target: Any, source: Any, max_distance: float = None) -> None:
|
|
236
|
+
def __init__(self, target: Any, source: Any, max_distance: float = None, k: int = 1) -> None:
|
|
235
237
|
"""Initializes the ComplementNearest class.
|
|
236
238
|
|
|
237
239
|
Parameters
|
|
@@ -242,17 +244,25 @@ class ComplementNearest(Complement):
|
|
|
242
244
|
The source dataset.
|
|
243
245
|
max_distance : float, optional
|
|
244
246
|
The maximum distance for nearest neighbor interpolation, default is None.
|
|
247
|
+
k : int, optional
|
|
248
|
+
The number of k closest neighbors to consider for interpolation
|
|
245
249
|
"""
|
|
246
250
|
super().__init__(target, source)
|
|
247
251
|
|
|
248
|
-
self.
|
|
252
|
+
self.k = k
|
|
253
|
+
self._distances, self._nearest_grid_points = nearest_grid_points(
|
|
249
254
|
self._source.latitudes,
|
|
250
255
|
self._source.longitudes,
|
|
251
256
|
self._target.latitudes,
|
|
252
257
|
self._target.longitudes,
|
|
253
258
|
max_distance=max_distance,
|
|
259
|
+
k=k,
|
|
254
260
|
)
|
|
255
261
|
|
|
262
|
+
if k == 1:
|
|
263
|
+
self._distances = np.expand_dims(self._distances, axis=1)
|
|
264
|
+
self._nearest_grid_points = np.expand_dims(self._nearest_grid_points, axis=1)
|
|
265
|
+
|
|
256
266
|
def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
|
|
257
267
|
"""Checks the compatibility of two datasets for nearest neighbor interpolation.
|
|
258
268
|
|
|
@@ -285,12 +295,24 @@ class ComplementNearest(Complement):
|
|
|
285
295
|
source_data = self._source[index[0], source_index, index[2], ...]
|
|
286
296
|
target_data = source_data[..., self._nearest_grid_points]
|
|
287
297
|
|
|
288
|
-
|
|
298
|
+
epsilon = 1e-8 # prevent division by zero
|
|
299
|
+
weights = 1.0 / (self._distances + epsilon)
|
|
300
|
+
weights = weights.astype(target_data.dtype)
|
|
301
|
+
weights /= weights.sum(axis=1, keepdims=True) # normalize
|
|
302
|
+
|
|
303
|
+
# Reshape weights to broadcast correctly
|
|
304
|
+
# Add leading singleton dimensions so it matches target_data shape
|
|
305
|
+
while weights.ndim < target_data.ndim:
|
|
306
|
+
weights = np.expand_dims(weights, axis=0)
|
|
307
|
+
|
|
308
|
+
# Compute weighted average along the last dimension
|
|
309
|
+
final_point = np.sum(target_data * weights, axis=-1)
|
|
310
|
+
result = final_point[..., index[3]]
|
|
289
311
|
|
|
290
312
|
return apply_index_to_slices_changes(result, changes)
|
|
291
313
|
|
|
292
314
|
|
|
293
|
-
def complement_factory(args:
|
|
315
|
+
def complement_factory(args: tuple, kwargs: dict) -> Dataset:
|
|
294
316
|
"""Factory function to create a Complement instance based on the provided arguments.
|
|
295
317
|
|
|
296
318
|
Parameters
|
|
@@ -330,6 +352,13 @@ def complement_factory(args: Tuple, kwargs: dict) -> Dataset:
|
|
|
330
352
|
"nearest": ComplementNearest,
|
|
331
353
|
}[interpolation]
|
|
332
354
|
|
|
333
|
-
|
|
355
|
+
if interpolation == "nearest":
|
|
356
|
+
k = kwargs.pop("k", "1")
|
|
357
|
+
complement = Class(target=target, source=source, k=k)._subset(**kwargs)
|
|
358
|
+
|
|
359
|
+
else:
|
|
360
|
+
complement = Class(target=target, source=source)._subset(**kwargs)
|
|
361
|
+
|
|
362
|
+
joined = _open_dataset([target, complement])
|
|
334
363
|
|
|
335
|
-
return _open_dataset(
|
|
364
|
+
return _open_dataset(joined, reorder=sorted(joined.variables))
|
anemoi/datasets/data/concat.py
CHANGED
|
@@ -11,9 +11,6 @@
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
13
|
from typing import Any
|
|
14
|
-
from typing import List
|
|
15
|
-
from typing import Set
|
|
16
|
-
from typing import Tuple
|
|
17
14
|
|
|
18
15
|
import numpy as np
|
|
19
16
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
@@ -123,12 +120,12 @@ class ConcatMixin:
|
|
|
123
120
|
return np.concatenate(result)
|
|
124
121
|
|
|
125
122
|
@cached_property
|
|
126
|
-
def missing(self) ->
|
|
123
|
+
def missing(self) -> set[int]:
|
|
127
124
|
"""Returns the set of missing indices in the concatenated datasets."""
|
|
128
|
-
result:
|
|
125
|
+
result: set[int] = set()
|
|
129
126
|
offset = 0
|
|
130
127
|
for d in self.datasets:
|
|
131
|
-
result = result |
|
|
128
|
+
result = result | {m + offset for m in d.missing}
|
|
132
129
|
offset += len(d)
|
|
133
130
|
return result
|
|
134
131
|
|
|
@@ -195,7 +192,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
195
192
|
return Node(self, [d.tree() for d in self.datasets])
|
|
196
193
|
|
|
197
194
|
@classmethod
|
|
198
|
-
def check_dataset_compatibility(cls, datasets:
|
|
195
|
+
def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: bool = False) -> list[Any]:
|
|
199
196
|
"""Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
|
|
200
197
|
|
|
201
198
|
Parameters
|
|
@@ -259,7 +256,7 @@ class Concat(ConcatMixin, Combined):
|
|
|
259
256
|
return {}
|
|
260
257
|
|
|
261
258
|
|
|
262
|
-
def concat_factory(args:
|
|
259
|
+
def concat_factory(args: tuple[Any, ...], kwargs: dict) -> Concat:
|
|
263
260
|
"""Factory function to create a Concat object.
|
|
264
261
|
|
|
265
262
|
Parameters
|