anemoi-datasets 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/compare.py +59 -0
- anemoi/datasets/commands/create.py +84 -3
- anemoi/datasets/commands/inspect.py +9 -9
- anemoi/datasets/commands/scan.py +4 -4
- anemoi/datasets/compute/recentre.py +14 -9
- anemoi/datasets/create/__init__.py +44 -17
- anemoi/datasets/create/check.py +6 -5
- anemoi/datasets/create/chunks.py +1 -1
- anemoi/datasets/create/config.py +6 -27
- anemoi/datasets/create/functions/__init__.py +3 -3
- anemoi/datasets/create/functions/filters/empty.py +4 -4
- anemoi/datasets/create/functions/filters/rename.py +14 -6
- anemoi/datasets/create/functions/filters/rotate_winds.py +16 -60
- anemoi/datasets/create/functions/filters/unrotate_winds.py +14 -64
- anemoi/datasets/create/functions/sources/__init__.py +39 -0
- anemoi/datasets/create/functions/sources/accumulations.py +38 -56
- anemoi/datasets/create/functions/sources/constants.py +11 -4
- anemoi/datasets/create/functions/sources/empty.py +2 -2
- anemoi/datasets/create/functions/sources/forcings.py +3 -3
- anemoi/datasets/create/functions/sources/grib.py +8 -4
- anemoi/datasets/create/functions/sources/hindcasts.py +32 -364
- anemoi/datasets/create/functions/sources/mars.py +57 -26
- anemoi/datasets/create/functions/sources/netcdf.py +2 -60
- anemoi/datasets/create/functions/sources/opendap.py +3 -2
- anemoi/datasets/create/functions/sources/source.py +3 -3
- anemoi/datasets/create/functions/sources/tendencies.py +7 -7
- anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
- anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
- anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
- anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
- anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
- anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
- anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
- anemoi/datasets/create/functions/sources/zenodo.py +40 -0
- anemoi/datasets/create/input.py +309 -191
- anemoi/datasets/create/loaders.py +155 -77
- anemoi/datasets/create/patch.py +17 -14
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/size.py +4 -5
- anemoi/datasets/create/statistics/__init__.py +51 -17
- anemoi/datasets/create/template.py +11 -61
- anemoi/datasets/create/trace.py +91 -0
- anemoi/datasets/create/utils.py +5 -52
- anemoi/datasets/create/zarr.py +24 -10
- anemoi/datasets/data/dataset.py +4 -4
- anemoi/datasets/data/misc.py +9 -37
- anemoi/datasets/data/stores.py +37 -14
- anemoi/datasets/dates/__init__.py +7 -1
- anemoi/datasets/dates/groups.py +3 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +24 -8
- anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
- anemoi_datasets-0.3.10.dist-info/RECORD +0 -73
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -9,10 +9,10 @@
|
|
|
9
9
|
import datetime
|
|
10
10
|
from collections import defaultdict
|
|
11
11
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
12
|
+
from earthkit.data.core.temporary import temp_file
|
|
13
|
+
from earthkit.data.readers.grib.output import new_grib_output
|
|
14
14
|
|
|
15
|
-
from anemoi.datasets.create.functions import
|
|
15
|
+
from anemoi.datasets.create.functions import assert_is_fieldlist
|
|
16
16
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
17
17
|
|
|
18
18
|
|
|
@@ -36,7 +36,7 @@ def normalise_time_delta(t):
|
|
|
36
36
|
def group_by_field(ds):
|
|
37
37
|
d = defaultdict(list)
|
|
38
38
|
for field in ds.order_by("valid_datetime"):
|
|
39
|
-
m = field.
|
|
39
|
+
m = field.metadata(namespace="mars")
|
|
40
40
|
for k in ("date", "time", "step"):
|
|
41
41
|
m.pop(k, None)
|
|
42
42
|
keys = tuple(m.items())
|
|
@@ -103,10 +103,10 @@ def tendencies(dates, time_increment, **kwargs):
|
|
|
103
103
|
|
|
104
104
|
out.close()
|
|
105
105
|
|
|
106
|
-
from
|
|
106
|
+
from earthkit.data import from_source
|
|
107
107
|
|
|
108
|
-
ds =
|
|
109
|
-
|
|
108
|
+
ds = from_source("file", path)
|
|
109
|
+
assert_is_fieldlist(ds)
|
|
110
110
|
# save a reference to the tmp file so it is deleted
|
|
111
111
|
# only when the dataset is not used anymore
|
|
112
112
|
ds._tmp = tmp
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from earthkit.data.core.fieldlist import MultiFieldList
|
|
13
|
+
|
|
14
|
+
from anemoi.datasets.data.stores import name_to_zarr_store
|
|
15
|
+
|
|
16
|
+
from .. import iterate_patterns
|
|
17
|
+
from .fieldlist import XarrayFieldList
|
|
18
|
+
|
|
19
|
+
LOG = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def check(what, ds, paths, **kwargs):
|
|
23
|
+
count = 1
|
|
24
|
+
for k, v in kwargs.items():
|
|
25
|
+
if isinstance(v, (tuple, list)):
|
|
26
|
+
count *= len(v)
|
|
27
|
+
|
|
28
|
+
if len(ds) != count:
|
|
29
|
+
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs):
|
|
33
|
+
import xarray as xr
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
We manage the S3 client ourselve, bypassing fsspec and s3fs layers, because sometimes something on the stack
|
|
37
|
+
zarr/fsspec/s3fs/boto3 (?) seem to flags files as missing when they actually are not (maybe when S3 reports some sort of
|
|
38
|
+
connection error). In that case, Zarr will silently fill the chunks that could not be downloaded with NaNs.
|
|
39
|
+
See https://github.com/pydata/xarray/issues/8842
|
|
40
|
+
|
|
41
|
+
We have seen this bug triggered when we run many clients in parallel, for example, when we create a new dataset using `xarray-zarr`.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
context.trace(emoji, dataset, options)
|
|
45
|
+
|
|
46
|
+
if isinstance(dataset, str) and ".zarr" in dataset:
|
|
47
|
+
data = xr.open_zarr(name_to_zarr_store(dataset), **options)
|
|
48
|
+
else:
|
|
49
|
+
data = xr.open_dataset(dataset, **options)
|
|
50
|
+
|
|
51
|
+
fs = XarrayFieldList.from_xarray(data, flavour)
|
|
52
|
+
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])
|
|
53
|
+
|
|
54
|
+
if len(result) == 0:
|
|
55
|
+
LOG.warning(f"No data found for {dataset} and dates {dates}")
|
|
56
|
+
LOG.warning(f"Options: {options}")
|
|
57
|
+
LOG.warning(data)
|
|
58
|
+
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def load_many(emoji, context, dates, pattern, **kwargs):
|
|
63
|
+
|
|
64
|
+
result = []
|
|
65
|
+
|
|
66
|
+
for path, dates in iterate_patterns(pattern, dates, **kwargs):
|
|
67
|
+
result.append(load_one(emoji, context, dates, path, **kwargs))
|
|
68
|
+
|
|
69
|
+
return MultiFieldList(result)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def execute(context, dates, url, *args, **kwargs):
|
|
73
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import datetime
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from earthkit.data.utils.dates import to_datetime
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_scalar(variable):
|
|
20
|
+
shape = variable.shape
|
|
21
|
+
if shape == (1,):
|
|
22
|
+
return True
|
|
23
|
+
if len(shape) == 0:
|
|
24
|
+
return True
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_single_value(variable):
|
|
29
|
+
shape = variable.shape
|
|
30
|
+
if np.issubdtype(variable.values.dtype, np.datetime64):
|
|
31
|
+
if len(shape) == 0:
|
|
32
|
+
return to_datetime(variable.values) # Convert to python datetime
|
|
33
|
+
assert False, (shape, variable.values[:2])
|
|
34
|
+
|
|
35
|
+
if np.issubdtype(variable.values.dtype, np.timedelta64):
|
|
36
|
+
if len(shape) == 0:
|
|
37
|
+
# Convert to python timedelta64
|
|
38
|
+
return datetime.timedelta(seconds=variable.values.astype("timedelta64[s]").astype(int).item())
|
|
39
|
+
assert False, (shape, variable.values)
|
|
40
|
+
|
|
41
|
+
if shape == (1,):
|
|
42
|
+
return variable.values[0]
|
|
43
|
+
|
|
44
|
+
if len(shape) == 0:
|
|
45
|
+
return variable.values.item()
|
|
46
|
+
|
|
47
|
+
assert False, (shape, variable.values)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Coordinate:
|
|
51
|
+
is_grid = False
|
|
52
|
+
is_dim = True
|
|
53
|
+
is_lat = False
|
|
54
|
+
is_lon = False
|
|
55
|
+
is_time = False
|
|
56
|
+
is_step = False
|
|
57
|
+
is_date = False
|
|
58
|
+
|
|
59
|
+
def __init__(self, variable):
|
|
60
|
+
self.variable = variable
|
|
61
|
+
self.scalar = is_scalar(variable)
|
|
62
|
+
self.kwargs = {} # Used when creating a new coordinate (reduced method)
|
|
63
|
+
|
|
64
|
+
def __len__(self):
|
|
65
|
+
return 1 if self.scalar else len(self.variable)
|
|
66
|
+
|
|
67
|
+
def __repr__(self):
|
|
68
|
+
return "%s[name=%s,values=%s]" % (
|
|
69
|
+
self.__class__.__name__,
|
|
70
|
+
self.variable.name,
|
|
71
|
+
self.variable.values if self.scalar else len(self),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def reduced(self, i):
|
|
75
|
+
"""Create a new coordinate with a single value
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
i : int
|
|
80
|
+
the index of the value to select
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Coordinate
|
|
85
|
+
the new coordinate
|
|
86
|
+
"""
|
|
87
|
+
return self.__class__(
|
|
88
|
+
self.variable.isel({self.variable.dims[0]: i}),
|
|
89
|
+
**self.kwargs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def index(self, value):
|
|
93
|
+
"""Return the index of the value in the coordinate
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
value : Any
|
|
98
|
+
The value to search for
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
int or None
|
|
103
|
+
The index of the value in the coordinate or None if not found
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
if isinstance(value, (list, tuple)):
|
|
107
|
+
if len(value) == 1:
|
|
108
|
+
return self._index_single(value)
|
|
109
|
+
else:
|
|
110
|
+
return self._index_multiple(value)
|
|
111
|
+
return self._index_single(value)
|
|
112
|
+
|
|
113
|
+
def _index_single(self, value):
|
|
114
|
+
|
|
115
|
+
values = self.variable.values
|
|
116
|
+
|
|
117
|
+
# Assume the array is sorted
|
|
118
|
+
index = np.searchsorted(values, value)
|
|
119
|
+
|
|
120
|
+
if index < len(values) and values[index] == value:
|
|
121
|
+
return index
|
|
122
|
+
|
|
123
|
+
# If not found, we need to check if the value is in the array
|
|
124
|
+
|
|
125
|
+
index = np.where(values == value)[0]
|
|
126
|
+
if len(index) > 0:
|
|
127
|
+
return index[0]
|
|
128
|
+
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def _index_multiple(self, value):
|
|
132
|
+
|
|
133
|
+
values = self.variable.values
|
|
134
|
+
|
|
135
|
+
# Assume the array is sorted
|
|
136
|
+
|
|
137
|
+
index = np.searchsorted(values, value)
|
|
138
|
+
index = index[index < len(values)]
|
|
139
|
+
|
|
140
|
+
if np.all(values[index] == value):
|
|
141
|
+
return index
|
|
142
|
+
|
|
143
|
+
# If not found, we need to check if the value is in the array
|
|
144
|
+
|
|
145
|
+
index = np.where(np.isin(values, value))[0]
|
|
146
|
+
|
|
147
|
+
# We could also return incomplete matches
|
|
148
|
+
if len(index) == len(value):
|
|
149
|
+
return index
|
|
150
|
+
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def name(self):
|
|
155
|
+
return self.variable.name
|
|
156
|
+
|
|
157
|
+
def normalise(self, value):
|
|
158
|
+
# Subclasses to format values that will be added to the field metadata
|
|
159
|
+
return value
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def single_value(self):
|
|
163
|
+
return extract_single_value(self.variable)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class TimeCoordinate(Coordinate):
|
|
167
|
+
is_time = True
|
|
168
|
+
mars_names = ("valid_datetime",)
|
|
169
|
+
|
|
170
|
+
def index(self, time):
|
|
171
|
+
return super().index(np.datetime64(time))
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class DateCoordinate(Coordinate):
|
|
175
|
+
is_date = True
|
|
176
|
+
mars_names = ("date",)
|
|
177
|
+
|
|
178
|
+
def index(self, date):
|
|
179
|
+
return super().index(np.datetime64(date))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class StepCoordinate(Coordinate):
|
|
183
|
+
is_step = True
|
|
184
|
+
mars_names = ("step",)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class LevelCoordinate(Coordinate):
|
|
188
|
+
mars_names = ("level", "levelist")
|
|
189
|
+
|
|
190
|
+
def __init__(self, variable, levtype):
|
|
191
|
+
super().__init__(variable)
|
|
192
|
+
self.levtype = levtype
|
|
193
|
+
# kwargs is used when creating a new coordinate (reduced method)
|
|
194
|
+
self.kwargs = {"levtype": levtype}
|
|
195
|
+
|
|
196
|
+
def normalise(self, value):
|
|
197
|
+
# Some netcdf have pressue levels in float
|
|
198
|
+
if int(value) == value:
|
|
199
|
+
return int(value)
|
|
200
|
+
return value
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class EnsembleCoordinate(Coordinate):
|
|
204
|
+
mars_names = ("number",)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class LongitudeCoordinate(Coordinate):
|
|
208
|
+
is_grid = True
|
|
209
|
+
is_lon = True
|
|
210
|
+
mars_names = ("longitude",)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class LatitudeCoordinate(Coordinate):
|
|
214
|
+
is_grid = True
|
|
215
|
+
is_lat = True
|
|
216
|
+
mars_names = ("latitude",)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class XCoordinate(Coordinate):
|
|
220
|
+
is_grid = True
|
|
221
|
+
mars_names = ("x",)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class YCoordinate(Coordinate):
|
|
225
|
+
is_grid = True
|
|
226
|
+
mars_names = ("y",)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ScalarCoordinate(Coordinate):
|
|
230
|
+
is_grid = False
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def mars_names(self):
|
|
234
|
+
return (self.variable.name,)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from earthkit.data.core.fieldlist import Field
|
|
13
|
+
from earthkit.data.core.fieldlist import math
|
|
14
|
+
|
|
15
|
+
from .coordinates import extract_single_value
|
|
16
|
+
from .coordinates import is_scalar
|
|
17
|
+
from .metadata import XArrayMetadata
|
|
18
|
+
|
|
19
|
+
LOG = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EmptyFieldList:
|
|
23
|
+
def __len__(self):
|
|
24
|
+
return 0
|
|
25
|
+
|
|
26
|
+
def __getitem__(self, i):
|
|
27
|
+
raise IndexError(i)
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return "EmptyFieldList()"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class XArrayField(Field):
|
|
34
|
+
|
|
35
|
+
def __init__(self, owner, selection):
|
|
36
|
+
"""Create a new XArrayField object.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
owner : Variable
|
|
41
|
+
The variable that owns this field.
|
|
42
|
+
selection : XArrayDataArray
|
|
43
|
+
A 2D sub-selection of the variable's underlying array.
|
|
44
|
+
This is actually a nD object, but the first dimensions are always 1.
|
|
45
|
+
The other two dimensions are latitude and longitude.
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(owner.array_backend)
|
|
48
|
+
|
|
49
|
+
self.owner = owner
|
|
50
|
+
self.selection = selection
|
|
51
|
+
|
|
52
|
+
# Copy the metadata from the owner
|
|
53
|
+
self._md = owner._metadata.copy()
|
|
54
|
+
|
|
55
|
+
for coord_name, coord_value in self.selection.coords.items():
|
|
56
|
+
if is_scalar(coord_value):
|
|
57
|
+
# Extract the single value from the scalar dimension
|
|
58
|
+
# and store it in the metadata
|
|
59
|
+
coordinate = owner.by_name[coord_name]
|
|
60
|
+
self._md[coord_name] = coordinate.normalise(extract_single_value(coord_value))
|
|
61
|
+
|
|
62
|
+
# print(values.ndim, values.shape, selection.dims)
|
|
63
|
+
# By now, the only dimensions should be latitude and longitude
|
|
64
|
+
self._shape = tuple(list(self.selection.shape)[-2:])
|
|
65
|
+
if math.prod(self._shape) != math.prod(self.selection.shape):
|
|
66
|
+
print(self.selection.ndim, self.selection.shape)
|
|
67
|
+
print(self.selection)
|
|
68
|
+
raise ValueError("Invalid shape for selection")
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def shape(self):
|
|
72
|
+
return self._shape
|
|
73
|
+
|
|
74
|
+
def to_numpy(self, flatten=False, dtype=None):
|
|
75
|
+
values = self.selection.values
|
|
76
|
+
|
|
77
|
+
assert dtype is None
|
|
78
|
+
if flatten:
|
|
79
|
+
return values.flatten()
|
|
80
|
+
return values.reshape(self.shape)
|
|
81
|
+
|
|
82
|
+
def _make_metadata(self):
|
|
83
|
+
return XArrayMetadata(self, self.owner.mapping)
|
|
84
|
+
|
|
85
|
+
def grid_points(self):
|
|
86
|
+
return self.owner.grid_points()
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def resolution(self):
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def grid_mapping(self):
|
|
94
|
+
return self.owner.grid_mapping
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def latitudes(self):
|
|
98
|
+
return self.owner.latitudes
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def longitudes(self):
|
|
102
|
+
return self.owner.longitudes
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def forecast_reference_time(self):
|
|
106
|
+
return self.owner.forecast_reference_time
|
|
107
|
+
|
|
108
|
+
def __repr__(self):
|
|
109
|
+
return repr(self._metadata)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
from earthkit.data.core.fieldlist import FieldList
|
|
15
|
+
|
|
16
|
+
from .coordinates import is_scalar as is_scalar
|
|
17
|
+
from .field import EmptyFieldList
|
|
18
|
+
from .flavour import CoordinateGuesser
|
|
19
|
+
from .metadata import XArrayMetadata as XArrayMetadata
|
|
20
|
+
from .time import Time
|
|
21
|
+
from .variable import FilteredVariable
|
|
22
|
+
from .variable import Variable
|
|
23
|
+
|
|
24
|
+
LOG = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class XarrayFieldList(FieldList):
|
|
28
|
+
def __init__(self, ds, variables):
|
|
29
|
+
self.ds = ds
|
|
30
|
+
self.variables = variables.copy()
|
|
31
|
+
self.total_length = sum(v.length for v in variables)
|
|
32
|
+
|
|
33
|
+
def __repr__(self):
|
|
34
|
+
return f"XarrayFieldList({self.total_length})"
|
|
35
|
+
|
|
36
|
+
def __len__(self):
|
|
37
|
+
return self.total_length
|
|
38
|
+
|
|
39
|
+
def __getitem__(self, i):
|
|
40
|
+
k = i
|
|
41
|
+
|
|
42
|
+
if i < 0:
|
|
43
|
+
i = self.total_length + i
|
|
44
|
+
|
|
45
|
+
for v in self.variables:
|
|
46
|
+
if i < v.length:
|
|
47
|
+
return v[i]
|
|
48
|
+
i -= v.length
|
|
49
|
+
|
|
50
|
+
raise IndexError(k)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def from_xarray(cls, ds, flavour=None):
|
|
54
|
+
variables = []
|
|
55
|
+
|
|
56
|
+
if isinstance(flavour, str):
|
|
57
|
+
with open(flavour) as f:
|
|
58
|
+
if flavour.endswith(".yaml") or flavour.endswith(".yml"):
|
|
59
|
+
flavour = yaml.safe_load(f)
|
|
60
|
+
else:
|
|
61
|
+
flavour = json.load(f)
|
|
62
|
+
|
|
63
|
+
guess = CoordinateGuesser.from_flavour(ds, flavour)
|
|
64
|
+
|
|
65
|
+
skip = set()
|
|
66
|
+
|
|
67
|
+
def _skip_attr(v, attr_name):
|
|
68
|
+
attr_val = getattr(v, attr_name, "")
|
|
69
|
+
if isinstance(attr_val, str):
|
|
70
|
+
skip.update(attr_val.split(" "))
|
|
71
|
+
|
|
72
|
+
for name in ds.data_vars:
|
|
73
|
+
v = ds[name]
|
|
74
|
+
_skip_attr(v, "coordinates")
|
|
75
|
+
_skip_attr(v, "bounds")
|
|
76
|
+
_skip_attr(v, "grid_mapping")
|
|
77
|
+
|
|
78
|
+
# Select only geographical variables
|
|
79
|
+
for name in ds.data_vars:
|
|
80
|
+
|
|
81
|
+
if name in skip:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
v = ds[name]
|
|
85
|
+
coordinates = []
|
|
86
|
+
|
|
87
|
+
for coord in v.coords:
|
|
88
|
+
|
|
89
|
+
c = guess.guess(ds[coord], coord)
|
|
90
|
+
assert c, f"Could not guess coordinate for {coord}"
|
|
91
|
+
if coord not in v.dims:
|
|
92
|
+
c.is_dim = False
|
|
93
|
+
coordinates.append(c)
|
|
94
|
+
|
|
95
|
+
grid_coords = sum(1 for c in coordinates if c.is_grid and c.is_dim)
|
|
96
|
+
assert grid_coords <= 2
|
|
97
|
+
|
|
98
|
+
if grid_coords < 2:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
variables.append(
|
|
102
|
+
Variable(
|
|
103
|
+
ds=ds,
|
|
104
|
+
var=v,
|
|
105
|
+
coordinates=coordinates,
|
|
106
|
+
grid=guess.grid(coordinates),
|
|
107
|
+
time=Time.from_coordinates(coordinates),
|
|
108
|
+
metadata={},
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return cls(ds, variables)
|
|
113
|
+
|
|
114
|
+
def sel(self, **kwargs):
|
|
115
|
+
"""Override the FieldList's sel method
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
FieldList
|
|
120
|
+
The new FieldList
|
|
121
|
+
|
|
122
|
+
The algorithm is as follows:
|
|
123
|
+
1 - Use the kwargs to select the variables that match the selection (`param` or `variable`)
|
|
124
|
+
2 - For each variable, use the remaining kwargs to select the coordinates (`level`, `number`, ...)
|
|
125
|
+
3 - Some mars like keys, like `date`, `time`, `step` are not found in the coordinates,
|
|
126
|
+
but added to the metadata of the selected fields. A example is `step` that is added to the
|
|
127
|
+
metadata of the field. Step 2 may return a variable that contain all the fields that
|
|
128
|
+
verify at the same `valid_datetime`, with different base `date` and `time` and a different `step`.
|
|
129
|
+
So we get an extra chance to filter the fields by the metadata.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
variables = []
|
|
133
|
+
count = 0
|
|
134
|
+
|
|
135
|
+
for v in self.variables:
|
|
136
|
+
|
|
137
|
+
v.update_metadata_mapping(kwargs)
|
|
138
|
+
|
|
139
|
+
# First, select matching variables
|
|
140
|
+
# This will consume 'param' or 'variable' from kwargs
|
|
141
|
+
# and return the rest
|
|
142
|
+
match, rest = v.match(**kwargs)
|
|
143
|
+
|
|
144
|
+
if match:
|
|
145
|
+
count += 1
|
|
146
|
+
missing = {}
|
|
147
|
+
|
|
148
|
+
# Select from the variable's coordinates (time, level, number, ....)
|
|
149
|
+
# This may return a new variable with a isel() slice of the selection
|
|
150
|
+
# or None if the selection is not possible. In this case, missing is updated
|
|
151
|
+
# with the values of kwargs (rest) that are not relevant for this variable
|
|
152
|
+
v = v.sel(missing, **rest)
|
|
153
|
+
if missing:
|
|
154
|
+
if v is not None:
|
|
155
|
+
# The remaining kwargs are passed used to create a FilteredVariable
|
|
156
|
+
# that will select 2D slices based on their metadata
|
|
157
|
+
v = FilteredVariable(v, **missing)
|
|
158
|
+
else:
|
|
159
|
+
LOG.warning(f"Variable {v} has missing coordinates: {missing}")
|
|
160
|
+
|
|
161
|
+
if v is not None:
|
|
162
|
+
variables.append(v)
|
|
163
|
+
|
|
164
|
+
if count == 0:
|
|
165
|
+
LOG.warning("No variable found for %s", kwargs)
|
|
166
|
+
LOG.warning("Variables: %s", sorted([v.name for v in self.variables]))
|
|
167
|
+
|
|
168
|
+
if not variables:
|
|
169
|
+
return EmptyFieldList()
|
|
170
|
+
|
|
171
|
+
return self.__class__(self.ds, variables)
|