anemoi-datasets 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/compare.py +59 -0
- anemoi/datasets/commands/create.py +84 -3
- anemoi/datasets/commands/inspect.py +3 -3
- anemoi/datasets/create/__init__.py +44 -17
- anemoi/datasets/create/check.py +6 -5
- anemoi/datasets/create/chunks.py +1 -1
- anemoi/datasets/create/config.py +5 -26
- anemoi/datasets/create/functions/filters/rename.py +9 -1
- anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
- anemoi/datasets/create/functions/sources/__init__.py +39 -0
- anemoi/datasets/create/functions/sources/accumulations.py +11 -41
- anemoi/datasets/create/functions/sources/constants.py +3 -0
- anemoi/datasets/create/functions/sources/grib.py +4 -0
- anemoi/datasets/create/functions/sources/hindcasts.py +32 -377
- anemoi/datasets/create/functions/sources/mars.py +53 -22
- anemoi/datasets/create/functions/sources/netcdf.py +2 -60
- anemoi/datasets/create/functions/sources/opendap.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
- anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
- anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
- anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
- anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
- anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
- anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
- anemoi/datasets/create/functions/sources/zenodo.py +40 -0
- anemoi/datasets/create/input.py +290 -172
- anemoi/datasets/create/loaders.py +120 -71
- anemoi/datasets/create/patch.py +17 -14
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/size.py +4 -5
- anemoi/datasets/create/statistics/__init__.py +49 -16
- anemoi/datasets/create/template.py +11 -61
- anemoi/datasets/create/trace.py +91 -0
- anemoi/datasets/create/utils.py +0 -48
- anemoi/datasets/create/zarr.py +24 -10
- anemoi/datasets/data/misc.py +9 -37
- anemoi/datasets/data/stores.py +29 -14
- anemoi/datasets/dates/__init__.py +7 -1
- anemoi/datasets/dates/groups.py +3 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +18 -3
- anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
- anemoi_datasets-0.4.0.dist-info/RECORD +0 -73
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
import datetime
|
|
10
|
-
from copy import deepcopy
|
|
11
10
|
|
|
12
11
|
from anemoi.utils.humanize import did_you_mean
|
|
13
12
|
from earthkit.data import from_source
|
|
@@ -43,25 +42,27 @@ def normalise_time_delta(t):
|
|
|
43
42
|
return t
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def _expand_mars_request(request, date, date_key="date"):
|
|
45
|
+
def _expand_mars_request(request, date, request_already_using_valid_datetime=False, date_key="date"):
|
|
47
46
|
requests = []
|
|
48
47
|
step = to_list(request.get("step", [0]))
|
|
49
48
|
for s in step:
|
|
50
|
-
r =
|
|
51
|
-
|
|
52
|
-
if
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
49
|
+
r = request.copy()
|
|
50
|
+
|
|
51
|
+
if not request_already_using_valid_datetime:
|
|
52
|
+
|
|
53
|
+
if isinstance(s, str) and "-" in s:
|
|
54
|
+
assert s.count("-") == 1, s
|
|
55
|
+
# this takes care of the cases where the step is a period such as 0-24 or 12-24
|
|
56
|
+
hours = int(str(s).split("-")[-1])
|
|
57
|
+
|
|
58
|
+
base = date - datetime.timedelta(hours=hours)
|
|
59
|
+
r.update(
|
|
60
|
+
{
|
|
61
|
+
date_key: base.strftime("%Y%m%d"),
|
|
62
|
+
"time": base.strftime("%H%M"),
|
|
63
|
+
"step": s,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
65
66
|
|
|
66
67
|
for pproc in ("grid", "rotation", "frame", "area", "bitmap", "resol"):
|
|
67
68
|
if pproc in r:
|
|
@@ -73,13 +74,18 @@ def _expand_mars_request(request, date, date_key="date"):
|
|
|
73
74
|
return requests
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
def factorise_requests(dates, *requests, date_key="date"):
|
|
77
|
+
def factorise_requests(dates, *requests, request_already_using_valid_datetime=False, date_key="date"):
|
|
77
78
|
updates = []
|
|
78
79
|
for req in requests:
|
|
79
80
|
# req = normalise_request(req)
|
|
80
81
|
|
|
81
82
|
for d in dates:
|
|
82
|
-
updates += _expand_mars_request(
|
|
83
|
+
updates += _expand_mars_request(
|
|
84
|
+
req,
|
|
85
|
+
date=d,
|
|
86
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
87
|
+
date_key=date_key,
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
compressed = Availability(updates)
|
|
85
91
|
for r in compressed.iterate():
|
|
@@ -171,11 +177,32 @@ MARS_KEYS = [
|
|
|
171
177
|
]
|
|
172
178
|
|
|
173
179
|
|
|
174
|
-
def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
180
|
+
def mars(context, dates, *requests, request_already_using_valid_datetime=False, date_key="date", **kwargs):
|
|
175
181
|
if not requests:
|
|
176
182
|
requests = [kwargs]
|
|
177
183
|
|
|
178
|
-
|
|
184
|
+
for r in requests:
|
|
185
|
+
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
186
|
+
for p in r.get("param", []):
|
|
187
|
+
if p is False:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
190
|
+
)
|
|
191
|
+
if p is None:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
194
|
+
)
|
|
195
|
+
if p is True:
|
|
196
|
+
raise ValueError(
|
|
197
|
+
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
requests = factorise_requests(
|
|
201
|
+
dates,
|
|
202
|
+
*requests,
|
|
203
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
204
|
+
date_key=date_key,
|
|
205
|
+
)
|
|
179
206
|
ds = from_source("empty")
|
|
180
207
|
for r in requests:
|
|
181
208
|
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
@@ -191,7 +218,11 @@ def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
|
191
218
|
raise ValueError(
|
|
192
219
|
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
193
220
|
)
|
|
194
|
-
|
|
221
|
+
try:
|
|
222
|
+
ds = ds + from_source("mars", **r)
|
|
223
|
+
except Exception as e:
|
|
224
|
+
if "File is empty:" not in str(e):
|
|
225
|
+
raise
|
|
195
226
|
return ds
|
|
196
227
|
|
|
197
228
|
|
|
@@ -7,66 +7,8 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
import
|
|
11
|
-
|
|
12
|
-
from earthkit.data import from_source
|
|
13
|
-
from earthkit.data.utils.patterns import Pattern
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _expand(paths):
|
|
17
|
-
for path in paths:
|
|
18
|
-
if path.startswith("file://"):
|
|
19
|
-
path = path[7:]
|
|
20
|
-
|
|
21
|
-
if path.startswith("http://"):
|
|
22
|
-
yield path
|
|
23
|
-
continue
|
|
24
|
-
|
|
25
|
-
if path.startswith("https://"):
|
|
26
|
-
yield path
|
|
27
|
-
continue
|
|
28
|
-
|
|
29
|
-
for p in glob.glob(path):
|
|
30
|
-
yield p
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def check(what, ds, paths, **kwargs):
|
|
34
|
-
count = 1
|
|
35
|
-
for k, v in kwargs.items():
|
|
36
|
-
if isinstance(v, (tuple, list)):
|
|
37
|
-
count *= len(v)
|
|
38
|
-
|
|
39
|
-
if len(ds) != count:
|
|
40
|
-
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def load_netcdfs(emoji, what, context, dates, path, *args, **kwargs):
|
|
44
|
-
given_paths = path if isinstance(path, list) else [path]
|
|
45
|
-
|
|
46
|
-
dates = [d.isoformat() for d in dates]
|
|
47
|
-
ds = from_source("empty")
|
|
48
|
-
|
|
49
|
-
for path in given_paths:
|
|
50
|
-
paths = Pattern(path, ignore_missing_keys=True).substitute(*args, date=dates, **kwargs)
|
|
51
|
-
|
|
52
|
-
levels = kwargs.get("level", kwargs.get("levelist"))
|
|
53
|
-
|
|
54
|
-
for path in _expand(paths):
|
|
55
|
-
context.trace(emoji, what.upper(), path)
|
|
56
|
-
s = from_source("opendap", path)
|
|
57
|
-
s = s.sel(
|
|
58
|
-
valid_datetime=dates,
|
|
59
|
-
param=kwargs["param"],
|
|
60
|
-
step=kwargs.get("step", 0),
|
|
61
|
-
)
|
|
62
|
-
if levels:
|
|
63
|
-
s = s.sel(levelist=levels)
|
|
64
|
-
ds = ds + s
|
|
65
|
-
|
|
66
|
-
check(what, ds, given_paths, valid_datetime=dates, **kwargs)
|
|
67
|
-
|
|
68
|
-
return ds
|
|
10
|
+
from .xarray import load_many
|
|
69
11
|
|
|
70
12
|
|
|
71
13
|
def execute(context, dates, path, *args, **kwargs):
|
|
72
|
-
return
|
|
14
|
+
return load_many("📁", context, dates, path, *args, **kwargs)
|
|
@@ -7,8 +7,9 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
from .xarray import load_many
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def execute(context, dates, url, *args, **kwargs):
|
|
14
|
-
return
|
|
15
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from earthkit.data.core.fieldlist import MultiFieldList
|
|
13
|
+
|
|
14
|
+
from anemoi.datasets.data.stores import name_to_zarr_store
|
|
15
|
+
|
|
16
|
+
from .. import iterate_patterns
|
|
17
|
+
from .fieldlist import XarrayFieldList
|
|
18
|
+
|
|
19
|
+
LOG = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def check(what, ds, paths, **kwargs):
|
|
23
|
+
count = 1
|
|
24
|
+
for k, v in kwargs.items():
|
|
25
|
+
if isinstance(v, (tuple, list)):
|
|
26
|
+
count *= len(v)
|
|
27
|
+
|
|
28
|
+
if len(ds) != count:
|
|
29
|
+
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs):
|
|
33
|
+
import xarray as xr
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
We manage the S3 client ourselve, bypassing fsspec and s3fs layers, because sometimes something on the stack
|
|
37
|
+
zarr/fsspec/s3fs/boto3 (?) seem to flags files as missing when they actually are not (maybe when S3 reports some sort of
|
|
38
|
+
connection error). In that case, Zarr will silently fill the chunks that could not be downloaded with NaNs.
|
|
39
|
+
See https://github.com/pydata/xarray/issues/8842
|
|
40
|
+
|
|
41
|
+
We have seen this bug triggered when we run many clients in parallel, for example, when we create a new dataset using `xarray-zarr`.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
context.trace(emoji, dataset, options)
|
|
45
|
+
|
|
46
|
+
if isinstance(dataset, str) and ".zarr" in dataset:
|
|
47
|
+
data = xr.open_zarr(name_to_zarr_store(dataset), **options)
|
|
48
|
+
else:
|
|
49
|
+
data = xr.open_dataset(dataset, **options)
|
|
50
|
+
|
|
51
|
+
fs = XarrayFieldList.from_xarray(data, flavour)
|
|
52
|
+
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])
|
|
53
|
+
|
|
54
|
+
if len(result) == 0:
|
|
55
|
+
LOG.warning(f"No data found for {dataset} and dates {dates}")
|
|
56
|
+
LOG.warning(f"Options: {options}")
|
|
57
|
+
LOG.warning(data)
|
|
58
|
+
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def load_many(emoji, context, dates, pattern, **kwargs):
|
|
63
|
+
|
|
64
|
+
result = []
|
|
65
|
+
|
|
66
|
+
for path, dates in iterate_patterns(pattern, dates, **kwargs):
|
|
67
|
+
result.append(load_one(emoji, context, dates, path, **kwargs))
|
|
68
|
+
|
|
69
|
+
return MultiFieldList(result)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def execute(context, dates, url, *args, **kwargs):
|
|
73
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import datetime
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from earthkit.data.utils.dates import to_datetime
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_scalar(variable):
|
|
20
|
+
shape = variable.shape
|
|
21
|
+
if shape == (1,):
|
|
22
|
+
return True
|
|
23
|
+
if len(shape) == 0:
|
|
24
|
+
return True
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_single_value(variable):
|
|
29
|
+
shape = variable.shape
|
|
30
|
+
if np.issubdtype(variable.values.dtype, np.datetime64):
|
|
31
|
+
if len(shape) == 0:
|
|
32
|
+
return to_datetime(variable.values) # Convert to python datetime
|
|
33
|
+
assert False, (shape, variable.values[:2])
|
|
34
|
+
|
|
35
|
+
if np.issubdtype(variable.values.dtype, np.timedelta64):
|
|
36
|
+
if len(shape) == 0:
|
|
37
|
+
# Convert to python timedelta64
|
|
38
|
+
return datetime.timedelta(seconds=variable.values.astype("timedelta64[s]").astype(int).item())
|
|
39
|
+
assert False, (shape, variable.values)
|
|
40
|
+
|
|
41
|
+
if shape == (1,):
|
|
42
|
+
return variable.values[0]
|
|
43
|
+
|
|
44
|
+
if len(shape) == 0:
|
|
45
|
+
return variable.values.item()
|
|
46
|
+
|
|
47
|
+
assert False, (shape, variable.values)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Coordinate:
|
|
51
|
+
is_grid = False
|
|
52
|
+
is_dim = True
|
|
53
|
+
is_lat = False
|
|
54
|
+
is_lon = False
|
|
55
|
+
is_time = False
|
|
56
|
+
is_step = False
|
|
57
|
+
is_date = False
|
|
58
|
+
|
|
59
|
+
def __init__(self, variable):
|
|
60
|
+
self.variable = variable
|
|
61
|
+
self.scalar = is_scalar(variable)
|
|
62
|
+
self.kwargs = {} # Used when creating a new coordinate (reduced method)
|
|
63
|
+
|
|
64
|
+
def __len__(self):
|
|
65
|
+
return 1 if self.scalar else len(self.variable)
|
|
66
|
+
|
|
67
|
+
def __repr__(self):
|
|
68
|
+
return "%s[name=%s,values=%s]" % (
|
|
69
|
+
self.__class__.__name__,
|
|
70
|
+
self.variable.name,
|
|
71
|
+
self.variable.values if self.scalar else len(self),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def reduced(self, i):
|
|
75
|
+
"""Create a new coordinate with a single value
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
i : int
|
|
80
|
+
the index of the value to select
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Coordinate
|
|
85
|
+
the new coordinate
|
|
86
|
+
"""
|
|
87
|
+
return self.__class__(
|
|
88
|
+
self.variable.isel({self.variable.dims[0]: i}),
|
|
89
|
+
**self.kwargs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def index(self, value):
|
|
93
|
+
"""Return the index of the value in the coordinate
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
value : Any
|
|
98
|
+
The value to search for
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
int or None
|
|
103
|
+
The index of the value in the coordinate or None if not found
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
if isinstance(value, (list, tuple)):
|
|
107
|
+
if len(value) == 1:
|
|
108
|
+
return self._index_single(value)
|
|
109
|
+
else:
|
|
110
|
+
return self._index_multiple(value)
|
|
111
|
+
return self._index_single(value)
|
|
112
|
+
|
|
113
|
+
def _index_single(self, value):
|
|
114
|
+
|
|
115
|
+
values = self.variable.values
|
|
116
|
+
|
|
117
|
+
# Assume the array is sorted
|
|
118
|
+
index = np.searchsorted(values, value)
|
|
119
|
+
|
|
120
|
+
if index < len(values) and values[index] == value:
|
|
121
|
+
return index
|
|
122
|
+
|
|
123
|
+
# If not found, we need to check if the value is in the array
|
|
124
|
+
|
|
125
|
+
index = np.where(values == value)[0]
|
|
126
|
+
if len(index) > 0:
|
|
127
|
+
return index[0]
|
|
128
|
+
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def _index_multiple(self, value):
|
|
132
|
+
|
|
133
|
+
values = self.variable.values
|
|
134
|
+
|
|
135
|
+
# Assume the array is sorted
|
|
136
|
+
|
|
137
|
+
index = np.searchsorted(values, value)
|
|
138
|
+
index = index[index < len(values)]
|
|
139
|
+
|
|
140
|
+
if np.all(values[index] == value):
|
|
141
|
+
return index
|
|
142
|
+
|
|
143
|
+
# If not found, we need to check if the value is in the array
|
|
144
|
+
|
|
145
|
+
index = np.where(np.isin(values, value))[0]
|
|
146
|
+
|
|
147
|
+
# We could also return incomplete matches
|
|
148
|
+
if len(index) == len(value):
|
|
149
|
+
return index
|
|
150
|
+
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def name(self):
|
|
155
|
+
return self.variable.name
|
|
156
|
+
|
|
157
|
+
def normalise(self, value):
|
|
158
|
+
# Subclasses to format values that will be added to the field metadata
|
|
159
|
+
return value
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def single_value(self):
|
|
163
|
+
return extract_single_value(self.variable)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class TimeCoordinate(Coordinate):
|
|
167
|
+
is_time = True
|
|
168
|
+
mars_names = ("valid_datetime",)
|
|
169
|
+
|
|
170
|
+
def index(self, time):
|
|
171
|
+
return super().index(np.datetime64(time))
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class DateCoordinate(Coordinate):
|
|
175
|
+
is_date = True
|
|
176
|
+
mars_names = ("date",)
|
|
177
|
+
|
|
178
|
+
def index(self, date):
|
|
179
|
+
return super().index(np.datetime64(date))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class StepCoordinate(Coordinate):
|
|
183
|
+
is_step = True
|
|
184
|
+
mars_names = ("step",)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class LevelCoordinate(Coordinate):
|
|
188
|
+
mars_names = ("level", "levelist")
|
|
189
|
+
|
|
190
|
+
def __init__(self, variable, levtype):
|
|
191
|
+
super().__init__(variable)
|
|
192
|
+
self.levtype = levtype
|
|
193
|
+
# kwargs is used when creating a new coordinate (reduced method)
|
|
194
|
+
self.kwargs = {"levtype": levtype}
|
|
195
|
+
|
|
196
|
+
def normalise(self, value):
|
|
197
|
+
# Some netcdf have pressue levels in float
|
|
198
|
+
if int(value) == value:
|
|
199
|
+
return int(value)
|
|
200
|
+
return value
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class EnsembleCoordinate(Coordinate):
|
|
204
|
+
mars_names = ("number",)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class LongitudeCoordinate(Coordinate):
|
|
208
|
+
is_grid = True
|
|
209
|
+
is_lon = True
|
|
210
|
+
mars_names = ("longitude",)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class LatitudeCoordinate(Coordinate):
|
|
214
|
+
is_grid = True
|
|
215
|
+
is_lat = True
|
|
216
|
+
mars_names = ("latitude",)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class XCoordinate(Coordinate):
|
|
220
|
+
is_grid = True
|
|
221
|
+
mars_names = ("x",)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class YCoordinate(Coordinate):
|
|
225
|
+
is_grid = True
|
|
226
|
+
mars_names = ("y",)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ScalarCoordinate(Coordinate):
|
|
230
|
+
is_grid = False
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def mars_names(self):
|
|
234
|
+
return (self.variable.name,)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from earthkit.data.core.fieldlist import Field
|
|
13
|
+
from earthkit.data.core.fieldlist import math
|
|
14
|
+
|
|
15
|
+
from .coordinates import extract_single_value
|
|
16
|
+
from .coordinates import is_scalar
|
|
17
|
+
from .metadata import XArrayMetadata
|
|
18
|
+
|
|
19
|
+
LOG = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EmptyFieldList:
|
|
23
|
+
def __len__(self):
|
|
24
|
+
return 0
|
|
25
|
+
|
|
26
|
+
def __getitem__(self, i):
|
|
27
|
+
raise IndexError(i)
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return "EmptyFieldList()"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class XArrayField(Field):
|
|
34
|
+
|
|
35
|
+
def __init__(self, owner, selection):
|
|
36
|
+
"""Create a new XArrayField object.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
owner : Variable
|
|
41
|
+
The variable that owns this field.
|
|
42
|
+
selection : XArrayDataArray
|
|
43
|
+
A 2D sub-selection of the variable's underlying array.
|
|
44
|
+
This is actually a nD object, but the first dimensions are always 1.
|
|
45
|
+
The other two dimensions are latitude and longitude.
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(owner.array_backend)
|
|
48
|
+
|
|
49
|
+
self.owner = owner
|
|
50
|
+
self.selection = selection
|
|
51
|
+
|
|
52
|
+
# Copy the metadata from the owner
|
|
53
|
+
self._md = owner._metadata.copy()
|
|
54
|
+
|
|
55
|
+
for coord_name, coord_value in self.selection.coords.items():
|
|
56
|
+
if is_scalar(coord_value):
|
|
57
|
+
# Extract the single value from the scalar dimension
|
|
58
|
+
# and store it in the metadata
|
|
59
|
+
coordinate = owner.by_name[coord_name]
|
|
60
|
+
self._md[coord_name] = coordinate.normalise(extract_single_value(coord_value))
|
|
61
|
+
|
|
62
|
+
# print(values.ndim, values.shape, selection.dims)
|
|
63
|
+
# By now, the only dimensions should be latitude and longitude
|
|
64
|
+
self._shape = tuple(list(self.selection.shape)[-2:])
|
|
65
|
+
if math.prod(self._shape) != math.prod(self.selection.shape):
|
|
66
|
+
print(self.selection.ndim, self.selection.shape)
|
|
67
|
+
print(self.selection)
|
|
68
|
+
raise ValueError("Invalid shape for selection")
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def shape(self):
|
|
72
|
+
return self._shape
|
|
73
|
+
|
|
74
|
+
def to_numpy(self, flatten=False, dtype=None):
|
|
75
|
+
values = self.selection.values
|
|
76
|
+
|
|
77
|
+
assert dtype is None
|
|
78
|
+
if flatten:
|
|
79
|
+
return values.flatten()
|
|
80
|
+
return values.reshape(self.shape)
|
|
81
|
+
|
|
82
|
+
def _make_metadata(self):
|
|
83
|
+
return XArrayMetadata(self, self.owner.mapping)
|
|
84
|
+
|
|
85
|
+
def grid_points(self):
|
|
86
|
+
return self.owner.grid_points()
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def resolution(self):
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def grid_mapping(self):
|
|
94
|
+
return self.owner.grid_mapping
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def latitudes(self):
|
|
98
|
+
return self.owner.latitudes
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def longitudes(self):
|
|
102
|
+
return self.owner.longitudes
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def forecast_reference_time(self):
|
|
106
|
+
return self.owner.forecast_reference_time
|
|
107
|
+
|
|
108
|
+
def __repr__(self):
|
|
109
|
+
return repr(self._metadata)
|