anemoi-datasets 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/compare.py +59 -0
- anemoi/datasets/commands/create.py +84 -3
- anemoi/datasets/commands/inspect.py +3 -3
- anemoi/datasets/create/__init__.py +44 -17
- anemoi/datasets/create/check.py +6 -5
- anemoi/datasets/create/chunks.py +1 -1
- anemoi/datasets/create/config.py +5 -26
- anemoi/datasets/create/functions/filters/rename.py +9 -1
- anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
- anemoi/datasets/create/functions/sources/__init__.py +39 -0
- anemoi/datasets/create/functions/sources/accumulations.py +11 -41
- anemoi/datasets/create/functions/sources/constants.py +3 -0
- anemoi/datasets/create/functions/sources/grib.py +4 -0
- anemoi/datasets/create/functions/sources/hindcasts.py +32 -377
- anemoi/datasets/create/functions/sources/mars.py +53 -22
- anemoi/datasets/create/functions/sources/netcdf.py +2 -60
- anemoi/datasets/create/functions/sources/opendap.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
- anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
- anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
- anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
- anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
- anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
- anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
- anemoi/datasets/create/functions/sources/zenodo.py +40 -0
- anemoi/datasets/create/input.py +290 -172
- anemoi/datasets/create/loaders.py +120 -71
- anemoi/datasets/create/patch.py +17 -14
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/size.py +4 -5
- anemoi/datasets/create/statistics/__init__.py +49 -16
- anemoi/datasets/create/template.py +11 -61
- anemoi/datasets/create/trace.py +91 -0
- anemoi/datasets/create/utils.py +0 -48
- anemoi/datasets/create/zarr.py +24 -10
- anemoi/datasets/data/misc.py +9 -37
- anemoi/datasets/data/stores.py +29 -14
- anemoi/datasets/dates/__init__.py +7 -1
- anemoi/datasets/dates/groups.py +3 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +18 -3
- anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
- anemoi_datasets-0.4.0.dist-info/RECORD +0 -73
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import datetime
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Time:
|
|
15
|
+
@classmethod
|
|
16
|
+
def from_coordinates(cls, coordinates):
|
|
17
|
+
time_coordinate = [c for c in coordinates if c.is_time]
|
|
18
|
+
step_coordinate = [c for c in coordinates if c.is_step]
|
|
19
|
+
date_coordinate = [c for c in coordinates if c.is_date]
|
|
20
|
+
|
|
21
|
+
if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 1:
|
|
22
|
+
return ForecasstFromValidTimeAndStep(step_coordinate[0])
|
|
23
|
+
|
|
24
|
+
if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
|
|
25
|
+
return Analysis()
|
|
26
|
+
|
|
27
|
+
if len(date_coordinate) == 0 and len(time_coordinate) == 0 and len(step_coordinate) == 0:
|
|
28
|
+
return Constant()
|
|
29
|
+
|
|
30
|
+
if len(date_coordinate) == 1 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
|
|
31
|
+
return ForecastFromValidTimeAndBaseTime(date_coordinate[0])
|
|
32
|
+
|
|
33
|
+
if len(date_coordinate) == 1 and len(time_coordinate) == 0 and len(step_coordinate) == 1:
|
|
34
|
+
return ForecastFromBaseTimeAndDate(date_coordinate[0], step_coordinate[0])
|
|
35
|
+
|
|
36
|
+
raise NotImplementedError(f"{date_coordinate=} {time_coordinate=} {step_coordinate=}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Constant(Time):
|
|
40
|
+
|
|
41
|
+
def fill_time_metadata(self, time, metadata):
|
|
42
|
+
metadata["date"] = time.strftime("%Y%m%d")
|
|
43
|
+
metadata["time"] = time.strftime("%H%M")
|
|
44
|
+
metadata["step"] = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Analysis(Time):
|
|
48
|
+
|
|
49
|
+
def fill_time_metadata(self, time, metadata):
|
|
50
|
+
metadata["date"] = time.strftime("%Y%m%d")
|
|
51
|
+
metadata["time"] = time.strftime("%H%M")
|
|
52
|
+
metadata["step"] = 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ForecasstFromValidTimeAndStep(Time):
|
|
56
|
+
def __init__(self, step_coordinate):
|
|
57
|
+
self.step_name = step_coordinate.variable.name
|
|
58
|
+
|
|
59
|
+
def fill_time_metadata(self, time, metadata):
|
|
60
|
+
step = metadata.pop(self.step_name)
|
|
61
|
+
assert isinstance(step, datetime.timedelta)
|
|
62
|
+
base = time - step
|
|
63
|
+
|
|
64
|
+
hours = step.total_seconds() / 3600
|
|
65
|
+
assert int(hours) == hours
|
|
66
|
+
|
|
67
|
+
metadata["date"] = base.strftime("%Y%m%d")
|
|
68
|
+
metadata["time"] = base.strftime("%H%M")
|
|
69
|
+
metadata["step"] = int(hours)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class ForecastFromValidTimeAndBaseTime(Time):
|
|
73
|
+
def __init__(self, date_coordinate):
|
|
74
|
+
self.date_coordinate = date_coordinate
|
|
75
|
+
|
|
76
|
+
def fill_time_metadata(self, time, metadata):
|
|
77
|
+
|
|
78
|
+
step = time - self.date_coordinate
|
|
79
|
+
|
|
80
|
+
hours = step.total_seconds() / 3600
|
|
81
|
+
assert int(hours) == hours
|
|
82
|
+
|
|
83
|
+
metadata["date"] = self.date_coordinate.single_value.strftime("%Y%m%d")
|
|
84
|
+
metadata["time"] = self.date_coordinate.single_value.strftime("%H%M")
|
|
85
|
+
metadata["step"] = int(hours)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class ForecastFromBaseTimeAndDate(Time):
|
|
89
|
+
def __init__(self, date_coordinate, step_coordinate):
|
|
90
|
+
self.date_coordinate = date_coordinate
|
|
91
|
+
self.step_coordinate = step_coordinate
|
|
92
|
+
|
|
93
|
+
def fill_time_metadata(self, time, metadata):
|
|
94
|
+
metadata["date"] = time.strftime("%Y%m%d")
|
|
95
|
+
metadata["time"] = time.strftime("%H%M")
|
|
96
|
+
hours = metadata[self.step_coordinate.name].total_seconds() / 3600
|
|
97
|
+
assert int(hours) == hours
|
|
98
|
+
metadata["step"] = int(hours)
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import math
|
|
12
|
+
from functools import cached_property
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from earthkit.data.utils.array import ensure_backend
|
|
16
|
+
|
|
17
|
+
from anemoi.datasets.create.functions.sources.xarray.metadata import MDMapping
|
|
18
|
+
|
|
19
|
+
from .field import XArrayField
|
|
20
|
+
|
|
21
|
+
LOG = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Variable:
|
|
25
|
+
def __init__(self, *, ds, var, coordinates, grid, time, metadata, mapping=None, array_backend=None):
|
|
26
|
+
self.ds = ds
|
|
27
|
+
self.var = var
|
|
28
|
+
|
|
29
|
+
self.grid = grid
|
|
30
|
+
self.coordinates = coordinates
|
|
31
|
+
|
|
32
|
+
# print("Variable", var.name)
|
|
33
|
+
# for c in coordinates:
|
|
34
|
+
# print(" ", c)
|
|
35
|
+
|
|
36
|
+
self._metadata = metadata.copy()
|
|
37
|
+
# self._metadata.update(var.attrs)
|
|
38
|
+
self._metadata.update({"variable": var.name})
|
|
39
|
+
|
|
40
|
+
# self._metadata.setdefault("level", None)
|
|
41
|
+
# self._metadata.setdefault("number", 0)
|
|
42
|
+
# self._metadata.setdefault("levtype", "sfc")
|
|
43
|
+
self._mapping = mapping
|
|
44
|
+
|
|
45
|
+
self.time = time
|
|
46
|
+
|
|
47
|
+
self.shape = tuple(len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid)
|
|
48
|
+
self.names = {c.variable.name: c for c in coordinates if c.is_dim and not c.scalar and not c.is_grid}
|
|
49
|
+
self.by_name = {c.variable.name: c for c in coordinates}
|
|
50
|
+
|
|
51
|
+
self.length = math.prod(self.shape)
|
|
52
|
+
self.array_backend = ensure_backend(array_backend)
|
|
53
|
+
|
|
54
|
+
def update_metadata_mapping(self, kwargs):
|
|
55
|
+
|
|
56
|
+
result = {}
|
|
57
|
+
|
|
58
|
+
for k, v in kwargs.items():
|
|
59
|
+
if k == "param":
|
|
60
|
+
result[k] = "variable"
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
for c in self.coordinates:
|
|
64
|
+
if k in c.mars_names:
|
|
65
|
+
for v in c.mars_names:
|
|
66
|
+
result[v] = c.variable.name
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
self._mapping = MDMapping(result)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def name(self):
|
|
73
|
+
return self.var.name
|
|
74
|
+
|
|
75
|
+
def __len__(self):
|
|
76
|
+
return self.length
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def grid_mapping(self):
|
|
80
|
+
grid_mapping = self.var.attrs.get("grid_mapping", None)
|
|
81
|
+
if grid_mapping is None:
|
|
82
|
+
return None
|
|
83
|
+
return self.ds[grid_mapping].attrs
|
|
84
|
+
|
|
85
|
+
def grid_points(self):
|
|
86
|
+
return self.grid.grid_points()
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def latitudes(self):
|
|
90
|
+
return self.grid.latitudes
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def longitudes(self):
|
|
94
|
+
return self.grid.longitudes
|
|
95
|
+
|
|
96
|
+
def __repr__(self):
|
|
97
|
+
return "Variable[name=%s,coordinates=%s,metadata=%s]" % (
|
|
98
|
+
self.var.name,
|
|
99
|
+
self.coordinates,
|
|
100
|
+
self._metadata,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def __getitem__(self, i):
|
|
104
|
+
"""
|
|
105
|
+
Get a 2D field from the variable
|
|
106
|
+
"""
|
|
107
|
+
if i >= self.length:
|
|
108
|
+
raise IndexError(i)
|
|
109
|
+
|
|
110
|
+
coords = np.unravel_index(i, self.shape)
|
|
111
|
+
kwargs = {k: v for k, v in zip(self.names, coords)}
|
|
112
|
+
return XArrayField(self, self.var.isel(kwargs))
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def mapping(self):
|
|
116
|
+
return self._mapping
|
|
117
|
+
|
|
118
|
+
def sel(self, missing, **kwargs):
|
|
119
|
+
|
|
120
|
+
if not kwargs:
|
|
121
|
+
return self
|
|
122
|
+
|
|
123
|
+
kwargs = self._mapping.from_user(kwargs)
|
|
124
|
+
|
|
125
|
+
k, v = kwargs.popitem()
|
|
126
|
+
|
|
127
|
+
c = self.by_name.get(k)
|
|
128
|
+
|
|
129
|
+
if c is None:
|
|
130
|
+
missing[k] = v
|
|
131
|
+
return self.sel(missing, **kwargs)
|
|
132
|
+
|
|
133
|
+
i = c.index(v)
|
|
134
|
+
if i is None:
|
|
135
|
+
LOG.warning(f"Could not find {k}={v} in {c}")
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
coordinates = [x.reduced(i) if c is x else x for x in self.coordinates]
|
|
139
|
+
|
|
140
|
+
metadata = self._metadata.copy()
|
|
141
|
+
metadata.update({k: v})
|
|
142
|
+
|
|
143
|
+
variable = Variable(
|
|
144
|
+
ds=self.ds,
|
|
145
|
+
var=self.var.isel({k: i}),
|
|
146
|
+
coordinates=coordinates,
|
|
147
|
+
grid=self.grid,
|
|
148
|
+
time=self.time,
|
|
149
|
+
metadata=metadata,
|
|
150
|
+
mapping=self.mapping,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return variable.sel(missing, **kwargs)
|
|
154
|
+
|
|
155
|
+
def match(self, **kwargs):
|
|
156
|
+
kwargs = self._mapping.from_user(kwargs)
|
|
157
|
+
|
|
158
|
+
if "variable" in kwargs:
|
|
159
|
+
name = kwargs.pop("variable")
|
|
160
|
+
if not isinstance(name, (list, tuple)):
|
|
161
|
+
name = [name]
|
|
162
|
+
if self.var.name not in name:
|
|
163
|
+
return False, None
|
|
164
|
+
return True, kwargs
|
|
165
|
+
return True, kwargs
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class FilteredVariable:
|
|
169
|
+
def __init__(self, variable, **kwargs):
|
|
170
|
+
self.variable = variable
|
|
171
|
+
self.kwargs = kwargs
|
|
172
|
+
|
|
173
|
+
@cached_property
|
|
174
|
+
def fields(self):
|
|
175
|
+
"""Filter the fields of a variable based on metadata.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
list
|
|
180
|
+
A list of fields that match the metadata.
|
|
181
|
+
"""
|
|
182
|
+
return [
|
|
183
|
+
field
|
|
184
|
+
for field in self.variable
|
|
185
|
+
if all(field.metadata(k, default=None) == v for k, v in self.kwargs.items())
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def length(self):
|
|
190
|
+
return len(self.fields)
|
|
191
|
+
|
|
192
|
+
def __len__(self):
|
|
193
|
+
return self.length
|
|
194
|
+
|
|
195
|
+
def __getitem__(self, i):
|
|
196
|
+
if i >= self.length:
|
|
197
|
+
raise IndexError(i)
|
|
198
|
+
return self.fields[i]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from earthkit.data.core.fieldlist import MultiFieldList
|
|
12
|
+
|
|
13
|
+
from . import iterate_patterns
|
|
14
|
+
from .xarray import load_one
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_many(emoji, context, dates, pattern, options, **kwargs):
|
|
18
|
+
|
|
19
|
+
result = []
|
|
20
|
+
options = options.copy() if options is not None else {}
|
|
21
|
+
|
|
22
|
+
options.setdefault("engine", "zarr")
|
|
23
|
+
options.setdefault("backend_kwargs", {})
|
|
24
|
+
|
|
25
|
+
backend_kwargs = options["backend_kwargs"]
|
|
26
|
+
backend_kwargs.setdefault("consolidated", False)
|
|
27
|
+
backend_kwargs.setdefault("storage_options", {})
|
|
28
|
+
|
|
29
|
+
storage_options = backend_kwargs["storage_options"]
|
|
30
|
+
storage_options.setdefault("remote_protocol", "s3")
|
|
31
|
+
storage_options.setdefault("remote_options", {"anon": True})
|
|
32
|
+
|
|
33
|
+
for path, dates in iterate_patterns(pattern, dates, **kwargs):
|
|
34
|
+
storage_options["fo"] = path
|
|
35
|
+
|
|
36
|
+
result.append(load_one(emoji, context, dates, "reference://", options=options, **kwargs))
|
|
37
|
+
|
|
38
|
+
return MultiFieldList(result)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def execute(context, dates, json, options=None, **kwargs):
|
|
42
|
+
return load_many("🧱", context, dates, json, options, **kwargs)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from .xarray import load_many
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def execute(context, dates, url, *args, **kwargs):
|
|
15
|
+
return load_many("🇿", context, dates, url, *args, **kwargs)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from earthkit.data.core.fieldlist import MultiFieldList
|
|
12
|
+
from earthkit.data.sources.url import download_and_cache
|
|
13
|
+
|
|
14
|
+
from . import iterate_patterns
|
|
15
|
+
from .xarray import load_one
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def execute(context, dates, record_id, file_key, *args, **kwargs):
|
|
19
|
+
import requests
|
|
20
|
+
|
|
21
|
+
result = []
|
|
22
|
+
|
|
23
|
+
URLPATTERN = "https://zenodo.org/api/records/{record_id}"
|
|
24
|
+
url = URLPATTERN.format(record_id=record_id)
|
|
25
|
+
r = requests.get(url)
|
|
26
|
+
r.raise_for_status()
|
|
27
|
+
record = r.json()
|
|
28
|
+
|
|
29
|
+
urls = {}
|
|
30
|
+
for file in record["files"]:
|
|
31
|
+
urls[file["key"]] = file["links"]["self"]
|
|
32
|
+
|
|
33
|
+
for url, dates in iterate_patterns(file_key, dates, **kwargs):
|
|
34
|
+
if url not in urls:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
path = download_and_cache(urls[url])
|
|
38
|
+
result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
|
|
39
|
+
|
|
40
|
+
return MultiFieldList(result)
|