anemoi-datasets 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/cleanup.py +44 -0
- anemoi/datasets/commands/create.py +50 -20
- anemoi/datasets/commands/finalise-additions.py +45 -0
- anemoi/datasets/commands/finalise.py +39 -0
- anemoi/datasets/commands/init-additions.py +45 -0
- anemoi/datasets/commands/init.py +67 -0
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/load-additions.py +47 -0
- anemoi/datasets/commands/load.py +47 -0
- anemoi/datasets/commands/patch.py +39 -0
- anemoi/datasets/create/__init__.py +961 -146
- anemoi/datasets/create/check.py +5 -3
- anemoi/datasets/create/config.py +53 -2
- anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
- anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
- anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
- anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
- anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
- anemoi/datasets/create/input.py +23 -22
- anemoi/datasets/create/statistics/__init__.py +39 -23
- anemoi/datasets/create/utils.py +3 -2
- anemoi/datasets/data/__init__.py +1 -0
- anemoi/datasets/data/concat.py +46 -2
- anemoi/datasets/data/dataset.py +109 -34
- anemoi/datasets/data/forwards.py +17 -8
- anemoi/datasets/data/grids.py +17 -3
- anemoi/datasets/data/interpolate.py +133 -0
- anemoi/datasets/data/misc.py +56 -66
- anemoi/datasets/data/missing.py +240 -0
- anemoi/datasets/data/select.py +7 -1
- anemoi/datasets/data/stores.py +3 -3
- anemoi/datasets/data/subset.py +47 -5
- anemoi/datasets/data/unchecked.py +20 -22
- anemoi/datasets/data/xy.py +125 -0
- anemoi/datasets/dates/__init__.py +13 -66
- anemoi/datasets/dates/groups.py +2 -2
- anemoi/datasets/grids.py +66 -48
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +47 -37
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/loaders.py +0 -936
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0
anemoi/datasets/create/check.py
CHANGED
|
@@ -12,6 +12,7 @@ import re
|
|
|
12
12
|
import warnings
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
|
+
from anemoi.utils.dates import frequency_to_string
|
|
15
16
|
|
|
16
17
|
LOG = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -56,10 +57,11 @@ class DatasetName:
|
|
|
56
57
|
raise ValueError(self.error_message)
|
|
57
58
|
|
|
58
59
|
def _parse(self, name):
|
|
59
|
-
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)
|
|
60
|
+
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)?$"
|
|
60
61
|
match = re.match(pattern, name)
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
if not match:
|
|
64
|
+
raise ValueError(f"the dataset name '{name}' does not follow naming convention. Does not match {pattern}")
|
|
63
65
|
|
|
64
66
|
parsed = {}
|
|
65
67
|
if match:
|
|
@@ -105,7 +107,7 @@ class DatasetName:
|
|
|
105
107
|
def check_frequency(self, frequency):
|
|
106
108
|
if frequency is None:
|
|
107
109
|
return
|
|
108
|
-
frequency_str =
|
|
110
|
+
frequency_str = frequency_to_string(frequency)
|
|
109
111
|
self._check_missing("frequency", frequency_str)
|
|
110
112
|
self._check_mismatch("frequency", frequency_str)
|
|
111
113
|
|
anemoi/datasets/create/config.py
CHANGED
|
@@ -16,6 +16,8 @@ from anemoi.utils.config import DotDict
|
|
|
16
16
|
from anemoi.utils.config import load_any_dict_format
|
|
17
17
|
from earthkit.data.core.order import normalize_order_by
|
|
18
18
|
|
|
19
|
+
from anemoi.datasets.dates.groups import Groups
|
|
20
|
+
|
|
19
21
|
LOG = logging.getLogger(__name__)
|
|
20
22
|
|
|
21
23
|
|
|
@@ -153,6 +155,8 @@ class LoadersConfig(Config):
|
|
|
153
155
|
raise ValueError("statistics_end is not supported anymore. Use 'statistics:end:' instead")
|
|
154
156
|
|
|
155
157
|
self.setdefault("statistics", Config())
|
|
158
|
+
if "allow_nans" not in self.statistics:
|
|
159
|
+
self.statistics.allow_nans = []
|
|
156
160
|
|
|
157
161
|
check_dict_value_and_set(self.output, "flatten_grid", True)
|
|
158
162
|
check_dict_value_and_set(self.output, "ensemble_dimension", 2)
|
|
@@ -207,8 +211,49 @@ def _prepare_serialisation(o):
|
|
|
207
211
|
return str(o)
|
|
208
212
|
|
|
209
213
|
|
|
210
|
-
def
|
|
214
|
+
def set_to_test_mode(cfg):
|
|
215
|
+
NUMBER_OF_DATES = 4
|
|
216
|
+
|
|
217
|
+
dates = cfg["dates"]
|
|
218
|
+
LOG.warn(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
|
|
219
|
+
groups = Groups(**LoadersConfig(cfg).dates)
|
|
220
|
+
dates = groups.dates
|
|
221
|
+
cfg["dates"] = dict(
|
|
222
|
+
start=dates[0],
|
|
223
|
+
end=dates[NUMBER_OF_DATES - 1],
|
|
224
|
+
frequency=dates.frequency,
|
|
225
|
+
group_by=NUMBER_OF_DATES,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def set_element_to_test(obj):
|
|
229
|
+
if isinstance(obj, (list, tuple)):
|
|
230
|
+
for v in obj:
|
|
231
|
+
set_element_to_test(v)
|
|
232
|
+
return
|
|
233
|
+
if isinstance(obj, (dict, DotDict)):
|
|
234
|
+
if "grid" in obj:
|
|
235
|
+
previous = obj["grid"]
|
|
236
|
+
obj["grid"] = "20./20."
|
|
237
|
+
LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
238
|
+
if "number" in obj:
|
|
239
|
+
if isinstance(obj["number"], (list, tuple)):
|
|
240
|
+
previous = obj["number"]
|
|
241
|
+
obj["number"] = previous[0:3]
|
|
242
|
+
LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
243
|
+
for k, v in obj.items():
|
|
244
|
+
set_element_to_test(v)
|
|
245
|
+
if "constants" in obj:
|
|
246
|
+
constants = obj["constants"]
|
|
247
|
+
if "param" in constants and isinstance(constants["param"], list):
|
|
248
|
+
constants["param"] = ["cos_latitude"]
|
|
249
|
+
|
|
250
|
+
set_element_to_test(cfg)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def loader_config(config, is_test=False):
|
|
211
254
|
config = Config(config)
|
|
255
|
+
if is_test:
|
|
256
|
+
set_to_test_mode(config)
|
|
212
257
|
obj = LoadersConfig(config)
|
|
213
258
|
|
|
214
259
|
# yaml round trip to check that serialisation works as expected
|
|
@@ -216,7 +261,13 @@ def loader_config(config):
|
|
|
216
261
|
copy = yaml.load(yaml.dump(copy), Loader=yaml.SafeLoader)
|
|
217
262
|
copy = Config(copy)
|
|
218
263
|
copy = LoadersConfig(config)
|
|
219
|
-
|
|
264
|
+
|
|
265
|
+
a = yaml.dump(obj)
|
|
266
|
+
b = yaml.dump(copy)
|
|
267
|
+
if a != b:
|
|
268
|
+
print(a)
|
|
269
|
+
print(b)
|
|
270
|
+
raise ValueError("Serialisation failed")
|
|
220
271
|
|
|
221
272
|
return copy
|
|
222
273
|
|
|
@@ -52,9 +52,19 @@ def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs)
|
|
|
52
52
|
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])
|
|
53
53
|
|
|
54
54
|
if len(result) == 0:
|
|
55
|
-
LOG.warning(f"No data found for {dataset} and dates {dates}")
|
|
55
|
+
LOG.warning(f"No data found for {dataset} and dates {dates} and {kwargs}")
|
|
56
56
|
LOG.warning(f"Options: {options}")
|
|
57
|
-
|
|
57
|
+
|
|
58
|
+
for i, k in enumerate(fs):
|
|
59
|
+
a = ["valid_datetime", k.metadata("valid_datetime", default=None)]
|
|
60
|
+
for n in kwargs.keys():
|
|
61
|
+
a.extend([n, k.metadata(n, default=None)])
|
|
62
|
+
print([str(x) for x in a])
|
|
63
|
+
|
|
64
|
+
if i > 16:
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
# LOG.warning(data)
|
|
58
68
|
|
|
59
69
|
return result
|
|
60
70
|
|
|
@@ -55,6 +55,7 @@ class Coordinate:
|
|
|
55
55
|
is_time = False
|
|
56
56
|
is_step = False
|
|
57
57
|
is_date = False
|
|
58
|
+
is_member = False
|
|
58
59
|
|
|
59
60
|
def __init__(self, variable):
|
|
60
61
|
self.variable = variable
|
|
@@ -201,8 +202,14 @@ class LevelCoordinate(Coordinate):
|
|
|
201
202
|
|
|
202
203
|
|
|
203
204
|
class EnsembleCoordinate(Coordinate):
|
|
205
|
+
is_member = True
|
|
204
206
|
mars_names = ("number",)
|
|
205
207
|
|
|
208
|
+
def normalise(self, value):
|
|
209
|
+
if int(value) == value:
|
|
210
|
+
return int(value)
|
|
211
|
+
return value
|
|
212
|
+
|
|
206
213
|
|
|
207
214
|
class LongitudeCoordinate(Coordinate):
|
|
208
215
|
is_grid = True
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
from .coordinates import DateCoordinate
|
|
12
|
+
from .coordinates import EnsembleCoordinate
|
|
12
13
|
from .coordinates import LatitudeCoordinate
|
|
13
14
|
from .coordinates import LevelCoordinate
|
|
14
15
|
from .coordinates import LongitudeCoordinate
|
|
@@ -135,6 +136,17 @@ class CoordinateGuesser:
|
|
|
135
136
|
if d is not None:
|
|
136
137
|
return d
|
|
137
138
|
|
|
139
|
+
d = self._is_number(
|
|
140
|
+
c,
|
|
141
|
+
axis=axis,
|
|
142
|
+
name=name,
|
|
143
|
+
long_name=long_name,
|
|
144
|
+
standard_name=standard_name,
|
|
145
|
+
units=units,
|
|
146
|
+
)
|
|
147
|
+
if d is not None:
|
|
148
|
+
return d
|
|
149
|
+
|
|
138
150
|
if c.shape in ((1,), tuple()):
|
|
139
151
|
return ScalarCoordinate(c)
|
|
140
152
|
|
|
@@ -249,9 +261,13 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
|
|
|
249
261
|
if standard_name == "depth":
|
|
250
262
|
return LevelCoordinate(c, "depth")
|
|
251
263
|
|
|
252
|
-
if name == "
|
|
264
|
+
if name == "vertical" and units == "hPa":
|
|
253
265
|
return LevelCoordinate(c, "pl")
|
|
254
266
|
|
|
267
|
+
def _is_number(self, c, *, axis, name, long_name, standard_name, units):
|
|
268
|
+
if name in ("realization", "number"):
|
|
269
|
+
return EnsembleCoordinate(c)
|
|
270
|
+
|
|
255
271
|
|
|
256
272
|
class FlavourCoordinateGuesser(CoordinateGuesser):
|
|
257
273
|
def __init__(self, ds, flavour):
|
|
@@ -328,3 +344,7 @@ class FlavourCoordinateGuesser(CoordinateGuesser):
|
|
|
328
344
|
return self.flavour["levtype"]
|
|
329
345
|
|
|
330
346
|
raise NotImplementedError(f"levtype for {c=}")
|
|
347
|
+
|
|
348
|
+
def _is_number(self, c, *, axis, name, long_name, standard_name, units):
|
|
349
|
+
if self._match(c, "number", locals()):
|
|
350
|
+
return DateCoordinate(c)
|
|
@@ -10,29 +10,37 @@
|
|
|
10
10
|
import logging
|
|
11
11
|
from functools import cached_property
|
|
12
12
|
|
|
13
|
+
from anemoi.utils.dates import as_datetime
|
|
13
14
|
from earthkit.data.core.geography import Geography
|
|
14
15
|
from earthkit.data.core.metadata import RawMetadata
|
|
15
|
-
from earthkit.data.utils.dates import to_datetime
|
|
16
16
|
from earthkit.data.utils.projections import Projection
|
|
17
17
|
|
|
18
18
|
LOG = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class _MDMapping:
|
|
22
22
|
|
|
23
|
-
def __init__(self,
|
|
24
|
-
self.
|
|
23
|
+
def __init__(self, variable):
|
|
24
|
+
self.variable = variable
|
|
25
|
+
self.time = variable.time
|
|
26
|
+
self.mapping = dict(param="variable")
|
|
27
|
+
for c in variable.coordinates:
|
|
28
|
+
for v in c.mars_names:
|
|
29
|
+
assert v not in self.mapping, f"Duplicate key '{v}' in {c}"
|
|
30
|
+
self.mapping[v] = c.variable.name
|
|
25
31
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
return self.user_to_internal.get(kwargs, kwargs)
|
|
29
|
-
return {self.user_to_internal.get(k, k): v for k, v in kwargs.items()}
|
|
32
|
+
def _from_user(self, key):
|
|
33
|
+
return self.mapping.get(key, key)
|
|
30
34
|
|
|
31
|
-
def
|
|
32
|
-
|
|
35
|
+
def from_user(self, kwargs):
|
|
36
|
+
print("from_user", kwargs, self)
|
|
37
|
+
return {self._from_user(k): v for k, v in kwargs.items()}
|
|
33
38
|
|
|
34
39
|
def __repr__(self):
|
|
35
|
-
return f"MDMapping({self.
|
|
40
|
+
return f"MDMapping({self.mapping})"
|
|
41
|
+
|
|
42
|
+
def fill_time_metadata(self, field, md):
|
|
43
|
+
md["valid_datetime"] = as_datetime(self.variable.time.fill_time_metadata(field._md, md)).isoformat()
|
|
36
44
|
|
|
37
45
|
|
|
38
46
|
class XArrayMetadata(RawMetadata):
|
|
@@ -40,23 +48,11 @@ class XArrayMetadata(RawMetadata):
|
|
|
40
48
|
NAMESPACES = ["default", "mars"]
|
|
41
49
|
MARS_KEYS = ["param", "step", "levelist", "levtype", "number", "date", "time"]
|
|
42
50
|
|
|
43
|
-
def __init__(self, field
|
|
51
|
+
def __init__(self, field):
|
|
44
52
|
self._field = field
|
|
45
53
|
md = field._md.copy()
|
|
46
|
-
|
|
47
|
-
self._mapping
|
|
48
|
-
if mapping is None:
|
|
49
|
-
time_coord = [c for c in field.owner.coordinates if c.is_time]
|
|
50
|
-
if len(time_coord) == 1:
|
|
51
|
-
time_key = time_coord[0].name
|
|
52
|
-
else:
|
|
53
|
-
time_key = "time"
|
|
54
|
-
else:
|
|
55
|
-
time_key = mapping.from_user("valid_datetime")
|
|
56
|
-
self._time = to_datetime(md.pop(time_key))
|
|
57
|
-
self._field.owner.time.fill_time_metadata(self._time, md)
|
|
58
|
-
md["valid_datetime"] = self._time.isoformat()
|
|
59
|
-
|
|
54
|
+
self._mapping = _MDMapping(field.owner)
|
|
55
|
+
self._mapping.fill_time_metadata(field, md)
|
|
60
56
|
super().__init__(md)
|
|
61
57
|
|
|
62
58
|
@cached_property
|
|
@@ -88,10 +84,13 @@ class XArrayMetadata(RawMetadata):
|
|
|
88
84
|
return self._field.forecast_reference_time
|
|
89
85
|
|
|
90
86
|
def _valid_datetime(self):
|
|
91
|
-
return self.
|
|
87
|
+
return self._get("valid_datetime")
|
|
92
88
|
|
|
93
89
|
def _get(self, key, **kwargs):
|
|
94
90
|
|
|
91
|
+
if key in self._d:
|
|
92
|
+
return self._d[key]
|
|
93
|
+
|
|
95
94
|
if key.startswith("mars."):
|
|
96
95
|
key = key[5:]
|
|
97
96
|
if key not in self.MARS_KEYS:
|
|
@@ -100,8 +99,7 @@ class XArrayMetadata(RawMetadata):
|
|
|
100
99
|
else:
|
|
101
100
|
return kwargs.get("default", None)
|
|
102
101
|
|
|
103
|
-
|
|
104
|
-
key = self._mapping.from_user(key)
|
|
102
|
+
key = self._mapping._from_user(key)
|
|
105
103
|
|
|
106
104
|
return super()._get(key, **kwargs)
|
|
107
105
|
|
|
@@ -10,8 +10,11 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
|
|
13
|
+
from anemoi.utils.dates import as_datetime
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
class Time:
|
|
17
|
+
|
|
15
18
|
@classmethod
|
|
16
19
|
def from_coordinates(cls, coordinates):
|
|
17
20
|
time_coordinate = [c for c in coordinates if c.is_time]
|
|
@@ -19,16 +22,16 @@ class Time:
|
|
|
19
22
|
date_coordinate = [c for c in coordinates if c.is_date]
|
|
20
23
|
|
|
21
24
|
if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 1:
|
|
22
|
-
return
|
|
25
|
+
return ForecastFromValidTimeAndStep(time_coordinate[0], step_coordinate[0])
|
|
23
26
|
|
|
24
27
|
if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
|
|
25
|
-
return Analysis()
|
|
28
|
+
return Analysis(time_coordinate[0])
|
|
26
29
|
|
|
27
30
|
if len(date_coordinate) == 0 and len(time_coordinate) == 0 and len(step_coordinate) == 0:
|
|
28
31
|
return Constant()
|
|
29
32
|
|
|
30
33
|
if len(date_coordinate) == 1 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
|
|
31
|
-
return ForecastFromValidTimeAndBaseTime(date_coordinate[0])
|
|
34
|
+
return ForecastFromValidTimeAndBaseTime(date_coordinate[0], time_coordinate[0])
|
|
32
35
|
|
|
33
36
|
if len(date_coordinate) == 1 and len(time_coordinate) == 0 and len(step_coordinate) == 1:
|
|
34
37
|
return ForecastFromBaseTimeAndDate(date_coordinate[0], step_coordinate[0])
|
|
@@ -38,61 +41,91 @@ class Time:
|
|
|
38
41
|
|
|
39
42
|
class Constant(Time):
|
|
40
43
|
|
|
41
|
-
def fill_time_metadata(self,
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
metadata["
|
|
44
|
+
def fill_time_metadata(self, coords_values, metadata):
|
|
45
|
+
raise NotImplementedError("Constant time not implemented")
|
|
46
|
+
# print("Constant", coords_values, metadata)
|
|
47
|
+
# metadata["date"] = time.strftime("%Y%m%d")
|
|
48
|
+
# metadata["time"] = time.strftime("%H%M")
|
|
49
|
+
# metadata["step"] = 0
|
|
45
50
|
|
|
46
51
|
|
|
47
52
|
class Analysis(Time):
|
|
48
53
|
|
|
49
|
-
def
|
|
50
|
-
|
|
51
|
-
|
|
54
|
+
def __init__(self, time_coordinate):
|
|
55
|
+
self.time_coordinate_name = time_coordinate.variable.name
|
|
56
|
+
|
|
57
|
+
def fill_time_metadata(self, coords_values, metadata):
|
|
58
|
+
valid_datetime = coords_values[self.time_coordinate_name]
|
|
59
|
+
|
|
60
|
+
metadata["date"] = as_datetime(valid_datetime).strftime("%Y%m%d")
|
|
61
|
+
metadata["time"] = as_datetime(valid_datetime).strftime("%H%M")
|
|
52
62
|
metadata["step"] = 0
|
|
53
63
|
|
|
64
|
+
return valid_datetime
|
|
54
65
|
|
|
55
|
-
class ForecasstFromValidTimeAndStep(Time):
|
|
56
|
-
def __init__(self, step_coordinate):
|
|
57
|
-
self.step_name = step_coordinate.variable.name
|
|
58
66
|
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
class ForecastFromValidTimeAndStep(Time):
|
|
68
|
+
|
|
69
|
+
def __init__(self, time_coordinate, step_coordinate):
|
|
70
|
+
self.time_coordinate_name = time_coordinate.variable.name
|
|
71
|
+
self.step_coordinate_name = step_coordinate.variable.name
|
|
72
|
+
|
|
73
|
+
def fill_time_metadata(self, coords_values, metadata):
|
|
74
|
+
valid_datetime = coords_values[self.time_coordinate_name]
|
|
75
|
+
step = coords_values[self.step_coordinate_name]
|
|
76
|
+
|
|
61
77
|
assert isinstance(step, datetime.timedelta)
|
|
62
|
-
|
|
78
|
+
base_datetime = valid_datetime - step
|
|
63
79
|
|
|
64
80
|
hours = step.total_seconds() / 3600
|
|
65
81
|
assert int(hours) == hours
|
|
66
82
|
|
|
67
|
-
metadata["date"] =
|
|
68
|
-
metadata["time"] =
|
|
83
|
+
metadata["date"] = as_datetime(base_datetime).strftime("%Y%m%d")
|
|
84
|
+
metadata["time"] = as_datetime(base_datetime).strftime("%H%M")
|
|
69
85
|
metadata["step"] = int(hours)
|
|
86
|
+
return valid_datetime
|
|
70
87
|
|
|
71
88
|
|
|
72
89
|
class ForecastFromValidTimeAndBaseTime(Time):
|
|
73
|
-
def __init__(self, date_coordinate):
|
|
74
|
-
self.date_coordinate = date_coordinate
|
|
75
90
|
|
|
76
|
-
def
|
|
91
|
+
def __init__(self, date_coordinate, time_coordinate):
|
|
92
|
+
self.date_coordinate.name = date_coordinate.name
|
|
93
|
+
self.time_coordinate.name = time_coordinate.name
|
|
94
|
+
|
|
95
|
+
def fill_time_metadata(self, coords_values, metadata):
|
|
96
|
+
valid_datetime = coords_values[self.time_coordinate_name]
|
|
97
|
+
base_datetime = coords_values[self.date_coordinate_name]
|
|
77
98
|
|
|
78
|
-
step =
|
|
99
|
+
step = valid_datetime - base_datetime
|
|
79
100
|
|
|
80
101
|
hours = step.total_seconds() / 3600
|
|
81
102
|
assert int(hours) == hours
|
|
82
103
|
|
|
83
|
-
metadata["date"] =
|
|
84
|
-
metadata["time"] =
|
|
104
|
+
metadata["date"] = as_datetime(base_datetime).strftime("%Y%m%d")
|
|
105
|
+
metadata["time"] = as_datetime(base_datetime).strftime("%H%M")
|
|
85
106
|
metadata["step"] = int(hours)
|
|
86
107
|
|
|
108
|
+
return valid_datetime
|
|
109
|
+
|
|
87
110
|
|
|
88
111
|
class ForecastFromBaseTimeAndDate(Time):
|
|
112
|
+
|
|
89
113
|
def __init__(self, date_coordinate, step_coordinate):
|
|
90
|
-
self.
|
|
91
|
-
self.
|
|
114
|
+
self.date_coordinate_name = date_coordinate.name
|
|
115
|
+
self.step_coordinate_name = step_coordinate.name
|
|
116
|
+
|
|
117
|
+
def fill_time_metadata(self, coords_values, metadata):
|
|
118
|
+
|
|
119
|
+
date = coords_values[self.date_coordinate_name]
|
|
120
|
+
step = coords_values[self.step_coordinate_name]
|
|
121
|
+
assert isinstance(step, datetime.timedelta)
|
|
122
|
+
|
|
123
|
+
metadata["date"] = as_datetime(date).strftime("%Y%m%d")
|
|
124
|
+
metadata["time"] = as_datetime(date).strftime("%H%M")
|
|
125
|
+
|
|
126
|
+
hours = step.total_seconds() / 3600
|
|
92
127
|
|
|
93
|
-
def fill_time_metadata(self, time, metadata):
|
|
94
|
-
metadata["date"] = time.strftime("%Y%m%d")
|
|
95
|
-
metadata["time"] = time.strftime("%H%M")
|
|
96
|
-
hours = metadata[self.step_coordinate.name].total_seconds() / 3600
|
|
97
128
|
assert int(hours) == hours
|
|
98
129
|
metadata["step"] = int(hours)
|
|
130
|
+
|
|
131
|
+
return date + step
|
|
@@ -14,34 +14,32 @@ from functools import cached_property
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
from earthkit.data.utils.array import ensure_backend
|
|
16
16
|
|
|
17
|
-
from anemoi.datasets.create.functions.sources.xarray.metadata import MDMapping
|
|
18
|
-
|
|
19
17
|
from .field import XArrayField
|
|
20
18
|
|
|
21
19
|
LOG = logging.getLogger(__name__)
|
|
22
20
|
|
|
23
21
|
|
|
24
22
|
class Variable:
|
|
25
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
ds,
|
|
27
|
+
var,
|
|
28
|
+
coordinates,
|
|
29
|
+
grid,
|
|
30
|
+
time,
|
|
31
|
+
metadata,
|
|
32
|
+
array_backend=None,
|
|
33
|
+
):
|
|
26
34
|
self.ds = ds
|
|
27
35
|
self.var = var
|
|
28
36
|
|
|
29
37
|
self.grid = grid
|
|
30
38
|
self.coordinates = coordinates
|
|
31
39
|
|
|
32
|
-
# print("Variable", var.name)
|
|
33
|
-
# for c in coordinates:
|
|
34
|
-
# print(" ", c)
|
|
35
|
-
|
|
36
40
|
self._metadata = metadata.copy()
|
|
37
|
-
# self._metadata.update(var.attrs)
|
|
38
41
|
self._metadata.update({"variable": var.name})
|
|
39
42
|
|
|
40
|
-
# self._metadata.setdefault("level", None)
|
|
41
|
-
# self._metadata.setdefault("number", 0)
|
|
42
|
-
# self._metadata.setdefault("levtype", "sfc")
|
|
43
|
-
self._mapping = mapping
|
|
44
|
-
|
|
45
43
|
self.time = time
|
|
46
44
|
|
|
47
45
|
self.shape = tuple(len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid)
|
|
@@ -51,23 +49,6 @@ class Variable:
|
|
|
51
49
|
self.length = math.prod(self.shape)
|
|
52
50
|
self.array_backend = ensure_backend(array_backend)
|
|
53
51
|
|
|
54
|
-
def update_metadata_mapping(self, kwargs):
|
|
55
|
-
|
|
56
|
-
result = {}
|
|
57
|
-
|
|
58
|
-
for k, v in kwargs.items():
|
|
59
|
-
if k == "param":
|
|
60
|
-
result[k] = "variable"
|
|
61
|
-
continue
|
|
62
|
-
|
|
63
|
-
for c in self.coordinates:
|
|
64
|
-
if k in c.mars_names:
|
|
65
|
-
for v in c.mars_names:
|
|
66
|
-
result[v] = c.variable.name
|
|
67
|
-
break
|
|
68
|
-
|
|
69
|
-
self._mapping = MDMapping(result)
|
|
70
|
-
|
|
71
52
|
@property
|
|
72
53
|
def name(self):
|
|
73
54
|
return self.var.name
|
|
@@ -111,17 +92,11 @@ class Variable:
|
|
|
111
92
|
kwargs = {k: v for k, v in zip(self.names, coords)}
|
|
112
93
|
return XArrayField(self, self.var.isel(kwargs))
|
|
113
94
|
|
|
114
|
-
@property
|
|
115
|
-
def mapping(self):
|
|
116
|
-
return self._mapping
|
|
117
|
-
|
|
118
95
|
def sel(self, missing, **kwargs):
|
|
119
96
|
|
|
120
97
|
if not kwargs:
|
|
121
98
|
return self
|
|
122
99
|
|
|
123
|
-
kwargs = self._mapping.from_user(kwargs)
|
|
124
|
-
|
|
125
100
|
k, v = kwargs.popitem()
|
|
126
101
|
|
|
127
102
|
c = self.by_name.get(k)
|
|
@@ -147,13 +122,15 @@ class Variable:
|
|
|
147
122
|
grid=self.grid,
|
|
148
123
|
time=self.time,
|
|
149
124
|
metadata=metadata,
|
|
150
|
-
mapping=self.mapping,
|
|
151
125
|
)
|
|
152
126
|
|
|
153
127
|
return variable.sel(missing, **kwargs)
|
|
154
128
|
|
|
155
129
|
def match(self, **kwargs):
|
|
156
|
-
|
|
130
|
+
|
|
131
|
+
if "param" in kwargs:
|
|
132
|
+
assert "variable" not in kwargs
|
|
133
|
+
kwargs["variable"] = kwargs.pop("param")
|
|
157
134
|
|
|
158
135
|
if "variable" in kwargs:
|
|
159
136
|
name = kwargs.pop("variable")
|
anemoi/datasets/create/input.py
CHANGED
|
@@ -106,30 +106,32 @@ def _data_request(data):
|
|
|
106
106
|
area = grid = None
|
|
107
107
|
|
|
108
108
|
for field in data:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if date is None:
|
|
113
|
-
date = field.datetime()["valid_time"]
|
|
114
|
-
|
|
115
|
-
if field.datetime()["valid_time"] != date:
|
|
116
|
-
continue
|
|
109
|
+
try:
|
|
110
|
+
if date is None:
|
|
111
|
+
date = field.datetime()["valid_time"]
|
|
117
112
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
levtype = as_mars.get("levtype", "sfc")
|
|
121
|
-
param = as_mars["param"]
|
|
122
|
-
levelist = as_mars.get("levelist", None)
|
|
123
|
-
area = field.mars_area
|
|
124
|
-
grid = field.mars_grid
|
|
113
|
+
if field.datetime()["valid_time"] != date:
|
|
114
|
+
continue
|
|
125
115
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
116
|
+
as_mars = field.metadata(namespace="mars")
|
|
117
|
+
if not as_mars:
|
|
118
|
+
continue
|
|
119
|
+
step = as_mars.get("step")
|
|
120
|
+
levtype = as_mars.get("levtype", "sfc")
|
|
121
|
+
param = as_mars["param"]
|
|
122
|
+
levelist = as_mars.get("levelist", None)
|
|
123
|
+
area = field.mars_area
|
|
124
|
+
grid = field.mars_grid
|
|
125
|
+
|
|
126
|
+
if levelist is None:
|
|
127
|
+
params_levels[levtype].add(param)
|
|
128
|
+
else:
|
|
129
|
+
params_levels[levtype].add((param, levelist))
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
131
|
+
if step:
|
|
132
|
+
params_steps[levtype].add((param, step))
|
|
133
|
+
except Exception:
|
|
134
|
+
LOG.error(f"Error in retrieving metadata (cannot build data request info) for {field}", exc_info=True)
|
|
133
135
|
|
|
134
136
|
def sort(old_dic):
|
|
135
137
|
new_dic = {}
|
|
@@ -288,7 +290,6 @@ class Result:
|
|
|
288
290
|
names += list(a.keys())
|
|
289
291
|
|
|
290
292
|
print(f"Building a {len(names)}D hypercube using", names)
|
|
291
|
-
|
|
292
293
|
ds = ds.order_by(*args, remapping=remapping, patches=patches)
|
|
293
294
|
user_coords = ds.unique_values(*names, remapping=remapping, patches=patches, progress_bar=False)
|
|
294
295
|
|