anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/cleanup.py +44 -0
- anemoi/datasets/commands/create.py +52 -21
- anemoi/datasets/commands/finalise-additions.py +45 -0
- anemoi/datasets/commands/finalise.py +39 -0
- anemoi/datasets/commands/init-additions.py +45 -0
- anemoi/datasets/commands/init.py +67 -0
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/load-additions.py +47 -0
- anemoi/datasets/commands/load.py +47 -0
- anemoi/datasets/commands/patch.py +39 -0
- anemoi/datasets/create/__init__.py +959 -146
- anemoi/datasets/create/check.py +5 -3
- anemoi/datasets/create/config.py +54 -2
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/grib.py +86 -1
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
- anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
- anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
- anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
- anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
- anemoi/datasets/create/input.py +62 -39
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/statistics/__init__.py +39 -23
- anemoi/datasets/create/utils.py +6 -2
- anemoi/datasets/data/__init__.py +1 -0
- anemoi/datasets/data/concat.py +46 -2
- anemoi/datasets/data/dataset.py +119 -34
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/forwards.py +17 -8
- anemoi/datasets/data/grids.py +17 -3
- anemoi/datasets/data/interpolate.py +133 -0
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/misc.py +56 -66
- anemoi/datasets/data/missing.py +240 -0
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/select.py +7 -1
- anemoi/datasets/data/stores.py +23 -10
- anemoi/datasets/data/subset.py +47 -5
- anemoi/datasets/data/unchecked.py +20 -22
- anemoi/datasets/data/xy.py +125 -0
- anemoi/datasets/dates/__init__.py +124 -95
- anemoi/datasets/dates/groups.py +85 -20
- anemoi/datasets/grids.py +66 -48
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
- anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/loaders.py +0 -936
- anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/dataset.py
CHANGED
|
@@ -5,24 +5,41 @@
|
|
|
5
5
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
7
|
|
|
8
|
+
import datetime
|
|
9
|
+
import json
|
|
8
10
|
import logging
|
|
9
11
|
import os
|
|
12
|
+
import pprint
|
|
10
13
|
import warnings
|
|
11
14
|
from functools import cached_property
|
|
12
15
|
|
|
16
|
+
from anemoi.utils.dates import frequency_to_seconds
|
|
17
|
+
from anemoi.utils.dates import frequency_to_string
|
|
18
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
19
|
+
|
|
13
20
|
LOG = logging.getLogger(__name__)
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
class Dataset:
|
|
17
24
|
arguments = {}
|
|
18
25
|
|
|
26
|
+
def mutate(self) -> "Dataset":
|
|
27
|
+
"""
|
|
28
|
+
Give an opportunity to a subclass to return a new Dataset
|
|
29
|
+
object of a different class, if needed.
|
|
30
|
+
"""
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
def swap_with_parent(self, parent):
|
|
34
|
+
return parent
|
|
35
|
+
|
|
19
36
|
@cached_property
|
|
20
37
|
def _len(self):
|
|
21
38
|
return len(self)
|
|
22
39
|
|
|
23
40
|
def _subset(self, **kwargs):
|
|
24
41
|
if not kwargs:
|
|
25
|
-
return self
|
|
42
|
+
return self.mutate()
|
|
26
43
|
|
|
27
44
|
if "start" in kwargs or "end" in kwargs:
|
|
28
45
|
start = kwargs.pop("start", None)
|
|
@@ -30,37 +47,58 @@ class Dataset:
|
|
|
30
47
|
|
|
31
48
|
from .subset import Subset
|
|
32
49
|
|
|
33
|
-
return
|
|
50
|
+
return (
|
|
51
|
+
Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs).mutate()
|
|
52
|
+
)
|
|
34
53
|
|
|
35
54
|
if "frequency" in kwargs:
|
|
36
55
|
from .subset import Subset
|
|
37
56
|
|
|
57
|
+
if "interpolate_frequency" in kwargs:
|
|
58
|
+
raise ValueError("Cannot use both `frequency` and `interpolate_frequency`")
|
|
59
|
+
|
|
38
60
|
frequency = kwargs.pop("frequency")
|
|
39
|
-
return
|
|
61
|
+
return (
|
|
62
|
+
Subset(self, self._frequency_to_indices(frequency), dict(frequency=frequency))
|
|
63
|
+
._subset(**kwargs)
|
|
64
|
+
.mutate()
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if "interpolate_frequency" in kwargs:
|
|
68
|
+
from .interpolate import InterpolateFrequency
|
|
69
|
+
|
|
70
|
+
interpolate_frequency = kwargs.pop("interpolate_frequency")
|
|
71
|
+
return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate()
|
|
40
72
|
|
|
41
73
|
if "select" in kwargs:
|
|
42
74
|
from .select import Select
|
|
43
75
|
|
|
44
76
|
select = kwargs.pop("select")
|
|
45
|
-
return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs)
|
|
77
|
+
return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs).mutate()
|
|
46
78
|
|
|
47
79
|
if "drop" in kwargs:
|
|
48
80
|
from .select import Select
|
|
49
81
|
|
|
50
82
|
drop = kwargs.pop("drop")
|
|
51
|
-
return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs)
|
|
83
|
+
return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs).mutate()
|
|
52
84
|
|
|
53
85
|
if "reorder" in kwargs:
|
|
54
86
|
from .select import Select
|
|
55
87
|
|
|
56
88
|
reorder = kwargs.pop("reorder")
|
|
57
|
-
return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs)
|
|
89
|
+
return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate()
|
|
58
90
|
|
|
59
91
|
if "rename" in kwargs:
|
|
60
92
|
from .select import Rename
|
|
61
93
|
|
|
62
94
|
rename = kwargs.pop("rename")
|
|
63
|
-
return Rename(self, rename)._subset(**kwargs)
|
|
95
|
+
return Rename(self, rename)._subset(**kwargs).mutate()
|
|
96
|
+
|
|
97
|
+
if "rescale" in kwargs:
|
|
98
|
+
from .rescale import Rescale
|
|
99
|
+
|
|
100
|
+
rescale = kwargs.pop("rescale")
|
|
101
|
+
return Rescale(self, rescale)._subset(**kwargs).mutate()
|
|
64
102
|
|
|
65
103
|
if "statistics" in kwargs:
|
|
66
104
|
from ..data import open_dataset
|
|
@@ -68,20 +106,38 @@ class Dataset:
|
|
|
68
106
|
|
|
69
107
|
statistics = kwargs.pop("statistics")
|
|
70
108
|
|
|
71
|
-
return Statistics(self, open_dataset(statistics))._subset(**kwargs)
|
|
109
|
+
return Statistics(self, open_dataset(statistics))._subset(**kwargs).mutate()
|
|
72
110
|
|
|
73
111
|
if "thinning" in kwargs:
|
|
74
112
|
from .masked import Thinning
|
|
75
113
|
|
|
76
114
|
thinning = kwargs.pop("thinning")
|
|
77
115
|
method = kwargs.pop("method", "every-nth")
|
|
78
|
-
return Thinning(self, thinning, method)._subset(**kwargs)
|
|
116
|
+
return Thinning(self, thinning, method)._subset(**kwargs).mutate()
|
|
79
117
|
|
|
80
118
|
if "area" in kwargs:
|
|
81
119
|
from .masked import Cropping
|
|
82
120
|
|
|
83
121
|
bbox = kwargs.pop("area")
|
|
84
|
-
return Cropping(self, bbox)._subset(**kwargs)
|
|
122
|
+
return Cropping(self, bbox)._subset(**kwargs).mutate()
|
|
123
|
+
|
|
124
|
+
if "missing_dates" in kwargs:
|
|
125
|
+
from .missing import MissingDates
|
|
126
|
+
|
|
127
|
+
missing_dates = kwargs.pop("missing_dates")
|
|
128
|
+
return MissingDates(self, missing_dates)._subset(**kwargs).mutate()
|
|
129
|
+
|
|
130
|
+
if "skip_missing_dates" in kwargs:
|
|
131
|
+
from .missing import SkipMissingDates
|
|
132
|
+
|
|
133
|
+
if "expected_access" not in kwargs:
|
|
134
|
+
raise ValueError("`expected_access` is required with `skip_missing_dates`")
|
|
135
|
+
|
|
136
|
+
skip_missing_dates = kwargs.pop("skip_missing_dates")
|
|
137
|
+
expected_access = kwargs.pop("expected_access")
|
|
138
|
+
|
|
139
|
+
if skip_missing_dates:
|
|
140
|
+
return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
|
|
85
141
|
|
|
86
142
|
# Keep last
|
|
87
143
|
if "shuffle" in kwargs:
|
|
@@ -90,15 +146,14 @@ class Dataset:
|
|
|
90
146
|
shuffle = kwargs.pop("shuffle")
|
|
91
147
|
|
|
92
148
|
if shuffle:
|
|
93
|
-
return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs)
|
|
149
|
+
return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs).mutate()
|
|
94
150
|
|
|
95
151
|
raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs))
|
|
96
152
|
|
|
97
153
|
def _frequency_to_indices(self, frequency):
|
|
98
|
-
from .misc import _frequency_to_hours
|
|
99
154
|
|
|
100
|
-
requested_frequency =
|
|
101
|
-
dataset_frequency =
|
|
155
|
+
requested_frequency = frequency_to_seconds(frequency)
|
|
156
|
+
dataset_frequency = frequency_to_seconds(self.frequency)
|
|
102
157
|
assert requested_frequency % dataset_frequency == 0
|
|
103
158
|
# Question: where do we start? first date, or first date that is a multiple of the frequency?
|
|
104
159
|
step = requested_frequency // dataset_frequency
|
|
@@ -171,37 +226,71 @@ class Dataset:
|
|
|
171
226
|
import anemoi
|
|
172
227
|
|
|
173
228
|
def tidy(v):
|
|
174
|
-
if isinstance(v, (list, tuple)):
|
|
229
|
+
if isinstance(v, (list, tuple, set)):
|
|
175
230
|
return [tidy(i) for i in v]
|
|
176
231
|
if isinstance(v, dict):
|
|
177
232
|
return {k: tidy(v) for k, v in v.items()}
|
|
178
233
|
if isinstance(v, str) and v.startswith("/"):
|
|
179
234
|
return os.path.basename(v)
|
|
235
|
+
if isinstance(v, datetime.datetime):
|
|
236
|
+
return v.isoformat()
|
|
237
|
+
if isinstance(v, datetime.date):
|
|
238
|
+
return v.isoformat()
|
|
239
|
+
if isinstance(v, datetime.timedelta):
|
|
240
|
+
return frequency_to_string(v)
|
|
241
|
+
|
|
242
|
+
if isinstance(v, Dataset):
|
|
243
|
+
# That can happen in the `arguments`
|
|
244
|
+
# if a dataset is passed as an argument
|
|
245
|
+
return repr(v)
|
|
246
|
+
|
|
247
|
+
if isinstance(v, slice):
|
|
248
|
+
return (v.start, v.stop, v.step)
|
|
249
|
+
|
|
180
250
|
return v
|
|
181
251
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
)
|
|
252
|
+
md = dict(
|
|
253
|
+
version=anemoi.datasets.__version__,
|
|
254
|
+
arguments=self.arguments,
|
|
255
|
+
**self.dataset_metadata(),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
return json.loads(json.dumps(tidy(md)))
|
|
260
|
+
except Exception:
|
|
261
|
+
LOG.exception("Failed to serialize metadata")
|
|
262
|
+
pprint.pprint(md)
|
|
263
|
+
|
|
264
|
+
raise
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
def start_date(self):
|
|
268
|
+
return self.dates[0]
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def end_date(self):
|
|
272
|
+
return self.dates[-1]
|
|
273
|
+
|
|
274
|
+
def dataset_metadata(self):
|
|
275
|
+
return dict(
|
|
276
|
+
specific=self.metadata_specific(),
|
|
277
|
+
frequency=self.frequency,
|
|
278
|
+
variables=self.variables,
|
|
279
|
+
shape=self.shape,
|
|
280
|
+
start_date=self.start_date.astype(str),
|
|
281
|
+
end_date=self.end_date.astype(str),
|
|
193
282
|
)
|
|
194
283
|
|
|
195
284
|
def metadata_specific(self, **kwargs):
|
|
196
285
|
action = self.__class__.__name__.lower()
|
|
197
|
-
assert isinstance(self.frequency,
|
|
286
|
+
# assert isinstance(self.frequency, datetime.timedelta), (self.frequency, self, action)
|
|
198
287
|
return dict(
|
|
199
288
|
action=action,
|
|
200
289
|
variables=self.variables,
|
|
201
290
|
shape=self.shape,
|
|
202
|
-
frequency=self.frequency,
|
|
203
|
-
start_date=self.
|
|
204
|
-
end_date=self.
|
|
291
|
+
frequency=frequency_to_string(frequency_to_timedelta(self.frequency)),
|
|
292
|
+
start_date=self.start_date.astype(str),
|
|
293
|
+
end_date=self.end_date.astype(str),
|
|
205
294
|
**kwargs,
|
|
206
295
|
)
|
|
207
296
|
|
|
@@ -220,10 +309,6 @@ class Dataset:
|
|
|
220
309
|
if n.startswith("_") and not n.startswith("__"):
|
|
221
310
|
warnings.warn(f"Private method {n} is overriden in {ds.__class__.__name__}")
|
|
222
311
|
|
|
223
|
-
# for n in ('metadata_specific', 'tree', 'source'):
|
|
224
|
-
# if n not in overriden:
|
|
225
|
-
# warnings.warn(f"Method {n} is not overriden in {ds.__class__.__name__}")
|
|
226
|
-
|
|
227
312
|
def _repr_html_(self):
|
|
228
313
|
return self.tree().html()
|
|
229
314
|
|
anemoi/datasets/data/debug.py
CHANGED
|
@@ -209,10 +209,14 @@ def _debug_indexing(method):
|
|
|
209
209
|
return wrapper
|
|
210
210
|
|
|
211
211
|
|
|
212
|
+
def _identity(x):
|
|
213
|
+
return x
|
|
214
|
+
|
|
215
|
+
|
|
212
216
|
if DEBUG_ZARR_INDEXING:
|
|
213
217
|
debug_indexing = _debug_indexing
|
|
214
218
|
else:
|
|
215
|
-
debug_indexing =
|
|
219
|
+
debug_indexing = _identity
|
|
216
220
|
|
|
217
221
|
|
|
218
222
|
def debug_zarr_loading(on_off):
|
anemoi/datasets/data/forwards.py
CHANGED
|
@@ -23,7 +23,7 @@ LOG = logging.getLogger(__name__)
|
|
|
23
23
|
|
|
24
24
|
class Forwards(Dataset):
|
|
25
25
|
def __init__(self, forward):
|
|
26
|
-
self.forward = forward
|
|
26
|
+
self.forward = forward.mutate()
|
|
27
27
|
|
|
28
28
|
def __len__(self):
|
|
29
29
|
return len(self.forward)
|
|
@@ -118,6 +118,9 @@ class Combined(Forwards):
|
|
|
118
118
|
# Forward most properties to the first dataset
|
|
119
119
|
super().__init__(datasets[0])
|
|
120
120
|
|
|
121
|
+
def mutate(self):
|
|
122
|
+
return self
|
|
123
|
+
|
|
121
124
|
def check_same_resolution(self, d1, d2):
|
|
122
125
|
if d1.resolution != d2.resolution:
|
|
123
126
|
raise ValueError(f"Incompatible resolutions: {d1.resolution} and {d2.resolution} ({d1} {d2})")
|
|
@@ -187,14 +190,9 @@ class Combined(Forwards):
|
|
|
187
190
|
**kwargs,
|
|
188
191
|
)
|
|
189
192
|
|
|
190
|
-
@
|
|
193
|
+
@property
|
|
191
194
|
def missing(self):
|
|
192
|
-
|
|
193
|
-
result = set()
|
|
194
|
-
for d in self.datasets:
|
|
195
|
-
result.update(offset + m for m in d.missing)
|
|
196
|
-
offset += len(d)
|
|
197
|
-
return result
|
|
195
|
+
raise NotImplementedError("missing() not implemented for Combined")
|
|
198
196
|
|
|
199
197
|
def get_dataset_names(self, names):
|
|
200
198
|
for d in self.datasets:
|
|
@@ -249,3 +247,14 @@ class GivenAxis(Combined):
|
|
|
249
247
|
return self._get_slice(n)
|
|
250
248
|
|
|
251
249
|
return np.concatenate([d[n] for d in self.datasets], axis=self.axis - 1)
|
|
250
|
+
|
|
251
|
+
@cached_property
|
|
252
|
+
def missing(self):
|
|
253
|
+
offset = 0
|
|
254
|
+
result = set()
|
|
255
|
+
for d in self.datasets:
|
|
256
|
+
print("--->", d.missing, d)
|
|
257
|
+
result.update(offset + m for m in d.missing)
|
|
258
|
+
if self.axis == 0: # Advance if axis is time
|
|
259
|
+
offset += len(d)
|
|
260
|
+
return result
|
anemoi/datasets/data/grids.py
CHANGED
|
@@ -128,7 +128,7 @@ class Grids(GridsBase):
|
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
class Cutout(GridsBase):
|
|
131
|
-
def __init__(self, datasets, axis):
|
|
131
|
+
def __init__(self, datasets, axis, min_distance_km=None, cropping_distance=2.0, neighbours=5, plot=False):
|
|
132
132
|
from anemoi.datasets.grids import cutout_mask
|
|
133
133
|
|
|
134
134
|
super().__init__(datasets, axis)
|
|
@@ -144,7 +144,10 @@ class Cutout(GridsBase):
|
|
|
144
144
|
self.lam.longitudes,
|
|
145
145
|
self.globe.latitudes,
|
|
146
146
|
self.globe.longitudes,
|
|
147
|
-
|
|
147
|
+
plot=plot,
|
|
148
|
+
min_distance_km=min_distance_km,
|
|
149
|
+
cropping_distance=cropping_distance,
|
|
150
|
+
neighbours=neighbours,
|
|
148
151
|
)
|
|
149
152
|
assert len(self.mask) == self.globe.shape[3], (
|
|
150
153
|
len(self.mask),
|
|
@@ -229,6 +232,10 @@ def cutout_factory(args, kwargs):
|
|
|
229
232
|
|
|
230
233
|
cutout = kwargs.pop("cutout")
|
|
231
234
|
axis = kwargs.pop("axis", 3)
|
|
235
|
+
plot = kwargs.pop("plot", None)
|
|
236
|
+
min_distance_km = kwargs.pop("min_distance_km", None)
|
|
237
|
+
cropping_distance = kwargs.pop("cropping_distance", 2.0)
|
|
238
|
+
neighbours = kwargs.pop("neighbours", 5)
|
|
232
239
|
|
|
233
240
|
assert len(args) == 0
|
|
234
241
|
assert isinstance(cutout, (list, tuple))
|
|
@@ -236,4 +243,11 @@ def cutout_factory(args, kwargs):
|
|
|
236
243
|
datasets = [_open(e) for e in cutout]
|
|
237
244
|
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
238
245
|
|
|
239
|
-
return Cutout(
|
|
246
|
+
return Cutout(
|
|
247
|
+
datasets,
|
|
248
|
+
axis=axis,
|
|
249
|
+
neighbours=neighbours,
|
|
250
|
+
min_distance_km=min_distance_km,
|
|
251
|
+
cropping_distance=cropping_distance,
|
|
252
|
+
plot=plot,
|
|
253
|
+
)._subset(**kwargs)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
13
|
+
|
|
14
|
+
from .debug import Node
|
|
15
|
+
from .debug import debug_indexing
|
|
16
|
+
from .forwards import Forwards
|
|
17
|
+
from .indexing import apply_index_to_slices_changes
|
|
18
|
+
from .indexing import expand_list_indexing
|
|
19
|
+
from .indexing import index_to_slices
|
|
20
|
+
from .indexing import update_tuple
|
|
21
|
+
|
|
22
|
+
LOG = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class InterpolateFrequency(Forwards):
|
|
26
|
+
|
|
27
|
+
def __init__(self, dataset, frequency):
|
|
28
|
+
super().__init__(dataset)
|
|
29
|
+
self._frequency = frequency_to_timedelta(frequency)
|
|
30
|
+
|
|
31
|
+
self.seconds = self._frequency.total_seconds()
|
|
32
|
+
other_seconds = dataset.frequency.total_seconds()
|
|
33
|
+
|
|
34
|
+
self.seconds = int(self.seconds)
|
|
35
|
+
assert self.seconds == self._frequency.total_seconds()
|
|
36
|
+
|
|
37
|
+
other_seconds = int(other_seconds)
|
|
38
|
+
assert other_seconds == dataset.frequency.total_seconds()
|
|
39
|
+
|
|
40
|
+
if self.seconds >= other_seconds:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"Interpolate frequency {self._frequency} must be more frequent than dataset frequency {dataset.frequency}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if other_seconds % self.seconds != 0:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Interpolate frequency {self._frequency} must be a multiple of the dataset frequency {dataset.frequency}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
self.ratio = other_seconds // self.seconds
|
|
51
|
+
self.alphas = np.linspace(0, 1, self.ratio + 1)
|
|
52
|
+
self.other_len = len(dataset)
|
|
53
|
+
|
|
54
|
+
@debug_indexing
|
|
55
|
+
@expand_list_indexing
|
|
56
|
+
def _get_tuple(self, index):
|
|
57
|
+
index, changes = index_to_slices(index, self.shape)
|
|
58
|
+
index, previous = update_tuple(index, 0, slice(None))
|
|
59
|
+
result = self._get_slice(previous)
|
|
60
|
+
return apply_index_to_slices_changes(result[index], changes)
|
|
61
|
+
|
|
62
|
+
def _get_slice(self, s):
|
|
63
|
+
return np.stack([self[i] for i in range(*s.indices(self._len))])
|
|
64
|
+
|
|
65
|
+
@debug_indexing
|
|
66
|
+
def __getitem__(self, n):
|
|
67
|
+
if isinstance(n, tuple):
|
|
68
|
+
return self._get_tuple(n)
|
|
69
|
+
|
|
70
|
+
if isinstance(n, slice):
|
|
71
|
+
return self._get_slice(n)
|
|
72
|
+
|
|
73
|
+
if n < 0:
|
|
74
|
+
n += self._len
|
|
75
|
+
|
|
76
|
+
if n == self._len - 1:
|
|
77
|
+
# Special case for the last element
|
|
78
|
+
return self.forward[-1]
|
|
79
|
+
|
|
80
|
+
i = n // self.ratio
|
|
81
|
+
x = n % self.ratio
|
|
82
|
+
|
|
83
|
+
if x == 0:
|
|
84
|
+
# No interpolation needed
|
|
85
|
+
return self.forward[i]
|
|
86
|
+
|
|
87
|
+
alpha = self.alphas[x]
|
|
88
|
+
|
|
89
|
+
assert 0 < alpha < 1, alpha
|
|
90
|
+
return self.forward[i] * (1 - alpha) + self.forward[i + 1] * alpha
|
|
91
|
+
|
|
92
|
+
def __len__(self):
|
|
93
|
+
return (self.other_len - 1) * self.ratio + 1
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def frequency(self):
|
|
97
|
+
return self._frequency
|
|
98
|
+
|
|
99
|
+
@cached_property
|
|
100
|
+
def dates(self):
|
|
101
|
+
result = []
|
|
102
|
+
deltas = [np.timedelta64(self.seconds * i, "s") for i in range(self.ratio)]
|
|
103
|
+
for d in self.forward.dates[:-1]:
|
|
104
|
+
for i in deltas:
|
|
105
|
+
result.append(d + i)
|
|
106
|
+
result.append(self.forward.dates[-1])
|
|
107
|
+
return np.array(result)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def shape(self):
|
|
111
|
+
return (self._len,) + self.forward.shape[1:]
|
|
112
|
+
|
|
113
|
+
def tree(self):
|
|
114
|
+
return Node(self, [self.forward.tree()], frequency=self.frequency)
|
|
115
|
+
|
|
116
|
+
@cached_property
|
|
117
|
+
def missing(self):
|
|
118
|
+
result = []
|
|
119
|
+
j = 0
|
|
120
|
+
for i in range(self.other_len):
|
|
121
|
+
missing = i in self.forward.missing
|
|
122
|
+
for _ in range(self.ratio):
|
|
123
|
+
if missing:
|
|
124
|
+
result.append(j)
|
|
125
|
+
j += 1
|
|
126
|
+
|
|
127
|
+
result = set(x for x in result if x < self._len)
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
def subclass_metadata_specific(self):
|
|
131
|
+
return {
|
|
132
|
+
# "frequency": frequency_to_string(self._frequency),
|
|
133
|
+
}
|
anemoi/datasets/data/masked.py
CHANGED
|
@@ -112,5 +112,5 @@ class Cropping(Masked):
|
|
|
112
112
|
def tree(self):
|
|
113
113
|
return Node(self, [self.forward.tree()], area=self.area)
|
|
114
114
|
|
|
115
|
-
def
|
|
116
|
-
return
|
|
115
|
+
def subclass_metadata_specific(self):
|
|
116
|
+
return dict(area=self.area)
|