anemoi-datasets 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/cleanup.py +44 -0
- anemoi/datasets/commands/create.py +50 -20
- anemoi/datasets/commands/finalise-additions.py +45 -0
- anemoi/datasets/commands/finalise.py +39 -0
- anemoi/datasets/commands/init-additions.py +45 -0
- anemoi/datasets/commands/init.py +67 -0
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/load-additions.py +47 -0
- anemoi/datasets/commands/load.py +47 -0
- anemoi/datasets/commands/patch.py +39 -0
- anemoi/datasets/compute/recentre.py +1 -1
- anemoi/datasets/create/__init__.py +961 -146
- anemoi/datasets/create/check.py +5 -3
- anemoi/datasets/create/config.py +53 -2
- anemoi/datasets/create/functions/sources/accumulations.py +6 -22
- anemoi/datasets/create/functions/sources/hindcasts.py +27 -12
- anemoi/datasets/create/functions/sources/tendencies.py +1 -1
- anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
- anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
- anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
- anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
- anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
- anemoi/datasets/create/input.py +62 -25
- anemoi/datasets/create/statistics/__init__.py +39 -23
- anemoi/datasets/create/utils.py +3 -2
- anemoi/datasets/data/__init__.py +1 -0
- anemoi/datasets/data/concat.py +46 -2
- anemoi/datasets/data/dataset.py +109 -34
- anemoi/datasets/data/forwards.py +17 -8
- anemoi/datasets/data/grids.py +17 -3
- anemoi/datasets/data/interpolate.py +133 -0
- anemoi/datasets/data/misc.py +56 -66
- anemoi/datasets/data/missing.py +240 -0
- anemoi/datasets/data/select.py +7 -1
- anemoi/datasets/data/stores.py +3 -3
- anemoi/datasets/data/subset.py +47 -5
- anemoi/datasets/data/unchecked.py +20 -22
- anemoi/datasets/data/xy.py +125 -0
- anemoi/datasets/dates/__init__.py +33 -20
- anemoi/datasets/dates/groups.py +2 -2
- anemoi/datasets/grids.py +66 -48
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +51 -41
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/loaders.py +0 -924
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from anemoi.datasets.create.utils import to_datetime
|
|
14
|
+
from anemoi.datasets.data import MissingDateError
|
|
15
|
+
|
|
16
|
+
from .debug import Node
|
|
17
|
+
from .debug import debug_indexing
|
|
18
|
+
from .forwards import Forwards
|
|
19
|
+
from .indexing import expand_list_indexing
|
|
20
|
+
from .indexing import update_tuple
|
|
21
|
+
|
|
22
|
+
LOG = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MissingDates(Forwards):
|
|
26
|
+
# TODO: Use that class instead of ZarrMissing
|
|
27
|
+
|
|
28
|
+
def __init__(self, dataset, missing_dates):
|
|
29
|
+
super().__init__(dataset)
|
|
30
|
+
self.missing_dates = []
|
|
31
|
+
|
|
32
|
+
self._missing = set()
|
|
33
|
+
|
|
34
|
+
other = []
|
|
35
|
+
for date in missing_dates:
|
|
36
|
+
if isinstance(date, int):
|
|
37
|
+
self._missing.add(date)
|
|
38
|
+
self.missing_dates.append(dataset.dates[date])
|
|
39
|
+
else:
|
|
40
|
+
date = to_datetime(date)
|
|
41
|
+
other.append(date)
|
|
42
|
+
|
|
43
|
+
if other:
|
|
44
|
+
for i, date in enumerate(dataset.dates):
|
|
45
|
+
if date in other:
|
|
46
|
+
self._missing.add(i)
|
|
47
|
+
self.missing_dates.append(date)
|
|
48
|
+
|
|
49
|
+
n = self.forward._len
|
|
50
|
+
self._missing = set(i for i in self._missing if 0 <= i < n)
|
|
51
|
+
self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
|
|
52
|
+
|
|
53
|
+
assert len(self._missing), "No dates to force missing"
|
|
54
|
+
|
|
55
|
+
@cached_property
|
|
56
|
+
def missing(self):
|
|
57
|
+
return self._missing.union(self.forward.missing)
|
|
58
|
+
|
|
59
|
+
@debug_indexing
|
|
60
|
+
@expand_list_indexing
|
|
61
|
+
def __getitem__(self, n):
|
|
62
|
+
if isinstance(n, int):
|
|
63
|
+
if n in self.missing:
|
|
64
|
+
self._report_missing(n)
|
|
65
|
+
return self.forward[n]
|
|
66
|
+
|
|
67
|
+
if isinstance(n, slice):
|
|
68
|
+
common = set(range(*n.indices(len(self)))) & self.missing
|
|
69
|
+
if common:
|
|
70
|
+
self._report_missing(list(common)[0])
|
|
71
|
+
return self.forward[n]
|
|
72
|
+
|
|
73
|
+
if isinstance(n, tuple):
|
|
74
|
+
first = n[0]
|
|
75
|
+
if isinstance(first, int):
|
|
76
|
+
if first in self.missing:
|
|
77
|
+
self._report_missing(first)
|
|
78
|
+
return self.forward[n]
|
|
79
|
+
|
|
80
|
+
if isinstance(first, slice):
|
|
81
|
+
common = set(range(*first.indices(len(self)))) & self.missing
|
|
82
|
+
if common:
|
|
83
|
+
self._report_missing(list(common)[0])
|
|
84
|
+
return self.forward[n]
|
|
85
|
+
|
|
86
|
+
if isinstance(first, (list, tuple)):
|
|
87
|
+
common = set(first) & self.missing
|
|
88
|
+
if common:
|
|
89
|
+
self._report_missing(list(common)[0])
|
|
90
|
+
return self.forward[n]
|
|
91
|
+
|
|
92
|
+
raise TypeError(f"Unsupported index {n} {type(n)}")
|
|
93
|
+
|
|
94
|
+
def _report_missing(self, n):
|
|
95
|
+
raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def reason(self):
|
|
99
|
+
return {"missing_dates": self.missing_dates}
|
|
100
|
+
|
|
101
|
+
def tree(self):
|
|
102
|
+
return Node(self, [self.forward.tree()], **self.reason)
|
|
103
|
+
|
|
104
|
+
def subclass_metadata_specific(self):
|
|
105
|
+
return {"missing_dates": self.missing_dates}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class SkipMissingDates(Forwards):
|
|
109
|
+
|
|
110
|
+
def __init__(self, dataset, expected_access):
|
|
111
|
+
super().__init__(dataset)
|
|
112
|
+
|
|
113
|
+
# if isinstance(expected_access, (tuple, list)):
|
|
114
|
+
# expected_access = slice(*expected_access)
|
|
115
|
+
|
|
116
|
+
if isinstance(expected_access, int):
|
|
117
|
+
expected_access = slice(0, expected_access)
|
|
118
|
+
|
|
119
|
+
assert isinstance(expected_access, slice), f"Expected access must be a slice, got {expected_access}"
|
|
120
|
+
|
|
121
|
+
expected_access = slice(*expected_access.indices(dataset._len))
|
|
122
|
+
missing = dataset.missing.copy()
|
|
123
|
+
|
|
124
|
+
size = (expected_access.stop - expected_access.start) // expected_access.step
|
|
125
|
+
indices = []
|
|
126
|
+
|
|
127
|
+
for i in range(dataset._len):
|
|
128
|
+
s = slice(expected_access.start + i, expected_access.stop + i, expected_access.step)
|
|
129
|
+
p = set(range(*s.indices(dataset._len)))
|
|
130
|
+
if p.intersection(missing):
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
if len(p) != size:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
indices.append(tuple(sorted(p)))
|
|
137
|
+
|
|
138
|
+
self.expected_access = expected_access
|
|
139
|
+
self.indices = indices
|
|
140
|
+
|
|
141
|
+
def __len__(self):
|
|
142
|
+
return len(self.indices)
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def start_date(self):
|
|
146
|
+
return self.forward.start_date
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def end_date(self):
|
|
150
|
+
return self.forward.end_date
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def dates(self):
|
|
154
|
+
raise NotImplementedError("SkipMissingDates.dates")
|
|
155
|
+
|
|
156
|
+
@debug_indexing
|
|
157
|
+
@expand_list_indexing
|
|
158
|
+
def _get_tuple(self, index):
|
|
159
|
+
|
|
160
|
+
def _get_one(n):
|
|
161
|
+
result = []
|
|
162
|
+
for i in self.indices[n]:
|
|
163
|
+
s, _ = update_tuple(index, 0, i)
|
|
164
|
+
result.append(self.forward[s])
|
|
165
|
+
|
|
166
|
+
return tuple(result)
|
|
167
|
+
|
|
168
|
+
first = index[0]
|
|
169
|
+
if isinstance(first, int):
|
|
170
|
+
return _get_one(first)
|
|
171
|
+
|
|
172
|
+
assert isinstance(first, slice), f"SkipMissingDates._get_tuple {index}"
|
|
173
|
+
|
|
174
|
+
values = [_get_one(i) for i in range(*first.indices(self._len))]
|
|
175
|
+
|
|
176
|
+
result = [_ for _ in zip(*values)]
|
|
177
|
+
return tuple(np.stack(_) for _ in result)
|
|
178
|
+
|
|
179
|
+
@debug_indexing
|
|
180
|
+
def _get_slice(self, s):
|
|
181
|
+
values = [self[i] for i in range(*s.indices(self._len))]
|
|
182
|
+
result = [_ for _ in zip(*values)]
|
|
183
|
+
return tuple(np.stack(_) for _ in result)
|
|
184
|
+
|
|
185
|
+
@debug_indexing
|
|
186
|
+
def __getitem__(self, n):
|
|
187
|
+
if isinstance(n, tuple):
|
|
188
|
+
return self._get_tuple(n)
|
|
189
|
+
|
|
190
|
+
if isinstance(n, slice):
|
|
191
|
+
return self._get_slice(n)
|
|
192
|
+
|
|
193
|
+
return tuple(self.forward[i] for i in self.indices[n])
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def frequency(self):
|
|
197
|
+
return self.forward.frequency
|
|
198
|
+
|
|
199
|
+
def tree(self):
|
|
200
|
+
return Node(self, [self.forward.tree()], expected_access=self.expected_access)
|
|
201
|
+
|
|
202
|
+
def subclass_metadata_specific(self):
|
|
203
|
+
return {"expected_access": self.expected_access}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class MissingDataset(Forwards):
|
|
207
|
+
|
|
208
|
+
def __init__(self, dataset, start, end):
|
|
209
|
+
super().__init__(dataset)
|
|
210
|
+
self.start = start
|
|
211
|
+
self.end = end
|
|
212
|
+
|
|
213
|
+
dates = []
|
|
214
|
+
date = start
|
|
215
|
+
while date <= end:
|
|
216
|
+
dates.append(date)
|
|
217
|
+
date += dataset.frequency
|
|
218
|
+
|
|
219
|
+
self._dates = np.array(dates, dtype="datetime64")
|
|
220
|
+
self._missing = set(range(len(dates)))
|
|
221
|
+
|
|
222
|
+
def __len__(self):
|
|
223
|
+
return len(self._dates)
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def dates(self):
|
|
227
|
+
return self._dates
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def missing(self):
|
|
231
|
+
return self._missing
|
|
232
|
+
|
|
233
|
+
def __getitem__(self, n):
|
|
234
|
+
raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
|
|
235
|
+
|
|
236
|
+
def tree(self):
|
|
237
|
+
return Node(self, [self.forward.tree()], start=self.start, end=self.end)
|
|
238
|
+
|
|
239
|
+
def subclass_metadata_specific(self):
|
|
240
|
+
return {"start": self.start, "end": self.end}
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -40,6 +40,12 @@ class Select(Forwards):
|
|
|
40
40
|
# Forward other properties to the main dataset
|
|
41
41
|
super().__init__(dataset)
|
|
42
42
|
|
|
43
|
+
def clone(self, dataset):
|
|
44
|
+
return self.__class__(dataset, self.indices, self.reason).mutate()
|
|
45
|
+
|
|
46
|
+
def mutate(self):
|
|
47
|
+
return self.forward.swap_with_parent(parent=self)
|
|
48
|
+
|
|
43
49
|
@debug_indexing
|
|
44
50
|
@expand_list_indexing
|
|
45
51
|
def _get_tuple(self, index):
|
|
@@ -101,7 +107,7 @@ class Rename(Forwards):
|
|
|
101
107
|
def __init__(self, dataset, rename):
|
|
102
108
|
super().__init__(dataset)
|
|
103
109
|
for n in rename:
|
|
104
|
-
assert n in dataset.variables
|
|
110
|
+
assert n in dataset.variables, n
|
|
105
111
|
self._variables = [rename.get(v, v) for v in dataset.variables]
|
|
106
112
|
self.rename = rename
|
|
107
113
|
|
anemoi/datasets/data/stores.py
CHANGED
|
@@ -13,6 +13,7 @@ from urllib.parse import urlparse
|
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
import zarr
|
|
16
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
16
17
|
|
|
17
18
|
from . import MissingDateError
|
|
18
19
|
from .dataset import Dataset
|
|
@@ -268,12 +269,11 @@ class Zarr(Dataset):
|
|
|
268
269
|
@property
|
|
269
270
|
def frequency(self):
|
|
270
271
|
try:
|
|
271
|
-
return self.z.attrs["frequency"]
|
|
272
|
+
return frequency_to_timedelta(self.z.attrs["frequency"])
|
|
272
273
|
except KeyError:
|
|
273
274
|
LOG.warning("No 'frequency' in %r, computing from 'dates'", self)
|
|
274
275
|
dates = self.dates
|
|
275
|
-
|
|
276
|
-
return int(delta.total_seconds() / 3600)
|
|
276
|
+
return dates[1].astype(object) - dates[0].astype(object)
|
|
277
277
|
|
|
278
278
|
@property
|
|
279
279
|
def name_to_index(self):
|
anemoi/datasets/data/subset.py
CHANGED
|
@@ -9,6 +9,7 @@ import logging
|
|
|
9
9
|
from functools import cached_property
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
12
13
|
|
|
13
14
|
from .debug import Node
|
|
14
15
|
from .debug import Source
|
|
@@ -23,13 +24,51 @@ from .indexing import update_tuple
|
|
|
23
24
|
LOG = logging.getLogger(__name__)
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
def _default(a, b, dates):
|
|
28
|
+
return [a, b]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _start(a, b, dates):
|
|
32
|
+
from .misc import as_first_date
|
|
33
|
+
|
|
34
|
+
c = as_first_date(a, dates)
|
|
35
|
+
d = as_first_date(b, dates)
|
|
36
|
+
if c < d:
|
|
37
|
+
return b
|
|
38
|
+
else:
|
|
39
|
+
return a
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _end(a, b, dates):
|
|
43
|
+
from .misc import as_last_date
|
|
44
|
+
|
|
45
|
+
c = as_last_date(a, dates)
|
|
46
|
+
d = as_last_date(b, dates)
|
|
47
|
+
if c < d:
|
|
48
|
+
return a
|
|
49
|
+
else:
|
|
50
|
+
return b
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _combine_reasons(reason1, reason2, dates):
|
|
54
|
+
|
|
55
|
+
reason = reason1.copy()
|
|
56
|
+
for k, v in reason2.items():
|
|
57
|
+
if k not in reason:
|
|
58
|
+
reason[k] = v
|
|
59
|
+
else:
|
|
60
|
+
func = globals().get(f"_{k}", _default)
|
|
61
|
+
reason[k] = func(reason[k], v, dates)
|
|
62
|
+
return reason
|
|
63
|
+
|
|
64
|
+
|
|
26
65
|
class Subset(Forwards):
|
|
27
66
|
"""Select a subset of the dates."""
|
|
28
67
|
|
|
29
68
|
def __init__(self, dataset, indices, reason):
|
|
30
69
|
while isinstance(dataset, Subset):
|
|
31
70
|
indices = [dataset.indices[i] for i in indices]
|
|
32
|
-
reason =
|
|
71
|
+
reason = _combine_reasons(reason, dataset.reason, dataset.dates)
|
|
33
72
|
dataset = dataset.dataset
|
|
34
73
|
|
|
35
74
|
self.dataset = dataset
|
|
@@ -39,6 +78,12 @@ class Subset(Forwards):
|
|
|
39
78
|
# Forward other properties to the super dataset
|
|
40
79
|
super().__init__(dataset)
|
|
41
80
|
|
|
81
|
+
def clone(self, dataset):
|
|
82
|
+
return self.__class__(dataset, self.indices, self.reason).mutate()
|
|
83
|
+
|
|
84
|
+
def mutate(self):
|
|
85
|
+
return self.forward.swap_with_parent(parent=self)
|
|
86
|
+
|
|
42
87
|
@debug_indexing
|
|
43
88
|
def __getitem__(self, n):
|
|
44
89
|
if isinstance(n, tuple):
|
|
@@ -66,10 +111,8 @@ class Subset(Forwards):
|
|
|
66
111
|
@expand_list_indexing
|
|
67
112
|
def _get_tuple(self, n):
|
|
68
113
|
index, changes = index_to_slices(n, self.shape)
|
|
69
|
-
# print('INDEX', index, changes)
|
|
70
114
|
indices = [self.indices[i] for i in range(*index[0].indices(self._len))]
|
|
71
115
|
indices = make_slice_or_index_from_list_or_tuple(indices)
|
|
72
|
-
# print('INDICES', indices)
|
|
73
116
|
index, _ = update_tuple(index, 0, indices)
|
|
74
117
|
result = self.dataset[index]
|
|
75
118
|
result = apply_index_to_slices_changes(result, changes)
|
|
@@ -89,8 +132,7 @@ class Subset(Forwards):
|
|
|
89
132
|
@cached_property
|
|
90
133
|
def frequency(self):
|
|
91
134
|
dates = self.dates
|
|
92
|
-
|
|
93
|
-
return int(delta.total_seconds() / 3600)
|
|
135
|
+
return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
|
|
94
136
|
|
|
95
137
|
def source(self, index):
|
|
96
138
|
return Source(self, index, self.forward.source(index))
|
|
@@ -104,22 +104,29 @@ class Unchecked(Combined):
|
|
|
104
104
|
def shape(self):
|
|
105
105
|
raise NotImplementedError()
|
|
106
106
|
|
|
107
|
-
@property
|
|
108
|
-
def
|
|
109
|
-
|
|
107
|
+
# @property
|
|
108
|
+
# def field_shape(self):
|
|
109
|
+
# return tuple(d.shape for d in self.datasets)
|
|
110
110
|
|
|
111
|
-
@property
|
|
112
|
-
def
|
|
113
|
-
|
|
111
|
+
# @property
|
|
112
|
+
# def latitudes(self):
|
|
113
|
+
# return tuple(d.latitudes for d in self.datasets)
|
|
114
114
|
|
|
115
|
+
# @property
|
|
116
|
+
# def longitudes(self):
|
|
117
|
+
# return tuple(d.longitudes for d in self.datasets)
|
|
115
118
|
|
|
116
|
-
|
|
119
|
+
# @property
|
|
120
|
+
# def statistics(self):
|
|
121
|
+
# return tuple(d.statistics for d in self.datasets)
|
|
117
122
|
|
|
118
|
-
|
|
119
|
-
|
|
123
|
+
# @property
|
|
124
|
+
# def resolution(self):
|
|
125
|
+
# return tuple(d.resolution for d in self.datasets)
|
|
120
126
|
|
|
121
|
-
|
|
122
|
-
|
|
127
|
+
# @property
|
|
128
|
+
# def name_to_index(self):
|
|
129
|
+
# return tuple(d.name_to_index for d in self.datasets)
|
|
123
130
|
|
|
124
131
|
@cached_property
|
|
125
132
|
def missing(self):
|
|
@@ -142,17 +149,8 @@ class Chain(ConcatMixin, Unchecked):
|
|
|
142
149
|
def dates(self):
|
|
143
150
|
raise NotImplementedError()
|
|
144
151
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
zip = kwargs.pop("zip")
|
|
149
|
-
assert len(args) == 0
|
|
150
|
-
assert isinstance(zip, (list, tuple))
|
|
151
|
-
|
|
152
|
-
datasets = [_open(e) for e in zip]
|
|
153
|
-
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
154
|
-
|
|
155
|
-
return Zip(datasets)._subset(**kwargs)
|
|
152
|
+
def dataset_metadata(self):
|
|
153
|
+
return {"multiple": [d.dataset_metadata() for d in self.datasets]}
|
|
156
154
|
|
|
157
155
|
|
|
158
156
|
def chain_factory(args, kwargs):
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
|
|
11
|
+
from .debug import Node
|
|
12
|
+
from .forwards import Combined
|
|
13
|
+
from .misc import _auto_adjust
|
|
14
|
+
from .misc import _open
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ZipBase(Combined):
|
|
20
|
+
|
|
21
|
+
def swap_with_parent(self, parent):
|
|
22
|
+
new_parents = [parent.clone(ds) for ds in self.datasets]
|
|
23
|
+
return self.clone(new_parents)
|
|
24
|
+
|
|
25
|
+
def clone(self, datasets):
|
|
26
|
+
return self.__class__(datasets)
|
|
27
|
+
|
|
28
|
+
def tree(self):
|
|
29
|
+
return Node(self, [d.tree() for d in self.datasets])
|
|
30
|
+
|
|
31
|
+
def __len__(self):
|
|
32
|
+
return min(len(d) for d in self.datasets)
|
|
33
|
+
|
|
34
|
+
def __getitem__(self, n):
|
|
35
|
+
return tuple(d[n] for d in self.datasets)
|
|
36
|
+
|
|
37
|
+
def check_same_resolution(self, d1, d2):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
def check_same_grid(self, d1, d2):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def check_same_variables(self, d1, d2):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@cached_property
|
|
47
|
+
def missing(self):
|
|
48
|
+
result = set()
|
|
49
|
+
for d in self.datasets:
|
|
50
|
+
result = result | d.missing
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def shape(self):
|
|
55
|
+
return tuple(d.shape for d in self.datasets)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def field_shape(self):
|
|
59
|
+
return tuple(d.shape for d in self.datasets)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def latitudes(self):
|
|
63
|
+
return tuple(d.latitudes for d in self.datasets)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def longitudes(self):
|
|
67
|
+
return tuple(d.longitudes for d in self.datasets)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def dtype(self):
|
|
71
|
+
return tuple(d.dtype for d in self.datasets)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def grids(self):
|
|
75
|
+
return tuple(d.grids for d in self.datasets)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def statistics(self):
|
|
79
|
+
return tuple(d.statistics for d in self.datasets)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def resolution(self):
|
|
83
|
+
return tuple(d.resolution for d in self.datasets)
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def name_to_index(self):
|
|
87
|
+
return tuple(d.name_to_index for d in self.datasets)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Zip(ZipBase):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class XY(ZipBase):
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def xy_factory(args, kwargs):
|
|
99
|
+
|
|
100
|
+
if "xy" in kwargs:
|
|
101
|
+
xy = kwargs.pop("xy")
|
|
102
|
+
else:
|
|
103
|
+
xy = [kwargs.pop("x"), kwargs.pop("y")]
|
|
104
|
+
|
|
105
|
+
assert len(args) == 0
|
|
106
|
+
assert isinstance(xy, (list, tuple))
|
|
107
|
+
|
|
108
|
+
datasets = [_open(e) for e in xy]
|
|
109
|
+
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
110
|
+
|
|
111
|
+
assert len(datasets) == 2
|
|
112
|
+
|
|
113
|
+
return XY(datasets)._subset(**kwargs)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def zip_factory(args, kwargs):
|
|
117
|
+
|
|
118
|
+
zip = kwargs.pop("zip")
|
|
119
|
+
assert len(args) == 0
|
|
120
|
+
assert isinstance(zip, (list, tuple))
|
|
121
|
+
|
|
122
|
+
datasets = [_open(e) for e in zip]
|
|
123
|
+
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
124
|
+
|
|
125
|
+
return Zip(datasets)._subset(**kwargs)
|
|
@@ -9,25 +9,31 @@
|
|
|
9
9
|
import datetime
|
|
10
10
|
import warnings
|
|
11
11
|
|
|
12
|
+
# from anemoi.utils.dates import as_datetime
|
|
13
|
+
from anemoi.utils.dates import as_datetime
|
|
14
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
15
|
+
from anemoi.utils.humanize import print_dates
|
|
12
16
|
|
|
13
|
-
def no_time_zone(date):
|
|
14
|
-
return date.replace(tzinfo=None)
|
|
15
17
|
|
|
18
|
+
def extend(x):
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
if isinstance(x, (list, tuple)):
|
|
21
|
+
for y in x:
|
|
22
|
+
yield from extend(y)
|
|
23
|
+
return
|
|
21
24
|
|
|
22
|
-
unit = frequency[-1].lower()
|
|
23
|
-
v = int(frequency[:-1])
|
|
24
|
-
return {"h": v, "d": v * 24}[unit]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def normalize_date(x):
|
|
28
25
|
if isinstance(x, str):
|
|
29
|
-
|
|
30
|
-
|
|
26
|
+
if "/" in x:
|
|
27
|
+
start, end, step = x.split("/")
|
|
28
|
+
start = as_datetime(start)
|
|
29
|
+
end = as_datetime(end)
|
|
30
|
+
step = frequency_to_timedelta(step)
|
|
31
|
+
while start <= end:
|
|
32
|
+
yield start
|
|
33
|
+
start += datetime.timedelta(hours=step)
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
yield as_datetime(x)
|
|
31
37
|
|
|
32
38
|
|
|
33
39
|
class Dates:
|
|
@@ -59,7 +65,7 @@ class Dates:
|
|
|
59
65
|
def __init__(self, missing=None):
|
|
60
66
|
if not missing:
|
|
61
67
|
missing = []
|
|
62
|
-
self.missing =
|
|
68
|
+
self.missing = list(extend(missing))
|
|
63
69
|
if set(self.missing) - set(self.values):
|
|
64
70
|
warnings.warn(f"Missing dates {self.missing} not in list.")
|
|
65
71
|
|
|
@@ -85,7 +91,7 @@ class Dates:
|
|
|
85
91
|
|
|
86
92
|
class ValuesDates(Dates):
|
|
87
93
|
def __init__(self, values, **kwargs):
|
|
88
|
-
self.values = sorted([
|
|
94
|
+
self.values = sorted([as_datetime(_) for _ in values])
|
|
89
95
|
super().__init__(**kwargs)
|
|
90
96
|
|
|
91
97
|
def __repr__(self):
|
|
@@ -97,7 +103,8 @@ class ValuesDates(Dates):
|
|
|
97
103
|
|
|
98
104
|
class StartEndDates(Dates):
|
|
99
105
|
def __init__(self, start, end, frequency=1, months=None, **kwargs):
|
|
100
|
-
frequency =
|
|
106
|
+
frequency = frequency_to_timedelta(frequency)
|
|
107
|
+
assert isinstance(frequency, datetime.timedelta), frequency
|
|
101
108
|
|
|
102
109
|
def _(x):
|
|
103
110
|
if isinstance(x, str):
|
|
@@ -113,13 +120,13 @@ class StartEndDates(Dates):
|
|
|
113
120
|
if isinstance(end, datetime.date) and not isinstance(end, datetime.datetime):
|
|
114
121
|
end = datetime.datetime(end.year, end.month, end.day)
|
|
115
122
|
|
|
116
|
-
start =
|
|
117
|
-
end =
|
|
123
|
+
start = as_datetime(start)
|
|
124
|
+
end = as_datetime(end)
|
|
118
125
|
|
|
119
126
|
# if end <= start:
|
|
120
127
|
# raise ValueError(f"End date {end} must be after start date {start}")
|
|
121
128
|
|
|
122
|
-
increment =
|
|
129
|
+
increment = frequency
|
|
123
130
|
|
|
124
131
|
self.start = start
|
|
125
132
|
self.end = end
|
|
@@ -145,3 +152,9 @@ class StartEndDates(Dates):
|
|
|
145
152
|
"end": self.end.isoformat(),
|
|
146
153
|
"frequency": f"{self.frequency}h",
|
|
147
154
|
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if __name__ == "__main__":
|
|
158
|
+
print_dates([datetime.datetime(2023, 1, 1, 0, 0)])
|
|
159
|
+
s = StartEndDates(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=1)
|
|
160
|
+
print_dates(list(s))
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import itertools
|
|
10
10
|
|
|
11
11
|
from anemoi.datasets.dates import Dates
|
|
12
|
-
from anemoi.datasets.dates import
|
|
12
|
+
from anemoi.datasets.dates import as_datetime
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class Groups:
|
|
@@ -67,7 +67,7 @@ class Groups:
|
|
|
67
67
|
|
|
68
68
|
class Filter:
|
|
69
69
|
def __init__(self, missing):
|
|
70
|
-
self.missing = [
|
|
70
|
+
self.missing = [as_datetime(m) for m in missing]
|
|
71
71
|
|
|
72
72
|
def __call__(self, dates):
|
|
73
73
|
return [d for d in dates if d not in self.missing]
|