anemoi-datasets 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__main__.py +7 -51
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/__init__.py +5 -59
- anemoi/datasets/commands/copy.py +141 -83
- anemoi/datasets/commands/create.py +14 -3
- anemoi/datasets/commands/inspect/__init__.py +1 -5
- anemoi/datasets/compute/{perturbations.py → recentre.py} +24 -23
- anemoi/datasets/create/__init__.py +3 -0
- anemoi/datasets/create/config.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +7 -3
- anemoi/datasets/create/functions/sources/hindcasts.py +437 -0
- anemoi/datasets/create/functions/sources/mars.py +13 -7
- anemoi/datasets/create/functions/sources/{perturbations.py → recentre.py} +5 -5
- anemoi/datasets/create/input.py +0 -5
- anemoi/datasets/create/loaders.py +36 -0
- anemoi/datasets/create/persistent.py +1 -3
- anemoi/datasets/create/statistics/__init__.py +7 -17
- anemoi/datasets/create/statistics/summary.py +1 -4
- anemoi/datasets/create/writer.py +4 -3
- anemoi/datasets/data/indexing.py +1 -3
- anemoi/datasets/data/stores.py +2 -6
- anemoi/datasets/data/unchecked.py +1 -6
- anemoi/datasets/grids.py +2 -2
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/METADATA +30 -21
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/RECORD +29 -28
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/top_level.txt +0 -0
anemoi/datasets/create/config.py
CHANGED
|
@@ -154,10 +154,16 @@ class LoadersConfig(Config):
|
|
|
154
154
|
self.setdefault("build", Config())
|
|
155
155
|
self.build.setdefault("group_by", "monthly")
|
|
156
156
|
self.build.setdefault("use_grib_paramid", False)
|
|
157
|
+
self.build.setdefault("variable_naming", "default")
|
|
158
|
+
variable_naming = dict(
|
|
159
|
+
param="{param}",
|
|
160
|
+
param_levelist="{param}_{levelist}",
|
|
161
|
+
default="{param}_{levelist}",
|
|
162
|
+
).get(self.build.variable_naming, self.build.variable_naming)
|
|
157
163
|
|
|
158
164
|
self.setdefault("output", Config())
|
|
159
165
|
self.output.setdefault("order_by", ["valid_datetime", "param_level", "number"])
|
|
160
|
-
self.output.setdefault("remapping", Config(param_level=
|
|
166
|
+
self.output.setdefault("remapping", Config(param_level=variable_naming))
|
|
161
167
|
self.output.setdefault("statistics", "param_level")
|
|
162
168
|
self.output.setdefault("chunking", Config(dates=1, ensembles=1))
|
|
163
169
|
self.output.setdefault("dtype", "float32")
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
import datetime
|
|
10
|
+
import logging
|
|
10
11
|
import warnings
|
|
11
12
|
from copy import deepcopy
|
|
12
13
|
|
|
@@ -18,7 +19,7 @@ from climetlab.utils.availability import Availability
|
|
|
18
19
|
|
|
19
20
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
20
21
|
|
|
21
|
-
|
|
22
|
+
LOG = logging.getLogger(__name__)
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
def member(field):
|
|
@@ -73,7 +74,10 @@ class Accumulation:
|
|
|
73
74
|
def write(self, template):
|
|
74
75
|
|
|
75
76
|
assert self.startStep != self.endStep, (self.startStep, self.endStep)
|
|
76
|
-
|
|
77
|
+
if np.all(self.values < 0):
|
|
78
|
+
LOG.warning(
|
|
79
|
+
f"Negative values when computing accumutation for {self.param} ({self.date} {self.time}): min={np.amin(self.values)} max={np.amax(self.values)}"
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
self.out.write(
|
|
79
83
|
self.values,
|
|
@@ -395,6 +399,7 @@ def accumulations(context, dates, **request):
|
|
|
395
399
|
|
|
396
400
|
KWARGS = {
|
|
397
401
|
("od", "oper"): dict(patch=scda),
|
|
402
|
+
("od", "elda"): dict(base_times=(6, 18)),
|
|
398
403
|
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
|
|
399
404
|
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
|
|
400
405
|
}
|
|
@@ -431,6 +436,5 @@ if __name__ == "__main__":
|
|
|
431
436
|
dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
432
437
|
dates = to_datetime_list(dates)
|
|
433
438
|
|
|
434
|
-
DEBUG = True
|
|
435
439
|
for f in accumulations(None, dates, **config):
|
|
436
440
|
print(f, f.to_numpy().mean())
|
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
import datetime
|
|
10
|
+
import warnings
|
|
11
|
+
from copy import deepcopy
|
|
12
|
+
|
|
13
|
+
import climetlab as cml
|
|
14
|
+
import numpy as np
|
|
15
|
+
from climetlab.core.temporary import temp_file
|
|
16
|
+
from climetlab.readers.grib.output import new_grib_output
|
|
17
|
+
from climetlab.utils.availability import Availability
|
|
18
|
+
|
|
19
|
+
from anemoi.datasets.create.functions.sources.mars import mars
|
|
20
|
+
|
|
21
|
+
DEBUG = True
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def member(field):
|
|
25
|
+
# Bug in eccodes has number=0 randomly
|
|
26
|
+
number = field.metadata("number")
|
|
27
|
+
if number is None:
|
|
28
|
+
number = 0
|
|
29
|
+
return number
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Accumulation:
|
|
33
|
+
def __init__(self, out, /, param, date, time, number, step, frequency, **kwargs):
|
|
34
|
+
self.out = out
|
|
35
|
+
self.param = param
|
|
36
|
+
self.date = date
|
|
37
|
+
self.time = time
|
|
38
|
+
self.steps = step
|
|
39
|
+
self.number = number
|
|
40
|
+
self.values = None
|
|
41
|
+
self.seen = set()
|
|
42
|
+
self.startStep = None
|
|
43
|
+
self.endStep = None
|
|
44
|
+
self.done = False
|
|
45
|
+
self.frequency = frequency
|
|
46
|
+
self._check = None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def key(self):
|
|
50
|
+
return (self.param, self.date, self.time, self.steps, self.number)
|
|
51
|
+
|
|
52
|
+
def check(self, field):
|
|
53
|
+
if self._check is None:
|
|
54
|
+
self._check = field.as_mars()
|
|
55
|
+
|
|
56
|
+
assert self.param == field.metadata("param"), (self.param, field.metadata("param"))
|
|
57
|
+
assert self.date == field.metadata("date"), (self.date, field.metadata("date"))
|
|
58
|
+
assert self.time == field.metadata("time"), (self.time, field.metadata("time"))
|
|
59
|
+
assert self.number == member(field), (self.number, member(field))
|
|
60
|
+
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
mars = field.as_mars()
|
|
64
|
+
keys1 = sorted(self._check.keys())
|
|
65
|
+
keys2 = sorted(mars.keys())
|
|
66
|
+
|
|
67
|
+
assert keys1 == keys2, (keys1, keys2)
|
|
68
|
+
|
|
69
|
+
for k in keys1:
|
|
70
|
+
if k not in ("step",):
|
|
71
|
+
assert self._check[k] == mars[k], (k, self._check[k], mars[k])
|
|
72
|
+
|
|
73
|
+
def write(self, template):
|
|
74
|
+
|
|
75
|
+
assert self.startStep != self.endStep, (self.startStep, self.endStep)
|
|
76
|
+
assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
|
|
77
|
+
|
|
78
|
+
self.out.write(
|
|
79
|
+
self.values,
|
|
80
|
+
template=template,
|
|
81
|
+
stepType="accum",
|
|
82
|
+
startStep=self.startStep,
|
|
83
|
+
endStep=self.endStep,
|
|
84
|
+
)
|
|
85
|
+
self.values = None
|
|
86
|
+
self.done = True
|
|
87
|
+
|
|
88
|
+
def add(self, field, values):
|
|
89
|
+
|
|
90
|
+
self.check(field)
|
|
91
|
+
|
|
92
|
+
step = field.metadata("step")
|
|
93
|
+
if step not in self.steps:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
if not np.all(values >= 0):
|
|
97
|
+
warnings.warn(f"Negative values for {field}: {np.amin(values)} {np.amax(values)}")
|
|
98
|
+
|
|
99
|
+
assert not self.done, (self.key, step)
|
|
100
|
+
assert step not in self.seen, (self.key, step)
|
|
101
|
+
|
|
102
|
+
startStep = field.metadata("startStep")
|
|
103
|
+
endStep = field.metadata("endStep")
|
|
104
|
+
|
|
105
|
+
if self.buggy_steps and startStep == endStep:
|
|
106
|
+
startStep = 0
|
|
107
|
+
|
|
108
|
+
assert step == endStep, (startStep, endStep, step)
|
|
109
|
+
|
|
110
|
+
self.compute(values, startStep, endStep)
|
|
111
|
+
|
|
112
|
+
self.seen.add(step)
|
|
113
|
+
|
|
114
|
+
if len(self.seen) == len(self.steps):
|
|
115
|
+
self.write(template=field)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def mars_date_time_steps(cls, dates, step1, step2, frequency, base_times, adjust_step):
|
|
119
|
+
|
|
120
|
+
# assert step1 > 0, (step1, step2, frequency)
|
|
121
|
+
|
|
122
|
+
for valid_date in dates:
|
|
123
|
+
base_date = valid_date - datetime.timedelta(hours=step2)
|
|
124
|
+
add_step = 0
|
|
125
|
+
if base_date.hour not in base_times:
|
|
126
|
+
if not adjust_step:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Cannot find a base time in {base_times} that validates on {valid_date.isoformat()} for step={step2}"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
while base_date.hour not in base_times:
|
|
132
|
+
# print(f'{base_date=}, {base_times=}, {add_step=} {frequency=}')
|
|
133
|
+
base_date -= datetime.timedelta(hours=1)
|
|
134
|
+
add_step += 1
|
|
135
|
+
|
|
136
|
+
yield cls._mars_date_time_step(base_date, step1, step2, add_step, frequency)
|
|
137
|
+
|
|
138
|
+
def __repr__(self) -> str:
|
|
139
|
+
return f"{self.__class__.__name__}({self.key})"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class AccumulationFromStart(Accumulation):
|
|
143
|
+
buggy_steps = True
|
|
144
|
+
|
|
145
|
+
def compute(self, values, startStep, endStep):
|
|
146
|
+
|
|
147
|
+
assert startStep == 0, startStep
|
|
148
|
+
|
|
149
|
+
if self.values is None:
|
|
150
|
+
|
|
151
|
+
self.values = np.copy(values)
|
|
152
|
+
self.startStep = 0
|
|
153
|
+
self.endStep = endStep
|
|
154
|
+
|
|
155
|
+
else:
|
|
156
|
+
assert endStep != self.endStep, (self.endStep, endStep)
|
|
157
|
+
|
|
158
|
+
if endStep > self.endStep:
|
|
159
|
+
# assert endStep - self.endStep == self.stepping, (self.endStep, endStep, self.stepping)
|
|
160
|
+
self.values = values - self.values
|
|
161
|
+
self.startStep = self.endStep
|
|
162
|
+
self.endStep = endStep
|
|
163
|
+
else:
|
|
164
|
+
# assert self.endStep - endStep == self.stepping, (self.endStep, endStep, self.stepping)
|
|
165
|
+
self.values = self.values - values
|
|
166
|
+
self.startStep = endStep
|
|
167
|
+
|
|
168
|
+
if not np.all(self.values >= 0):
|
|
169
|
+
warnings.warn(f"Negative values for {self.param}: {np.amin(self.values)} {np.amax(self.values)}")
|
|
170
|
+
self.values = np.maximum(self.values, 0)
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
|
|
174
|
+
assert not frequency, frequency
|
|
175
|
+
|
|
176
|
+
steps = (step1 + add_step, step2 + add_step)
|
|
177
|
+
if steps[0] == 0:
|
|
178
|
+
steps = (steps[1],)
|
|
179
|
+
|
|
180
|
+
return (
|
|
181
|
+
base_date.year * 10000 + base_date.month * 100 + base_date.day,
|
|
182
|
+
base_date.hour * 100 + base_date.minute,
|
|
183
|
+
steps,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class AccumulationFromLastStep(Accumulation):
|
|
188
|
+
buggy_steps = False
|
|
189
|
+
|
|
190
|
+
def compute(self, values, startStep, endStep):
|
|
191
|
+
|
|
192
|
+
assert endStep - startStep == self.frequency, (startStep, endStep, self.frequency)
|
|
193
|
+
|
|
194
|
+
if self.startStep is None:
|
|
195
|
+
self.startStep = startStep
|
|
196
|
+
else:
|
|
197
|
+
self.startStep = min(self.startStep, startStep)
|
|
198
|
+
|
|
199
|
+
if self.endStep is None:
|
|
200
|
+
self.endStep = endStep
|
|
201
|
+
else:
|
|
202
|
+
self.endStep = max(self.endStep, endStep)
|
|
203
|
+
|
|
204
|
+
if self.values is None:
|
|
205
|
+
self.values = np.zeros_like(values)
|
|
206
|
+
|
|
207
|
+
self.values += values
|
|
208
|
+
|
|
209
|
+
@classmethod
|
|
210
|
+
def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
|
|
211
|
+
assert frequency > 0, frequency
|
|
212
|
+
# assert step1 > 0, (step1, step2, frequency, add_step, base_date)
|
|
213
|
+
|
|
214
|
+
steps = []
|
|
215
|
+
for step in range(step1 + frequency, step2 + frequency, frequency):
|
|
216
|
+
steps.append(step + add_step)
|
|
217
|
+
return (
|
|
218
|
+
base_date.year * 10000 + base_date.month * 100 + base_date.day,
|
|
219
|
+
base_date.hour * 100 + base_date.minute,
|
|
220
|
+
tuple(steps),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def identity(x):
|
|
225
|
+
return x
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def compute_accumulations(
|
|
229
|
+
dates,
|
|
230
|
+
request,
|
|
231
|
+
user_accumulation_period=6,
|
|
232
|
+
data_accumulation_period=None,
|
|
233
|
+
patch=identity,
|
|
234
|
+
base_times=None,
|
|
235
|
+
):
|
|
236
|
+
adjust_step = isinstance(user_accumulation_period, int)
|
|
237
|
+
|
|
238
|
+
if not isinstance(user_accumulation_period, (list, tuple)):
|
|
239
|
+
user_accumulation_period = (0, user_accumulation_period)
|
|
240
|
+
|
|
241
|
+
assert len(user_accumulation_period) == 2, user_accumulation_period
|
|
242
|
+
step1, step2 = user_accumulation_period
|
|
243
|
+
assert step1 < step2, user_accumulation_period
|
|
244
|
+
|
|
245
|
+
if base_times is None:
|
|
246
|
+
base_times = [0, 6, 12, 18]
|
|
247
|
+
|
|
248
|
+
base_times = [t // 100 if t > 100 else t for t in base_times]
|
|
249
|
+
|
|
250
|
+
AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
|
|
251
|
+
|
|
252
|
+
mars_date_time_steps = AccumulationClass.mars_date_time_steps(
|
|
253
|
+
dates,
|
|
254
|
+
step1,
|
|
255
|
+
step2,
|
|
256
|
+
data_accumulation_period,
|
|
257
|
+
base_times,
|
|
258
|
+
adjust_step,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
request = deepcopy(request)
|
|
262
|
+
|
|
263
|
+
param = request["param"]
|
|
264
|
+
if not isinstance(param, (list, tuple)):
|
|
265
|
+
param = [param]
|
|
266
|
+
|
|
267
|
+
number = request.get("number", [0])
|
|
268
|
+
assert isinstance(number, (list, tuple))
|
|
269
|
+
|
|
270
|
+
frequency = data_accumulation_period
|
|
271
|
+
|
|
272
|
+
type_ = request.get("type", "an")
|
|
273
|
+
if type_ == "an":
|
|
274
|
+
type_ = "fc"
|
|
275
|
+
|
|
276
|
+
request.update({"type": type_, "levtype": "sfc"})
|
|
277
|
+
|
|
278
|
+
tmp = temp_file()
|
|
279
|
+
path = tmp.path
|
|
280
|
+
out = new_grib_output(path)
|
|
281
|
+
|
|
282
|
+
requests = []
|
|
283
|
+
|
|
284
|
+
accumulations = {}
|
|
285
|
+
|
|
286
|
+
for date, time, steps in mars_date_time_steps:
|
|
287
|
+
for p in param:
|
|
288
|
+
for n in number:
|
|
289
|
+
requests.append(
|
|
290
|
+
patch(
|
|
291
|
+
{
|
|
292
|
+
"param": p,
|
|
293
|
+
"date": date,
|
|
294
|
+
"time": time,
|
|
295
|
+
"step": sorted(steps),
|
|
296
|
+
"number": n,
|
|
297
|
+
}
|
|
298
|
+
)
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
compressed = Availability(requests)
|
|
302
|
+
ds = cml.load_source("empty")
|
|
303
|
+
for r in compressed.iterate():
|
|
304
|
+
request.update(r)
|
|
305
|
+
print("🌧️", request)
|
|
306
|
+
ds = ds + cml.load_source("mars", **request)
|
|
307
|
+
|
|
308
|
+
accumulations = {}
|
|
309
|
+
for a in [AccumulationClass(out, frequency=frequency, **r) for r in requests]:
|
|
310
|
+
for s in a.steps:
|
|
311
|
+
key = (a.param, a.date, a.time, s, a.number)
|
|
312
|
+
accumulations.setdefault(key, []).append(a)
|
|
313
|
+
|
|
314
|
+
for field in ds:
|
|
315
|
+
key = (
|
|
316
|
+
field.metadata("param"),
|
|
317
|
+
field.metadata("date"),
|
|
318
|
+
field.metadata("time"),
|
|
319
|
+
field.metadata("step"),
|
|
320
|
+
member(field),
|
|
321
|
+
)
|
|
322
|
+
values = field.values # optimisation
|
|
323
|
+
assert accumulations[key], key
|
|
324
|
+
for a in accumulations[key]:
|
|
325
|
+
a.add(field, values)
|
|
326
|
+
|
|
327
|
+
for acc in accumulations.values():
|
|
328
|
+
for a in acc:
|
|
329
|
+
assert a.done, (a.key, a.seen, a.steps)
|
|
330
|
+
|
|
331
|
+
out.close()
|
|
332
|
+
|
|
333
|
+
ds = cml.load_source("file", path)
|
|
334
|
+
|
|
335
|
+
assert len(ds) / len(param) / len(number) == len(dates), (
|
|
336
|
+
len(ds),
|
|
337
|
+
len(param),
|
|
338
|
+
len(dates),
|
|
339
|
+
)
|
|
340
|
+
ds._tmp = tmp
|
|
341
|
+
|
|
342
|
+
return ds
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def to_list(x):
|
|
346
|
+
if isinstance(x, (list, tuple)):
|
|
347
|
+
return x
|
|
348
|
+
return [x]
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def normalise_time_to_hours(r):
|
|
352
|
+
r = deepcopy(r)
|
|
353
|
+
if "time" not in r:
|
|
354
|
+
return r
|
|
355
|
+
|
|
356
|
+
times = []
|
|
357
|
+
for t in to_list(r["time"]):
|
|
358
|
+
assert len(t) == 4, r
|
|
359
|
+
assert t.endswith("00"), r
|
|
360
|
+
times.append(int(t) // 100)
|
|
361
|
+
r["time"] = tuple(times)
|
|
362
|
+
return r
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def normalise_number(r):
|
|
366
|
+
if "number" not in r:
|
|
367
|
+
return r
|
|
368
|
+
number = r["number"]
|
|
369
|
+
number = to_list(number)
|
|
370
|
+
|
|
371
|
+
if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
|
|
372
|
+
return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
|
|
373
|
+
|
|
374
|
+
if len(number) > 2 and number[1] == "to":
|
|
375
|
+
return list(range(int(number[0]), int(number[2]) + 1))
|
|
376
|
+
|
|
377
|
+
r["number"] = number
|
|
378
|
+
return r
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class HindcastCompute:
|
|
382
|
+
def __init__(self, base_times, available_steps, request):
|
|
383
|
+
self.base_times = base_times
|
|
384
|
+
self.available_steps = available_steps
|
|
385
|
+
self.request = request
|
|
386
|
+
|
|
387
|
+
def compute_hindcast(self, date):
|
|
388
|
+
for step in self.available_steps:
|
|
389
|
+
start_date = date - datetime.timedelta(hours=step)
|
|
390
|
+
hours = start_date.hour
|
|
391
|
+
if hours in self.base_times:
|
|
392
|
+
r = deepcopy(self.request)
|
|
393
|
+
r["date"] = start_date
|
|
394
|
+
r["time"] = f"{start_date.hour:02d}00"
|
|
395
|
+
r["step"] = step
|
|
396
|
+
return r
|
|
397
|
+
raise ValueError(
|
|
398
|
+
f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, available_steps={self.available_steps})"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def use_reference_year(reference_year, request):
|
|
403
|
+
request = deepcopy(request)
|
|
404
|
+
hdate = request.pop("date")
|
|
405
|
+
date = datetime.datetime(reference_year, hdate.month, hdate.day)
|
|
406
|
+
request.update(date=date.strftime("%Y-%m-%d"), hdate=hdate.strftime("%Y-%m-%d"))
|
|
407
|
+
return request
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def hindcasts(context, dates, **request):
|
|
411
|
+
request["param"] = to_list(request["param"])
|
|
412
|
+
request["step"] = to_list(request["step"])
|
|
413
|
+
request["step"] = [int(_) for _ in request["step"]]
|
|
414
|
+
|
|
415
|
+
if request.get("stream") == "enfh" and "base_times" not in request:
|
|
416
|
+
request["base_times"] = [0]
|
|
417
|
+
|
|
418
|
+
available_steps = request.pop("step")
|
|
419
|
+
available_steps = to_list(available_steps)
|
|
420
|
+
|
|
421
|
+
base_times = request.pop("base_times")
|
|
422
|
+
|
|
423
|
+
reference_year = request.pop("reference_year")
|
|
424
|
+
|
|
425
|
+
context.trace("H️", f"hindcast {request} {base_times} {available_steps} {reference_year}")
|
|
426
|
+
|
|
427
|
+
c = HindcastCompute(base_times, available_steps, request)
|
|
428
|
+
requests = []
|
|
429
|
+
for d in dates:
|
|
430
|
+
req = c.compute_hindcast(d)
|
|
431
|
+
req = use_reference_year(reference_year, req)
|
|
432
|
+
|
|
433
|
+
requests.append(req)
|
|
434
|
+
return mars(context, dates, *requests, date_key="hdate")
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
execute = hindcasts
|
|
@@ -42,15 +42,21 @@ def normalise_time_delta(t):
|
|
|
42
42
|
return t
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def _expand_mars_request(request, date):
|
|
45
|
+
def _expand_mars_request(request, date, date_key="date"):
|
|
46
46
|
requests = []
|
|
47
47
|
step = to_list(request.get("step", [0]))
|
|
48
48
|
for s in step:
|
|
49
49
|
r = deepcopy(request)
|
|
50
|
-
|
|
50
|
+
|
|
51
|
+
if isinstance(s, str) and "-" in s:
|
|
52
|
+
assert s.count("-") == 1, s
|
|
53
|
+
# this takes care of the cases where the step is a period such as 0-24 or 12-24
|
|
54
|
+
hours = int(str(s).split("-")[-1])
|
|
55
|
+
|
|
56
|
+
base = date - datetime.timedelta(hours=hours)
|
|
51
57
|
r.update(
|
|
52
58
|
{
|
|
53
|
-
|
|
59
|
+
date_key: base.strftime("%Y%m%d"),
|
|
54
60
|
"time": base.strftime("%H%M"),
|
|
55
61
|
"step": s,
|
|
56
62
|
}
|
|
@@ -66,13 +72,13 @@ def _expand_mars_request(request, date):
|
|
|
66
72
|
return requests
|
|
67
73
|
|
|
68
74
|
|
|
69
|
-
def factorise_requests(dates, *requests):
|
|
75
|
+
def factorise_requests(dates, *requests, date_key="date"):
|
|
70
76
|
updates = []
|
|
71
77
|
for req in requests:
|
|
72
78
|
# req = normalise_request(req)
|
|
73
79
|
|
|
74
80
|
for d in dates:
|
|
75
|
-
updates += _expand_mars_request(req, date=d)
|
|
81
|
+
updates += _expand_mars_request(req, date=d, date_key=date_key)
|
|
76
82
|
|
|
77
83
|
compressed = Availability(updates)
|
|
78
84
|
for r in compressed.iterate():
|
|
@@ -96,11 +102,11 @@ def use_grib_paramid(r):
|
|
|
96
102
|
return r
|
|
97
103
|
|
|
98
104
|
|
|
99
|
-
def mars(context, dates, *requests, **kwargs):
|
|
105
|
+
def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
100
106
|
if not requests:
|
|
101
107
|
requests = [kwargs]
|
|
102
108
|
|
|
103
|
-
requests = factorise_requests(dates, *requests)
|
|
109
|
+
requests = factorise_requests(dates, *requests, date_key=date_key)
|
|
104
110
|
ds = load_source("empty")
|
|
105
111
|
for r in requests:
|
|
106
112
|
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
#
|
|
9
9
|
from copy import deepcopy
|
|
10
10
|
|
|
11
|
-
from anemoi.datasets.compute.
|
|
11
|
+
from anemoi.datasets.compute.recentre import recentre as _recentre
|
|
12
12
|
|
|
13
13
|
from .mars import mars
|
|
14
14
|
|
|
@@ -50,10 +50,10 @@ def load_if_needed(context, dates, dict_or_dataset):
|
|
|
50
50
|
return dict_or_dataset
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def
|
|
53
|
+
def recentre(context, dates, members, centre, alpha=1.0, remapping={}, patches={}):
|
|
54
54
|
members = load_if_needed(context, dates, members)
|
|
55
|
-
|
|
56
|
-
return
|
|
55
|
+
centre = load_if_needed(context, dates, centre)
|
|
56
|
+
return _recentre(members=members, centre=centre, alpha=alpha)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
execute =
|
|
59
|
+
execute = recentre
|
anemoi/datasets/create/input.py
CHANGED
|
@@ -353,11 +353,6 @@ class Result(HasCoordsMixin):
|
|
|
353
353
|
"""Returns a dictionary with the parameters needed to retrieve the data."""
|
|
354
354
|
return _data_request(self.datasource)
|
|
355
355
|
|
|
356
|
-
@property
|
|
357
|
-
def variables_with_nans(self):
|
|
358
|
-
print("❌❌HERE")
|
|
359
|
-
return
|
|
360
|
-
|
|
361
356
|
def get_cube(self):
|
|
362
357
|
trace("🧊", f"getting cube from {self.__class__.__name__}")
|
|
363
358
|
ds = self.datasource
|
|
@@ -25,6 +25,7 @@ from anemoi.datasets.dates.groups import Groups
|
|
|
25
25
|
from .check import DatasetName
|
|
26
26
|
from .check import check_data_values
|
|
27
27
|
from .chunks import ChunkFilter
|
|
28
|
+
from .config import DictObj
|
|
28
29
|
from .config import build_output
|
|
29
30
|
from .config import loader_config
|
|
30
31
|
from .input import build_input
|
|
@@ -55,6 +56,8 @@ class GenericDatasetHandler:
|
|
|
55
56
|
self.path = path
|
|
56
57
|
self.kwargs = kwargs
|
|
57
58
|
self.print = print
|
|
59
|
+
if "test" in kwargs:
|
|
60
|
+
self.test = kwargs["test"]
|
|
58
61
|
|
|
59
62
|
@classmethod
|
|
60
63
|
def from_config(cls, *, config, path, print=print, **kwargs):
|
|
@@ -157,7 +160,35 @@ class InitialiserLoader(Loader):
|
|
|
157
160
|
|
|
158
161
|
self.tmp_statistics.delete()
|
|
159
162
|
|
|
163
|
+
if self.test:
|
|
164
|
+
|
|
165
|
+
def test_dates(cfg, n=4):
|
|
166
|
+
LOG.warn("Running in test mode. Changing the list of dates to use only 4.")
|
|
167
|
+
groups = Groups(**cfg)
|
|
168
|
+
dates = groups.dates
|
|
169
|
+
return dict(start=dates[0], end=dates[n - 1], frequency=dates.frequency, group_by=n)
|
|
170
|
+
|
|
171
|
+
self.main_config.dates = test_dates(self.main_config.dates)
|
|
172
|
+
|
|
173
|
+
def set_to_test_mode(obj):
|
|
174
|
+
if isinstance(obj, (list, tuple)):
|
|
175
|
+
for v in obj:
|
|
176
|
+
set_to_test_mode(v)
|
|
177
|
+
return
|
|
178
|
+
if isinstance(obj, (dict, DictObj)):
|
|
179
|
+
if "grid" in obj:
|
|
180
|
+
obj["grid"] = "20./20."
|
|
181
|
+
LOG.warn(f"Running in test mode. Setting grid to {obj['grid']}")
|
|
182
|
+
if "number" in obj:
|
|
183
|
+
obj["number"] = obj["number"][0:3]
|
|
184
|
+
LOG.warn(f"Running in test mode. Setting number to {obj['number']}")
|
|
185
|
+
for k, v in obj.items():
|
|
186
|
+
set_to_test_mode(v)
|
|
187
|
+
|
|
188
|
+
set_to_test_mode(self.main_config)
|
|
189
|
+
|
|
160
190
|
LOG.info(self.main_config.dates)
|
|
191
|
+
|
|
161
192
|
self.groups = Groups(**self.main_config.dates)
|
|
162
193
|
|
|
163
194
|
self.output = build_output(self.main_config.output, parent=self)
|
|
@@ -615,6 +646,11 @@ class GenericAdditions(GenericDatasetHandler):
|
|
|
615
646
|
assert len(found) + len(missing) == len(self.dates), (len(found), len(missing), len(self.dates))
|
|
616
647
|
assert found.union(missing) == set(self.dates), (found, missing, set(self.dates))
|
|
617
648
|
|
|
649
|
+
if len(ifound) < 2:
|
|
650
|
+
LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
|
|
651
|
+
self.tmp_storage.delete()
|
|
652
|
+
return
|
|
653
|
+
|
|
618
654
|
mask = sorted(list(ifound))
|
|
619
655
|
for k in ["minimum", "maximum", "sums", "squares", "count", "has_nans"]:
|
|
620
656
|
agg[k] = agg[k][mask, ...]
|
|
@@ -28,9 +28,7 @@ class PersistentDict:
|
|
|
28
28
|
# Used in parrallel, during data loading,
|
|
29
29
|
# to write data in pickle files.
|
|
30
30
|
def __init__(self, directory, create=True):
|
|
31
|
-
"""dirname: str
|
|
32
|
-
The directory where the data will be stored.
|
|
33
|
-
"""
|
|
31
|
+
"""dirname: str The directory where the data will be stored."""
|
|
34
32
|
self.dirname = directory
|
|
35
33
|
self.name, self.ext = os.path.splitext(os.path.basename(self.dirname))
|
|
36
34
|
if create:
|