anemoi-datasets 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/compare.py +59 -0
- anemoi/datasets/commands/create.py +84 -3
- anemoi/datasets/commands/inspect.py +9 -9
- anemoi/datasets/commands/scan.py +4 -4
- anemoi/datasets/compute/recentre.py +14 -9
- anemoi/datasets/create/__init__.py +44 -17
- anemoi/datasets/create/check.py +6 -5
- anemoi/datasets/create/chunks.py +1 -1
- anemoi/datasets/create/config.py +6 -27
- anemoi/datasets/create/functions/__init__.py +3 -3
- anemoi/datasets/create/functions/filters/empty.py +4 -4
- anemoi/datasets/create/functions/filters/rename.py +14 -6
- anemoi/datasets/create/functions/filters/rotate_winds.py +16 -60
- anemoi/datasets/create/functions/filters/unrotate_winds.py +14 -64
- anemoi/datasets/create/functions/sources/__init__.py +39 -0
- anemoi/datasets/create/functions/sources/accumulations.py +38 -56
- anemoi/datasets/create/functions/sources/constants.py +11 -4
- anemoi/datasets/create/functions/sources/empty.py +2 -2
- anemoi/datasets/create/functions/sources/forcings.py +3 -3
- anemoi/datasets/create/functions/sources/grib.py +8 -4
- anemoi/datasets/create/functions/sources/hindcasts.py +32 -364
- anemoi/datasets/create/functions/sources/mars.py +57 -26
- anemoi/datasets/create/functions/sources/netcdf.py +2 -60
- anemoi/datasets/create/functions/sources/opendap.py +3 -2
- anemoi/datasets/create/functions/sources/source.py +3 -3
- anemoi/datasets/create/functions/sources/tendencies.py +7 -7
- anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
- anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
- anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
- anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
- anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
- anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
- anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
- anemoi/datasets/create/functions/sources/zenodo.py +40 -0
- anemoi/datasets/create/input.py +309 -191
- anemoi/datasets/create/loaders.py +155 -77
- anemoi/datasets/create/patch.py +17 -14
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/size.py +4 -5
- anemoi/datasets/create/statistics/__init__.py +51 -17
- anemoi/datasets/create/template.py +11 -61
- anemoi/datasets/create/trace.py +91 -0
- anemoi/datasets/create/utils.py +5 -52
- anemoi/datasets/create/zarr.py +24 -10
- anemoi/datasets/data/dataset.py +4 -4
- anemoi/datasets/data/misc.py +9 -37
- anemoi/datasets/data/stores.py +37 -14
- anemoi/datasets/dates/__init__.py +7 -1
- anemoi/datasets/dates/groups.py +3 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +24 -8
- anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
- anemoi_datasets-0.3.10.dist-info/RECORD +0 -73
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
|
@@ -7,21 +7,13 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
import datetime
|
|
10
|
-
import warnings
|
|
11
|
-
from copy import deepcopy
|
|
12
|
-
|
|
13
|
-
import climetlab as cml
|
|
14
|
-
import numpy as np
|
|
15
|
-
from climetlab.core.temporary import temp_file
|
|
16
|
-
from climetlab.readers.grib.output import new_grib_output
|
|
17
|
-
from climetlab.utils.availability import Availability
|
|
18
10
|
|
|
19
11
|
from anemoi.datasets.create.functions.sources.mars import mars
|
|
20
12
|
|
|
21
13
|
DEBUG = True
|
|
22
14
|
|
|
23
15
|
|
|
24
|
-
def
|
|
16
|
+
def _member(field):
|
|
25
17
|
# Bug in eccodes has number=0 randomly
|
|
26
18
|
number = field.metadata("number")
|
|
27
19
|
if number is None:
|
|
@@ -29,355 +21,12 @@ def member(field):
|
|
|
29
21
|
return number
|
|
30
22
|
|
|
31
23
|
|
|
32
|
-
|
|
33
|
-
def __init__(self, out, /, param, date, time, number, step, frequency, **kwargs):
|
|
34
|
-
self.out = out
|
|
35
|
-
self.param = param
|
|
36
|
-
self.date = date
|
|
37
|
-
self.time = time
|
|
38
|
-
self.steps = step
|
|
39
|
-
self.number = number
|
|
40
|
-
self.values = None
|
|
41
|
-
self.seen = set()
|
|
42
|
-
self.startStep = None
|
|
43
|
-
self.endStep = None
|
|
44
|
-
self.done = False
|
|
45
|
-
self.frequency = frequency
|
|
46
|
-
self._check = None
|
|
47
|
-
|
|
48
|
-
@property
|
|
49
|
-
def key(self):
|
|
50
|
-
return (self.param, self.date, self.time, self.steps, self.number)
|
|
51
|
-
|
|
52
|
-
def check(self, field):
|
|
53
|
-
if self._check is None:
|
|
54
|
-
self._check = field.as_mars()
|
|
55
|
-
|
|
56
|
-
assert self.param == field.metadata("param"), (self.param, field.metadata("param"))
|
|
57
|
-
assert self.date == field.metadata("date"), (self.date, field.metadata("date"))
|
|
58
|
-
assert self.time == field.metadata("time"), (self.time, field.metadata("time"))
|
|
59
|
-
assert self.number == member(field), (self.number, member(field))
|
|
60
|
-
|
|
61
|
-
return
|
|
62
|
-
|
|
63
|
-
mars = field.as_mars()
|
|
64
|
-
keys1 = sorted(self._check.keys())
|
|
65
|
-
keys2 = sorted(mars.keys())
|
|
66
|
-
|
|
67
|
-
assert keys1 == keys2, (keys1, keys2)
|
|
68
|
-
|
|
69
|
-
for k in keys1:
|
|
70
|
-
if k not in ("step",):
|
|
71
|
-
assert self._check[k] == mars[k], (k, self._check[k], mars[k])
|
|
72
|
-
|
|
73
|
-
def write(self, template):
|
|
74
|
-
|
|
75
|
-
assert self.startStep != self.endStep, (self.startStep, self.endStep)
|
|
76
|
-
assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
|
|
77
|
-
|
|
78
|
-
self.out.write(
|
|
79
|
-
self.values,
|
|
80
|
-
template=template,
|
|
81
|
-
stepType="accum",
|
|
82
|
-
startStep=self.startStep,
|
|
83
|
-
endStep=self.endStep,
|
|
84
|
-
)
|
|
85
|
-
self.values = None
|
|
86
|
-
self.done = True
|
|
87
|
-
|
|
88
|
-
def add(self, field, values):
|
|
89
|
-
|
|
90
|
-
self.check(field)
|
|
91
|
-
|
|
92
|
-
step = field.metadata("step")
|
|
93
|
-
if step not in self.steps:
|
|
94
|
-
return
|
|
95
|
-
|
|
96
|
-
if not np.all(values >= 0):
|
|
97
|
-
warnings.warn(f"Negative values for {field}: {np.amin(values)} {np.amax(values)}")
|
|
98
|
-
|
|
99
|
-
assert not self.done, (self.key, step)
|
|
100
|
-
assert step not in self.seen, (self.key, step)
|
|
101
|
-
|
|
102
|
-
startStep = field.metadata("startStep")
|
|
103
|
-
endStep = field.metadata("endStep")
|
|
104
|
-
|
|
105
|
-
if self.buggy_steps and startStep == endStep:
|
|
106
|
-
startStep = 0
|
|
107
|
-
|
|
108
|
-
assert step == endStep, (startStep, endStep, step)
|
|
109
|
-
|
|
110
|
-
self.compute(values, startStep, endStep)
|
|
111
|
-
|
|
112
|
-
self.seen.add(step)
|
|
113
|
-
|
|
114
|
-
if len(self.seen) == len(self.steps):
|
|
115
|
-
self.write(template=field)
|
|
116
|
-
|
|
117
|
-
@classmethod
|
|
118
|
-
def mars_date_time_steps(cls, dates, step1, step2, frequency, base_times, adjust_step):
|
|
119
|
-
|
|
120
|
-
# assert step1 > 0, (step1, step2, frequency)
|
|
121
|
-
|
|
122
|
-
for valid_date in dates:
|
|
123
|
-
base_date = valid_date - datetime.timedelta(hours=step2)
|
|
124
|
-
add_step = 0
|
|
125
|
-
if base_date.hour not in base_times:
|
|
126
|
-
if not adjust_step:
|
|
127
|
-
raise ValueError(
|
|
128
|
-
f"Cannot find a base time in {base_times} that validates on {valid_date.isoformat()} for step={step2}"
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
while base_date.hour not in base_times:
|
|
132
|
-
# print(f'{base_date=}, {base_times=}, {add_step=} {frequency=}')
|
|
133
|
-
base_date -= datetime.timedelta(hours=1)
|
|
134
|
-
add_step += 1
|
|
135
|
-
|
|
136
|
-
yield cls._mars_date_time_step(base_date, step1, step2, add_step, frequency)
|
|
137
|
-
|
|
138
|
-
def __repr__(self) -> str:
|
|
139
|
-
return f"{self.__class__.__name__}({self.key})"
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
class AccumulationFromStart(Accumulation):
|
|
143
|
-
buggy_steps = True
|
|
144
|
-
|
|
145
|
-
def compute(self, values, startStep, endStep):
|
|
146
|
-
|
|
147
|
-
assert startStep == 0, startStep
|
|
148
|
-
|
|
149
|
-
if self.values is None:
|
|
150
|
-
|
|
151
|
-
self.values = np.copy(values)
|
|
152
|
-
self.startStep = 0
|
|
153
|
-
self.endStep = endStep
|
|
154
|
-
|
|
155
|
-
else:
|
|
156
|
-
assert endStep != self.endStep, (self.endStep, endStep)
|
|
157
|
-
|
|
158
|
-
if endStep > self.endStep:
|
|
159
|
-
# assert endStep - self.endStep == self.stepping, (self.endStep, endStep, self.stepping)
|
|
160
|
-
self.values = values - self.values
|
|
161
|
-
self.startStep = self.endStep
|
|
162
|
-
self.endStep = endStep
|
|
163
|
-
else:
|
|
164
|
-
# assert self.endStep - endStep == self.stepping, (self.endStep, endStep, self.stepping)
|
|
165
|
-
self.values = self.values - values
|
|
166
|
-
self.startStep = endStep
|
|
167
|
-
|
|
168
|
-
if not np.all(self.values >= 0):
|
|
169
|
-
warnings.warn(f"Negative values for {self.param}: {np.amin(self.values)} {np.amax(self.values)}")
|
|
170
|
-
self.values = np.maximum(self.values, 0)
|
|
171
|
-
|
|
172
|
-
@classmethod
|
|
173
|
-
def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
|
|
174
|
-
assert not frequency, frequency
|
|
175
|
-
|
|
176
|
-
steps = (step1 + add_step, step2 + add_step)
|
|
177
|
-
if steps[0] == 0:
|
|
178
|
-
steps = (steps[1],)
|
|
179
|
-
|
|
180
|
-
return (
|
|
181
|
-
base_date.year * 10000 + base_date.month * 100 + base_date.day,
|
|
182
|
-
base_date.hour * 100 + base_date.minute,
|
|
183
|
-
steps,
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
class AccumulationFromLastStep(Accumulation):
|
|
188
|
-
buggy_steps = False
|
|
189
|
-
|
|
190
|
-
def compute(self, values, startStep, endStep):
|
|
191
|
-
|
|
192
|
-
assert endStep - startStep == self.frequency, (startStep, endStep, self.frequency)
|
|
193
|
-
|
|
194
|
-
if self.startStep is None:
|
|
195
|
-
self.startStep = startStep
|
|
196
|
-
else:
|
|
197
|
-
self.startStep = min(self.startStep, startStep)
|
|
198
|
-
|
|
199
|
-
if self.endStep is None:
|
|
200
|
-
self.endStep = endStep
|
|
201
|
-
else:
|
|
202
|
-
self.endStep = max(self.endStep, endStep)
|
|
203
|
-
|
|
204
|
-
if self.values is None:
|
|
205
|
-
self.values = np.zeros_like(values)
|
|
206
|
-
|
|
207
|
-
self.values += values
|
|
208
|
-
|
|
209
|
-
@classmethod
|
|
210
|
-
def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
|
|
211
|
-
assert frequency > 0, frequency
|
|
212
|
-
# assert step1 > 0, (step1, step2, frequency, add_step, base_date)
|
|
213
|
-
|
|
214
|
-
steps = []
|
|
215
|
-
for step in range(step1 + frequency, step2 + frequency, frequency):
|
|
216
|
-
steps.append(step + add_step)
|
|
217
|
-
return (
|
|
218
|
-
base_date.year * 10000 + base_date.month * 100 + base_date.day,
|
|
219
|
-
base_date.hour * 100 + base_date.minute,
|
|
220
|
-
tuple(steps),
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def identity(x):
|
|
225
|
-
return x
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def compute_accumulations(
|
|
229
|
-
dates,
|
|
230
|
-
request,
|
|
231
|
-
user_accumulation_period=6,
|
|
232
|
-
data_accumulation_period=None,
|
|
233
|
-
patch=identity,
|
|
234
|
-
base_times=None,
|
|
235
|
-
):
|
|
236
|
-
adjust_step = isinstance(user_accumulation_period, int)
|
|
237
|
-
|
|
238
|
-
if not isinstance(user_accumulation_period, (list, tuple)):
|
|
239
|
-
user_accumulation_period = (0, user_accumulation_period)
|
|
240
|
-
|
|
241
|
-
assert len(user_accumulation_period) == 2, user_accumulation_period
|
|
242
|
-
step1, step2 = user_accumulation_period
|
|
243
|
-
assert step1 < step2, user_accumulation_period
|
|
244
|
-
|
|
245
|
-
if base_times is None:
|
|
246
|
-
base_times = [0, 6, 12, 18]
|
|
247
|
-
|
|
248
|
-
base_times = [t // 100 if t > 100 else t for t in base_times]
|
|
249
|
-
|
|
250
|
-
AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
|
|
251
|
-
|
|
252
|
-
mars_date_time_steps = AccumulationClass.mars_date_time_steps(
|
|
253
|
-
dates,
|
|
254
|
-
step1,
|
|
255
|
-
step2,
|
|
256
|
-
data_accumulation_period,
|
|
257
|
-
base_times,
|
|
258
|
-
adjust_step,
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
request = deepcopy(request)
|
|
262
|
-
|
|
263
|
-
param = request["param"]
|
|
264
|
-
if not isinstance(param, (list, tuple)):
|
|
265
|
-
param = [param]
|
|
266
|
-
|
|
267
|
-
number = request.get("number", [0])
|
|
268
|
-
assert isinstance(number, (list, tuple))
|
|
269
|
-
|
|
270
|
-
frequency = data_accumulation_period
|
|
271
|
-
|
|
272
|
-
type_ = request.get("type", "an")
|
|
273
|
-
if type_ == "an":
|
|
274
|
-
type_ = "fc"
|
|
275
|
-
|
|
276
|
-
request.update({"type": type_, "levtype": "sfc"})
|
|
277
|
-
|
|
278
|
-
tmp = temp_file()
|
|
279
|
-
path = tmp.path
|
|
280
|
-
out = new_grib_output(path)
|
|
281
|
-
|
|
282
|
-
requests = []
|
|
283
|
-
|
|
284
|
-
accumulations = {}
|
|
285
|
-
|
|
286
|
-
for date, time, steps in mars_date_time_steps:
|
|
287
|
-
for p in param:
|
|
288
|
-
for n in number:
|
|
289
|
-
requests.append(
|
|
290
|
-
patch(
|
|
291
|
-
{
|
|
292
|
-
"param": p,
|
|
293
|
-
"date": date,
|
|
294
|
-
"time": time,
|
|
295
|
-
"step": sorted(steps),
|
|
296
|
-
"number": n,
|
|
297
|
-
}
|
|
298
|
-
)
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
compressed = Availability(requests)
|
|
302
|
-
ds = cml.load_source("empty")
|
|
303
|
-
for r in compressed.iterate():
|
|
304
|
-
request.update(r)
|
|
305
|
-
print("🌧️", request)
|
|
306
|
-
ds = ds + cml.load_source("mars", **request)
|
|
307
|
-
|
|
308
|
-
accumulations = {}
|
|
309
|
-
for a in [AccumulationClass(out, frequency=frequency, **r) for r in requests]:
|
|
310
|
-
for s in a.steps:
|
|
311
|
-
key = (a.param, a.date, a.time, s, a.number)
|
|
312
|
-
accumulations.setdefault(key, []).append(a)
|
|
313
|
-
|
|
314
|
-
for field in ds:
|
|
315
|
-
key = (
|
|
316
|
-
field.metadata("param"),
|
|
317
|
-
field.metadata("date"),
|
|
318
|
-
field.metadata("time"),
|
|
319
|
-
field.metadata("step"),
|
|
320
|
-
member(field),
|
|
321
|
-
)
|
|
322
|
-
values = field.values # optimisation
|
|
323
|
-
assert accumulations[key], key
|
|
324
|
-
for a in accumulations[key]:
|
|
325
|
-
a.add(field, values)
|
|
326
|
-
|
|
327
|
-
for acc in accumulations.values():
|
|
328
|
-
for a in acc:
|
|
329
|
-
assert a.done, (a.key, a.seen, a.steps)
|
|
330
|
-
|
|
331
|
-
out.close()
|
|
332
|
-
|
|
333
|
-
ds = cml.load_source("file", path)
|
|
334
|
-
|
|
335
|
-
assert len(ds) / len(param) / len(number) == len(dates), (
|
|
336
|
-
len(ds),
|
|
337
|
-
len(param),
|
|
338
|
-
len(dates),
|
|
339
|
-
)
|
|
340
|
-
ds._tmp = tmp
|
|
341
|
-
|
|
342
|
-
return ds
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def to_list(x):
|
|
24
|
+
def _to_list(x):
|
|
346
25
|
if isinstance(x, (list, tuple)):
|
|
347
26
|
return x
|
|
348
27
|
return [x]
|
|
349
28
|
|
|
350
29
|
|
|
351
|
-
def normalise_time_to_hours(r):
|
|
352
|
-
r = deepcopy(r)
|
|
353
|
-
if "time" not in r:
|
|
354
|
-
return r
|
|
355
|
-
|
|
356
|
-
times = []
|
|
357
|
-
for t in to_list(r["time"]):
|
|
358
|
-
assert len(t) == 4, r
|
|
359
|
-
assert t.endswith("00"), r
|
|
360
|
-
times.append(int(t) // 100)
|
|
361
|
-
r["time"] = tuple(times)
|
|
362
|
-
return r
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
def normalise_number(r):
|
|
366
|
-
if "number" not in r:
|
|
367
|
-
return r
|
|
368
|
-
number = r["number"]
|
|
369
|
-
number = to_list(number)
|
|
370
|
-
|
|
371
|
-
if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
|
|
372
|
-
return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
|
|
373
|
-
|
|
374
|
-
if len(number) > 2 and number[1] == "to":
|
|
375
|
-
return list(range(int(number[0]), int(number[2]) + 1))
|
|
376
|
-
|
|
377
|
-
r["number"] = number
|
|
378
|
-
return r
|
|
379
|
-
|
|
380
|
-
|
|
381
30
|
class HindcastCompute:
|
|
382
31
|
def __init__(self, base_times, available_steps, request):
|
|
383
32
|
self.base_times = base_times
|
|
@@ -385,22 +34,34 @@ class HindcastCompute:
|
|
|
385
34
|
self.request = request
|
|
386
35
|
|
|
387
36
|
def compute_hindcast(self, date):
|
|
388
|
-
|
|
37
|
+
result = []
|
|
38
|
+
for step in sorted(self.available_steps): # Use the shortest step
|
|
389
39
|
start_date = date - datetime.timedelta(hours=step)
|
|
390
40
|
hours = start_date.hour
|
|
391
41
|
if hours in self.base_times:
|
|
392
|
-
r =
|
|
42
|
+
r = self.request.copy()
|
|
393
43
|
r["date"] = start_date
|
|
394
44
|
r["time"] = f"{start_date.hour:02d}00"
|
|
395
45
|
r["step"] = step
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
46
|
+
result.append(r)
|
|
47
|
+
|
|
48
|
+
if not result:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, "
|
|
51
|
+
f"available_steps={self.available_steps})"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if len(result) > 1:
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"Multiple requests for {self.request} for {date} (base_times={self.base_times}, "
|
|
57
|
+
f"available_steps={self.available_steps})"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return result[0]
|
|
400
61
|
|
|
401
62
|
|
|
402
63
|
def use_reference_year(reference_year, request):
|
|
403
|
-
request =
|
|
64
|
+
request = request.copy()
|
|
404
65
|
hdate = request.pop("date")
|
|
405
66
|
date = datetime.datetime(reference_year, hdate.month, hdate.day)
|
|
406
67
|
request.update(date=date.strftime("%Y-%m-%d"), hdate=hdate.strftime("%Y-%m-%d"))
|
|
@@ -408,15 +69,15 @@ def use_reference_year(reference_year, request):
|
|
|
408
69
|
|
|
409
70
|
|
|
410
71
|
def hindcasts(context, dates, **request):
|
|
411
|
-
request["param"] =
|
|
412
|
-
request["step"] =
|
|
72
|
+
request["param"] = _to_list(request["param"])
|
|
73
|
+
request["step"] = _to_list(request["step"])
|
|
413
74
|
request["step"] = [int(_) for _ in request["step"]]
|
|
414
75
|
|
|
415
76
|
if request.get("stream") == "enfh" and "base_times" not in request:
|
|
416
77
|
request["base_times"] = [0]
|
|
417
78
|
|
|
418
79
|
available_steps = request.pop("step")
|
|
419
|
-
available_steps =
|
|
80
|
+
available_steps = _to_list(available_steps)
|
|
420
81
|
|
|
421
82
|
base_times = request.pop("base_times")
|
|
422
83
|
|
|
@@ -431,7 +92,14 @@ def hindcasts(context, dates, **request):
|
|
|
431
92
|
req = use_reference_year(reference_year, req)
|
|
432
93
|
|
|
433
94
|
requests.append(req)
|
|
434
|
-
|
|
95
|
+
|
|
96
|
+
return mars(
|
|
97
|
+
context,
|
|
98
|
+
dates,
|
|
99
|
+
*requests,
|
|
100
|
+
date_key="hdate",
|
|
101
|
+
request_already_using_valid_datetime=True,
|
|
102
|
+
)
|
|
435
103
|
|
|
436
104
|
|
|
437
105
|
execute = hindcasts
|
|
@@ -7,11 +7,10 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
import datetime
|
|
10
|
-
from copy import deepcopy
|
|
11
10
|
|
|
12
11
|
from anemoi.utils.humanize import did_you_mean
|
|
13
|
-
from
|
|
14
|
-
from
|
|
12
|
+
from earthkit.data import from_source
|
|
13
|
+
from earthkit.data.utils.availability import Availability
|
|
15
14
|
|
|
16
15
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
17
16
|
|
|
@@ -43,25 +42,27 @@ def normalise_time_delta(t):
|
|
|
43
42
|
return t
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def _expand_mars_request(request, date, date_key="date"):
|
|
45
|
+
def _expand_mars_request(request, date, request_already_using_valid_datetime=False, date_key="date"):
|
|
47
46
|
requests = []
|
|
48
47
|
step = to_list(request.get("step", [0]))
|
|
49
48
|
for s in step:
|
|
50
|
-
r =
|
|
51
|
-
|
|
52
|
-
if
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
49
|
+
r = request.copy()
|
|
50
|
+
|
|
51
|
+
if not request_already_using_valid_datetime:
|
|
52
|
+
|
|
53
|
+
if isinstance(s, str) and "-" in s:
|
|
54
|
+
assert s.count("-") == 1, s
|
|
55
|
+
# this takes care of the cases where the step is a period such as 0-24 or 12-24
|
|
56
|
+
hours = int(str(s).split("-")[-1])
|
|
57
|
+
|
|
58
|
+
base = date - datetime.timedelta(hours=hours)
|
|
59
|
+
r.update(
|
|
60
|
+
{
|
|
61
|
+
date_key: base.strftime("%Y%m%d"),
|
|
62
|
+
"time": base.strftime("%H%M"),
|
|
63
|
+
"step": s,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
65
66
|
|
|
66
67
|
for pproc in ("grid", "rotation", "frame", "area", "bitmap", "resol"):
|
|
67
68
|
if pproc in r:
|
|
@@ -73,13 +74,18 @@ def _expand_mars_request(request, date, date_key="date"):
|
|
|
73
74
|
return requests
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
def factorise_requests(dates, *requests, date_key="date"):
|
|
77
|
+
def factorise_requests(dates, *requests, request_already_using_valid_datetime=False, date_key="date"):
|
|
77
78
|
updates = []
|
|
78
79
|
for req in requests:
|
|
79
80
|
# req = normalise_request(req)
|
|
80
81
|
|
|
81
82
|
for d in dates:
|
|
82
|
-
updates += _expand_mars_request(
|
|
83
|
+
updates += _expand_mars_request(
|
|
84
|
+
req,
|
|
85
|
+
date=d,
|
|
86
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
87
|
+
date_key=date_key,
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
compressed = Availability(updates)
|
|
85
91
|
for r in compressed.iterate():
|
|
@@ -171,12 +177,33 @@ MARS_KEYS = [
|
|
|
171
177
|
]
|
|
172
178
|
|
|
173
179
|
|
|
174
|
-
def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
180
|
+
def mars(context, dates, *requests, request_already_using_valid_datetime=False, date_key="date", **kwargs):
|
|
175
181
|
if not requests:
|
|
176
182
|
requests = [kwargs]
|
|
177
183
|
|
|
178
|
-
|
|
179
|
-
|
|
184
|
+
for r in requests:
|
|
185
|
+
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
186
|
+
for p in r.get("param", []):
|
|
187
|
+
if p is False:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
190
|
+
)
|
|
191
|
+
if p is None:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
194
|
+
)
|
|
195
|
+
if p is True:
|
|
196
|
+
raise ValueError(
|
|
197
|
+
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
requests = factorise_requests(
|
|
201
|
+
dates,
|
|
202
|
+
*requests,
|
|
203
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
204
|
+
date_key=date_key,
|
|
205
|
+
)
|
|
206
|
+
ds = from_source("empty")
|
|
180
207
|
for r in requests:
|
|
181
208
|
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
182
209
|
|
|
@@ -184,14 +211,18 @@ def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
|
184
211
|
r = use_grib_paramid(r)
|
|
185
212
|
|
|
186
213
|
if DEBUG:
|
|
187
|
-
context.trace("✅", f"
|
|
214
|
+
context.trace("✅", f"from_source(mars, {r}")
|
|
188
215
|
|
|
189
216
|
for k, v in r.items():
|
|
190
217
|
if k not in MARS_KEYS:
|
|
191
218
|
raise ValueError(
|
|
192
219
|
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
193
220
|
)
|
|
194
|
-
|
|
221
|
+
try:
|
|
222
|
+
ds = ds + from_source("mars", **r)
|
|
223
|
+
except Exception as e:
|
|
224
|
+
if "File is empty:" not in str(e):
|
|
225
|
+
raise
|
|
195
226
|
return ds
|
|
196
227
|
|
|
197
228
|
|
|
@@ -7,66 +7,8 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
import
|
|
11
|
-
|
|
12
|
-
from climetlab import load_source
|
|
13
|
-
from climetlab.utils.patterns import Pattern
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _expand(paths):
|
|
17
|
-
for path in paths:
|
|
18
|
-
if path.startswith("file://"):
|
|
19
|
-
path = path[7:]
|
|
20
|
-
|
|
21
|
-
if path.startswith("http://"):
|
|
22
|
-
yield path
|
|
23
|
-
continue
|
|
24
|
-
|
|
25
|
-
if path.startswith("https://"):
|
|
26
|
-
yield path
|
|
27
|
-
continue
|
|
28
|
-
|
|
29
|
-
for p in glob.glob(path):
|
|
30
|
-
yield p
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def check(what, ds, paths, **kwargs):
|
|
34
|
-
count = 1
|
|
35
|
-
for k, v in kwargs.items():
|
|
36
|
-
if isinstance(v, (tuple, list)):
|
|
37
|
-
count *= len(v)
|
|
38
|
-
|
|
39
|
-
if len(ds) != count:
|
|
40
|
-
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def load_netcdfs(emoji, what, context, dates, path, *args, **kwargs):
|
|
44
|
-
given_paths = path if isinstance(path, list) else [path]
|
|
45
|
-
|
|
46
|
-
dates = [d.isoformat() for d in dates]
|
|
47
|
-
ds = load_source("empty")
|
|
48
|
-
|
|
49
|
-
for path in given_paths:
|
|
50
|
-
paths = Pattern(path, ignore_missing_keys=True).substitute(*args, date=dates, **kwargs)
|
|
51
|
-
|
|
52
|
-
levels = kwargs.get("level", kwargs.get("levelist"))
|
|
53
|
-
|
|
54
|
-
for path in _expand(paths):
|
|
55
|
-
context.trace(emoji, what.upper(), path)
|
|
56
|
-
s = load_source("opendap", path)
|
|
57
|
-
s = s.sel(
|
|
58
|
-
valid_datetime=dates,
|
|
59
|
-
param=kwargs["param"],
|
|
60
|
-
step=kwargs.get("step", 0),
|
|
61
|
-
)
|
|
62
|
-
if levels:
|
|
63
|
-
s = s.sel(levelist=levels)
|
|
64
|
-
ds = ds + s
|
|
65
|
-
|
|
66
|
-
check(what, ds, given_paths, valid_datetime=dates, **kwargs)
|
|
67
|
-
|
|
68
|
-
return ds
|
|
10
|
+
from .xarray import load_many
|
|
69
11
|
|
|
70
12
|
|
|
71
13
|
def execute(context, dates, path, *args, **kwargs):
|
|
72
|
-
return
|
|
14
|
+
return load_many("📁", context, dates, path, *args, **kwargs)
|
|
@@ -7,8 +7,9 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
from .xarray import load_many
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def execute(context, dates, url, *args, **kwargs):
|
|
14
|
-
return
|
|
15
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
|
-
from
|
|
9
|
+
from earthkit.data import from_source
|
|
10
10
|
|
|
11
11
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
12
12
|
|
|
@@ -15,12 +15,12 @@ DEBUG = True
|
|
|
15
15
|
|
|
16
16
|
def source(context, dates, **kwargs):
|
|
17
17
|
name = kwargs.pop("name")
|
|
18
|
-
context.trace("✅", f"
|
|
18
|
+
context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
|
|
19
19
|
if kwargs["date"] == "$from_dates":
|
|
20
20
|
kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
|
|
21
21
|
if kwargs["time"] == "$from_dates":
|
|
22
22
|
kwargs["time"] = list({d.strftime("%H%M") for d in dates})
|
|
23
|
-
return
|
|
23
|
+
return from_source(name, **kwargs)
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
execute = source
|