anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/create/__init__.py +46 -13
- anemoi/datasets/create/config.py +52 -53
- anemoi/datasets/create/input/__init__.py +43 -63
- anemoi/datasets/create/input/action.py +296 -236
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +2 -1
- anemoi/datasets/create/input/misc.py +0 -71
- anemoi/datasets/create/input/repeated_dates.py +0 -114
- anemoi/datasets/create/input/result/__init__.py +17 -0
- anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
- anemoi/datasets/create/sources/accumulate.py +517 -0
- anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
- anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
- anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
- anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -38
- anemoi/datasets/create/sources/empty.py +26 -22
- anemoi/datasets/create/sources/forcings.py +29 -28
- anemoi/datasets/create/sources/grib.py +92 -72
- anemoi/datasets/create/sources/grib_index.py +102 -54
- anemoi/datasets/create/sources/hindcasts.py +56 -55
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +159 -154
- anemoi/datasets/create/sources/netcdf.py +28 -24
- anemoi/datasets/create/sources/opendap.py +28 -24
- anemoi/datasets/create/sources/recentre.py +42 -41
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -48
- anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
- anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
- anemoi/datasets/create/sources/xarray_support/field.py +4 -4
- anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
- anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
- anemoi/datasets/create/sources/xarray_zarr.py +28 -24
- anemoi/datasets/create/sources/zenodo.py +43 -39
- anemoi/datasets/create/utils.py +0 -42
- anemoi/datasets/data/complement.py +26 -17
- anemoi/datasets/data/dataset.py +12 -0
- anemoi/datasets/data/grids.py +0 -152
- anemoi/datasets/data/masked.py +74 -13
- anemoi/datasets/data/missing.py +5 -0
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/stores.py +7 -9
- anemoi/datasets/dates/__init__.py +2 -0
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +1 -178
- anemoi/datasets/schemas/recipe.json +131 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/filter.py +0 -47
- anemoi/datasets/create/input/concat.py +0 -161
- anemoi/datasets/create/input/context.py +0 -86
- anemoi/datasets/create/input/empty.py +0 -53
- anemoi/datasets/create/input/filter.py +0 -117
- anemoi/datasets/create/input/function.py +0 -232
- anemoi/datasets/create/input/join.py +0 -129
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -173
- anemoi/datasets/create/input/template.py +0 -161
- anemoi/datasets/create/sources/accumulations.py +0 -1062
- anemoi/datasets/create/sources/accumulations2.py +0 -647
- anemoi/datasets/create/sources/tendencies.py +0 -198
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0
|
@@ -16,9 +16,10 @@ from anemoi.utils.humanize import did_you_mean
|
|
|
16
16
|
from earthkit.data import from_source
|
|
17
17
|
from earthkit.data.utils.availability import Availability
|
|
18
18
|
|
|
19
|
-
from anemoi.datasets.create.
|
|
19
|
+
from anemoi.datasets.create.sources import source_registry
|
|
20
|
+
from anemoi.datasets.create.sources.accumulate import IntervalsDatesProvider
|
|
20
21
|
|
|
21
|
-
from .legacy import
|
|
22
|
+
from .legacy import LegacySource
|
|
22
23
|
|
|
23
24
|
DEBUG = False
|
|
24
25
|
|
|
@@ -145,7 +146,7 @@ def _expand_mars_request(
|
|
|
145
146
|
|
|
146
147
|
Parameters
|
|
147
148
|
----------
|
|
148
|
-
request :
|
|
149
|
+
request : dict[str, Any]
|
|
149
150
|
The input MARS request.
|
|
150
151
|
date : datetime.datetime
|
|
151
152
|
The date to be used in the request.
|
|
@@ -156,7 +157,7 @@ def _expand_mars_request(
|
|
|
156
157
|
|
|
157
158
|
Returns
|
|
158
159
|
-------
|
|
159
|
-
List[
|
|
160
|
+
List[dict[str, Any]]
|
|
160
161
|
A list of expanded MARS requests.
|
|
161
162
|
"""
|
|
162
163
|
requests = []
|
|
@@ -164,23 +165,26 @@ def _expand_mars_request(
|
|
|
164
165
|
user_step = to_list(expand_to_by(request.get("step", [0])))
|
|
165
166
|
user_time = None
|
|
166
167
|
user_date = None
|
|
167
|
-
|
|
168
168
|
if not request_already_using_valid_datetime:
|
|
169
|
-
user_time = request.get("
|
|
169
|
+
user_time = request.get("user_time")
|
|
170
170
|
if user_time is not None:
|
|
171
171
|
user_time = to_list(user_time)
|
|
172
172
|
user_time = [_normalise_time(t) for t in user_time]
|
|
173
173
|
|
|
174
174
|
user_date = request.get(date_key)
|
|
175
175
|
if user_date is not None:
|
|
176
|
-
|
|
176
|
+
if isinstance(user_date, int):
|
|
177
|
+
user_date = str(user_date)
|
|
178
|
+
elif isinstance(user_date, datetime.datetime):
|
|
179
|
+
user_date = user_date.strftime("%Y%m%d")
|
|
180
|
+
else:
|
|
181
|
+
raise ValueError(f"Invalid type for {user_date}")
|
|
177
182
|
user_date = re.compile("^{}$".format(user_date.replace("-", "").replace("?", ".")))
|
|
178
183
|
|
|
179
184
|
for step in user_step:
|
|
180
185
|
r = request.copy()
|
|
181
186
|
|
|
182
187
|
if not request_already_using_valid_datetime:
|
|
183
|
-
|
|
184
188
|
if isinstance(step, str) and "-" in step:
|
|
185
189
|
assert step.count("-") == 1, step
|
|
186
190
|
|
|
@@ -190,30 +194,27 @@ def _expand_mars_request(
|
|
|
190
194
|
base = date - datetime.timedelta(hours=hours)
|
|
191
195
|
r.update(
|
|
192
196
|
{
|
|
193
|
-
|
|
197
|
+
"date": base.strftime("%Y%m%d"),
|
|
194
198
|
"time": base.strftime("%H%M"),
|
|
195
199
|
"step": step,
|
|
196
200
|
}
|
|
197
201
|
)
|
|
198
|
-
|
|
199
202
|
for pproc in ("grid", "rotation", "frame", "area", "bitmap", "resol"):
|
|
200
203
|
if pproc in r:
|
|
201
204
|
if isinstance(r[pproc], (list, tuple)):
|
|
202
205
|
r[pproc] = "/".join(str(x) for x in r[pproc])
|
|
203
206
|
|
|
204
207
|
if user_date is not None:
|
|
205
|
-
if not user_date.match(r[
|
|
208
|
+
if not user_date.match(r["date"]):
|
|
206
209
|
continue
|
|
207
210
|
|
|
208
211
|
if user_time is not None:
|
|
209
|
-
#
|
|
212
|
+
# If time is provided by the user, we only keep the requests that match the time
|
|
210
213
|
if r["time"] not in user_time:
|
|
211
214
|
continue
|
|
212
215
|
|
|
213
216
|
requests.append(r)
|
|
214
217
|
|
|
215
|
-
# assert requests, requests
|
|
216
|
-
|
|
217
218
|
return requests
|
|
218
219
|
|
|
219
220
|
|
|
@@ -222,6 +223,7 @@ def factorise_requests(
|
|
|
222
223
|
*requests: dict[str, Any],
|
|
223
224
|
request_already_using_valid_datetime: bool = False,
|
|
224
225
|
date_key: str = "date",
|
|
226
|
+
no_date_here: bool = False,
|
|
225
227
|
) -> Generator[dict[str, Any], None, None]:
|
|
226
228
|
"""Factorizes the requests based on the given dates.
|
|
227
229
|
|
|
@@ -229,33 +231,42 @@ def factorise_requests(
|
|
|
229
231
|
----------
|
|
230
232
|
dates : List[datetime.datetime]
|
|
231
233
|
The list of dates to be used in the requests.
|
|
232
|
-
requests :
|
|
234
|
+
requests : List[dict[str, Any]]
|
|
233
235
|
The input requests to be factorized.
|
|
234
236
|
request_already_using_valid_datetime : bool, optional
|
|
235
237
|
Flag indicating if the requests already use valid datetime.
|
|
236
238
|
date_key : str, optional
|
|
237
239
|
The key for the date in the requests.
|
|
240
|
+
no_date_here : bool, optional
|
|
241
|
+
Flag indicating if there is no date in the "dates" list.
|
|
238
242
|
|
|
239
243
|
Returns
|
|
240
244
|
-------
|
|
241
|
-
Generator[
|
|
245
|
+
Generator[dict[str, Any], None, None]
|
|
242
246
|
Factorized requests.
|
|
243
247
|
"""
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
# req = normalise_request(req)
|
|
248
|
+
if isinstance(requests, tuple) and len(requests) == 1 and "requests" in requests[0]:
|
|
249
|
+
requests = requests[0]["requests"]
|
|
247
250
|
|
|
248
|
-
|
|
249
|
-
|
|
251
|
+
updates = []
|
|
252
|
+
for d in sorted(dates):
|
|
253
|
+
for req in requests:
|
|
254
|
+
if not no_date_here and (
|
|
255
|
+
("date" in req)
|
|
256
|
+
and ("time" in req)
|
|
257
|
+
and d.strftime("%Y%m%d%H%M") != (str(req["date"]) + str(req["time"]).zfill(4))
|
|
258
|
+
):
|
|
259
|
+
continue
|
|
260
|
+
new_req = _expand_mars_request(
|
|
250
261
|
req,
|
|
251
262
|
date=d,
|
|
252
263
|
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
253
|
-
date_key=
|
|
264
|
+
date_key="user_date",
|
|
254
265
|
)
|
|
266
|
+
updates += new_req
|
|
255
267
|
|
|
256
268
|
if not updates:
|
|
257
269
|
return
|
|
258
|
-
|
|
259
270
|
compressed = Availability(updates)
|
|
260
271
|
for r in compressed.iterate():
|
|
261
272
|
for k, v in r.items():
|
|
@@ -269,12 +280,12 @@ def use_grib_paramid(r: dict[str, Any]) -> dict[str, Any]:
|
|
|
269
280
|
|
|
270
281
|
Parameters
|
|
271
282
|
----------
|
|
272
|
-
r :
|
|
283
|
+
r : dict[str, Any]
|
|
273
284
|
The input request containing parameter short names.
|
|
274
285
|
|
|
275
286
|
Returns
|
|
276
287
|
-------
|
|
277
|
-
|
|
288
|
+
dict[str, Any]
|
|
278
289
|
The request with parameter IDs.
|
|
279
290
|
"""
|
|
280
291
|
from anemoi.utils.grib import shortname_to_paramid
|
|
@@ -358,135 +369,129 @@ MARS_KEYS = [
|
|
|
358
369
|
]
|
|
359
370
|
|
|
360
371
|
|
|
361
|
-
@
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
requests = [kwargs]
|
|
398
|
-
|
|
399
|
-
for r in requests:
|
|
400
|
-
param = r.get("param", [])
|
|
401
|
-
if not isinstance(param, (list, tuple)):
|
|
402
|
-
param = [param]
|
|
403
|
-
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
404
|
-
for p in param:
|
|
405
|
-
if p is False:
|
|
406
|
-
raise ValueError(
|
|
407
|
-
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
408
|
-
)
|
|
409
|
-
if p is None:
|
|
410
|
-
raise ValueError(
|
|
411
|
-
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
412
|
-
)
|
|
413
|
-
if p is True:
|
|
414
|
-
raise ValueError(
|
|
415
|
-
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
if len(dates) == 0: # When using `repeated_dates`
|
|
419
|
-
assert len(requests) == 1, requests
|
|
420
|
-
assert "date" in requests[0], requests[0]
|
|
421
|
-
if isinstance(requests[0]["date"], datetime.date):
|
|
422
|
-
requests[0]["date"] = requests[0]["date"].strftime("%Y%m%d")
|
|
423
|
-
else:
|
|
424
|
-
requests = factorise_requests(
|
|
425
|
-
dates,
|
|
426
|
-
*requests,
|
|
427
|
-
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
428
|
-
date_key=date_key,
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
requests = list(requests)
|
|
432
|
-
|
|
433
|
-
ds = from_source("empty")
|
|
434
|
-
context.trace("✅", f"{[str(d) for d in dates]}")
|
|
435
|
-
context.trace("✅", f"Will run {len(requests)} requests")
|
|
436
|
-
for r in requests:
|
|
437
|
-
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
438
|
-
context.trace("✅", f"mars {r}")
|
|
439
|
-
|
|
440
|
-
for r in requests:
|
|
441
|
-
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
442
|
-
|
|
443
|
-
if context.use_grib_paramid and "param" in r:
|
|
444
|
-
r = use_grib_paramid(r)
|
|
445
|
-
|
|
446
|
-
for k, v in r.items():
|
|
447
|
-
if k not in MARS_KEYS:
|
|
448
|
-
raise ValueError(
|
|
449
|
-
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
450
|
-
)
|
|
451
|
-
try:
|
|
452
|
-
if use_cdsapi_dataset:
|
|
453
|
-
ds = ds + from_source("cds", use_cdsapi_dataset, r)
|
|
454
|
-
else:
|
|
455
|
-
ds = ds + from_source("mars", **r)
|
|
456
|
-
except Exception as e:
|
|
457
|
-
if "File is empty:" not in str(e):
|
|
458
|
-
raise
|
|
459
|
-
return ds
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
execute = mars
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
if __name__ == "__main__":
|
|
466
|
-
import yaml
|
|
467
|
-
|
|
468
|
-
config = yaml.safe_load(
|
|
372
|
+
@source_registry.register("mars")
|
|
373
|
+
class MarsSource(LegacySource):
|
|
374
|
+
|
|
375
|
+
@staticmethod
|
|
376
|
+
def _execute(
|
|
377
|
+
context: Any,
|
|
378
|
+
dates: list[datetime.datetime],
|
|
379
|
+
*requests: dict[str, Any],
|
|
380
|
+
request_already_using_valid_datetime: bool = False,
|
|
381
|
+
date_key: str = "date",
|
|
382
|
+
use_cdsapi_dataset: str | None = None,
|
|
383
|
+
**kwargs: Any,
|
|
384
|
+
) -> Any:
|
|
385
|
+
"""Executes MARS requests based on the given context, dates, and other parameters.
|
|
386
|
+
|
|
387
|
+
Parameters
|
|
388
|
+
----------
|
|
389
|
+
context : Any
|
|
390
|
+
The context for the requests.
|
|
391
|
+
dates : List[datetime.datetime]
|
|
392
|
+
The list of dates to be used in the requests.
|
|
393
|
+
requests : dict[str, Any]
|
|
394
|
+
The input requests to be executed.
|
|
395
|
+
request_already_using_valid_datetime : bool, optional
|
|
396
|
+
Flag indicating if the requests already use valid datetime.
|
|
397
|
+
date_key : str, optional
|
|
398
|
+
The key for the date in the requests.
|
|
399
|
+
use_cdsapi_dataset : Optional[str], optional
|
|
400
|
+
The dataset to be used with CDS API.
|
|
401
|
+
kwargs : Any
|
|
402
|
+
Additional keyword arguments for the requests.
|
|
403
|
+
|
|
404
|
+
Returns
|
|
405
|
+
-------
|
|
406
|
+
Any
|
|
407
|
+
The resulting dataset.
|
|
469
408
|
"""
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
409
|
+
if not requests:
|
|
410
|
+
requests = [kwargs]
|
|
411
|
+
|
|
412
|
+
for r in requests:
|
|
413
|
+
param = r.get("param", [])
|
|
414
|
+
if not isinstance(param, (list, tuple)):
|
|
415
|
+
param = [param]
|
|
416
|
+
# check for "Norway bug" where yaml transforms 'no' into False, etc.
|
|
417
|
+
for p in param:
|
|
418
|
+
if p is False:
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
|
|
421
|
+
)
|
|
422
|
+
if p is None:
|
|
423
|
+
raise ValueError(
|
|
424
|
+
"'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
|
|
425
|
+
)
|
|
426
|
+
if p is True:
|
|
427
|
+
raise ValueError(
|
|
428
|
+
"'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
if isinstance(dates, IntervalsDatesProvider):
|
|
432
|
+
# When using accumulate source
|
|
433
|
+
requests_ = []
|
|
434
|
+
for request in requests:
|
|
435
|
+
for d, interval in dates.intervals:
|
|
436
|
+
context.trace("🌧️", "interval:", interval)
|
|
437
|
+
_, r, _ = dates._adjust_request_to_interval(interval, request)
|
|
438
|
+
context.trace("🌧️", " adjusted request =", r)
|
|
439
|
+
requests_.append(r)
|
|
440
|
+
requests = requests_
|
|
441
|
+
context.trace("🌧️", f"Total requests: {len(requests)}")
|
|
442
|
+
requests = factorise_requests(
|
|
443
|
+
["no_date_here"],
|
|
444
|
+
*requests,
|
|
445
|
+
request_already_using_valid_datetime=True,
|
|
446
|
+
date_key=date_key,
|
|
447
|
+
no_date_here=True,
|
|
448
|
+
)
|
|
484
449
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
450
|
+
elif len(dates) == 0: # When using `repeated_dates`
|
|
451
|
+
assert len(requests) == 1, requests
|
|
452
|
+
assert "date" in requests[0], requests[0]
|
|
453
|
+
if isinstance(requests[0]["date"], datetime.date):
|
|
454
|
+
requests[0]["date"] = requests[0]["date"].strftime("%Y%m%d")
|
|
455
|
+
else:
|
|
456
|
+
requests = factorise_requests(
|
|
457
|
+
dates,
|
|
458
|
+
*requests,
|
|
459
|
+
request_already_using_valid_datetime=request_already_using_valid_datetime,
|
|
460
|
+
date_key=date_key,
|
|
461
|
+
)
|
|
489
462
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
463
|
+
requests = list(requests)
|
|
464
|
+
|
|
465
|
+
ds = from_source("empty")
|
|
466
|
+
context.trace("✅", f"{[str(d) for d in dates]}, {len(dates)}")
|
|
467
|
+
context.trace("✅", f"Will run {len(requests)} requests")
|
|
468
|
+
for r in requests:
|
|
469
|
+
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
470
|
+
context.trace("✅", f"mars {r}")
|
|
471
|
+
|
|
472
|
+
for r in requests:
|
|
473
|
+
r = {k: v for k, v in r.items() if v != ("-",)}
|
|
474
|
+
|
|
475
|
+
if context.use_grib_paramid and "param" in r:
|
|
476
|
+
r = use_grib_paramid(r)
|
|
477
|
+
|
|
478
|
+
for k, v in r.items():
|
|
479
|
+
if k not in MARS_KEYS:
|
|
480
|
+
raise ValueError(
|
|
481
|
+
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
482
|
+
)
|
|
483
|
+
try:
|
|
484
|
+
if use_cdsapi_dataset:
|
|
485
|
+
ds = ds + from_source("cds", use_cdsapi_dataset, r)
|
|
486
|
+
else:
|
|
487
|
+
ds = ds + from_source("mars", **r)
|
|
488
|
+
except Exception as e:
|
|
489
|
+
if "File is empty:" not in str(e):
|
|
490
|
+
raise
|
|
491
|
+
return ds
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# TODO: make clearer the interface between sources that use mars.
|
|
495
|
+
# Currently some sources use mars as a function rather than through the registry,
|
|
496
|
+
# e.g. accumulations, accumulations2, hindcasts, recentre, tendencies
|
|
497
|
+
mars = MarsSource._execute
|
|
@@ -12,30 +12,34 @@ from typing import Any
|
|
|
12
12
|
|
|
13
13
|
import earthkit.data as ekd
|
|
14
14
|
|
|
15
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
16
17
|
from .xarray import load_many
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
@
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
20
|
+
@source_registry.register("netcdf")
|
|
21
|
+
class NetCDFSource(LegacySource):
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _execute(context: Any, dates: list[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
25
|
+
"""Execute the loading of multiple NetCDF files.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : object
|
|
30
|
+
The context in which the function is executed.
|
|
31
|
+
dates : list
|
|
32
|
+
List of dates for which data is to be loaded.
|
|
33
|
+
path : str
|
|
34
|
+
Path to the directory containing the NetCDF files.
|
|
35
|
+
*args : tuple
|
|
36
|
+
Additional positional arguments.
|
|
37
|
+
**kwargs : dict
|
|
38
|
+
Additional keyword arguments.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
object
|
|
43
|
+
The loaded data.
|
|
44
|
+
"""
|
|
45
|
+
return load_many("📁", context, dates, path, *args, **kwargs)
|
|
@@ -12,30 +12,34 @@ from typing import Any
|
|
|
12
12
|
|
|
13
13
|
import earthkit.data as ekd
|
|
14
14
|
|
|
15
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
16
17
|
from .xarray import load_many
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
@
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
20
|
+
@source_registry.register("opendap")
|
|
21
|
+
class OpenDAPSource(LegacySource):
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _execute(context: dict[str, Any], dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
|
|
25
|
+
"""Execute the data loading process from an OpenDAP source.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
context : dict
|
|
30
|
+
The context in which the function is executed.
|
|
31
|
+
dates : list
|
|
32
|
+
List of dates for which data is to be loaded.
|
|
33
|
+
url : str
|
|
34
|
+
The URL of the OpenDAP source.
|
|
35
|
+
*args : tuple
|
|
36
|
+
Additional positional arguments.
|
|
37
|
+
**kwargs : dict
|
|
38
|
+
Additional keyword arguments.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
xarray.Dataset
|
|
43
|
+
The loaded dataset.
|
|
44
|
+
"""
|
|
45
|
+
return load_many("🌐", context, dates, url, *args, **kwargs)
|
|
@@ -12,7 +12,8 @@ from typing import Any
|
|
|
12
12
|
|
|
13
13
|
from anemoi.datasets.compute.recentre import recentre as _recentre
|
|
14
14
|
|
|
15
|
-
from .
|
|
15
|
+
from . import source_registry
|
|
16
|
+
from .legacy import LegacySource
|
|
16
17
|
from .mars import mars
|
|
17
18
|
|
|
18
19
|
|
|
@@ -105,43 +106,43 @@ def load_if_needed(context: Any, dates: Any, dict_or_dataset: dict | Any) -> Any
|
|
|
105
106
|
return dict_or_dataset
|
|
106
107
|
|
|
107
108
|
|
|
108
|
-
@
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
109
|
+
@source_registry.register("recentre")
|
|
110
|
+
class RecentreSource(LegacySource):
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _execute(
|
|
114
|
+
context: Any,
|
|
115
|
+
dates: Any,
|
|
116
|
+
members: dict | Any,
|
|
117
|
+
centre: dict | Any,
|
|
118
|
+
alpha: float = 1.0,
|
|
119
|
+
remapping: dict = {},
|
|
120
|
+
patches: dict = {},
|
|
121
|
+
) -> Any:
|
|
122
|
+
"""Recentres the members dataset using the centre dataset.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
context : Any
|
|
127
|
+
The context for recentering.
|
|
128
|
+
dates : Any
|
|
129
|
+
The dates for recentering.
|
|
130
|
+
members : Union[dict, Any]
|
|
131
|
+
The members dataset or request dictionary.
|
|
132
|
+
centre : Union[dict, Any]
|
|
133
|
+
The centre dataset or request dictionary.
|
|
134
|
+
alpha : float, optional
|
|
135
|
+
The alpha value for recentering. Defaults to 1.0.
|
|
136
|
+
remapping : dict, optional
|
|
137
|
+
The remapping dictionary. Defaults to {}.
|
|
138
|
+
patches : dict, optional
|
|
139
|
+
The patches dictionary. Defaults to {}.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
Any
|
|
144
|
+
The recentred dataset.
|
|
145
|
+
"""
|
|
146
|
+
members = load_if_needed(context, dates, members)
|
|
147
|
+
centre = load_if_needed(context, dates, centre)
|
|
148
|
+
return _recentre(members=members, centre=centre, alpha=alpha)
|