anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/create/__init__.py +46 -13
- anemoi/datasets/create/config.py +52 -53
- anemoi/datasets/create/input/__init__.py +43 -63
- anemoi/datasets/create/input/action.py +296 -236
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +2 -1
- anemoi/datasets/create/input/misc.py +0 -71
- anemoi/datasets/create/input/repeated_dates.py +0 -114
- anemoi/datasets/create/input/result/__init__.py +17 -0
- anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
- anemoi/datasets/create/sources/accumulate.py +517 -0
- anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
- anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
- anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
- anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -38
- anemoi/datasets/create/sources/empty.py +26 -22
- anemoi/datasets/create/sources/forcings.py +29 -28
- anemoi/datasets/create/sources/grib.py +92 -72
- anemoi/datasets/create/sources/grib_index.py +102 -54
- anemoi/datasets/create/sources/hindcasts.py +56 -55
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +159 -154
- anemoi/datasets/create/sources/netcdf.py +28 -24
- anemoi/datasets/create/sources/opendap.py +28 -24
- anemoi/datasets/create/sources/recentre.py +42 -41
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -48
- anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
- anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
- anemoi/datasets/create/sources/xarray_support/field.py +4 -4
- anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
- anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
- anemoi/datasets/create/sources/xarray_zarr.py +28 -24
- anemoi/datasets/create/sources/zenodo.py +43 -39
- anemoi/datasets/create/utils.py +0 -42
- anemoi/datasets/data/complement.py +26 -17
- anemoi/datasets/data/dataset.py +12 -0
- anemoi/datasets/data/grids.py +0 -152
- anemoi/datasets/data/masked.py +74 -13
- anemoi/datasets/data/missing.py +5 -0
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/stores.py +7 -9
- anemoi/datasets/dates/__init__.py +2 -0
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +1 -178
- anemoi/datasets/schemas/recipe.json +131 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/filter.py +0 -47
- anemoi/datasets/create/input/concat.py +0 -161
- anemoi/datasets/create/input/context.py +0 -86
- anemoi/datasets/create/input/empty.py +0 -53
- anemoi/datasets/create/input/filter.py +0 -117
- anemoi/datasets/create/input/function.py +0 -232
- anemoi/datasets/create/input/join.py +0 -129
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -173
- anemoi/datasets/create/input/template.py +0 -161
- anemoi/datasets/create/sources/accumulations.py +0 -1062
- anemoi/datasets/create/sources/accumulations2.py +0 -647
- anemoi/datasets/create/sources/tendencies.py +0 -198
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,12 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
10
12
|
import logging
|
|
11
13
|
import os
|
|
12
14
|
import sqlite3
|
|
15
|
+
from collections import defaultdict
|
|
13
16
|
from collections.abc import Iterator
|
|
14
17
|
from typing import Any
|
|
15
18
|
|
|
@@ -19,7 +22,8 @@ from anemoi.transform.flavour import RuleBasedFlavour
|
|
|
19
22
|
from cachetools import LRUCache
|
|
20
23
|
from earthkit.data.indexing.fieldlist import FieldArray
|
|
21
24
|
|
|
22
|
-
from .
|
|
25
|
+
from . import source_registry
|
|
26
|
+
from .legacy import LegacySource
|
|
23
27
|
|
|
24
28
|
LOG = logging.getLogger(__name__)
|
|
25
29
|
|
|
@@ -45,8 +49,8 @@ class GribIndex:
|
|
|
45
49
|
----------
|
|
46
50
|
database : str
|
|
47
51
|
Path to the SQLite database file.
|
|
48
|
-
keys : Optional[
|
|
49
|
-
|
|
52
|
+
keys : Optional[list[str] | str], optional
|
|
53
|
+
list of keys or a string of keys to use for indexing, by default None.
|
|
50
54
|
flavour : Optional[str], optional
|
|
51
55
|
Flavour configuration for mapping fields, by default None.
|
|
52
56
|
update : bool, optional
|
|
@@ -160,7 +164,7 @@ class GribIndex:
|
|
|
160
164
|
|
|
161
165
|
Returns
|
|
162
166
|
-------
|
|
163
|
-
|
|
167
|
+
list[str]
|
|
164
168
|
A list of metadata keys stored in the database.
|
|
165
169
|
"""
|
|
166
170
|
self.cursor.execute("SELECT key FROM metadata_keys")
|
|
@@ -228,7 +232,7 @@ class GribIndex:
|
|
|
228
232
|
|
|
229
233
|
Returns
|
|
230
234
|
-------
|
|
231
|
-
|
|
235
|
+
list[str]
|
|
232
236
|
A list of column names.
|
|
233
237
|
"""
|
|
234
238
|
if self._columns is not None:
|
|
@@ -244,8 +248,8 @@ class GribIndex:
|
|
|
244
248
|
|
|
245
249
|
Parameters
|
|
246
250
|
----------
|
|
247
|
-
columns :
|
|
248
|
-
|
|
251
|
+
columns : list[str]
|
|
252
|
+
list of column names to ensure in the table.
|
|
249
253
|
"""
|
|
250
254
|
assert self.update
|
|
251
255
|
|
|
@@ -363,7 +367,7 @@ class GribIndex:
|
|
|
363
367
|
|
|
364
368
|
Returns
|
|
365
369
|
-------
|
|
366
|
-
|
|
370
|
+
list[dict]
|
|
367
371
|
A list of GRIB2 parameter information.
|
|
368
372
|
"""
|
|
369
373
|
if ("grib2", paramId) in self.cache:
|
|
@@ -523,8 +527,8 @@ class GribIndex:
|
|
|
523
527
|
|
|
524
528
|
Parameters
|
|
525
529
|
----------
|
|
526
|
-
dates :
|
|
527
|
-
|
|
530
|
+
dates : list[Any]
|
|
531
|
+
list of dates to retrieve data for.
|
|
528
532
|
**kwargs : Any
|
|
529
533
|
Additional filtering criteria.
|
|
530
534
|
|
|
@@ -544,6 +548,9 @@ class GribIndex:
|
|
|
544
548
|
params = dates
|
|
545
549
|
|
|
546
550
|
for k, v in kwargs.items():
|
|
551
|
+
if k not in self._columns:
|
|
552
|
+
LOG.warning(f"Warning : {k} not in database columns, key discarded")
|
|
553
|
+
continue
|
|
547
554
|
if isinstance(v, list):
|
|
548
555
|
query += f" AND {k} IN ({', '.join('?' for _ in v)})"
|
|
549
556
|
params.extend([str(_) for _ in v])
|
|
@@ -551,11 +558,14 @@ class GribIndex:
|
|
|
551
558
|
query += f" AND {k} = ?"
|
|
552
559
|
params.append(str(v))
|
|
553
560
|
|
|
554
|
-
print("SELECT", query)
|
|
555
|
-
print("SELECT", params)
|
|
561
|
+
print("SELECT (query)", query)
|
|
562
|
+
print("SELECT (params)", params)
|
|
556
563
|
|
|
557
564
|
self.cursor.execute(query, params)
|
|
558
|
-
|
|
565
|
+
|
|
566
|
+
fetch = self.cursor.fetchall()
|
|
567
|
+
|
|
568
|
+
for path_id, offset, length in fetch:
|
|
559
569
|
if path_id in self.cache:
|
|
560
570
|
file = self.cache[path_id]
|
|
561
571
|
else:
|
|
@@ -569,44 +579,82 @@ class GribIndex:
|
|
|
569
579
|
yield data
|
|
570
580
|
|
|
571
581
|
|
|
572
|
-
@
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
flavour
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
582
|
+
@source_registry.register("grib-index")
|
|
583
|
+
class GribIndexSource(LegacySource):
|
|
584
|
+
@staticmethod
|
|
585
|
+
def _execute(
|
|
586
|
+
context: Any,
|
|
587
|
+
dates: list[Any],
|
|
588
|
+
indexdb: str,
|
|
589
|
+
flavour: str | None = None,
|
|
590
|
+
**kwargs: Any,
|
|
591
|
+
) -> FieldArray:
|
|
592
|
+
"""Execute the GRIB data retrieval process.
|
|
593
|
+
|
|
594
|
+
Parameters
|
|
595
|
+
----------
|
|
596
|
+
context : Any
|
|
597
|
+
The execution context.
|
|
598
|
+
dates : List[Any]
|
|
599
|
+
List of dates to retrieve data for.
|
|
600
|
+
indexdb : str
|
|
601
|
+
Path to the GRIB index database.
|
|
602
|
+
flavour : Optional[str], optional
|
|
603
|
+
Flavour configuration for mapping fields, by default None.
|
|
604
|
+
**kwargs : Any
|
|
605
|
+
Additional filtering criteria.
|
|
606
|
+
|
|
607
|
+
Returns
|
|
608
|
+
-------
|
|
609
|
+
FieldArray
|
|
610
|
+
An array of retrieved GRIB fields.
|
|
611
|
+
"""
|
|
612
|
+
index = GribIndex(indexdb)
|
|
613
|
+
|
|
614
|
+
if flavour is not None:
|
|
615
|
+
flavour = RuleBasedFlavour(flavour)
|
|
616
|
+
|
|
617
|
+
if hasattr(dates, "date_to_intervals"):
|
|
618
|
+
# When using accumulate source
|
|
619
|
+
full_requests = []
|
|
620
|
+
for d, interval in dates.intervals:
|
|
621
|
+
context.trace("🌧️", "interval:", interval)
|
|
622
|
+
valid_date, request, _ = dates._adjust_request_to_interval(interval, kwargs)
|
|
623
|
+
context.trace("🌧️", " request =", request)
|
|
624
|
+
full_requests.append(([valid_date], request))
|
|
625
|
+
else:
|
|
626
|
+
# Normal case, without accumulate source
|
|
627
|
+
full_requests = [(dates, kwargs)]
|
|
628
|
+
|
|
629
|
+
full_requests = factorise(full_requests)
|
|
630
|
+
context.trace("🌧️", f"number of (factorised) requests: {len(full_requests)}")
|
|
631
|
+
for valid_dates, request in full_requests:
|
|
632
|
+
context.trace("🌧️", f" dates: {valid_dates}, request: {request}")
|
|
633
|
+
|
|
634
|
+
result = []
|
|
635
|
+
for valid_dates, request in full_requests:
|
|
636
|
+
for grib in index.retrieve(valid_dates, **request):
|
|
637
|
+
field = ekd.from_source("memory", grib)[0]
|
|
638
|
+
if flavour:
|
|
639
|
+
field = flavour.apply(field)
|
|
640
|
+
result.append(field)
|
|
641
|
+
|
|
642
|
+
return FieldArray(result)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def factorise(lst):
|
|
646
|
+
"""Factorise a list of (dates, request) tuples by merging dates with identical requests."""
|
|
647
|
+
content = dict()
|
|
648
|
+
|
|
649
|
+
d = defaultdict(list)
|
|
650
|
+
for dates, request in lst:
|
|
651
|
+
assert isinstance(request, dict), type(request)
|
|
652
|
+
key = hashlib.md5(json.dumps(request, sort_keys=True).encode()).hexdigest()
|
|
653
|
+
content[key] = request
|
|
654
|
+
d[key] += dates
|
|
655
|
+
|
|
656
|
+
res = []
|
|
657
|
+
for key, dates in d.items():
|
|
658
|
+
dates = list(sorted(set(dates)))
|
|
659
|
+
res.append((dates, content[key]))
|
|
660
|
+
return res
|
|
@@ -14,7 +14,8 @@ from earthkit.data.core.fieldlist import MultiFieldList
|
|
|
14
14
|
|
|
15
15
|
from anemoi.datasets.create.sources.mars import mars
|
|
16
16
|
|
|
17
|
-
from .
|
|
17
|
+
from . import source_registry
|
|
18
|
+
from .legacy import LegacySource
|
|
18
19
|
|
|
19
20
|
LOGGER = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -37,57 +38,57 @@ def _to_list(x: list | tuple | Any) -> list[Any]:
|
|
|
37
38
|
return [x]
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
@
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
41
|
+
@source_registry.register("hindcasts")
|
|
42
|
+
class HindcastsSource(LegacySource):
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _execute(context: Any, dates: list[Any], **request: dict[str, Any]) -> MultiFieldList:
|
|
46
|
+
"""Generates hindcast requests based on the provided dates and request parameters.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
context : Any
|
|
51
|
+
The context containing the dates provider and trace method.
|
|
52
|
+
dates : List[Any]
|
|
53
|
+
A list of dates for which to generate hindcast requests.
|
|
54
|
+
request : Dict[str, Any]
|
|
55
|
+
Additional request parameters.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
MultiFieldList
|
|
60
|
+
A MultiFieldList containing the hindcast data.
|
|
61
|
+
"""
|
|
62
|
+
from anemoi.datasets.dates import HindcastsDates
|
|
63
|
+
|
|
64
|
+
provider = context.dates_provider
|
|
65
|
+
assert isinstance(provider, HindcastsDates)
|
|
66
|
+
|
|
67
|
+
context.trace("H️", f"hindcasts {len(dates)=}")
|
|
68
|
+
|
|
69
|
+
request["param"] = _to_list(request["param"])
|
|
70
|
+
request["step"] = _to_list(request.get("step", 0))
|
|
71
|
+
request["step"] = [int(_) for _ in request["step"]]
|
|
72
|
+
|
|
73
|
+
context.trace("H️", f"hindcast {request}")
|
|
74
|
+
|
|
75
|
+
requests = []
|
|
76
|
+
for d in dates:
|
|
77
|
+
r = request.copy()
|
|
78
|
+
hindcast = provider.mapping[d]
|
|
79
|
+
r["hdate"] = hindcast.hdate.strftime("%Y-%m-%d")
|
|
80
|
+
r["date"] = hindcast.refdate.strftime("%Y-%m-%d")
|
|
81
|
+
r["time"] = hindcast.refdate.strftime("%H")
|
|
82
|
+
r["step"] = hindcast.step
|
|
83
|
+
requests.append(r)
|
|
84
|
+
|
|
85
|
+
if len(requests) == 0:
|
|
86
|
+
return MultiFieldList([])
|
|
87
|
+
|
|
88
|
+
return mars(
|
|
89
|
+
context,
|
|
90
|
+
dates,
|
|
91
|
+
*requests,
|
|
92
|
+
date_key="hdate",
|
|
93
|
+
request_already_using_valid_datetime=True,
|
|
94
|
+
)
|
|
@@ -8,16 +8,13 @@
|
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
import inspect
|
|
12
11
|
import logging
|
|
13
|
-
import
|
|
14
|
-
from collections.abc import Callable
|
|
12
|
+
from abc import abstractmethod
|
|
15
13
|
from typing import Any
|
|
16
14
|
|
|
17
|
-
from anemoi.datasets.create.input.
|
|
15
|
+
from anemoi.datasets.create.input.context import Context
|
|
18
16
|
|
|
19
17
|
from ..source import Source
|
|
20
|
-
from . import source_registry
|
|
21
18
|
|
|
22
19
|
LOG = logging.getLogger(__name__)
|
|
23
20
|
|
|
@@ -27,7 +24,7 @@ class LegacySource(Source):
|
|
|
27
24
|
|
|
28
25
|
Parameters
|
|
29
26
|
----------
|
|
30
|
-
context :
|
|
27
|
+
context : Context
|
|
31
28
|
The context in which the source is created.
|
|
32
29
|
*args : tuple
|
|
33
30
|
Positional arguments.
|
|
@@ -35,64 +32,15 @@ class LegacySource(Source):
|
|
|
35
32
|
Keyword arguments.
|
|
36
33
|
"""
|
|
37
34
|
|
|
38
|
-
def __init__(self, context:
|
|
35
|
+
def __init__(self, context: Context, *args: Any, **kwargs: Any) -> None:
|
|
39
36
|
super().__init__(context, *args, **kwargs)
|
|
40
37
|
self.args = args
|
|
41
38
|
self.kwargs = kwargs
|
|
42
39
|
|
|
40
|
+
@staticmethod
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def _execute(context, *args, **kwargs):
|
|
43
|
+
pass
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
Parameters
|
|
48
|
-
----------
|
|
49
|
-
name : str
|
|
50
|
-
The name of the legacy source.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
def __init__(self, name: str) -> None:
|
|
54
|
-
name, _ = os.path.splitext(os.path.basename(name))
|
|
55
|
-
self.name = name
|
|
56
|
-
|
|
57
|
-
def __call__(self, execute: Callable) -> Callable:
|
|
58
|
-
"""Call method to wrap the execute function.
|
|
59
|
-
|
|
60
|
-
Parameters
|
|
61
|
-
----------
|
|
62
|
-
execute : function
|
|
63
|
-
The execute function to be wrapped.
|
|
64
|
-
|
|
65
|
-
Returns
|
|
66
|
-
-------
|
|
67
|
-
function
|
|
68
|
-
The wrapped execute function.
|
|
69
|
-
"""
|
|
70
|
-
this = self
|
|
71
|
-
name = f"Legacy{self.name.title()}Source"
|
|
72
|
-
source = ".".join([execute.__module__, execute.__name__])
|
|
73
|
-
|
|
74
|
-
def execute_wrapper(self, dates) -> Any:
|
|
75
|
-
"""Wrapper method to call the execute function."""
|
|
76
|
-
|
|
77
|
-
args, kwargs = resolve(self.context, (self.args, self.kwargs))
|
|
78
|
-
|
|
79
|
-
try:
|
|
80
|
-
return execute(self.context, dates, *args, **kwargs)
|
|
81
|
-
except TypeError:
|
|
82
|
-
LOG.error(f"Error executing source {this.name} from {source}")
|
|
83
|
-
LOG.error(f"Function signature is: {inspect.signature(execute)}")
|
|
84
|
-
LOG.error(f"Arguments are: {args=}, {kwargs=}")
|
|
85
|
-
raise
|
|
86
|
-
|
|
87
|
-
klass = type(
|
|
88
|
-
name,
|
|
89
|
-
(LegacySource,),
|
|
90
|
-
{
|
|
91
|
-
"execute": execute_wrapper,
|
|
92
|
-
"_source": source,
|
|
93
|
-
},
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
source_registry.register(self.name)(klass)
|
|
97
|
-
|
|
98
|
-
return execute
|
|
45
|
+
def execute(self, dates: Any) -> Any:
|
|
46
|
+
return self._execute(self.context, dates, *self.args, **self.kwargs)
|