anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -10,14 +10,9 @@
|
|
|
10
10
|
import datetime
|
|
11
11
|
import logging
|
|
12
12
|
import warnings
|
|
13
|
+
from collections.abc import Generator
|
|
13
14
|
from copy import deepcopy
|
|
14
15
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import Generator
|
|
17
|
-
from typing import List
|
|
18
|
-
from typing import Optional
|
|
19
|
-
from typing import Tuple
|
|
20
|
-
from typing import Union
|
|
21
16
|
|
|
22
17
|
import earthkit.data as ekd
|
|
23
18
|
import numpy as np
|
|
@@ -25,12 +20,13 @@ from earthkit.data.core.temporary import temp_file
|
|
|
25
20
|
from earthkit.data.readers.grib.output import new_grib_output
|
|
26
21
|
from numpy.typing import NDArray
|
|
27
22
|
|
|
28
|
-
from anemoi.datasets.create.
|
|
23
|
+
from anemoi.datasets.create.sources import source_registry
|
|
29
24
|
|
|
30
|
-
from .legacy import
|
|
25
|
+
from .legacy import LegacySource
|
|
31
26
|
from .mars import mars
|
|
32
27
|
|
|
33
28
|
LOG = logging.getLogger(__name__)
|
|
29
|
+
MISSING_VALUE = 1e-38
|
|
34
30
|
|
|
35
31
|
|
|
36
32
|
def _member(field: Any) -> int:
|
|
@@ -66,10 +62,10 @@ class Accumulation:
|
|
|
66
62
|
date: int,
|
|
67
63
|
time: int,
|
|
68
64
|
number: int,
|
|
69
|
-
step:
|
|
65
|
+
step: list[int],
|
|
70
66
|
frequency: int,
|
|
71
|
-
accumulations_reset_frequency:
|
|
72
|
-
user_date:
|
|
67
|
+
accumulations_reset_frequency: int | None = None,
|
|
68
|
+
user_date: str | None = None,
|
|
73
69
|
**kwargs: Any,
|
|
74
70
|
) -> None:
|
|
75
71
|
"""Initialises an Accumulation instance.
|
|
@@ -103,10 +99,10 @@ class Accumulation:
|
|
|
103
99
|
self.time = time
|
|
104
100
|
self.steps = step
|
|
105
101
|
self.number = number
|
|
106
|
-
self.values:
|
|
102
|
+
self.values: NDArray[None] | None = None
|
|
107
103
|
self.seen = set()
|
|
108
|
-
self.startStep:
|
|
109
|
-
self.endStep:
|
|
104
|
+
self.startStep: int | None = None
|
|
105
|
+
self.endStep: int | None = None
|
|
110
106
|
self.done = False
|
|
111
107
|
self.frequency = frequency
|
|
112
108
|
self.accumulations_reset_frequency = accumulations_reset_frequency
|
|
@@ -114,7 +110,7 @@ class Accumulation:
|
|
|
114
110
|
self.user_date = user_date
|
|
115
111
|
|
|
116
112
|
@property
|
|
117
|
-
def key(self) ->
|
|
113
|
+
def key(self) -> tuple[str, int, int, list[int], int]:
|
|
118
114
|
"""Returns the key for the accumulation."""
|
|
119
115
|
return (self.param, self.date, self.time, self.steps, self.number)
|
|
120
116
|
|
|
@@ -173,6 +169,7 @@ class Accumulation:
|
|
|
173
169
|
# are used to store the end step
|
|
174
170
|
|
|
175
171
|
edition = template.metadata("edition")
|
|
172
|
+
assert np.all(self.values != MISSING_VALUE)
|
|
176
173
|
|
|
177
174
|
if edition == 1 and self.endStep > 254:
|
|
178
175
|
self.out.write(
|
|
@@ -181,6 +178,7 @@ class Accumulation:
|
|
|
181
178
|
stepType="instant",
|
|
182
179
|
step=self.endStep,
|
|
183
180
|
check_nans=True,
|
|
181
|
+
missing_value=MISSING_VALUE,
|
|
184
182
|
)
|
|
185
183
|
else:
|
|
186
184
|
self.out.write(
|
|
@@ -190,6 +188,7 @@ class Accumulation:
|
|
|
190
188
|
startStep=self.startStep,
|
|
191
189
|
endStep=self.endStep,
|
|
192
190
|
check_nans=True,
|
|
191
|
+
missing_value=MISSING_VALUE,
|
|
193
192
|
)
|
|
194
193
|
self.values = None
|
|
195
194
|
self.done = True
|
|
@@ -210,9 +209,6 @@ class Accumulation:
|
|
|
210
209
|
if step not in self.steps:
|
|
211
210
|
return
|
|
212
211
|
|
|
213
|
-
if not np.all(values >= 0):
|
|
214
|
-
warnings.warn(f"Negative values for {field}: {np.nanmin(values)} {np.nanmax(values)}")
|
|
215
|
-
|
|
216
212
|
assert not self.done, (self.key, step)
|
|
217
213
|
assert step not in self.seen, (self.key, step)
|
|
218
214
|
|
|
@@ -235,15 +231,15 @@ class Accumulation:
|
|
|
235
231
|
def mars_date_time_steps(
|
|
236
232
|
cls,
|
|
237
233
|
*,
|
|
238
|
-
dates:
|
|
234
|
+
dates: list[datetime.datetime],
|
|
239
235
|
step1: int,
|
|
240
236
|
step2: int,
|
|
241
|
-
frequency:
|
|
242
|
-
base_times:
|
|
237
|
+
frequency: int | None,
|
|
238
|
+
base_times: list[int],
|
|
243
239
|
adjust_step: bool,
|
|
244
|
-
accumulations_reset_frequency:
|
|
245
|
-
user_date:
|
|
246
|
-
) -> Generator[
|
|
240
|
+
accumulations_reset_frequency: int | None,
|
|
241
|
+
user_date: str | None,
|
|
242
|
+
) -> Generator[tuple[int, int, tuple[int, ...]], None, None]:
|
|
247
243
|
"""Generates MARS date-time steps.
|
|
248
244
|
|
|
249
245
|
Parameters
|
|
@@ -327,11 +323,11 @@ class Accumulation:
|
|
|
327
323
|
step1: int,
|
|
328
324
|
step2: int,
|
|
329
325
|
add_step: int,
|
|
330
|
-
frequency:
|
|
331
|
-
accumulations_reset_frequency:
|
|
332
|
-
user_date:
|
|
333
|
-
requested_date:
|
|
334
|
-
) ->
|
|
326
|
+
frequency: int | None,
|
|
327
|
+
accumulations_reset_frequency: int | None,
|
|
328
|
+
user_date: str | None,
|
|
329
|
+
requested_date: datetime.datetime | None = None,
|
|
330
|
+
) -> tuple[int, int, tuple[int, ...]]:
|
|
335
331
|
"""Generates a MARS date-time step.
|
|
336
332
|
|
|
337
333
|
Parameters
|
|
@@ -364,7 +360,7 @@ class Accumulation:
|
|
|
364
360
|
class AccumulationFromStart(Accumulation):
|
|
365
361
|
"""Class to handle data accumulation from the start of the forecast."""
|
|
366
362
|
|
|
367
|
-
def adjust_steps(self, startStep: int, endStep: int) ->
|
|
363
|
+
def adjust_steps(self, startStep: int, endStep: int) -> tuple[int, int]:
|
|
368
364
|
"""Adjusts the start and end steps.
|
|
369
365
|
|
|
370
366
|
Parameters
|
|
@@ -427,11 +423,11 @@ class AccumulationFromStart(Accumulation):
|
|
|
427
423
|
step1: int,
|
|
428
424
|
step2: int,
|
|
429
425
|
add_step: int,
|
|
430
|
-
frequency:
|
|
431
|
-
accumulations_reset_frequency:
|
|
432
|
-
user_date:
|
|
433
|
-
requested_date:
|
|
434
|
-
) ->
|
|
426
|
+
frequency: int | None,
|
|
427
|
+
accumulations_reset_frequency: int | None,
|
|
428
|
+
user_date: str | None,
|
|
429
|
+
requested_date: datetime.datetime | None = None,
|
|
430
|
+
) -> tuple[int, int, tuple[int, ...]]:
|
|
435
431
|
"""Generates a MARS date-time step.
|
|
436
432
|
|
|
437
433
|
Parameters
|
|
@@ -518,10 +514,10 @@ class AccumulationFromLastStep(Accumulation):
|
|
|
518
514
|
step2: int,
|
|
519
515
|
add_step: int,
|
|
520
516
|
frequency: int,
|
|
521
|
-
accumulations_reset_frequency:
|
|
522
|
-
user_date:
|
|
523
|
-
requested_date:
|
|
524
|
-
) ->
|
|
517
|
+
accumulations_reset_frequency: int | None,
|
|
518
|
+
user_date: str | None = None,
|
|
519
|
+
requested_date: datetime.datetime | None = None,
|
|
520
|
+
) -> tuple[int, int, tuple[int, ...]]:
|
|
525
521
|
"""Generates a MARS date-time step.
|
|
526
522
|
|
|
527
523
|
Parameters
|
|
@@ -568,7 +564,7 @@ class AccumulationFromLastStep(Accumulation):
|
|
|
568
564
|
class AccumulationFromLastReset(Accumulation):
|
|
569
565
|
"""Class to handle data accumulation from the last step of the forecast."""
|
|
570
566
|
|
|
571
|
-
def adjust_steps(self, startStep: int, endStep: int) ->
|
|
567
|
+
def adjust_steps(self, startStep: int, endStep: int) -> tuple[int, int]:
|
|
572
568
|
"""Adjusts the start and end steps.
|
|
573
569
|
|
|
574
570
|
Parameters
|
|
@@ -588,7 +584,7 @@ class AccumulationFromLastReset(Accumulation):
|
|
|
588
584
|
@classmethod
|
|
589
585
|
def _adjust_steps(
|
|
590
586
|
self, startStep: int, endStep: int, frequency: int, accumulations_reset_frequency: int
|
|
591
|
-
) ->
|
|
587
|
+
) -> tuple[int, int]:
|
|
592
588
|
"""Adjusts the start and end steps.
|
|
593
589
|
|
|
594
590
|
Parameters
|
|
@@ -620,7 +616,7 @@ class AccumulationFromLastReset(Accumulation):
|
|
|
620
616
|
base_date: datetime.datetime,
|
|
621
617
|
frequency: int,
|
|
622
618
|
accumulations_reset_frequency: int,
|
|
623
|
-
) ->
|
|
619
|
+
) -> tuple[int, int]:
|
|
624
620
|
"""Calculates the steps for accumulation.
|
|
625
621
|
|
|
626
622
|
Parameters
|
|
@@ -704,10 +700,10 @@ class AccumulationFromLastReset(Accumulation):
|
|
|
704
700
|
step2: int,
|
|
705
701
|
add_step: int,
|
|
706
702
|
frequency: int,
|
|
707
|
-
accumulations_reset_frequency:
|
|
708
|
-
user_date:
|
|
709
|
-
requested_date:
|
|
710
|
-
) ->
|
|
703
|
+
accumulations_reset_frequency: int | None,
|
|
704
|
+
user_date: str | None,
|
|
705
|
+
requested_date: datetime.datetime | None = None,
|
|
706
|
+
) -> tuple[int, int, tuple[int, ...]]:
|
|
711
707
|
"""Generates a MARS date-time step.
|
|
712
708
|
|
|
713
709
|
Parameters
|
|
@@ -776,15 +772,15 @@ def _identity(x: Any) -> Any:
|
|
|
776
772
|
|
|
777
773
|
def _compute_accumulations(
|
|
778
774
|
context: Any,
|
|
779
|
-
dates:
|
|
780
|
-
request:
|
|
781
|
-
user_accumulation_period:
|
|
782
|
-
data_accumulation_period:
|
|
783
|
-
accumulations_reset_frequency:
|
|
784
|
-
user_date:
|
|
775
|
+
dates: list[datetime.datetime],
|
|
776
|
+
request: dict[str, Any],
|
|
777
|
+
user_accumulation_period: int | tuple[int, int] = 6,
|
|
778
|
+
data_accumulation_period: int | None = None,
|
|
779
|
+
accumulations_reset_frequency: int | None = None,
|
|
780
|
+
user_date: str | None = None,
|
|
785
781
|
patch: Any = _identity,
|
|
786
|
-
base_times:
|
|
787
|
-
use_cdsapi_dataset:
|
|
782
|
+
base_times: list[int] | None = None,
|
|
783
|
+
use_cdsapi_dataset: str | None = None,
|
|
788
784
|
) -> Any:
|
|
789
785
|
"""Computes accumulations based on the provided parameters.
|
|
790
786
|
|
|
@@ -933,7 +929,7 @@ def _compute_accumulations(
|
|
|
933
929
|
return ds
|
|
934
930
|
|
|
935
931
|
|
|
936
|
-
def _to_list(x:
|
|
932
|
+
def _to_list(x: list[Any] | tuple[Any] | Any) -> list[Any]:
|
|
937
933
|
"""Converts the input to a list if it is not already a list or tuple.
|
|
938
934
|
|
|
939
935
|
Parameters
|
|
@@ -951,7 +947,7 @@ def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
|
|
|
951
947
|
return [x]
|
|
952
948
|
|
|
953
949
|
|
|
954
|
-
def _scda(request:
|
|
950
|
+
def _scda(request: dict[str, Any]) -> dict[str, Any]:
|
|
955
951
|
"""Modifies the request stream based on the time.
|
|
956
952
|
|
|
957
953
|
Parameters
|
|
@@ -971,97 +967,76 @@ def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
971
967
|
return request
|
|
972
968
|
|
|
973
969
|
|
|
974
|
-
@
|
|
975
|
-
|
|
976
|
-
context: Any, dates: List[datetime.datetime], use_cdsapi_dataset: Optional[str] = None, **request: Any
|
|
977
|
-
) -> Any:
|
|
978
|
-
"""Computes accumulations based on the provided context, dates, and request parameters.
|
|
979
|
-
|
|
980
|
-
Parameters
|
|
981
|
-
----------
|
|
982
|
-
context : Any
|
|
983
|
-
Context for the computation.
|
|
984
|
-
dates : List[datetime.datetime]
|
|
985
|
-
List of dates.
|
|
986
|
-
use_cdsapi_dataset : Optional[str], optional
|
|
987
|
-
CDSAPI dataset to use. Defaults to None.
|
|
988
|
-
**request : Any
|
|
989
|
-
Additional request parameters.
|
|
990
|
-
|
|
991
|
-
Returns
|
|
992
|
-
-------
|
|
993
|
-
Any
|
|
994
|
-
The computed accumulations.
|
|
995
|
-
"""
|
|
996
|
-
|
|
997
|
-
if (
|
|
998
|
-
request.get("class") == "ea"
|
|
999
|
-
and request.get("stream", "oper") == "oper"
|
|
1000
|
-
and request.get("accumulation_period") == 24
|
|
1001
|
-
):
|
|
1002
|
-
from .accumulations2 import accumulations as accumulations2
|
|
1003
|
-
|
|
1004
|
-
LOG.warning(
|
|
1005
|
-
"🧪️ Experimental features: Using accumulations2, because class=ea stream=oper and accumulation_period=24"
|
|
1006
|
-
)
|
|
1007
|
-
return accumulations2(context, dates, **request)
|
|
1008
|
-
|
|
1009
|
-
_to_list(request["param"])
|
|
1010
|
-
class_ = request.get("class", "od")
|
|
1011
|
-
stream = request.get("stream", "oper")
|
|
1012
|
-
|
|
1013
|
-
user_accumulation_period = request.pop("accumulation_period", 6)
|
|
1014
|
-
accumulations_reset_frequency = request.pop("accumulations_reset_frequency", None)
|
|
1015
|
-
user_date = request.pop("date", None)
|
|
1016
|
-
|
|
1017
|
-
# If `data_accumulation_period` is not set, this means that the accumulations are from the start
|
|
1018
|
-
# of the forecast.
|
|
1019
|
-
|
|
1020
|
-
KWARGS = {
|
|
1021
|
-
("od", "oper"): dict(patch=_scda),
|
|
1022
|
-
("od", "elda"): dict(base_times=(6, 18)),
|
|
1023
|
-
("od", "enfo"): dict(base_times=(0, 6, 12, 18)),
|
|
1024
|
-
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
|
|
1025
|
-
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
|
|
1026
|
-
("rr", "oper"): dict(base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
|
|
1027
|
-
("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
|
|
1028
|
-
}
|
|
1029
|
-
|
|
1030
|
-
kwargs = KWARGS.get((class_, stream), {})
|
|
1031
|
-
|
|
1032
|
-
context.trace("🌧️", f"accumulations {request} {user_accumulation_period} {kwargs}")
|
|
1033
|
-
|
|
1034
|
-
return _compute_accumulations(
|
|
1035
|
-
context,
|
|
1036
|
-
dates,
|
|
1037
|
-
request,
|
|
1038
|
-
user_accumulation_period=user_accumulation_period,
|
|
1039
|
-
accumulations_reset_frequency=accumulations_reset_frequency,
|
|
1040
|
-
use_cdsapi_dataset=use_cdsapi_dataset,
|
|
1041
|
-
user_date=user_date,
|
|
1042
|
-
**kwargs,
|
|
1043
|
-
)
|
|
970
|
+
@source_registry.register("accumulations")
|
|
971
|
+
class AccumulationsSource(LegacySource):
|
|
1044
972
|
|
|
973
|
+
@staticmethod
|
|
974
|
+
def _execute(
|
|
975
|
+
context: Any, dates: list[datetime.datetime], use_cdsapi_dataset: str | None = None, **request: Any
|
|
976
|
+
) -> Any:
|
|
977
|
+
"""Computes accumulations based on the provided context, dates, and request parameters.
|
|
1045
978
|
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
979
|
+
Parameters
|
|
980
|
+
----------
|
|
981
|
+
context : Any
|
|
982
|
+
Context for the computation.
|
|
983
|
+
dates : List[datetime.datetime]
|
|
984
|
+
List of dates.
|
|
985
|
+
use_cdsapi_dataset : Optional[str], optional
|
|
986
|
+
CDSAPI dataset to use. Defaults to None.
|
|
987
|
+
**request : Any
|
|
988
|
+
Additional request parameters.
|
|
1050
989
|
|
|
1051
|
-
|
|
990
|
+
Returns
|
|
991
|
+
-------
|
|
992
|
+
Any
|
|
993
|
+
The computed accumulations.
|
|
1052
994
|
"""
|
|
1053
|
-
class: ea
|
|
1054
|
-
expver: '0001'
|
|
1055
|
-
grid: 20./20.
|
|
1056
|
-
levtype: sfc
|
|
1057
|
-
# number: [0, 1]
|
|
1058
|
-
# stream: enda
|
|
1059
|
-
param: [cp, tp]
|
|
1060
|
-
# accumulation_period: 6h
|
|
1061
|
-
"""
|
|
1062
|
-
)
|
|
1063
|
-
dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
1064
|
-
dates = to_datetime_list(dates)
|
|
1065
995
|
|
|
1066
|
-
|
|
1067
|
-
|
|
996
|
+
if (
|
|
997
|
+
request.get("class") == "ea"
|
|
998
|
+
and request.get("stream", "oper") == "oper"
|
|
999
|
+
and request.get("accumulation_period") == 24
|
|
1000
|
+
):
|
|
1001
|
+
from .accumulations2 import Accumulations2Source
|
|
1002
|
+
|
|
1003
|
+
LOG.warning(
|
|
1004
|
+
"🧪️ Experimental features: Using accumulations2, because class=ea stream=oper and accumulation_period=24"
|
|
1005
|
+
)
|
|
1006
|
+
return Accumulations2Source._execute(context, dates, **request)
|
|
1007
|
+
|
|
1008
|
+
_to_list(request["param"])
|
|
1009
|
+
class_ = request.get("class", "od")
|
|
1010
|
+
stream = request.get("stream", "oper")
|
|
1011
|
+
|
|
1012
|
+
user_accumulation_period = request.pop("accumulation_period", 6)
|
|
1013
|
+
accumulations_reset_frequency = request.pop("accumulations_reset_frequency", None)
|
|
1014
|
+
user_date = request.pop("date", None)
|
|
1015
|
+
|
|
1016
|
+
# If `data_accumulation_period` is not set, this means that the accumulations are from the start
|
|
1017
|
+
# of the forecast.
|
|
1018
|
+
|
|
1019
|
+
KWARGS = {
|
|
1020
|
+
("od", "oper"): dict(patch=_scda),
|
|
1021
|
+
("od", "elda"): dict(base_times=(6, 18)),
|
|
1022
|
+
("od", "enfo"): dict(base_times=(0, 6, 12, 18)),
|
|
1023
|
+
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
|
|
1024
|
+
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
|
|
1025
|
+
("rr", "oper"): dict(base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
|
|
1026
|
+
("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
kwargs = KWARGS.get((class_, stream), {})
|
|
1030
|
+
|
|
1031
|
+
context.trace("🌧️", f"accumulations {request} {user_accumulation_period} {kwargs}")
|
|
1032
|
+
|
|
1033
|
+
return _compute_accumulations(
|
|
1034
|
+
context,
|
|
1035
|
+
dates,
|
|
1036
|
+
request,
|
|
1037
|
+
user_accumulation_period=user_accumulation_period,
|
|
1038
|
+
accumulations_reset_frequency=accumulations_reset_frequency,
|
|
1039
|
+
use_cdsapi_dataset=use_cdsapi_dataset,
|
|
1040
|
+
user_date=user_date,
|
|
1041
|
+
**kwargs,
|
|
1042
|
+
)
|
|
@@ -12,20 +12,16 @@ import logging
|
|
|
12
12
|
from abc import abstractmethod
|
|
13
13
|
from copy import deepcopy
|
|
14
14
|
from typing import Any
|
|
15
|
-
from typing import Dict
|
|
16
|
-
from typing import List
|
|
17
|
-
from typing import Tuple
|
|
18
|
-
from typing import Union
|
|
19
15
|
|
|
20
16
|
import earthkit.data as ekd
|
|
21
17
|
import numpy as np
|
|
22
18
|
from earthkit.data.core.temporary import temp_file
|
|
23
19
|
from earthkit.data.readers.grib.output import new_grib_output
|
|
24
20
|
|
|
21
|
+
from anemoi.datasets.create.sources import source_registry
|
|
25
22
|
from anemoi.datasets.create.sources.mars import mars
|
|
26
|
-
from anemoi.datasets.create.utils import to_datetime_list
|
|
27
23
|
|
|
28
|
-
from .legacy import
|
|
24
|
+
from .legacy import LegacySource
|
|
29
25
|
|
|
30
26
|
LOG = logging.getLogger(__name__)
|
|
31
27
|
|
|
@@ -477,8 +473,8 @@ class Accumulator:
|
|
|
477
473
|
|
|
478
474
|
def _compute_accumulations(
|
|
479
475
|
context: Any,
|
|
480
|
-
dates:
|
|
481
|
-
request:
|
|
476
|
+
dates: list[datetime.datetime],
|
|
477
|
+
request: dict[str, Any],
|
|
482
478
|
user_accumulation_period: datetime.timedelta,
|
|
483
479
|
# data_accumulation_period: Optional[int] = None,
|
|
484
480
|
# patch: Any = _identity,
|
|
@@ -565,7 +561,7 @@ def _compute_accumulations(
|
|
|
565
561
|
return ds
|
|
566
562
|
|
|
567
563
|
|
|
568
|
-
def _to_list(x:
|
|
564
|
+
def _to_list(x: list[Any] | tuple[Any] | Any) -> list[Any]:
|
|
569
565
|
"""Converts the input to a list if it is not already a list or tuple.
|
|
570
566
|
|
|
571
567
|
Parameters
|
|
@@ -583,7 +579,7 @@ def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
|
|
|
583
579
|
return [x]
|
|
584
580
|
|
|
585
581
|
|
|
586
|
-
def _scda(request:
|
|
582
|
+
def _scda(request: dict[str, Any]) -> dict[str, Any]:
|
|
587
583
|
"""Modifies the request stream based on the time.
|
|
588
584
|
|
|
589
585
|
Parameters
|
|
@@ -603,49 +599,20 @@ def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
603
599
|
return request
|
|
604
600
|
|
|
605
601
|
|
|
606
|
-
@
|
|
607
|
-
|
|
608
|
-
_to_list(request["param"])
|
|
609
|
-
user_accumulation_period = request.pop("accumulation_period", 6)
|
|
610
|
-
user_accumulation_period = datetime.timedelta(hours=user_accumulation_period)
|
|
602
|
+
@source_registry.register("accumulations2")
|
|
603
|
+
class Accumulations2Source(LegacySource):
|
|
611
604
|
|
|
612
|
-
|
|
605
|
+
@staticmethod
|
|
606
|
+
def _execute(context, dates, **request):
|
|
607
|
+
_to_list(request["param"])
|
|
608
|
+
user_accumulation_period = request.pop("accumulation_period", 6)
|
|
609
|
+
user_accumulation_period = datetime.timedelta(hours=user_accumulation_period)
|
|
613
610
|
|
|
614
|
-
|
|
615
|
-
context,
|
|
616
|
-
dates,
|
|
617
|
-
request,
|
|
618
|
-
user_accumulation_period=user_accumulation_period,
|
|
619
|
-
)
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
execute = accumulations
|
|
623
|
-
|
|
624
|
-
if __name__ == "__main__":
|
|
625
|
-
import yaml
|
|
626
|
-
|
|
627
|
-
config = yaml.safe_load(
|
|
628
|
-
"""
|
|
629
|
-
class: ea
|
|
630
|
-
expver: '0001'
|
|
631
|
-
grid: 20./20.
|
|
632
|
-
levtype: sfc
|
|
633
|
-
# number: [0, 1]
|
|
634
|
-
# stream: enda
|
|
635
|
-
param: [cp, tp]
|
|
636
|
-
# accumulation_period: 6h
|
|
637
|
-
accumulation_period: 2
|
|
638
|
-
"""
|
|
639
|
-
)
|
|
640
|
-
dates = yaml.safe_load("[2022-12-31 00:00, 2022-12-31 06:00]")
|
|
641
|
-
# dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
|
|
642
|
-
dates = to_datetime_list(dates)
|
|
643
|
-
|
|
644
|
-
class Context:
|
|
645
|
-
use_grib_paramid = True
|
|
611
|
+
context.trace("🌧️", f"accumulations {request} {user_accumulation_period}")
|
|
646
612
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
613
|
+
return _compute_accumulations(
|
|
614
|
+
context,
|
|
615
|
+
dates,
|
|
616
|
+
request,
|
|
617
|
+
user_accumulation_period=user_accumulation_period,
|
|
618
|
+
)
|
|
@@ -9,65 +9,69 @@
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
|
-
from .
|
|
12
|
+
from . import source_registry
|
|
13
|
+
from .legacy import LegacySource
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
@
|
|
16
|
-
|
|
17
|
-
import earthkit.data as ekd
|
|
16
|
+
@source_registry.register("anemoi_dataset")
|
|
17
|
+
class AnemoiDatasetSource(LegacySource):
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _execute(context, dates, params=None, **kwargs):
|
|
21
|
+
import earthkit.data as ekd
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
# dates_to_index = {date: i for i, date in enumerate(ds.dates)}
|
|
23
|
+
from anemoi.datasets import open_dataset
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
idx = np.where(ds.dates == date)[0]
|
|
27
|
-
if len(idx) == 0:
|
|
28
|
-
continue
|
|
29
|
-
indices.append((int(idx[0]), date))
|
|
25
|
+
ds = open_dataset(**kwargs)
|
|
26
|
+
# dates_to_index = {date: i for i, date in enumerate(ds.dates)}
|
|
30
27
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
indices = []
|
|
29
|
+
for date in dates:
|
|
30
|
+
idx = np.where(ds.dates == date)[0]
|
|
31
|
+
if len(idx) == 0:
|
|
32
|
+
continue
|
|
33
|
+
indices.append((int(idx[0]), date))
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
params
|
|
35
|
+
vars = ds.variables
|
|
36
|
+
if params is None:
|
|
37
|
+
params = vars
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
if not isinstance(params, (list, tuple, set)):
|
|
40
|
+
params = [params]
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
longitudes = ds.longitudes
|
|
42
|
+
params = set(params)
|
|
43
|
+
results = []
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
ensemble = ds.shape[2] > 1
|
|
46
|
+
latitudes = ds.latitudes
|
|
47
|
+
longitudes = ds.longitudes
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
for idx, date in indices:
|
|
48
50
|
|
|
49
|
-
|
|
51
|
+
metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
for j, y in enumerate(ds[idx]):
|
|
54
|
+
|
|
55
|
+
param = vars[j]
|
|
56
|
+
if param not in params:
|
|
57
|
+
continue
|
|
54
58
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
# metadata['name'] = param
|
|
60
|
+
# metadata['param_level'] = param
|
|
61
|
+
metadata["param"] = param
|
|
58
62
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
63
|
+
for k, e in enumerate(y):
|
|
64
|
+
if ensemble:
|
|
65
|
+
metadata["number"] = k + 1
|
|
62
66
|
|
|
63
|
-
|
|
67
|
+
metadata["values"] = e
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
results.append(metadata.copy())
|
|
66
70
|
|
|
67
|
-
|
|
71
|
+
print(results[0].keys())
|
|
68
72
|
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
# "list-of-dicts" does support resolution
|
|
74
|
+
results = ekd.from_source("list-of-dicts", results)
|
|
71
75
|
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
# return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
|
|
77
|
+
return results
|