anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -10,14 +10,9 @@
10
10
  import datetime
11
11
  import logging
12
12
  import warnings
13
+ from collections.abc import Generator
13
14
  from copy import deepcopy
14
15
  from typing import Any
15
- from typing import Dict
16
- from typing import Generator
17
- from typing import List
18
- from typing import Optional
19
- from typing import Tuple
20
- from typing import Union
21
16
 
22
17
  import earthkit.data as ekd
23
18
  import numpy as np
@@ -25,12 +20,13 @@ from earthkit.data.core.temporary import temp_file
25
20
  from earthkit.data.readers.grib.output import new_grib_output
26
21
  from numpy.typing import NDArray
27
22
 
28
- from anemoi.datasets.create.utils import to_datetime_list
23
+ from anemoi.datasets.create.sources import source_registry
29
24
 
30
- from .legacy import legacy_source
25
+ from .legacy import LegacySource
31
26
  from .mars import mars
32
27
 
33
28
  LOG = logging.getLogger(__name__)
29
+ MISSING_VALUE = 1e-38
34
30
 
35
31
 
36
32
  def _member(field: Any) -> int:
@@ -66,10 +62,10 @@ class Accumulation:
66
62
  date: int,
67
63
  time: int,
68
64
  number: int,
69
- step: List[int],
65
+ step: list[int],
70
66
  frequency: int,
71
- accumulations_reset_frequency: Optional[int] = None,
72
- user_date: Optional[str] = None,
67
+ accumulations_reset_frequency: int | None = None,
68
+ user_date: str | None = None,
73
69
  **kwargs: Any,
74
70
  ) -> None:
75
71
  """Initialises an Accumulation instance.
@@ -103,10 +99,10 @@ class Accumulation:
103
99
  self.time = time
104
100
  self.steps = step
105
101
  self.number = number
106
- self.values: Optional[NDArray[None]] = None
102
+ self.values: NDArray[None] | None = None
107
103
  self.seen = set()
108
- self.startStep: Optional[int] = None
109
- self.endStep: Optional[int] = None
104
+ self.startStep: int | None = None
105
+ self.endStep: int | None = None
110
106
  self.done = False
111
107
  self.frequency = frequency
112
108
  self.accumulations_reset_frequency = accumulations_reset_frequency
@@ -114,7 +110,7 @@ class Accumulation:
114
110
  self.user_date = user_date
115
111
 
116
112
  @property
117
- def key(self) -> Tuple[str, int, int, List[int], int]:
113
+ def key(self) -> tuple[str, int, int, list[int], int]:
118
114
  """Returns the key for the accumulation."""
119
115
  return (self.param, self.date, self.time, self.steps, self.number)
120
116
 
@@ -173,6 +169,7 @@ class Accumulation:
173
169
  # are used to store the end step
174
170
 
175
171
  edition = template.metadata("edition")
172
+ assert np.all(self.values != MISSING_VALUE)
176
173
 
177
174
  if edition == 1 and self.endStep > 254:
178
175
  self.out.write(
@@ -181,6 +178,7 @@ class Accumulation:
181
178
  stepType="instant",
182
179
  step=self.endStep,
183
180
  check_nans=True,
181
+ missing_value=MISSING_VALUE,
184
182
  )
185
183
  else:
186
184
  self.out.write(
@@ -190,6 +188,7 @@ class Accumulation:
190
188
  startStep=self.startStep,
191
189
  endStep=self.endStep,
192
190
  check_nans=True,
191
+ missing_value=MISSING_VALUE,
193
192
  )
194
193
  self.values = None
195
194
  self.done = True
@@ -210,9 +209,6 @@ class Accumulation:
210
209
  if step not in self.steps:
211
210
  return
212
211
 
213
- if not np.all(values >= 0):
214
- warnings.warn(f"Negative values for {field}: {np.nanmin(values)} {np.nanmax(values)}")
215
-
216
212
  assert not self.done, (self.key, step)
217
213
  assert step not in self.seen, (self.key, step)
218
214
 
@@ -235,15 +231,15 @@ class Accumulation:
235
231
  def mars_date_time_steps(
236
232
  cls,
237
233
  *,
238
- dates: List[datetime.datetime],
234
+ dates: list[datetime.datetime],
239
235
  step1: int,
240
236
  step2: int,
241
- frequency: Optional[int],
242
- base_times: List[int],
237
+ frequency: int | None,
238
+ base_times: list[int],
243
239
  adjust_step: bool,
244
- accumulations_reset_frequency: Optional[int],
245
- user_date: Optional[str],
246
- ) -> Generator[Tuple[int, int, Tuple[int, ...]], None, None]:
240
+ accumulations_reset_frequency: int | None,
241
+ user_date: str | None,
242
+ ) -> Generator[tuple[int, int, tuple[int, ...]], None, None]:
247
243
  """Generates MARS date-time steps.
248
244
 
249
245
  Parameters
@@ -327,11 +323,11 @@ class Accumulation:
327
323
  step1: int,
328
324
  step2: int,
329
325
  add_step: int,
330
- frequency: Optional[int],
331
- accumulations_reset_frequency: Optional[int],
332
- user_date: Optional[str],
333
- requested_date: Optional[datetime.datetime] = None,
334
- ) -> Tuple[int, int, Tuple[int, ...]]:
326
+ frequency: int | None,
327
+ accumulations_reset_frequency: int | None,
328
+ user_date: str | None,
329
+ requested_date: datetime.datetime | None = None,
330
+ ) -> tuple[int, int, tuple[int, ...]]:
335
331
  """Generates a MARS date-time step.
336
332
 
337
333
  Parameters
@@ -364,7 +360,7 @@ class Accumulation:
364
360
  class AccumulationFromStart(Accumulation):
365
361
  """Class to handle data accumulation from the start of the forecast."""
366
362
 
367
- def adjust_steps(self, startStep: int, endStep: int) -> Tuple[int, int]:
363
+ def adjust_steps(self, startStep: int, endStep: int) -> tuple[int, int]:
368
364
  """Adjusts the start and end steps.
369
365
 
370
366
  Parameters
@@ -427,11 +423,11 @@ class AccumulationFromStart(Accumulation):
427
423
  step1: int,
428
424
  step2: int,
429
425
  add_step: int,
430
- frequency: Optional[int],
431
- accumulations_reset_frequency: Optional[int],
432
- user_date: Optional[str],
433
- requested_date: Optional[datetime.datetime] = None,
434
- ) -> Tuple[int, int, Tuple[int, ...]]:
426
+ frequency: int | None,
427
+ accumulations_reset_frequency: int | None,
428
+ user_date: str | None,
429
+ requested_date: datetime.datetime | None = None,
430
+ ) -> tuple[int, int, tuple[int, ...]]:
435
431
  """Generates a MARS date-time step.
436
432
 
437
433
  Parameters
@@ -518,10 +514,10 @@ class AccumulationFromLastStep(Accumulation):
518
514
  step2: int,
519
515
  add_step: int,
520
516
  frequency: int,
521
- accumulations_reset_frequency: Optional[int],
522
- user_date: Optional[str] = None,
523
- requested_date: Optional[datetime.datetime] = None,
524
- ) -> Tuple[int, int, Tuple[int, ...]]:
517
+ accumulations_reset_frequency: int | None,
518
+ user_date: str | None = None,
519
+ requested_date: datetime.datetime | None = None,
520
+ ) -> tuple[int, int, tuple[int, ...]]:
525
521
  """Generates a MARS date-time step.
526
522
 
527
523
  Parameters
@@ -568,7 +564,7 @@ class AccumulationFromLastStep(Accumulation):
568
564
  class AccumulationFromLastReset(Accumulation):
569
565
  """Class to handle data accumulation from the last step of the forecast."""
570
566
 
571
- def adjust_steps(self, startStep: int, endStep: int) -> Tuple[int, int]:
567
+ def adjust_steps(self, startStep: int, endStep: int) -> tuple[int, int]:
572
568
  """Adjusts the start and end steps.
573
569
 
574
570
  Parameters
@@ -588,7 +584,7 @@ class AccumulationFromLastReset(Accumulation):
588
584
  @classmethod
589
585
  def _adjust_steps(
590
586
  self, startStep: int, endStep: int, frequency: int, accumulations_reset_frequency: int
591
- ) -> Tuple[int, int]:
587
+ ) -> tuple[int, int]:
592
588
  """Adjusts the start and end steps.
593
589
 
594
590
  Parameters
@@ -620,7 +616,7 @@ class AccumulationFromLastReset(Accumulation):
620
616
  base_date: datetime.datetime,
621
617
  frequency: int,
622
618
  accumulations_reset_frequency: int,
623
- ) -> Tuple[int, int]:
619
+ ) -> tuple[int, int]:
624
620
  """Calculates the steps for accumulation.
625
621
 
626
622
  Parameters
@@ -704,10 +700,10 @@ class AccumulationFromLastReset(Accumulation):
704
700
  step2: int,
705
701
  add_step: int,
706
702
  frequency: int,
707
- accumulations_reset_frequency: Optional[int],
708
- user_date: Optional[str],
709
- requested_date: Optional[datetime.datetime] = None,
710
- ) -> Tuple[int, int, Tuple[int, ...]]:
703
+ accumulations_reset_frequency: int | None,
704
+ user_date: str | None,
705
+ requested_date: datetime.datetime | None = None,
706
+ ) -> tuple[int, int, tuple[int, ...]]:
711
707
  """Generates a MARS date-time step.
712
708
 
713
709
  Parameters
@@ -776,15 +772,15 @@ def _identity(x: Any) -> Any:
776
772
 
777
773
  def _compute_accumulations(
778
774
  context: Any,
779
- dates: List[datetime.datetime],
780
- request: Dict[str, Any],
781
- user_accumulation_period: Union[int, Tuple[int, int]] = 6,
782
- data_accumulation_period: Optional[int] = None,
783
- accumulations_reset_frequency: Optional[int] = None,
784
- user_date: Optional[str] = None,
775
+ dates: list[datetime.datetime],
776
+ request: dict[str, Any],
777
+ user_accumulation_period: int | tuple[int, int] = 6,
778
+ data_accumulation_period: int | None = None,
779
+ accumulations_reset_frequency: int | None = None,
780
+ user_date: str | None = None,
785
781
  patch: Any = _identity,
786
- base_times: Optional[List[int]] = None,
787
- use_cdsapi_dataset: Optional[str] = None,
782
+ base_times: list[int] | None = None,
783
+ use_cdsapi_dataset: str | None = None,
788
784
  ) -> Any:
789
785
  """Computes accumulations based on the provided parameters.
790
786
 
@@ -933,7 +929,7 @@ def _compute_accumulations(
933
929
  return ds
934
930
 
935
931
 
936
- def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
932
+ def _to_list(x: list[Any] | tuple[Any] | Any) -> list[Any]:
937
933
  """Converts the input to a list if it is not already a list or tuple.
938
934
 
939
935
  Parameters
@@ -951,7 +947,7 @@ def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
951
947
  return [x]
952
948
 
953
949
 
954
- def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
950
+ def _scda(request: dict[str, Any]) -> dict[str, Any]:
955
951
  """Modifies the request stream based on the time.
956
952
 
957
953
  Parameters
@@ -971,97 +967,76 @@ def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
971
967
  return request
972
968
 
973
969
 
974
- @legacy_source(__file__)
975
- def accumulations(
976
- context: Any, dates: List[datetime.datetime], use_cdsapi_dataset: Optional[str] = None, **request: Any
977
- ) -> Any:
978
- """Computes accumulations based on the provided context, dates, and request parameters.
979
-
980
- Parameters
981
- ----------
982
- context : Any
983
- Context for the computation.
984
- dates : List[datetime.datetime]
985
- List of dates.
986
- use_cdsapi_dataset : Optional[str], optional
987
- CDSAPI dataset to use. Defaults to None.
988
- **request : Any
989
- Additional request parameters.
990
-
991
- Returns
992
- -------
993
- Any
994
- The computed accumulations.
995
- """
996
-
997
- if (
998
- request.get("class") == "ea"
999
- and request.get("stream", "oper") == "oper"
1000
- and request.get("accumulation_period") == 24
1001
- ):
1002
- from .accumulations2 import accumulations as accumulations2
1003
-
1004
- LOG.warning(
1005
- "🧪️ Experimental features: Using accumulations2, because class=ea stream=oper and accumulation_period=24"
1006
- )
1007
- return accumulations2(context, dates, **request)
1008
-
1009
- _to_list(request["param"])
1010
- class_ = request.get("class", "od")
1011
- stream = request.get("stream", "oper")
1012
-
1013
- user_accumulation_period = request.pop("accumulation_period", 6)
1014
- accumulations_reset_frequency = request.pop("accumulations_reset_frequency", None)
1015
- user_date = request.pop("date", None)
1016
-
1017
- # If `data_accumulation_period` is not set, this means that the accumulations are from the start
1018
- # of the forecast.
1019
-
1020
- KWARGS = {
1021
- ("od", "oper"): dict(patch=_scda),
1022
- ("od", "elda"): dict(base_times=(6, 18)),
1023
- ("od", "enfo"): dict(base_times=(0, 6, 12, 18)),
1024
- ("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
1025
- ("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
1026
- ("rr", "oper"): dict(base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
1027
- ("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
1028
- }
1029
-
1030
- kwargs = KWARGS.get((class_, stream), {})
1031
-
1032
- context.trace("🌧️", f"accumulations {request} {user_accumulation_period} {kwargs}")
1033
-
1034
- return _compute_accumulations(
1035
- context,
1036
- dates,
1037
- request,
1038
- user_accumulation_period=user_accumulation_period,
1039
- accumulations_reset_frequency=accumulations_reset_frequency,
1040
- use_cdsapi_dataset=use_cdsapi_dataset,
1041
- user_date=user_date,
1042
- **kwargs,
1043
- )
970
+ @source_registry.register("accumulations")
971
+ class AccumulationsSource(LegacySource):
1044
972
 
973
+ @staticmethod
974
+ def _execute(
975
+ context: Any, dates: list[datetime.datetime], use_cdsapi_dataset: str | None = None, **request: Any
976
+ ) -> Any:
977
+ """Computes accumulations based on the provided context, dates, and request parameters.
1045
978
 
1046
- execute = accumulations
1047
-
1048
- if __name__ == "__main__":
1049
- import yaml
979
+ Parameters
980
+ ----------
981
+ context : Any
982
+ Context for the computation.
983
+ dates : List[datetime.datetime]
984
+ List of dates.
985
+ use_cdsapi_dataset : Optional[str], optional
986
+ CDSAPI dataset to use. Defaults to None.
987
+ **request : Any
988
+ Additional request parameters.
1050
989
 
1051
- config = yaml.safe_load(
990
+ Returns
991
+ -------
992
+ Any
993
+ The computed accumulations.
1052
994
  """
1053
- class: ea
1054
- expver: '0001'
1055
- grid: 20./20.
1056
- levtype: sfc
1057
- # number: [0, 1]
1058
- # stream: enda
1059
- param: [cp, tp]
1060
- # accumulation_period: 6h
1061
- """
1062
- )
1063
- dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
1064
- dates = to_datetime_list(dates)
1065
995
 
1066
- for f in accumulations(None, dates, **config):
1067
- print(f, f.to_numpy().mean())
996
+ if (
997
+ request.get("class") == "ea"
998
+ and request.get("stream", "oper") == "oper"
999
+ and request.get("accumulation_period") == 24
1000
+ ):
1001
+ from .accumulations2 import Accumulations2Source
1002
+
1003
+ LOG.warning(
1004
+ "🧪️ Experimental features: Using accumulations2, because class=ea stream=oper and accumulation_period=24"
1005
+ )
1006
+ return Accumulations2Source._execute(context, dates, **request)
1007
+
1008
+ _to_list(request["param"])
1009
+ class_ = request.get("class", "od")
1010
+ stream = request.get("stream", "oper")
1011
+
1012
+ user_accumulation_period = request.pop("accumulation_period", 6)
1013
+ accumulations_reset_frequency = request.pop("accumulations_reset_frequency", None)
1014
+ user_date = request.pop("date", None)
1015
+
1016
+ # If `data_accumulation_period` is not set, this means that the accumulations are from the start
1017
+ # of the forecast.
1018
+
1019
+ KWARGS = {
1020
+ ("od", "oper"): dict(patch=_scda),
1021
+ ("od", "elda"): dict(base_times=(6, 18)),
1022
+ ("od", "enfo"): dict(base_times=(0, 6, 12, 18)),
1023
+ ("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
1024
+ ("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
1025
+ ("rr", "oper"): dict(base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
1026
+ ("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
1027
+ }
1028
+
1029
+ kwargs = KWARGS.get((class_, stream), {})
1030
+
1031
+ context.trace("🌧️", f"accumulations {request} {user_accumulation_period} {kwargs}")
1032
+
1033
+ return _compute_accumulations(
1034
+ context,
1035
+ dates,
1036
+ request,
1037
+ user_accumulation_period=user_accumulation_period,
1038
+ accumulations_reset_frequency=accumulations_reset_frequency,
1039
+ use_cdsapi_dataset=use_cdsapi_dataset,
1040
+ user_date=user_date,
1041
+ **kwargs,
1042
+ )
@@ -12,20 +12,16 @@ import logging
12
12
  from abc import abstractmethod
13
13
  from copy import deepcopy
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Tuple
18
- from typing import Union
19
15
 
20
16
  import earthkit.data as ekd
21
17
  import numpy as np
22
18
  from earthkit.data.core.temporary import temp_file
23
19
  from earthkit.data.readers.grib.output import new_grib_output
24
20
 
21
+ from anemoi.datasets.create.sources import source_registry
25
22
  from anemoi.datasets.create.sources.mars import mars
26
- from anemoi.datasets.create.utils import to_datetime_list
27
23
 
28
- from .legacy import legacy_source
24
+ from .legacy import LegacySource
29
25
 
30
26
  LOG = logging.getLogger(__name__)
31
27
 
@@ -477,8 +473,8 @@ class Accumulator:
477
473
 
478
474
  def _compute_accumulations(
479
475
  context: Any,
480
- dates: List[datetime.datetime],
481
- request: Dict[str, Any],
476
+ dates: list[datetime.datetime],
477
+ request: dict[str, Any],
482
478
  user_accumulation_period: datetime.timedelta,
483
479
  # data_accumulation_period: Optional[int] = None,
484
480
  # patch: Any = _identity,
@@ -565,7 +561,7 @@ def _compute_accumulations(
565
561
  return ds
566
562
 
567
563
 
568
- def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
564
+ def _to_list(x: list[Any] | tuple[Any] | Any) -> list[Any]:
569
565
  """Converts the input to a list if it is not already a list or tuple.
570
566
 
571
567
  Parameters
@@ -583,7 +579,7 @@ def _to_list(x: Union[List[Any], Tuple[Any], Any]) -> List[Any]:
583
579
  return [x]
584
580
 
585
581
 
586
- def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
582
+ def _scda(request: dict[str, Any]) -> dict[str, Any]:
587
583
  """Modifies the request stream based on the time.
588
584
 
589
585
  Parameters
@@ -603,49 +599,20 @@ def _scda(request: Dict[str, Any]) -> Dict[str, Any]:
603
599
  return request
604
600
 
605
601
 
606
- @legacy_source(__file__)
607
- def accumulations(context, dates, **request):
608
- _to_list(request["param"])
609
- user_accumulation_period = request.pop("accumulation_period", 6)
610
- user_accumulation_period = datetime.timedelta(hours=user_accumulation_period)
602
+ @source_registry.register("accumulations2")
603
+ class Accumulations2Source(LegacySource):
611
604
 
612
- context.trace("🌧️", f"accumulations {request} {user_accumulation_period}")
605
+ @staticmethod
606
+ def _execute(context, dates, **request):
607
+ _to_list(request["param"])
608
+ user_accumulation_period = request.pop("accumulation_period", 6)
609
+ user_accumulation_period = datetime.timedelta(hours=user_accumulation_period)
613
610
 
614
- return _compute_accumulations(
615
- context,
616
- dates,
617
- request,
618
- user_accumulation_period=user_accumulation_period,
619
- )
620
-
621
-
622
- execute = accumulations
623
-
624
- if __name__ == "__main__":
625
- import yaml
626
-
627
- config = yaml.safe_load(
628
- """
629
- class: ea
630
- expver: '0001'
631
- grid: 20./20.
632
- levtype: sfc
633
- # number: [0, 1]
634
- # stream: enda
635
- param: [cp, tp]
636
- # accumulation_period: 6h
637
- accumulation_period: 2
638
- """
639
- )
640
- dates = yaml.safe_load("[2022-12-31 00:00, 2022-12-31 06:00]")
641
- # dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
642
- dates = to_datetime_list(dates)
643
-
644
- class Context:
645
- use_grib_paramid = True
611
+ context.trace("🌧️", f"accumulations {request} {user_accumulation_period}")
646
612
 
647
- def trace(self, *args):
648
- print(*args)
649
-
650
- for f in accumulations(Context, dates, **config):
651
- print(f, f.to_numpy().mean())
613
+ return _compute_accumulations(
614
+ context,
615
+ dates,
616
+ request,
617
+ user_accumulation_period=user_accumulation_period,
618
+ )
@@ -9,65 +9,69 @@
9
9
 
10
10
  import numpy as np
11
11
 
12
- from .legacy import legacy_source
12
+ from . import source_registry
13
+ from .legacy import LegacySource
13
14
 
14
15
 
15
- @legacy_source(__file__)
16
- def execute(context, dates, params=None, **kwargs):
17
- import earthkit.data as ekd
16
+ @source_registry.register("anemoi_dataset")
17
+ class AnemoiDatasetSource(LegacySource):
18
18
 
19
- from anemoi.datasets import open_dataset
19
+ @staticmethod
20
+ def _execute(context, dates, params=None, **kwargs):
21
+ import earthkit.data as ekd
20
22
 
21
- ds = open_dataset(**kwargs)
22
- # dates_to_index = {date: i for i, date in enumerate(ds.dates)}
23
+ from anemoi.datasets import open_dataset
23
24
 
24
- indices = []
25
- for date in dates:
26
- idx = np.where(ds.dates == date)[0]
27
- if len(idx) == 0:
28
- continue
29
- indices.append((int(idx[0]), date))
25
+ ds = open_dataset(**kwargs)
26
+ # dates_to_index = {date: i for i, date in enumerate(ds.dates)}
30
27
 
31
- vars = ds.variables
32
- if params is None:
33
- params = vars
28
+ indices = []
29
+ for date in dates:
30
+ idx = np.where(ds.dates == date)[0]
31
+ if len(idx) == 0:
32
+ continue
33
+ indices.append((int(idx[0]), date))
34
34
 
35
- if not isinstance(params, (list, tuple, set)):
36
- params = [params]
35
+ vars = ds.variables
36
+ if params is None:
37
+ params = vars
37
38
 
38
- params = set(params)
39
- results = []
39
+ if not isinstance(params, (list, tuple, set)):
40
+ params = [params]
40
41
 
41
- ensemble = ds.shape[2] > 1
42
- latitudes = ds.latitudes
43
- longitudes = ds.longitudes
42
+ params = set(params)
43
+ results = []
44
44
 
45
- for idx, date in indices:
45
+ ensemble = ds.shape[2] > 1
46
+ latitudes = ds.latitudes
47
+ longitudes = ds.longitudes
46
48
 
47
- metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
49
+ for idx, date in indices:
48
50
 
49
- for j, y in enumerate(ds[idx]):
51
+ metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
50
52
 
51
- param = vars[j]
52
- if param not in params:
53
- continue
53
+ for j, y in enumerate(ds[idx]):
54
+
55
+ param = vars[j]
56
+ if param not in params:
57
+ continue
54
58
 
55
- # metadata['name'] = param
56
- # metadata['param_level'] = param
57
- metadata["param"] = param
59
+ # metadata['name'] = param
60
+ # metadata['param_level'] = param
61
+ metadata["param"] = param
58
62
 
59
- for k, e in enumerate(y):
60
- if ensemble:
61
- metadata["number"] = k + 1
63
+ for k, e in enumerate(y):
64
+ if ensemble:
65
+ metadata["number"] = k + 1
62
66
 
63
- metadata["values"] = e
67
+ metadata["values"] = e
64
68
 
65
- results.append(metadata.copy())
69
+ results.append(metadata.copy())
66
70
 
67
- print(results[0].keys())
71
+ print(results[0].keys())
68
72
 
69
- # "list-of-dicts" does support resolution
70
- results = ekd.from_source("list-of-dicts", results)
73
+ # "list-of-dicts" does support resolution
74
+ results = ekd.from_source("list-of-dicts", results)
71
75
 
72
- # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
73
- return results
76
+ # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
77
+ return results