anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,8 @@
10
10
  import logging
11
11
  import os
12
12
  import sqlite3
13
+ from collections.abc import Iterator
13
14
  from typing import Any
14
- from typing import Iterator
15
- from typing import List
16
- from typing import Optional
17
15
 
18
16
  import earthkit.data as ekd
19
17
  import tqdm
@@ -21,7 +19,8 @@ from anemoi.transform.flavour import RuleBasedFlavour
21
19
  from cachetools import LRUCache
22
20
  from earthkit.data.indexing.fieldlist import FieldArray
23
21
 
24
- from .legacy import legacy_source
22
+ from . import source_registry
23
+ from .legacy import LegacySource
25
24
 
26
25
  LOG = logging.getLogger(__name__)
27
26
 
@@ -36,8 +35,8 @@ class GribIndex:
36
35
  self,
37
36
  database: str,
38
37
  *,
39
- keys: Optional[List[str] | str] = None,
40
- flavour: Optional[str] = None,
38
+ keys: list[str] | str | None = None,
39
+ flavour: str | None = None,
41
40
  update: bool = False,
42
41
  overwrite: bool = False,
43
42
  ) -> None:
@@ -157,7 +156,7 @@ class GribIndex:
157
156
  """Commit the current transaction to the database."""
158
157
  self.conn.commit()
159
158
 
160
- def _get_metadata_keys(self) -> List[str]:
159
+ def _get_metadata_keys(self) -> list[str]:
161
160
  """Retrieve the metadata keys from the database.
162
161
 
163
162
  Returns
@@ -225,7 +224,7 @@ class GribIndex:
225
224
  LOG.info(f"Path: {self._get_path(existing_record[1])}")
226
225
  raise
227
226
 
228
- def _all_columns(self) -> List[str]:
227
+ def _all_columns(self) -> list[str]:
229
228
  """Retrieve all column names from the grib_index table.
230
229
 
231
230
  Returns
@@ -241,7 +240,7 @@ class GribIndex:
241
240
  self._columns = [col for col in columns if not col.startswith("_")]
242
241
  return self._columns
243
242
 
244
- def _ensure_columns(self, columns: List[str]) -> None:
243
+ def _ensure_columns(self, columns: list[str]) -> None:
245
244
  """Add missing columns to the grib_index table.
246
245
 
247
246
  Parameters
@@ -324,7 +323,7 @@ class GribIndex:
324
323
 
325
324
  self._commit()
326
325
 
327
- def _paramdb(self, category: int, discipline: int) -> Optional[dict]:
326
+ def _paramdb(self, category: int, discipline: int) -> dict | None:
328
327
  """Fetch parameter information from the parameter database.
329
328
 
330
329
  Parameters
@@ -355,7 +354,7 @@ class GribIndex:
355
354
  except Exception as e:
356
355
  LOG.warning(f"Failed to fetch information from parameter database: {e}")
357
356
 
358
- def _param_grib2_info(self, paramId: int) -> List[dict]:
357
+ def _param_grib2_info(self, paramId: int) -> list[dict]:
359
358
  """Fetch GRIB2 parameter information for a given parameter ID.
360
359
 
361
360
  Parameters
@@ -383,7 +382,7 @@ class GribIndex:
383
382
  LOG.warning(f"Failed to fetch information from parameter database: {e}")
384
383
  return []
385
384
 
386
- def _param_id_info(self, paramId: int) -> Optional[dict]:
385
+ def _param_id_info(self, paramId: int) -> dict | None:
387
386
  """Fetch detailed information for a given parameter ID.
388
387
 
389
388
  Parameters
@@ -412,7 +411,7 @@ class GribIndex:
412
411
 
413
412
  return None
414
413
 
415
- def _param_id_unit(self, unitId: int) -> Optional[dict]:
414
+ def _param_id_unit(self, unitId: int) -> dict | None:
416
415
  """Fetch unit information for a given unit ID.
417
416
 
418
417
  Parameters
@@ -520,7 +519,7 @@ class GribIndex:
520
519
  raise ValueError(f"No path found for path_id {path_id}")
521
520
  return row[0]
522
521
 
523
- def retrieve(self, dates: List[Any], **kwargs: Any) -> Iterator[Any]:
522
+ def retrieve(self, dates: list[Any], **kwargs: Any) -> Iterator[Any]:
524
523
  """Retrieve GRIB data from the database.
525
524
 
526
525
  Parameters
@@ -571,44 +570,47 @@ class GribIndex:
571
570
  yield data
572
571
 
573
572
 
574
- @legacy_source(__file__)
575
- def execute(
576
- context: Any,
577
- dates: List[Any],
578
- indexdb: str,
579
- flavour: Optional[str] = None,
580
- **kwargs: Any,
581
- ) -> FieldArray:
582
- """Execute the GRIB data retrieval process.
583
-
584
- Parameters
585
- ----------
586
- context : Any
587
- The execution context.
588
- dates : List[Any]
589
- List of dates to retrieve data for.
590
- indexdb : str
591
- Path to the GRIB index database.
592
- flavour : Optional[str], optional
593
- Flavour configuration for mapping fields, by default None.
594
- **kwargs : Any
595
- Additional filtering criteria.
596
-
597
- Returns
598
- -------
599
- FieldArray
600
- An array of retrieved GRIB fields.
601
- """
602
- index = GribIndex(indexdb)
603
- result = []
604
-
605
- if flavour is not None:
606
- flavour = RuleBasedFlavour(flavour)
607
-
608
- for grib in index.retrieve(dates, **kwargs):
609
- field = ekd.from_source("memory", grib)[0]
610
- if flavour:
611
- field = flavour.apply(field)
612
- result.append(field)
613
-
614
- return FieldArray(result)
573
+ @source_registry.register("grib_index")
574
+ class GribIndexSource(LegacySource):
575
+
576
+ @staticmethod
577
+ def _execute(
578
+ context: Any,
579
+ dates: list[Any],
580
+ indexdb: str,
581
+ flavour: str | None = None,
582
+ **kwargs: Any,
583
+ ) -> FieldArray:
584
+ """Execute the GRIB data retrieval process.
585
+
586
+ Parameters
587
+ ----------
588
+ context : Any
589
+ The execution context.
590
+ dates : List[Any]
591
+ List of dates to retrieve data for.
592
+ indexdb : str
593
+ Path to the GRIB index database.
594
+ flavour : Optional[str], optional
595
+ Flavour configuration for mapping fields, by default None.
596
+ **kwargs : Any
597
+ Additional filtering criteria.
598
+
599
+ Returns
600
+ -------
601
+ FieldArray
602
+ An array of retrieved GRIB fields.
603
+ """
604
+ index = GribIndex(indexdb)
605
+ result = []
606
+
607
+ if flavour is not None:
608
+ flavour = RuleBasedFlavour(flavour)
609
+
610
+ for grib in index.retrieve(dates, **kwargs):
611
+ field = ekd.from_source("memory", grib)[0]
612
+ if flavour:
613
+ field = flavour.apply(field)
614
+ result.append(field)
615
+
616
+ return FieldArray(result)
@@ -9,20 +9,18 @@
9
9
 
10
10
  import logging
11
11
  from typing import Any
12
- from typing import Dict
13
- from typing import List
14
- from typing import Union
15
12
 
16
13
  from earthkit.data.core.fieldlist import MultiFieldList
17
14
 
18
15
  from anemoi.datasets.create.sources.mars import mars
19
16
 
20
- from .legacy import legacy_source
17
+ from . import source_registry
18
+ from .legacy import LegacySource
21
19
 
22
20
  LOGGER = logging.getLogger(__name__)
23
21
 
24
22
 
25
- def _to_list(x: Union[list, tuple, Any]) -> List[Any]:
23
+ def _to_list(x: list | tuple | Any) -> list[Any]:
26
24
  """Converts the input to a list if it is not already a list or tuple.
27
25
 
28
26
  Parameters
@@ -40,57 +38,57 @@ def _to_list(x: Union[list, tuple, Any]) -> List[Any]:
40
38
  return [x]
41
39
 
42
40
 
43
- @legacy_source(__file__)
44
- def hindcasts(context: Any, dates: List[Any], **request: Dict[str, Any]) -> MultiFieldList:
45
- """Generates hindcast requests based on the provided dates and request parameters.
46
-
47
- Parameters
48
- ----------
49
- context : Any
50
- The context containing the dates provider and trace method.
51
- dates : List[Any]
52
- A list of dates for which to generate hindcast requests.
53
- request : Dict[str, Any]
54
- Additional request parameters.
55
-
56
- Returns
57
- -------
58
- MultiFieldList
59
- A MultiFieldList containing the hindcast data.
60
- """
61
- from anemoi.datasets.dates import HindcastsDates
62
-
63
- provider = context.dates_provider
64
- assert isinstance(provider, HindcastsDates)
65
-
66
- context.trace("H️", f"hindcasts {len(dates)=}")
67
-
68
- request["param"] = _to_list(request["param"])
69
- request["step"] = _to_list(request.get("step", 0))
70
- request["step"] = [int(_) for _ in request["step"]]
71
-
72
- context.trace("H️", f"hindcast {request}")
73
-
74
- requests = []
75
- for d in dates:
76
- r = request.copy()
77
- hindcast = provider.mapping[d]
78
- r["hdate"] = hindcast.hdate.strftime("%Y-%m-%d")
79
- r["date"] = hindcast.refdate.strftime("%Y-%m-%d")
80
- r["time"] = hindcast.refdate.strftime("%H")
81
- r["step"] = hindcast.step
82
- requests.append(r)
83
-
84
- if len(requests) == 0:
85
- return MultiFieldList([])
86
-
87
- return mars(
88
- context,
89
- dates,
90
- *requests,
91
- date_key="hdate",
92
- request_already_using_valid_datetime=True,
93
- )
94
-
95
-
96
- execute = hindcasts
41
+ @source_registry.register("hindcasts")
42
+ class HindcastsSource(LegacySource):
43
+
44
+ @staticmethod
45
+ def _execute(context: Any, dates: list[Any], **request: dict[str, Any]) -> MultiFieldList:
46
+ """Generates hindcast requests based on the provided dates and request parameters.
47
+
48
+ Parameters
49
+ ----------
50
+ context : Any
51
+ The context containing the dates provider and trace method.
52
+ dates : List[Any]
53
+ A list of dates for which to generate hindcast requests.
54
+ request : Dict[str, Any]
55
+ Additional request parameters.
56
+
57
+ Returns
58
+ -------
59
+ MultiFieldList
60
+ A MultiFieldList containing the hindcast data.
61
+ """
62
+ from anemoi.datasets.dates import HindcastsDates
63
+
64
+ provider = context.dates_provider
65
+ assert isinstance(provider, HindcastsDates)
66
+
67
+ context.trace("H️", f"hindcasts {len(dates)=}")
68
+
69
+ request["param"] = _to_list(request["param"])
70
+ request["step"] = _to_list(request.get("step", 0))
71
+ request["step"] = [int(_) for _ in request["step"]]
72
+
73
+ context.trace("H️", f"hindcast {request}")
74
+
75
+ requests = []
76
+ for d in dates:
77
+ r = request.copy()
78
+ hindcast = provider.mapping[d]
79
+ r["hdate"] = hindcast.hdate.strftime("%Y-%m-%d")
80
+ r["date"] = hindcast.refdate.strftime("%Y-%m-%d")
81
+ r["time"] = hindcast.refdate.strftime("%H")
82
+ r["step"] = hindcast.step
83
+ requests.append(r)
84
+
85
+ if len(requests) == 0:
86
+ return MultiFieldList([])
87
+
88
+ return mars(
89
+ context,
90
+ dates,
91
+ *requests,
92
+ date_key="hdate",
93
+ request_already_using_valid_datetime=True,
94
+ )
@@ -8,16 +8,13 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
 
11
- import inspect
12
11
  import logging
13
- import os
12
+ from abc import abstractmethod
14
13
  from typing import Any
15
- from typing import Callable
16
14
 
17
- from anemoi.datasets.create.input.template import resolve
15
+ from anemoi.datasets.create.input.context import Context
18
16
 
19
17
  from ..source import Source
20
- from . import source_registry
21
18
 
22
19
  LOG = logging.getLogger(__name__)
23
20
 
@@ -27,7 +24,7 @@ class LegacySource(Source):
27
24
 
28
25
  Parameters
29
26
  ----------
30
- context : Any
27
+ context : Context
31
28
  The context in which the source is created.
32
29
  *args : tuple
33
30
  Positional arguments.
@@ -35,64 +32,15 @@ class LegacySource(Source):
35
32
  Keyword arguments.
36
33
  """
37
34
 
38
- def __init__(self, context: Any, *args: Any, **kwargs: Any) -> None:
35
+ def __init__(self, context: Context, *args: Any, **kwargs: Any) -> None:
39
36
  super().__init__(context, *args, **kwargs)
40
37
  self.args = args
41
38
  self.kwargs = kwargs
42
39
 
40
+ @staticmethod
41
+ @abstractmethod
42
+ def _execute(context, *args, **kwargs):
43
+ pass
43
44
 
44
- class legacy_source:
45
- """A decorator class for legacy sources.
46
-
47
- Parameters
48
- ----------
49
- name : str
50
- The name of the legacy source.
51
- """
52
-
53
- def __init__(self, name: str) -> None:
54
- name, _ = os.path.splitext(os.path.basename(name))
55
- self.name = name
56
-
57
- def __call__(self, execute: Callable) -> Callable:
58
- """Call method to wrap the execute function.
59
-
60
- Parameters
61
- ----------
62
- execute : function
63
- The execute function to be wrapped.
64
-
65
- Returns
66
- -------
67
- function
68
- The wrapped execute function.
69
- """
70
- this = self
71
- name = f"Legacy{self.name.title()}Source"
72
- source = ".".join([execute.__module__, execute.__name__])
73
-
74
- def execute_wrapper(self, dates) -> Any:
75
- """Wrapper method to call the execute function."""
76
-
77
- args, kwargs = resolve(self.context, (self.args, self.kwargs))
78
-
79
- try:
80
- return execute(self.context, dates, *args, **kwargs)
81
- except TypeError:
82
- LOG.error(f"Error executing source {this.name} from {source}")
83
- LOG.error(f"Function signature is: {inspect.signature(execute)}")
84
- LOG.error(f"Arguments are: {args=}, {kwargs=}")
85
- raise
86
-
87
- klass = type(
88
- name,
89
- (LegacySource,),
90
- {
91
- "execute": execute_wrapper,
92
- "_source": source,
93
- },
94
- )
95
-
96
- source_registry.register(self.name)(klass)
97
-
98
- return execute
45
+ def execute(self, dates: Any) -> Any:
46
+ return self._execute(self.context, dates, *self.args, **self.kwargs)