anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +42 -1
  6. anemoi/datasets/create/config.py +2 -0
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
  16. anemoi/datasets/create/sources/accumulations.py +74 -94
  17. anemoi/datasets/create/sources/accumulations2.py +16 -45
  18. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  19. anemoi/datasets/create/sources/constants.py +39 -38
  20. anemoi/datasets/create/sources/empty.py +26 -22
  21. anemoi/datasets/create/sources/forcings.py +29 -28
  22. anemoi/datasets/create/sources/grib.py +92 -72
  23. anemoi/datasets/create/sources/grib_index.py +46 -42
  24. anemoi/datasets/create/sources/hindcasts.py +56 -55
  25. anemoi/datasets/create/sources/legacy.py +10 -62
  26. anemoi/datasets/create/sources/mars.py +107 -131
  27. anemoi/datasets/create/sources/netcdf.py +28 -24
  28. anemoi/datasets/create/sources/opendap.py +28 -24
  29. anemoi/datasets/create/sources/recentre.py +42 -41
  30. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  31. anemoi/datasets/create/sources/source.py +26 -48
  32. anemoi/datasets/create/sources/tendencies.py +67 -94
  33. anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
  34. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  35. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  36. anemoi/datasets/create/sources/zenodo.py +43 -39
  37. anemoi/datasets/create/utils.py +0 -42
  38. anemoi/datasets/data/dataset.py +6 -0
  39. anemoi/datasets/data/grids.py +0 -152
  40. anemoi/datasets/data/rolling_average.py +141 -0
  41. anemoi/datasets/data/stores.py +7 -9
  42. anemoi/datasets/dates/__init__.py +2 -0
  43. anemoi/datasets/dumper.py +76 -0
  44. anemoi/datasets/grids.py +1 -178
  45. anemoi/datasets/schemas/recipe.json +131 -0
  46. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
  47. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
  48. anemoi/datasets/create/filter.py +0 -47
  49. anemoi/datasets/create/input/concat.py +0 -161
  50. anemoi/datasets/create/input/context.py +0 -86
  51. anemoi/datasets/create/input/empty.py +0 -53
  52. anemoi/datasets/create/input/filter.py +0 -117
  53. anemoi/datasets/create/input/function.py +0 -232
  54. anemoi/datasets/create/input/join.py +0 -129
  55. anemoi/datasets/create/input/pipe.py +0 -66
  56. anemoi/datasets/create/input/step.py +0 -173
  57. anemoi/datasets/create/input/template.py +0 -161
  58. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  59. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  60. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  61. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -9,65 +9,69 @@
9
9
 
10
10
  import numpy as np
11
11
 
12
- from .legacy import legacy_source
12
+ from . import source_registry
13
+ from .legacy import LegacySource
13
14
 
14
15
 
15
- @legacy_source(__file__)
16
- def execute(context, dates, params=None, **kwargs):
17
- import earthkit.data as ekd
16
+ @source_registry.register("anemoi_dataset")
17
+ class AnemoiDatasetSource(LegacySource):
18
18
 
19
- from anemoi.datasets import open_dataset
19
+ @staticmethod
20
+ def _execute(context, dates, params=None, **kwargs):
21
+ import earthkit.data as ekd
20
22
 
21
- ds = open_dataset(**kwargs)
22
- # dates_to_index = {date: i for i, date in enumerate(ds.dates)}
23
+ from anemoi.datasets import open_dataset
23
24
 
24
- indices = []
25
- for date in dates:
26
- idx = np.where(ds.dates == date)[0]
27
- if len(idx) == 0:
28
- continue
29
- indices.append((int(idx[0]), date))
25
+ ds = open_dataset(**kwargs)
26
+ # dates_to_index = {date: i for i, date in enumerate(ds.dates)}
30
27
 
31
- vars = ds.variables
32
- if params is None:
33
- params = vars
28
+ indices = []
29
+ for date in dates:
30
+ idx = np.where(ds.dates == date)[0]
31
+ if len(idx) == 0:
32
+ continue
33
+ indices.append((int(idx[0]), date))
34
34
 
35
- if not isinstance(params, (list, tuple, set)):
36
- params = [params]
35
+ vars = ds.variables
36
+ if params is None:
37
+ params = vars
37
38
 
38
- params = set(params)
39
- results = []
39
+ if not isinstance(params, (list, tuple, set)):
40
+ params = [params]
40
41
 
41
- ensemble = ds.shape[2] > 1
42
- latitudes = ds.latitudes
43
- longitudes = ds.longitudes
42
+ params = set(params)
43
+ results = []
44
44
 
45
- for idx, date in indices:
45
+ ensemble = ds.shape[2] > 1
46
+ latitudes = ds.latitudes
47
+ longitudes = ds.longitudes
46
48
 
47
- metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
49
+ for idx, date in indices:
48
50
 
49
- for j, y in enumerate(ds[idx]):
51
+ metadata = dict(valid_datetime=date, latitudes=latitudes, longitudes=longitudes)
50
52
 
51
- param = vars[j]
52
- if param not in params:
53
- continue
53
+ for j, y in enumerate(ds[idx]):
54
+
55
+ param = vars[j]
56
+ if param not in params:
57
+ continue
54
58
 
55
- # metadata['name'] = param
56
- # metadata['param_level'] = param
57
- metadata["param"] = param
59
+ # metadata['name'] = param
60
+ # metadata['param_level'] = param
61
+ metadata["param"] = param
58
62
 
59
- for k, e in enumerate(y):
60
- if ensemble:
61
- metadata["number"] = k + 1
63
+ for k, e in enumerate(y):
64
+ if ensemble:
65
+ metadata["number"] = k + 1
62
66
 
63
- metadata["values"] = e
67
+ metadata["values"] = e
64
68
 
65
- results.append(metadata.copy())
69
+ results.append(metadata.copy())
66
70
 
67
- print(results[0].keys())
71
+ print(results[0].keys())
68
72
 
69
- # "list-of-dicts" does support resolution
70
- results = ekd.from_source("list-of-dicts", results)
73
+ # "list-of-dicts" does support resolution
74
+ results = ekd.from_source("list-of-dicts", results)
71
75
 
72
- # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
73
- return results
76
+ # return new_fieldlist_from_list([new_field_from_latitudes_longitudes(x, latitudes, longitudes) for x in results])
77
+ return results
@@ -11,41 +11,42 @@ from typing import Any
11
11
 
12
12
  from earthkit.data import from_source
13
13
 
14
- from .legacy import legacy_source
15
-
16
-
17
- @legacy_source(__file__)
18
- def constants(context: Any, dates: list[str], template: dict[str, Any], param: str) -> Any:
19
- """Deprecated function to retrieve constants data.
20
-
21
- Parameters
22
- ----------
23
- context : Any
24
- The context object for tracing.
25
- dates : list of str
26
- List of dates for which data is required.
27
- template : dict of str to Any
28
- Template dictionary for the data source.
29
- param : str
30
- Parameter to retrieve.
31
-
32
- Returns
33
- -------
34
- Any
35
- Data retrieved from the source.
36
- """
37
- from warnings import warn
38
-
39
- warn(
40
- "The source `constants` is deprecated, use `forcings` instead.",
41
- DeprecationWarning,
42
- stacklevel=2,
43
- )
44
- context.trace("✅", f"from_source(constants, {template}, {param}")
45
- if len(template) == 0:
46
- raise ValueError("Forcings template is empty.")
47
-
48
- return from_source("forcings", source_or_dataset=template, date=dates, param=param)
49
-
50
-
51
- execute: Any = constants
14
+ from . import source_registry
15
+ from .legacy import LegacySource
16
+
17
+
18
+ @source_registry.register("constants")
19
+ class ConstantsSource(LegacySource):
20
+
21
+ @staticmethod
22
+ def _execute(context: Any, dates: list[str], template: dict[str, Any], param: str) -> Any:
23
+ """Deprecated function to retrieve constants data.
24
+
25
+ Parameters
26
+ ----------
27
+ context : Any
28
+ The context object for tracing.
29
+ dates : list of str
30
+ List of dates for which data is required.
31
+ template : dict of str to Any
32
+ Template dictionary for the data source.
33
+ param : str
34
+ Parameter to retrieve.
35
+
36
+ Returns
37
+ -------
38
+ Any
39
+ Data retrieved from the source.
40
+ """
41
+ from warnings import warn
42
+
43
+ warn(
44
+ "The source `constants` is deprecated, use `forcings` instead.",
45
+ DeprecationWarning,
46
+ stacklevel=2,
47
+ )
48
+ context.trace("", f"from_source(constants, {template}, {param}")
49
+ if len(template) == 0:
50
+ raise ValueError("Forcings template is empty.")
51
+
52
+ return from_source("forcings", source_or_dataset=template, date=list(dates), param=param)
@@ -12,25 +12,29 @@ from typing import Any
12
12
 
13
13
  import earthkit.data as ekd
14
14
 
15
- from .legacy import legacy_source
16
-
17
-
18
- @legacy_source(__file__)
19
- def execute(context: Any, dates: list[str], **kwargs: Any) -> ekd.FieldList:
20
- """Executes the loading of an empty data source.
21
-
22
- Parameters
23
- ----------
24
- context : object
25
- The context in which the function is executed.
26
- dates : list
27
- List of dates for which data is to be loaded.
28
- **kwargs : dict
29
- Additional keyword arguments.
30
-
31
- Returns
32
- -------
33
- ekd.FieldList
34
- Loaded empty data source.
35
- """
36
- return ekd.from_source("empty")
15
+ from . import source_registry
16
+ from .legacy import LegacySource
17
+
18
+
19
+ @source_registry.register("empty")
20
+ class EmptySource(LegacySource):
21
+
22
+ @staticmethod
23
+ def _execute(context: Any, dates: list[str], **kwargs: Any) -> ekd.FieldList:
24
+ """Executes the loading of an empty data source.
25
+
26
+ Parameters
27
+ ----------
28
+ context : object
29
+ The context in which the function is executed.
30
+ dates : list
31
+ List of dates for which data is to be loaded.
32
+ **kwargs : dict
33
+ Additional keyword arguments.
34
+
35
+ Returns
36
+ -------
37
+ ekd.FieldList
38
+ Loaded empty data source.
39
+ """
40
+ return ekd.from_source("empty")
@@ -11,31 +11,32 @@ from typing import Any
11
11
 
12
12
  from earthkit.data import from_source
13
13
 
14
- from .legacy import legacy_source
15
-
16
-
17
- @legacy_source(__file__)
18
- def forcings(context: Any, dates: list[str], template: str, param: str) -> Any:
19
- """Loads forcing data from a specified source.
20
-
21
- Parameters
22
- ----------
23
- context : object
24
- The context in which the function is executed.
25
- dates : list
26
- List of dates for which data is to be loaded.
27
- template : FieldList
28
- Template for the data source.
29
- param : str
30
- Parameter for the data source.
31
-
32
- Returns
33
- -------
34
- object
35
- Loaded forcing data.
36
- """
37
- context.trace("✅", f"from_source(forcings, {template}, {param}")
38
- return from_source("forcings", source_or_dataset=template, date=dates, param=param)
39
-
40
-
41
- execute = forcings
14
+ from . import source_registry
15
+ from .legacy import LegacySource
16
+
17
+
18
+ @source_registry.register("forcings")
19
+ class ForcingsSource(LegacySource):
20
+
21
+ @staticmethod
22
+ def _execute(context: Any, dates: list[str], template: str, param: str) -> Any:
23
+ """Loads forcing data from a specified source.
24
+
25
+ Parameters
26
+ ----------
27
+ context : object
28
+ The context in which the function is executed.
29
+ dates : list
30
+ List of dates for which data is to be loaded.
31
+ template : FieldList
32
+ Template for the data source.
33
+ param : str
34
+ Parameter for the data source.
35
+
36
+ Returns
37
+ -------
38
+ object
39
+ Loaded forcing data.
40
+ """
41
+ context.trace("✅", f"from_source(forcings, {template}, {param}")
42
+ return from_source("forcings", source_or_dataset=template, date=list(dates), param=param)
@@ -20,7 +20,8 @@ from anemoi.transform.grids import grid_registry
20
20
  from earthkit.data import from_source
21
21
  from earthkit.data.utils.patterns import Pattern
22
22
 
23
- from .legacy import legacy_source
23
+ from . import source_registry
24
+ from .legacy import LegacySource
24
25
 
25
26
  LOG = logging.getLogger(__name__)
26
27
 
@@ -47,6 +48,14 @@ def check(ds: Any, paths: list[str], **kwargs: Any) -> None:
47
48
  if isinstance(v, (tuple, list)):
48
49
  count *= len(v)
49
50
 
51
+ # in the case of static data (e.g repeated dates) dates might be empty
52
+ if len(ds) != count and kwargs.get("dates", []) == []:
53
+ LOG.warning(
54
+ f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, paths={paths})"
55
+ f" Received empty dates - assuming this is static data."
56
+ )
57
+ return
58
+
50
59
  if len(ds) != count:
51
60
  raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, paths={paths})")
52
61
 
@@ -73,74 +82,85 @@ def _expand(paths: list[str]) -> Any:
73
82
  yield path
74
83
 
75
84
 
76
- @legacy_source(__file__)
77
- def execute(
78
- context: Any,
79
- dates: list[Any],
80
- path: str | list[str],
81
- flavour: str | dict[str, Any] | None = None,
82
- grid_definition: dict[str, Any] | None = None,
83
- *args: Any,
84
- **kwargs: Any,
85
- ) -> ekd.FieldList:
86
- """Executes the function to load data from GRIB files.
87
-
88
- Parameters
89
- ----------
90
- context : Any
91
- The context in which the function is executed.
92
- dates : list of Any
93
- List of dates.
94
- path : str or list of str
95
- Path or list of paths to the GRIB files.
96
- flavour : str or dict of str to Any, optional
97
- Flavour information, by default None.
98
- grid_definition : dict of str to Any, optional
99
- Grid definition configuration to create a Grid object, by default None.
100
- *args : Any
101
- Additional positional arguments.
102
- **kwargs : Any
103
- Additional keyword arguments.
104
-
105
- Returns
106
- -------
107
- Any
108
- The loaded dataset.
109
- """
110
- given_paths = path if isinstance(path, list) else [path]
111
- if flavour is not None:
112
- flavour = RuleBasedFlavour(flavour)
113
-
114
- if grid_definition is not None:
115
- grid = grid_registry.from_config(grid_definition)
116
- else:
117
- grid = None
118
-
119
- ds = from_source("empty")
120
- dates = [d.isoformat() for d in dates]
121
-
122
- for path in given_paths:
123
- paths = Pattern(path).substitute(*args, date=dates, allow_extra=True, **kwargs)
124
-
125
- for name in ("grid", "area", "rotation", "frame", "resol", "bitmap"):
126
- if name in kwargs:
127
- raise ValueError(f"MARS interpolation parameter '{name}' not supported")
128
-
129
- for path in _expand(paths):
130
- context.trace("📁", "PATH", path)
131
- s = from_source("file", path)
132
- if flavour is not None:
133
- s = flavour.map(s)
134
- s = s.sel(valid_datetime=dates, **kwargs)
135
- ds = ds + s
136
-
137
- if kwargs and not context.partial_ok:
138
- check(ds, given_paths, valid_datetime=dates, **kwargs)
139
-
140
- if grid is not None:
141
- ds = new_fieldlist_from_list([new_field_from_grid(f, grid) for f in ds])
142
-
143
- if len(ds) == 0:
144
- LOG.warning(f"No fields found for {dates} in {given_paths} (kwargs={kwargs})")
145
-
146
- return ds
85
+ @source_registry.register("grib")
86
+ class GribSource(LegacySource):
87
+
88
+ @staticmethod
89
+ def _execute(
90
+ context: Any,
91
+ dates: list[Any],
92
+ path: str | list[str],
93
+ flavour: str | dict[str, Any] | None = None,
94
+ grid_definition: dict[str, Any] | None = None,
95
+ *args: Any,
96
+ **kwargs: Any,
97
+ ) -> ekd.FieldList:
98
+ """Executes the function to load data from GRIB files.
99
+
100
+ Parameters
101
+ ----------
102
+ context : Any
103
+ The context in which the function is executed.
104
+ dates : list of Any
105
+ List of dates.
106
+ path : str or list of str
107
+ Path or list of paths to the GRIB files.
108
+ flavour : str or dict of str to Any, optional
109
+ Flavour information, by default None.
110
+ grid_definition : dict of str to Any, optional
111
+ Grid definition configuration to create a Grid object, by default None.
112
+ *args : Any
113
+ Additional positional arguments.
114
+ **kwargs : Any
115
+ Additional keyword arguments.
116
+
117
+ Returns
118
+ -------
119
+ Any
120
+ The loaded dataset.
121
+ """
122
+ given_paths = path if isinstance(path, list) else [path]
123
+ if flavour is not None:
124
+ flavour = RuleBasedFlavour(flavour)
125
+
126
+ if grid_definition is not None:
127
+ grid = grid_registry.from_config(grid_definition)
128
+ else:
129
+ grid = None
130
+
131
+ ds = from_source("empty")
132
+ dates = [d.isoformat() for d in dates]
133
+
134
+ for path in given_paths:
135
+
136
+ # do not substitute if not needed
137
+ if "{" not in path:
138
+ paths = [path]
139
+ else:
140
+ paths = Pattern(path).substitute(*args, date=dates, allow_extra=True, **kwargs)
141
+
142
+ for name in ("grid", "area", "rotation", "frame", "resol", "bitmap"):
143
+ if name in kwargs:
144
+ raise ValueError(f"MARS interpolation parameter '{name}' not supported")
145
+
146
+ for path in _expand(paths):
147
+ context.trace("📁", "PATH", path)
148
+ s = from_source("file", path)
149
+ if flavour is not None:
150
+ s = flavour.map(s)
151
+ sel_kwargs = kwargs.copy()
152
+ if dates != []:
153
+ sel_kwargs["valid_datetime"] = dates
154
+ s = s.sel(**sel_kwargs)
155
+ ds = ds + s
156
+
157
+ if kwargs and not context.partial_ok:
158
+ check(ds, given_paths, valid_datetime=dates, **kwargs)
159
+
160
+ if grid is not None:
161
+ ds = new_fieldlist_from_list([new_field_from_grid(f, grid) for f in ds])
162
+
163
+ if len(ds) == 0:
164
+ LOG.warning(f"No fields found for {dates} in {given_paths} (kwargs={kwargs})")
165
+
166
+ return ds
@@ -19,7 +19,8 @@ from anemoi.transform.flavour import RuleBasedFlavour
19
19
  from cachetools import LRUCache
20
20
  from earthkit.data.indexing.fieldlist import FieldArray
21
21
 
22
- from .legacy import legacy_source
22
+ from . import source_registry
23
+ from .legacy import LegacySource
23
24
 
24
25
  LOG = logging.getLogger(__name__)
25
26
 
@@ -569,44 +570,47 @@ class GribIndex:
569
570
  yield data
570
571
 
571
572
 
572
- @legacy_source(__file__)
573
- def execute(
574
- context: Any,
575
- dates: list[Any],
576
- indexdb: str,
577
- flavour: str | None = None,
578
- **kwargs: Any,
579
- ) -> FieldArray:
580
- """Execute the GRIB data retrieval process.
581
-
582
- Parameters
583
- ----------
584
- context : Any
585
- The execution context.
586
- dates : List[Any]
587
- List of dates to retrieve data for.
588
- indexdb : str
589
- Path to the GRIB index database.
590
- flavour : Optional[str], optional
591
- Flavour configuration for mapping fields, by default None.
592
- **kwargs : Any
593
- Additional filtering criteria.
594
-
595
- Returns
596
- -------
597
- FieldArray
598
- An array of retrieved GRIB fields.
599
- """
600
- index = GribIndex(indexdb)
601
- result = []
602
-
603
- if flavour is not None:
604
- flavour = RuleBasedFlavour(flavour)
605
-
606
- for grib in index.retrieve(dates, **kwargs):
607
- field = ekd.from_source("memory", grib)[0]
608
- if flavour:
609
- field = flavour.apply(field)
610
- result.append(field)
611
-
612
- return FieldArray(result)
573
+ @source_registry.register("grib_index")
574
+ class GribIndexSource(LegacySource):
575
+
576
+ @staticmethod
577
+ def _execute(
578
+ context: Any,
579
+ dates: list[Any],
580
+ indexdb: str,
581
+ flavour: str | None = None,
582
+ **kwargs: Any,
583
+ ) -> FieldArray:
584
+ """Execute the GRIB data retrieval process.
585
+
586
+ Parameters
587
+ ----------
588
+ context : Any
589
+ The execution context.
590
+ dates : List[Any]
591
+ List of dates to retrieve data for.
592
+ indexdb : str
593
+ Path to the GRIB index database.
594
+ flavour : Optional[str], optional
595
+ Flavour configuration for mapping fields, by default None.
596
+ **kwargs : Any
597
+ Additional filtering criteria.
598
+
599
+ Returns
600
+ -------
601
+ FieldArray
602
+ An array of retrieved GRIB fields.
603
+ """
604
+ index = GribIndex(indexdb)
605
+ result = []
606
+
607
+ if flavour is not None:
608
+ flavour = RuleBasedFlavour(flavour)
609
+
610
+ for grib in index.retrieve(dates, **kwargs):
611
+ field = ekd.from_source("memory", grib)[0]
612
+ if flavour:
613
+ field = flavour.apply(field)
614
+ result.append(field)
615
+
616
+ return FieldArray(result)