anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/grib-index.py +1 -1
  7. anemoi/datasets/commands/inspect.py +27 -35
  8. anemoi/datasets/commands/validate.py +59 -0
  9. anemoi/datasets/compute/recentre.py +3 -6
  10. anemoi/datasets/create/__init__.py +22 -25
  11. anemoi/datasets/create/check.py +10 -12
  12. anemoi/datasets/create/chunks.py +1 -2
  13. anemoi/datasets/create/config.py +3 -6
  14. anemoi/datasets/create/filter.py +21 -24
  15. anemoi/datasets/create/input/__init__.py +1 -2
  16. anemoi/datasets/create/input/action.py +3 -5
  17. anemoi/datasets/create/input/concat.py +5 -8
  18. anemoi/datasets/create/input/context.py +3 -6
  19. anemoi/datasets/create/input/data_sources.py +5 -8
  20. anemoi/datasets/create/input/empty.py +1 -2
  21. anemoi/datasets/create/input/filter.py +2 -3
  22. anemoi/datasets/create/input/function.py +1 -2
  23. anemoi/datasets/create/input/join.py +4 -5
  24. anemoi/datasets/create/input/misc.py +4 -6
  25. anemoi/datasets/create/input/repeated_dates.py +13 -18
  26. anemoi/datasets/create/input/result.py +29 -33
  27. anemoi/datasets/create/input/step.py +6 -24
  28. anemoi/datasets/create/input/template.py +3 -4
  29. anemoi/datasets/create/input/trace.py +1 -1
  30. anemoi/datasets/create/patch.py +1 -2
  31. anemoi/datasets/create/persistent.py +3 -5
  32. anemoi/datasets/create/size.py +1 -3
  33. anemoi/datasets/create/sources/accumulations.py +47 -52
  34. anemoi/datasets/create/sources/accumulations2.py +4 -8
  35. anemoi/datasets/create/sources/constants.py +1 -3
  36. anemoi/datasets/create/sources/empty.py +1 -2
  37. anemoi/datasets/create/sources/fdb.py +133 -0
  38. anemoi/datasets/create/sources/forcings.py +1 -2
  39. anemoi/datasets/create/sources/grib.py +6 -10
  40. anemoi/datasets/create/sources/grib_index.py +13 -15
  41. anemoi/datasets/create/sources/hindcasts.py +2 -5
  42. anemoi/datasets/create/sources/legacy.py +1 -1
  43. anemoi/datasets/create/sources/mars.py +17 -21
  44. anemoi/datasets/create/sources/netcdf.py +1 -2
  45. anemoi/datasets/create/sources/opendap.py +1 -3
  46. anemoi/datasets/create/sources/patterns.py +4 -6
  47. anemoi/datasets/create/sources/planetary_computer.py +44 -0
  48. anemoi/datasets/create/sources/recentre.py +8 -11
  49. anemoi/datasets/create/sources/source.py +3 -6
  50. anemoi/datasets/create/sources/tendencies.py +2 -5
  51. anemoi/datasets/create/sources/xarray.py +4 -6
  52. anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
  53. anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
  54. anemoi/datasets/create/sources/xarray_support/field.py +17 -16
  55. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  56. anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
  57. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  58. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  59. anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
  60. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  61. anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
  62. anemoi/datasets/create/sources/xarray_zarr.py +1 -2
  63. anemoi/datasets/create/sources/zenodo.py +3 -5
  64. anemoi/datasets/create/statistics/__init__.py +3 -6
  65. anemoi/datasets/create/testing.py +2 -74
  66. anemoi/datasets/create/typing.py +1 -2
  67. anemoi/datasets/create/utils.py +1 -2
  68. anemoi/datasets/create/zarr.py +7 -2
  69. anemoi/datasets/data/__init__.py +15 -6
  70. anemoi/datasets/data/complement.py +52 -23
  71. anemoi/datasets/data/concat.py +5 -8
  72. anemoi/datasets/data/dataset.py +42 -47
  73. anemoi/datasets/data/debug.py +7 -9
  74. anemoi/datasets/data/ensemble.py +4 -6
  75. anemoi/datasets/data/fill_missing.py +7 -10
  76. anemoi/datasets/data/forwards.py +30 -28
  77. anemoi/datasets/data/grids.py +12 -16
  78. anemoi/datasets/data/indexing.py +9 -12
  79. anemoi/datasets/data/interpolate.py +7 -15
  80. anemoi/datasets/data/join.py +8 -12
  81. anemoi/datasets/data/masked.py +6 -11
  82. anemoi/datasets/data/merge.py +5 -9
  83. anemoi/datasets/data/misc.py +41 -45
  84. anemoi/datasets/data/missing.py +11 -16
  85. anemoi/datasets/data/observations/__init__.py +8 -14
  86. anemoi/datasets/data/padded.py +3 -5
  87. anemoi/datasets/data/records/backends/__init__.py +2 -2
  88. anemoi/datasets/data/rescale.py +5 -12
  89. anemoi/datasets/data/select.py +13 -16
  90. anemoi/datasets/data/statistics.py +4 -7
  91. anemoi/datasets/data/stores.py +23 -77
  92. anemoi/datasets/data/subset.py +8 -11
  93. anemoi/datasets/data/unchecked.py +7 -11
  94. anemoi/datasets/data/xy.py +25 -21
  95. anemoi/datasets/dates/__init__.py +13 -18
  96. anemoi/datasets/dates/groups.py +7 -10
  97. anemoi/datasets/grids.py +11 -12
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
  101. anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filters/__init__.py +0 -33
  103. anemoi/datasets/create/filters/empty.py +0 -37
  104. anemoi/datasets/create/filters/legacy.py +0 -93
  105. anemoi/datasets/create/filters/noop.py +0 -37
  106. anemoi/datasets/create/filters/orog_to_z.py +0 -58
  107. anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
  108. anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
  109. anemoi/datasets/create/filters/rename.py +0 -205
  110. anemoi/datasets/create/filters/rotate_winds.py +0 -105
  111. anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
  112. anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
  113. anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
  114. anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
  115. anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
  116. anemoi/datasets/create/filters/sum.py +0 -68
  117. anemoi/datasets/create/filters/transform.py +0 -51
  118. anemoi/datasets/create/filters/unrotate_winds.py +0 -105
  119. anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
  120. anemoi/datasets/create/filters/wz_to_w.py +0 -98
  121. anemoi/datasets/utils/__init__.py +0 -8
  122. anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
  123. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
  124. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
  125. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
  126. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,6 @@
9
9
 
10
10
 
11
11
  from typing import Any
12
- from typing import List
13
12
 
14
13
  import earthkit.data as ekd
15
14
 
@@ -17,7 +16,7 @@ from .legacy import legacy_source
17
16
 
18
17
 
19
18
  @legacy_source(__file__)
20
- def execute(context: Any, dates: List[str], **kwargs: Any) -> ekd.FieldList:
19
+ def execute(context: Any, dates: list[str], **kwargs: Any) -> ekd.FieldList:
21
20
  """Executes the loading of an empty data source.
22
21
 
23
22
  Parameters
@@ -0,0 +1,133 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ from datetime import datetime
11
+ from typing import Any
12
+
13
+ import earthkit.data as ekd
14
+ from anemoi.transform.fields import new_field_from_grid
15
+ from anemoi.transform.fields import new_fieldlist_from_list
16
+ from anemoi.transform.flavour import RuleBasedFlavour
17
+ from anemoi.transform.grids import grid_registry
18
+
19
+ from anemoi.datasets.create.typing import DateList
20
+
21
+ from ..source import Source
22
+ from . import source_registry
23
+
24
+
25
+ @source_registry.register("fdb")
26
+ class FdbSource(Source):
27
+ """FDB data source."""
28
+
29
+ emoji = "💽"
30
+
31
+ def __init__(
32
+ self,
33
+ context,
34
+ fdb_request: dict[str, Any],
35
+ fdb_config: dict | None = None,
36
+ fdb_userconfig: dict | None = None,
37
+ flavour: dict[str, Any] | None = None,
38
+ grid_definition: str | None = None,
39
+ **kwargs: dict[str, Any],
40
+ ):
41
+ """Initialise the FDB input.
42
+
43
+ Parameters
44
+ ----------
45
+ context : dict
46
+ The context.
47
+ fdb_request: dict
48
+ The FDB request parameters.
49
+ fdb_config : dict, optional
50
+ The FDB config to use.
51
+ fdb_userconfig : dict, optional
52
+ The FDB userconfig to use.
53
+ flavour: dict, optional
54
+ The flavour configuration, see `anemoi.transform.flavour.RuleBasedFlavour`.
55
+ grid_definition : str, optional
56
+ The grid definition to use, see `anemoi.transform.grids.grid_registry`.
57
+ kwargs : dict, optional
58
+ Additional keyword arguments.
59
+ """
60
+ super().__init__(context)
61
+ self.request = fdb_request.copy()
62
+ self.configs = {"config": fdb_config, "userconfig": fdb_userconfig}
63
+
64
+ self.flavour = RuleBasedFlavour(flavour) if flavour else None
65
+ if grid_definition is not None:
66
+ self.grid = grid_registry.from_config(grid_definition)
67
+ else:
68
+ self.grid = None
69
+
70
+ if "step" not in self.request:
71
+ self.request["step"] = 0
72
+
73
+ self.request["param"] = _shortname_to_paramid(fdb_request["param"], kwargs.pop("param_id_map", None))
74
+
75
+ # temporary workarounds for FDB use at MeteoSwiss (adoption is ongoing)
76
+ # thus not documented
77
+ self.offset_from_date = kwargs.pop("offset_from_date", None)
78
+
79
+ def execute(self, dates: DateList) -> ekd.FieldList:
80
+ """Execute the FDB source.
81
+
82
+ Parameters
83
+ ----------
84
+ dates : DateList
85
+ The input dates.
86
+
87
+ Returns
88
+ -------
89
+ ekd.FieldList
90
+ The output data.
91
+ """
92
+
93
+ requests = []
94
+ for date in dates:
95
+ time_request = _time_request_keys(date, self.offset_from_date)
96
+ requests.append(self.request | time_request)
97
+
98
+ # in some cases (e.g. repeated_dates 'constant' mode), we might have a fully
99
+ # defined request already and an empty dates list
100
+ requests = requests or [self.request]
101
+
102
+ fl = ekd.from_source("empty")
103
+ for request in requests:
104
+ fl += ekd.from_source("fdb", request, **self.configs, read_all=True)
105
+
106
+ if self.grid is not None:
107
+ fl = new_fieldlist_from_list([new_field_from_grid(f, self.grid) for f in fl])
108
+
109
+ if self.flavour:
110
+ fl = self.flavour.map(fl)
111
+
112
+ return fl
113
+
114
+
115
+ def _time_request_keys(dt: datetime, offset_from_date: bool | None = None) -> str:
116
+ """Defines the time-related keys for the FDB request."""
117
+ out = {}
118
+ out["date"] = dt.strftime("%Y%m%d")
119
+ if offset_from_date:
120
+ out["time"] = "0000"
121
+ out["step"] = int((dt - dt.replace(hour=0, minute=0)).total_seconds() // 3600)
122
+ else:
123
+ out["time"] = dt.strftime("%H%M")
124
+ return out
125
+
126
+
127
+ def _shortname_to_paramid(shortname: list[str], param_id_map: dict[str, int] | None = None) -> list[int]:
128
+ from anemoi.datasets.create.sources.mars import use_grib_paramid
129
+
130
+ """Convert a shortname to a parameter ID."""
131
+ if param_id_map is None:
132
+ return use_grib_paramid(shortname)
133
+ return [param_id_map[s] for s in shortname]
@@ -8,7 +8,6 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  from typing import Any
11
- from typing import List
12
11
 
13
12
  from earthkit.data import from_source
14
13
 
@@ -16,7 +15,7 @@ from .legacy import legacy_source
16
15
 
17
16
 
18
17
  @legacy_source(__file__)
19
- def forcings(context: Any, dates: List[str], template: str, param: str) -> Any:
18
+ def forcings(context: Any, dates: list[str], template: str, param: str) -> Any:
20
19
  """Loads forcing data from a specified source.
21
20
 
22
21
  Parameters
@@ -11,10 +11,6 @@
11
11
  import glob
12
12
  import logging
13
13
  from typing import Any
14
- from typing import Dict
15
- from typing import List
16
- from typing import Optional
17
- from typing import Union
18
14
 
19
15
  import earthkit.data as ekd
20
16
  from anemoi.transform.fields import new_field_from_grid
@@ -29,7 +25,7 @@ from .legacy import legacy_source
29
25
  LOG = logging.getLogger(__name__)
30
26
 
31
27
 
32
- def check(ds: Any, paths: List[str], **kwargs: Any) -> None:
28
+ def check(ds: Any, paths: list[str], **kwargs: Any) -> None:
33
29
  """Check if the dataset matches the expected number of fields.
34
30
 
35
31
  Parameters
@@ -55,7 +51,7 @@ def check(ds: Any, paths: List[str], **kwargs: Any) -> None:
55
51
  raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, paths={paths})")
56
52
 
57
53
 
58
- def _expand(paths: List[str]) -> Any:
54
+ def _expand(paths: list[str]) -> Any:
59
55
  """Expand the given paths using glob.
60
56
 
61
57
  Parameters
@@ -80,10 +76,10 @@ def _expand(paths: List[str]) -> Any:
80
76
  @legacy_source(__file__)
81
77
  def execute(
82
78
  context: Any,
83
- dates: List[Any],
84
- path: Union[str, List[str]],
85
- flavour: Optional[Union[str, Dict[str, Any]]] = None,
86
- grid_definition: Optional[Dict[str, Any]] = None,
79
+ dates: list[Any],
80
+ path: str | list[str],
81
+ flavour: str | dict[str, Any] | None = None,
82
+ grid_definition: dict[str, Any] | None = None,
87
83
  *args: Any,
88
84
  **kwargs: Any,
89
85
  ) -> ekd.FieldList:
@@ -10,10 +10,8 @@
10
10
  import logging
11
11
  import os
12
12
  import sqlite3
13
+ from collections.abc import Iterator
13
14
  from typing import Any
14
- from typing import Iterator
15
- from typing import List
16
- from typing import Optional
17
15
 
18
16
  import earthkit.data as ekd
19
17
  import tqdm
@@ -36,8 +34,8 @@ class GribIndex:
36
34
  self,
37
35
  database: str,
38
36
  *,
39
- keys: Optional[List[str] | str] = None,
40
- flavour: Optional[str] = None,
37
+ keys: list[str] | str | None = None,
38
+ flavour: str | None = None,
41
39
  update: bool = False,
42
40
  overwrite: bool = False,
43
41
  ) -> None:
@@ -157,7 +155,7 @@ class GribIndex:
157
155
  """Commit the current transaction to the database."""
158
156
  self.conn.commit()
159
157
 
160
- def _get_metadata_keys(self) -> List[str]:
158
+ def _get_metadata_keys(self) -> list[str]:
161
159
  """Retrieve the metadata keys from the database.
162
160
 
163
161
  Returns
@@ -225,7 +223,7 @@ class GribIndex:
225
223
  LOG.info(f"Path: {self._get_path(existing_record[1])}")
226
224
  raise
227
225
 
228
- def _all_columns(self) -> List[str]:
226
+ def _all_columns(self) -> list[str]:
229
227
  """Retrieve all column names from the grib_index table.
230
228
 
231
229
  Returns
@@ -241,7 +239,7 @@ class GribIndex:
241
239
  self._columns = [col for col in columns if not col.startswith("_")]
242
240
  return self._columns
243
241
 
244
- def _ensure_columns(self, columns: List[str]) -> None:
242
+ def _ensure_columns(self, columns: list[str]) -> None:
245
243
  """Add missing columns to the grib_index table.
246
244
 
247
245
  Parameters
@@ -324,7 +322,7 @@ class GribIndex:
324
322
 
325
323
  self._commit()
326
324
 
327
- def _paramdb(self, category: int, discipline: int) -> Optional[dict]:
325
+ def _paramdb(self, category: int, discipline: int) -> dict | None:
328
326
  """Fetch parameter information from the parameter database.
329
327
 
330
328
  Parameters
@@ -355,7 +353,7 @@ class GribIndex:
355
353
  except Exception as e:
356
354
  LOG.warning(f"Failed to fetch information from parameter database: {e}")
357
355
 
358
- def _param_grib2_info(self, paramId: int) -> List[dict]:
356
+ def _param_grib2_info(self, paramId: int) -> list[dict]:
359
357
  """Fetch GRIB2 parameter information for a given parameter ID.
360
358
 
361
359
  Parameters
@@ -383,7 +381,7 @@ class GribIndex:
383
381
  LOG.warning(f"Failed to fetch information from parameter database: {e}")
384
382
  return []
385
383
 
386
- def _param_id_info(self, paramId: int) -> Optional[dict]:
384
+ def _param_id_info(self, paramId: int) -> dict | None:
387
385
  """Fetch detailed information for a given parameter ID.
388
386
 
389
387
  Parameters
@@ -412,7 +410,7 @@ class GribIndex:
412
410
 
413
411
  return None
414
412
 
415
- def _param_id_unit(self, unitId: int) -> Optional[dict]:
413
+ def _param_id_unit(self, unitId: int) -> dict | None:
416
414
  """Fetch unit information for a given unit ID.
417
415
 
418
416
  Parameters
@@ -520,7 +518,7 @@ class GribIndex:
520
518
  raise ValueError(f"No path found for path_id {path_id}")
521
519
  return row[0]
522
520
 
523
- def retrieve(self, dates: List[Any], **kwargs: Any) -> Iterator[Any]:
521
+ def retrieve(self, dates: list[Any], **kwargs: Any) -> Iterator[Any]:
524
522
  """Retrieve GRIB data from the database.
525
523
 
526
524
  Parameters
@@ -574,9 +572,9 @@ class GribIndex:
574
572
  @legacy_source(__file__)
575
573
  def execute(
576
574
  context: Any,
577
- dates: List[Any],
575
+ dates: list[Any],
578
576
  indexdb: str,
579
- flavour: Optional[str] = None,
577
+ flavour: str | None = None,
580
578
  **kwargs: Any,
581
579
  ) -> FieldArray:
582
580
  """Execute the GRIB data retrieval process.
@@ -9,9 +9,6 @@
9
9
 
10
10
  import logging
11
11
  from typing import Any
12
- from typing import Dict
13
- from typing import List
14
- from typing import Union
15
12
 
16
13
  from earthkit.data.core.fieldlist import MultiFieldList
17
14
 
@@ -22,7 +19,7 @@ from .legacy import legacy_source
22
19
  LOGGER = logging.getLogger(__name__)
23
20
 
24
21
 
25
- def _to_list(x: Union[list, tuple, Any]) -> List[Any]:
22
+ def _to_list(x: list | tuple | Any) -> list[Any]:
26
23
  """Converts the input to a list if it is not already a list or tuple.
27
24
 
28
25
  Parameters
@@ -41,7 +38,7 @@ def _to_list(x: Union[list, tuple, Any]) -> List[Any]:
41
38
 
42
39
 
43
40
  @legacy_source(__file__)
44
- def hindcasts(context: Any, dates: List[Any], **request: Dict[str, Any]) -> MultiFieldList:
41
+ def hindcasts(context: Any, dates: list[Any], **request: dict[str, Any]) -> MultiFieldList:
45
42
  """Generates hindcast requests based on the provided dates and request parameters.
46
43
 
47
44
  Parameters
@@ -11,8 +11,8 @@
11
11
  import inspect
12
12
  import logging
13
13
  import os
14
+ from collections.abc import Callable
14
15
  from typing import Any
15
- from typing import Callable
16
16
 
17
17
  from anemoi.datasets.create.input.template import resolve
18
18
 
@@ -9,12 +9,8 @@
9
9
 
10
10
  import datetime
11
11
  import re
12
+ from collections.abc import Generator
12
13
  from typing import Any
13
- from typing import Dict
14
- from typing import Generator
15
- from typing import List
16
- from typing import Optional
17
- from typing import Union
18
14
 
19
15
  from anemoi.utils.humanize import did_you_mean
20
16
  from earthkit.data import from_source
@@ -27,7 +23,7 @@ from .legacy import legacy_source
27
23
  DEBUG = False
28
24
 
29
25
 
30
- def to_list(x: Union[list, tuple, Any]) -> list:
26
+ def to_list(x: list | tuple | Any) -> list:
31
27
  """Converts the input to a list if it is not already a list or tuple.
32
28
 
33
29
  Parameters
@@ -46,8 +42,8 @@ def to_list(x: Union[list, tuple, Any]) -> list:
46
42
 
47
43
 
48
44
  def _date_to_datetime(
49
- d: Union[datetime.datetime, list, tuple, str],
50
- ) -> Union[datetime.datetime, List[datetime.datetime]]:
45
+ d: datetime.datetime | list | tuple | str,
46
+ ) -> datetime.datetime | list[datetime.datetime]:
51
47
  """Converts the input date(s) to datetime objects.
52
48
 
53
49
  Parameters
@@ -67,7 +63,7 @@ def _date_to_datetime(
67
63
  return datetime.datetime.fromisoformat(d)
68
64
 
69
65
 
70
- def expand_to_by(x: Union[str, int, list]) -> Union[str, int, list]:
66
+ def expand_to_by(x: str | int | list) -> str | int | list:
71
67
  """Expands a range expression to a list of values.
72
68
 
73
69
  Parameters
@@ -97,7 +93,7 @@ def expand_to_by(x: Union[str, int, list]) -> Union[str, int, list]:
97
93
  return x
98
94
 
99
95
 
100
- def normalise_time_delta(t: Union[datetime.timedelta, str]) -> datetime.timedelta:
96
+ def normalise_time_delta(t: datetime.timedelta | str) -> datetime.timedelta:
101
97
  """Normalizes a time delta string to a datetime.timedelta object.
102
98
 
103
99
  Parameters
@@ -120,7 +116,7 @@ def normalise_time_delta(t: Union[datetime.timedelta, str]) -> datetime.timedelt
120
116
  return t
121
117
 
122
118
 
123
- def _normalise_time(t: Union[int, str]) -> str:
119
+ def _normalise_time(t: int | str) -> str:
124
120
  """Normalizes a time value to a string in HHMM format.
125
121
 
126
122
  Parameters
@@ -136,15 +132,15 @@ def _normalise_time(t: Union[int, str]) -> str:
136
132
  t = int(t)
137
133
  if t < 100:
138
134
  t * 100
139
- return "{:04d}".format(t)
135
+ return f"{t:04d}"
140
136
 
141
137
 
142
138
  def _expand_mars_request(
143
- request: Dict[str, Any],
139
+ request: dict[str, Any],
144
140
  date: datetime.datetime,
145
141
  request_already_using_valid_datetime: bool = False,
146
142
  date_key: str = "date",
147
- ) -> List[Dict[str, Any]]:
143
+ ) -> list[dict[str, Any]]:
148
144
  """Expands a MARS request with the given date and other parameters.
149
145
 
150
146
  Parameters
@@ -222,11 +218,11 @@ def _expand_mars_request(
222
218
 
223
219
 
224
220
  def factorise_requests(
225
- dates: List[datetime.datetime],
226
- *requests: Dict[str, Any],
221
+ dates: list[datetime.datetime],
222
+ *requests: dict[str, Any],
227
223
  request_already_using_valid_datetime: bool = False,
228
224
  date_key: str = "date",
229
- ) -> Generator[Dict[str, Any], None, None]:
225
+ ) -> Generator[dict[str, Any], None, None]:
230
226
  """Factorizes the requests based on the given dates.
231
227
 
232
228
  Parameters
@@ -268,7 +264,7 @@ def factorise_requests(
268
264
  yield r
269
265
 
270
266
 
271
- def use_grib_paramid(r: Dict[str, Any]) -> Dict[str, Any]:
267
+ def use_grib_paramid(r: dict[str, Any]) -> dict[str, Any]:
272
268
  """Converts the parameter short names to GRIB parameter IDs.
273
269
 
274
270
  Parameters
@@ -365,11 +361,11 @@ MARS_KEYS = [
365
361
  @legacy_source(__file__)
366
362
  def mars(
367
363
  context: Any,
368
- dates: List[datetime.datetime],
369
- *requests: Dict[str, Any],
364
+ dates: list[datetime.datetime],
365
+ *requests: dict[str, Any],
370
366
  request_already_using_valid_datetime: bool = False,
371
367
  date_key: str = "date",
372
- use_cdsapi_dataset: Optional[str] = None,
368
+ use_cdsapi_dataset: str | None = None,
373
369
  **kwargs: Any,
374
370
  ) -> Any:
375
371
  """Executes MARS requests based on the given context, dates, and other parameters.
@@ -9,7 +9,6 @@
9
9
 
10
10
 
11
11
  from typing import Any
12
- from typing import List
13
12
 
14
13
  import earthkit.data as ekd
15
14
 
@@ -18,7 +17,7 @@ from .xarray import load_many
18
17
 
19
18
 
20
19
  @legacy_source(__file__)
21
- def execute(context: Any, dates: List[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
20
+ def execute(context: Any, dates: list[str], path: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
22
21
  """Execute the loading of multiple NetCDF files.
23
22
 
24
23
  Parameters
@@ -9,8 +9,6 @@
9
9
 
10
10
 
11
11
  from typing import Any
12
- from typing import Dict
13
- from typing import List
14
12
 
15
13
  import earthkit.data as ekd
16
14
 
@@ -19,7 +17,7 @@ from .xarray import load_many
19
17
 
20
18
 
21
19
  @legacy_source(__file__)
22
- def execute(context: Dict[str, Any], dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
20
+ def execute(context: dict[str, Any], dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
23
21
  """Execute the data loading process from an OpenDAP source.
24
22
 
25
23
  Parameters
@@ -9,15 +9,13 @@
9
9
 
10
10
  import datetime
11
11
  import glob
12
+ from collections.abc import Generator
12
13
  from typing import Any
13
- from typing import Generator
14
- from typing import List
15
- from typing import Tuple
16
14
 
17
15
  from earthkit.data.utils.patterns import Pattern
18
16
 
19
17
 
20
- def _expand(paths: List[str]) -> Generator[str, None, None]:
18
+ def _expand(paths: list[str]) -> Generator[str, None, None]:
21
19
  """Expand the given paths to include all matching file paths.
22
20
 
23
21
  Parameters
@@ -54,8 +52,8 @@ def _expand(paths: List[str]) -> Generator[str, None, None]:
54
52
 
55
53
 
56
54
  def iterate_patterns(
57
- path: str, dates: List[datetime.datetime], **kwargs: Any
58
- ) -> Generator[Tuple[str, List[str]], None, None]:
55
+ path: str, dates: list[datetime.datetime], **kwargs: Any
56
+ ) -> Generator[tuple[str, list[str]], None, None]:
59
57
  """Iterate over patterns and expand them with given dates and additional keyword arguments.
60
58
 
61
59
  Parameters
@@ -0,0 +1,44 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ from . import source_registry
12
+ from .xarray import XarraySourceBase
13
+
14
+
15
+ @source_registry.register("planetary_computer")
16
+ class PlanetaryComputerSource(XarraySourceBase):
17
+ """An Xarray data source for the planetary_computer."""
18
+
19
+ emoji = "🪐"
20
+
21
+ def __init__(self, context, data_catalog_id, version="v1", *args, **kwargs: dict):
22
+
23
+ import planetary_computer
24
+ import pystac_client
25
+
26
+ self.data_catalog_id = data_catalog_id
27
+ self.flavour = kwargs.pop("flavour", None)
28
+ self.patch = kwargs.pop("patch", None)
29
+ self.options = kwargs.pop("options", {})
30
+
31
+ catalog = pystac_client.Client.open(
32
+ f"https://planetarycomputer.microsoft.com/api/stac/{version}/",
33
+ modifier=planetary_computer.sign_inplace,
34
+ )
35
+ collection = catalog.get_collection(self.data_catalog_id)
36
+
37
+ asset = collection.assets["zarr-abfs"]
38
+
39
+ if "xarray:storage_options" in asset.extra_fields:
40
+ self.options["storage_options"] = asset.extra_fields["xarray:storage_options"]
41
+
42
+ self.options.update(asset.extra_fields["xarray:open_kwargs"])
43
+
44
+ super().__init__(context, url=asset.href, *args, **kwargs)
@@ -9,9 +9,6 @@
9
9
 
10
10
  from copy import deepcopy
11
11
  from typing import Any
12
- from typing import Dict
13
- from typing import List
14
- from typing import Union
15
12
 
16
13
  from anemoi.datasets.compute.recentre import recentre as _recentre
17
14
 
@@ -19,7 +16,7 @@ from .legacy import legacy_source
19
16
  from .mars import mars
20
17
 
21
18
 
22
- def to_list(x: Union[list, tuple, str]) -> List:
19
+ def to_list(x: list | tuple | str) -> list:
23
20
  """Converts the input to a list. If the input is a string, it splits it by '/'.
24
21
 
25
22
  Parameters
@@ -39,7 +36,7 @@ def to_list(x: Union[list, tuple, str]) -> List:
39
36
  return [x]
40
37
 
41
38
 
42
- def normalise_number(number: Union[list, tuple, str]) -> List[int]:
39
+ def normalise_number(number: list | tuple | str) -> list[int]:
43
40
  """Normalises the input number to a list of integers.
44
41
 
45
42
  Parameters
@@ -63,7 +60,7 @@ def normalise_number(number: Union[list, tuple, str]) -> List[int]:
63
60
  return number
64
61
 
65
62
 
66
- def normalise_request(request: Dict) -> Dict:
63
+ def normalise_request(request: dict) -> dict:
67
64
  """Normalises the request dictionary by converting certain fields to lists.
68
65
 
69
66
  Parameters
@@ -85,7 +82,7 @@ def normalise_request(request: Dict) -> Dict:
85
82
  return request
86
83
 
87
84
 
88
- def load_if_needed(context: Any, dates: Any, dict_or_dataset: Union[Dict, Any]) -> Any:
85
+ def load_if_needed(context: Any, dates: Any, dict_or_dataset: dict | Any) -> Any:
89
86
  """Loads the dataset if the input is a dictionary, otherwise returns the input.
90
87
 
91
88
  Parameters
@@ -112,11 +109,11 @@ def load_if_needed(context: Any, dates: Any, dict_or_dataset: Union[Dict, Any])
112
109
  def recentre(
113
110
  context: Any,
114
111
  dates: Any,
115
- members: Union[Dict, Any],
116
- centre: Union[Dict, Any],
112
+ members: dict | Any,
113
+ centre: dict | Any,
117
114
  alpha: float = 1.0,
118
- remapping: Dict = {},
119
- patches: Dict = {},
115
+ remapping: dict = {},
116
+ patches: dict = {},
120
117
  ) -> Any:
121
118
  """Recentres the members dataset using the centre dataset.
122
119
 
@@ -9,9 +9,6 @@
9
9
 
10
10
  from datetime import datetime
11
11
  from typing import Any
12
- from typing import Dict
13
- from typing import List
14
- from typing import Optional
15
12
 
16
13
  from earthkit.data import from_source
17
14
 
@@ -21,7 +18,7 @@ from .legacy import legacy_source
21
18
 
22
19
 
23
20
  @legacy_source(__file__)
24
- def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any:
21
+ def source(context: Any | None, dates: list[datetime], **kwargs: Any) -> Any:
25
22
  """Generates a source based on the provided context, dates, and additional keyword arguments.
26
23
 
27
24
  Parameters
@@ -52,7 +49,7 @@ execute = source
52
49
  if __name__ == "__main__":
53
50
  import yaml
54
51
 
55
- config: Dict[str, Any] = yaml.safe_load(
52
+ config: dict[str, Any] = yaml.safe_load(
56
53
  """
57
54
  name: mars
58
55
  class: ea
@@ -65,7 +62,7 @@ if __name__ == "__main__":
65
62
  time: $from_dates
66
63
  """
67
64
  )
68
- dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
65
+ dates: list[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
69
66
  dates = to_datetime_list(dates)
70
67
 
71
68
  for f in source(None, dates, **config):