anemoi-datasets 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/compare-lam.py +401 -0
  3. anemoi/datasets/commands/grib-index.py +111 -0
  4. anemoi/datasets/commands/inspect.py +2 -2
  5. anemoi/datasets/create/__init__.py +1 -1
  6. anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -1
  7. anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -1
  8. anemoi/datasets/create/filters/wz_to_w.py +3 -2
  9. anemoi/datasets/create/input/action.py +2 -0
  10. anemoi/datasets/create/input/result.py +1 -1
  11. anemoi/datasets/create/sources/anemoi_dataset.py +73 -0
  12. anemoi/datasets/create/sources/forcings.py +1 -1
  13. anemoi/datasets/create/sources/grib.py +18 -173
  14. anemoi/datasets/create/sources/grib_index.py +614 -0
  15. anemoi/datasets/create/sources/xarray_support/__init__.py +1 -1
  16. anemoi/datasets/create/sources/xarray_support/fieldlist.py +2 -2
  17. anemoi/datasets/create/sources/xarray_support/flavour.py +6 -0
  18. anemoi/datasets/create/writer.py +1 -1
  19. anemoi/datasets/data/__init__.py +16 -0
  20. anemoi/datasets/data/complement.py +4 -1
  21. anemoi/datasets/data/dataset.py +14 -0
  22. anemoi/datasets/data/interpolate.py +76 -0
  23. anemoi/datasets/data/masked.py +77 -0
  24. anemoi/datasets/data/misc.py +159 -0
  25. anemoi/datasets/grids.py +8 -2
  26. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/METADATA +11 -4
  27. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/RECORD +31 -27
  28. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/WHEEL +0 -0
  29. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/entry_points.txt +0 -0
  30. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/licenses/LICENSE +0 -0
  31. {anemoi_datasets-0.5.19.dist-info → anemoi_datasets-0.5.21.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ def forcings(context: Any, dates: List[str], template: str, param: str) -> Any:
25
25
  The context in which the function is executed.
26
26
  dates : list
27
27
  List of dates for which data is to be loaded.
28
- template : str
28
+ template : FieldList
29
29
  Template for the data source.
30
30
  param : str
31
31
  Parameter for the data source.
@@ -17,8 +17,11 @@ from typing import Optional
17
17
  from typing import Union
18
18
 
19
19
  import earthkit.data as ekd
20
+ from anemoi.transform.fields import new_field_from_grid
21
+ from anemoi.transform.fields import new_fieldlist_from_list
22
+ from anemoi.transform.flavour import RuleBasedFlavour
23
+ from anemoi.transform.grids import grid_registry
20
24
  from earthkit.data import from_source
21
- from earthkit.data.indexing.fieldlist import FieldArray
22
25
  from earthkit.data.utils.patterns import Pattern
23
26
 
24
27
  from .legacy import legacy_source
@@ -26,169 +29,6 @@ from .legacy import legacy_source
26
29
  LOG = logging.getLogger(__name__)
27
30
 
28
31
 
29
- def _load(context: Any, name: str, record: Dict[str, Any]) -> tuple:
30
- """Load data from a given source.
31
-
32
- Parameters
33
- ----------
34
- context : Any
35
- The context in which the function is executed.
36
- name : str
37
- The name of the data source.
38
- record : dict of str to Any
39
- The record containing source information.
40
-
41
- Returns
42
- -------
43
- tuple
44
- A tuple containing the data as a numpy array and the UUID of the HGrid.
45
- """
46
- ds = None
47
-
48
- param = record["param"]
49
-
50
- if "path" in record:
51
- context.info(f"Using {name} from {record['path']} (param={param})")
52
- ds = from_source("file", record["path"])
53
-
54
- if "url" in record:
55
- context.info(f"Using {name} from {record['url']} (param={param})")
56
- ds = from_source("url", record["url"])
57
-
58
- ds = ds.sel(param=param)
59
-
60
- assert len(ds) == 1, f"{name} {param}, expected one field, got {len(ds)}"
61
- ds = ds[0]
62
-
63
- return ds.to_numpy(flatten=True), ds.metadata("uuidOfHGrid")
64
-
65
-
66
- class Geography:
67
- """This class retrieves the latitudes and longitudes of unstructured grids,
68
- and checks if the fields are compatible with the grid.
69
-
70
- Parameters
71
- ----------
72
- context : Any
73
- The context in which the function is executed.
74
- latitudes : dict of str to Any
75
- Latitude information.
76
- longitudes : dict of str to Any
77
- Longitude information.
78
- """
79
-
80
- def __init__(self, context: Any, latitudes: Dict[str, Any], longitudes: Dict[str, Any]) -> None:
81
- """Initialize the Geography class.
82
-
83
- Parameters
84
- ----------
85
- context : Any
86
- The context in which the function is executed.
87
- latitudes : dict of str to Any
88
- Latitude information.
89
- longitudes : dict of str to Any
90
- Longitude information.
91
- """
92
- latitudes, uuidOfHGrid_lat = _load(context, "latitudes", latitudes)
93
- longitudes, uuidOfHGrid_lon = _load(context, "longitudes", longitudes)
94
-
95
- assert (
96
- uuidOfHGrid_lat == uuidOfHGrid_lon
97
- ), f"uuidOfHGrid mismatch: lat={uuidOfHGrid_lat} != lon={uuidOfHGrid_lon}"
98
-
99
- context.info(f"Latitudes: {len(latitudes)}, Longitudes: {len(longitudes)}")
100
- assert len(latitudes) == len(longitudes)
101
-
102
- self.uuidOfHGrid = uuidOfHGrid_lat
103
- self.latitudes = latitudes
104
- self.longitudes = longitudes
105
- self.first = True
106
-
107
- def check(self, field: Any) -> None:
108
- """Check if the field is compatible with the grid.
109
-
110
- Parameters
111
- ----------
112
- field : Any
113
- The field to check.
114
- """
115
- if self.first:
116
- # We only check the first field, for performance reasons
117
- assert (
118
- field.metadata("uuidOfHGrid") == self.uuidOfHGrid
119
- ), f"uuidOfHGrid mismatch: {field.metadata('uuidOfHGrid')} != {self.uuidOfHGrid}"
120
- self.first = False
121
-
122
-
123
- class AddGrid:
124
- """An earth-kit.data.Field wrapper that adds grid information.
125
-
126
- Parameters
127
- ----------
128
- field : Any
129
- The field to wrap.
130
- geography : Geography
131
- The geography information.
132
- """
133
-
134
- def __init__(self, field: Any, geography: Geography) -> None:
135
- """Initialize the AddGrid class.
136
-
137
- Parameters
138
- ----------
139
- field : Any
140
- The field to wrap.
141
- geography : Geography
142
- The geography information.
143
- """
144
- self._field = field
145
-
146
- geography.check(field)
147
-
148
- self._latitudes = geography.latitudes
149
- self._longitudes = geography.longitudes
150
-
151
- def __getattr__(self, name: str) -> Any:
152
- """Get an attribute from the wrapped field.
153
-
154
- Parameters
155
- ----------
156
- name : str
157
- The name of the attribute.
158
-
159
- Returns
160
- -------
161
- Any
162
- The attribute value.
163
- """
164
- return getattr(self._field, name)
165
-
166
- def __repr__(self) -> str:
167
- """Get the string representation of the wrapped field.
168
-
169
- Returns
170
- -------
171
- str
172
- The string representation.
173
- """
174
- return repr(self._field)
175
-
176
- def grid_points(self) -> tuple:
177
- """Get the grid points (latitudes and longitudes).
178
-
179
- Returns
180
- -------
181
- tuple
182
- The latitudes and longitudes.
183
- """
184
- return self._latitudes, self._longitudes
185
-
186
- @property
187
- def resolution(self) -> str:
188
- """Get the resolution of the grid."""
189
- return "unknown"
190
-
191
-
192
32
  def check(ds: Any, paths: List[str], **kwargs: Any) -> None:
193
33
  """Check if the dataset matches the expected number of fields.
194
34
 
@@ -242,8 +82,8 @@ def execute(
242
82
  context: Any,
243
83
  dates: List[Any],
244
84
  path: Union[str, List[str]],
245
- latitudes: Optional[Dict[str, Any]] = None,
246
- longitudes: Optional[Dict[str, Any]] = None,
85
+ flavour: Optional[Union[str, Dict[str, Any]]] = None,
86
+ grid_definition: Optional[Dict[str, Any]] = None,
247
87
  *args: Any,
248
88
  **kwargs: Any,
249
89
  ) -> ekd.FieldList:
@@ -253,8 +93,8 @@ def execute(
253
93
  context (Any): The context in which the function is executed.
254
94
  dates (List[Any]): List of dates.
255
95
  path (Union[str, List[str]]): Path or list of paths to the GRIB files.
256
- latitudes (Optional[Dict[str, Any]], optional): Latitude information. Defaults to None.
257
- longitudes (Optional[Dict[str, Any]], optional): Longitude information. Defaults to None.
96
+ flavour (Optional[Union[str, Dict[str, Any]]], optional): Flavour information. Defaults to None.
97
+ grid_definition (Optional[Dict[str, Any]]): Grid definition config to create a Grid object.
258
98
  *args (Any): Additional arguments.
259
99
  **kwargs (Any): Additional keyword arguments.
260
100
 
@@ -264,10 +104,13 @@ def execute(
264
104
  The loaded dataset.
265
105
  """
266
106
  given_paths = path if isinstance(path, list) else [path]
107
+ if flavour is not None:
108
+ flavour = RuleBasedFlavour(flavour)
267
109
 
268
- geography = None
269
- if latitudes is not None and longitudes is not None:
270
- geography = Geography(context, latitudes, longitudes)
110
+ if grid_definition is not None:
111
+ grid = grid_registry.from_config(grid_definition)
112
+ else:
113
+ grid = None
271
114
 
272
115
  ds = from_source("empty")
273
116
  dates = [d.isoformat() for d in dates]
@@ -282,14 +125,16 @@ def execute(
282
125
  for path in _expand(paths):
283
126
  context.trace("📁", "PATH", path)
284
127
  s = from_source("file", path)
128
+ if flavour is not None:
129
+ s = flavour.map(s)
285
130
  s = s.sel(valid_datetime=dates, **kwargs)
286
131
  ds = ds + s
287
132
 
288
133
  if kwargs and not context.partial_ok:
289
134
  check(ds, given_paths, valid_datetime=dates, **kwargs)
290
135
 
291
- if geography is not None:
292
- ds = FieldArray([AddGrid(_, geography) for _ in ds])
136
+ if grid is not None:
137
+ ds = new_fieldlist_from_list([new_field_from_grid(f, grid) for f in ds])
293
138
 
294
139
  if len(ds) == 0:
295
140
  LOG.warning(f"No fields found for {dates} in {given_paths} (kwargs={kwargs})")