anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +46 -13
  6. anemoi/datasets/create/config.py +52 -53
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
  16. anemoi/datasets/create/sources/accumulate.py +517 -0
  17. anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
  18. anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
  19. anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
  20. anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
  21. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  22. anemoi/datasets/create/sources/constants.py +39 -38
  23. anemoi/datasets/create/sources/empty.py +26 -22
  24. anemoi/datasets/create/sources/forcings.py +29 -28
  25. anemoi/datasets/create/sources/grib.py +92 -72
  26. anemoi/datasets/create/sources/grib_index.py +102 -54
  27. anemoi/datasets/create/sources/hindcasts.py +56 -55
  28. anemoi/datasets/create/sources/legacy.py +10 -62
  29. anemoi/datasets/create/sources/mars.py +159 -154
  30. anemoi/datasets/create/sources/netcdf.py +28 -24
  31. anemoi/datasets/create/sources/opendap.py +28 -24
  32. anemoi/datasets/create/sources/recentre.py +42 -41
  33. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  34. anemoi/datasets/create/sources/source.py +26 -48
  35. anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
  36. anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
  37. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  38. anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
  39. anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
  40. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  41. anemoi/datasets/create/sources/zenodo.py +43 -39
  42. anemoi/datasets/create/utils.py +0 -42
  43. anemoi/datasets/data/complement.py +26 -17
  44. anemoi/datasets/data/dataset.py +12 -0
  45. anemoi/datasets/data/grids.py +0 -152
  46. anemoi/datasets/data/masked.py +74 -13
  47. anemoi/datasets/data/missing.py +5 -0
  48. anemoi/datasets/data/rolling_average.py +141 -0
  49. anemoi/datasets/data/stores.py +7 -9
  50. anemoi/datasets/dates/__init__.py +2 -0
  51. anemoi/datasets/dumper.py +76 -0
  52. anemoi/datasets/grids.py +1 -178
  53. anemoi/datasets/schemas/recipe.json +131 -0
  54. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
  55. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
  56. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
  57. anemoi/datasets/create/filter.py +0 -47
  58. anemoi/datasets/create/input/concat.py +0 -161
  59. anemoi/datasets/create/input/context.py +0 -86
  60. anemoi/datasets/create/input/empty.py +0 -53
  61. anemoi/datasets/create/input/filter.py +0 -117
  62. anemoi/datasets/create/input/function.py +0 -232
  63. anemoi/datasets/create/input/join.py +0 -129
  64. anemoi/datasets/create/input/pipe.py +0 -66
  65. anemoi/datasets/create/input/step.py +0 -173
  66. anemoi/datasets/create/input/template.py +0 -161
  67. anemoi/datasets/create/sources/accumulations.py +0 -1062
  68. anemoi/datasets/create/sources/accumulations2.py +0 -647
  69. anemoi/datasets/create/sources/tendencies.py +0 -198
  70. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
  71. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
  72. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ from ...dates.groups import GroupOfDates
17
17
  from .action import Action
18
18
  from .action import action_factory
19
19
  from .misc import _tidy
20
- from .result import Result
20
+ from .result.field import Result
21
21
 
22
22
  LOG = logging.getLogger(__name__)
23
23
 
@@ -55,6 +55,7 @@ class DataSourcesAction(Action):
55
55
 
56
56
  self.sources = [action_factory(config, context, ["data_sources"] + [a_path]) for a_path, config in configs]
57
57
  self.input = action_factory(input, context, ["input"])
58
+ self.names = [a_path for a_path, config in configs]
58
59
 
59
60
  def select(self, group_of_dates: GroupOfDates) -> "DataSourcesResult":
60
61
  """Selects the data sources result for the given group of dates.
@@ -8,9 +8,6 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import logging
11
- from collections.abc import Callable
12
- from functools import wraps
13
- from typing import Any
14
11
 
15
12
  from earthkit.data import FieldList
16
13
  from earthkit.data.core.fieldlist import MultiFieldList
@@ -18,74 +15,6 @@ from earthkit.data.core.fieldlist import MultiFieldList
18
15
  LOG = logging.getLogger(__name__)
19
16
 
20
17
 
21
- def parse_function_name(name: str) -> tuple[str, int | None]:
22
- """Parses a function name to extract the base name and an optional time delta.
23
-
24
- Parameters
25
- ----------
26
- name : str
27
- The function name to parse.
28
-
29
- Returns
30
- -------
31
- tuple of (str, int or None)
32
- The base name and an optional time delta.
33
- """
34
- if name.endswith("h") and name[:-1].isdigit():
35
-
36
- if "-" in name:
37
- name, delta = name.split("-")
38
- sign = -1
39
-
40
- elif "+" in name:
41
- name, delta = name.split("+")
42
- sign = 1
43
-
44
- else:
45
- return name, None
46
-
47
- assert delta[-1] == "h", (name, delta)
48
- delta = sign * int(delta[:-1])
49
- return name, delta
50
-
51
- return name, None
52
-
53
-
54
- def assert_fieldlist(method: Callable[..., Any]) -> Callable[..., Any]:
55
- """Decorator to assert that the result of a method is an instance of FieldList.
56
-
57
- Parameters
58
- ----------
59
- method : Callable[..., Any]
60
- The method to decorate.
61
-
62
- Returns
63
- -------
64
- Callable[..., Any]
65
- The decorated method.
66
- """
67
-
68
- @wraps(method)
69
- def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
70
-
71
- result = method(self, *args, **kwargs)
72
- assert isinstance(result, FieldList), type(result)
73
- return result
74
-
75
- return wrapper
76
-
77
-
78
- def assert_is_fieldlist(obj: object) -> None:
79
- """Asserts that the given object is an instance of FieldList.
80
-
81
- Parameters
82
- ----------
83
- obj : object
84
- The object to check.
85
- """
86
- assert isinstance(obj, FieldList), type(obj)
87
-
88
-
89
18
  def _flatten(ds: MultiFieldList | FieldList) -> list:
90
19
  """Flattens a MultiFieldList or FieldList into a list of FieldList objects.
91
20
 
@@ -14,17 +14,9 @@ from collections.abc import Generator
14
14
  from typing import Any
15
15
 
16
16
  import numpy as np
17
- from anemoi.transform.fields import new_field_with_valid_datetime
18
- from anemoi.transform.fields import new_fieldlist_from_list
19
17
  from anemoi.utils.dates import as_datetime
20
18
  from anemoi.utils.dates import frequency_to_timedelta
21
19
 
22
- from .action import Action
23
- from .action import action_factory
24
- from .join import JoinResult
25
- from .result import Result
26
- from .trace import trace_select
27
-
28
20
  LOG = logging.getLogger(__name__)
29
21
 
30
22
 
@@ -276,109 +268,3 @@ class DateMapperConstant(DateMapper):
276
268
  group_of_dates,
277
269
  )
278
270
  ]
279
-
280
-
281
- class DateMapperResult(Result):
282
- """A Result implementation that updates the valid datetime of the datasource."""
283
-
284
- def __init__(
285
- self,
286
- context: Any,
287
- action_path: list[str],
288
- group_of_dates: Any,
289
- source_result: Any,
290
- mapper: DateMapper,
291
- original_group_of_dates: Any,
292
- ) -> None:
293
- """Initialize DateMapperResult.
294
-
295
- Parameters
296
- ----------
297
- context : Any
298
- The context.
299
- action_path : list of str
300
- The action path.
301
- group_of_dates : Any
302
- The group of dates.
303
- source_result : Any
304
- The source result.
305
- mapper : DateMapper
306
- The date mapper.
307
- original_group_of_dates : Any
308
- The original group of dates.
309
- """
310
- super().__init__(context, action_path, group_of_dates)
311
-
312
- self.source_results: Any = source_result
313
- self.mapper: DateMapper = mapper
314
- self.original_group_of_dates: Any = original_group_of_dates
315
-
316
- @property
317
- def datasource(self) -> Any:
318
- """Get the datasource with updated valid datetime."""
319
- result: list = []
320
-
321
- for field in self.source_results.datasource:
322
- for date in self.original_group_of_dates:
323
- result.append(new_field_with_valid_datetime(field, date))
324
-
325
- if not result:
326
- raise ValueError("repeated_dates: no input data found")
327
-
328
- return new_fieldlist_from_list(result)
329
-
330
-
331
- class RepeatedDatesAction(Action):
332
- """An Action implementation that selects and transforms a group of dates."""
333
-
334
- def __init__(self, context: Any, action_path: list[str], source: Any, mode: str, **kwargs: Any) -> None:
335
- """Initialize RepeatedDatesAction.
336
-
337
- Args:
338
- context (Any): The context.
339
- action_path (List[str]): The action path.
340
- source (Any): The data source.
341
- mode (str): The mode for date mapping.
342
- **kwargs (Any): Additional arguments.
343
- """
344
- super().__init__(context, action_path, source, mode, **kwargs)
345
-
346
- self.source: Any = action_factory(source, context, action_path + ["source"])
347
- self.mapper: DateMapper = DateMapper.from_mode(mode, self.source, kwargs)
348
-
349
- @trace_select
350
- def select(self, group_of_dates: Any) -> JoinResult:
351
- """Select and transform the group of dates.
352
-
353
- Args:
354
- group_of_dates (Any): The group of dates to select.
355
-
356
- Returns
357
- -------
358
- JoinResult
359
- The result of the join operation.
360
- """
361
- results: list = []
362
- for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
363
- results.append(
364
- DateMapperResult(
365
- self.context,
366
- self.action_path,
367
- one_date_group,
368
- self.source.select(one_date_group),
369
- self.mapper,
370
- many_dates_group,
371
- )
372
- )
373
-
374
- return JoinResult(self.context, self.action_path, group_of_dates, results)
375
-
376
- def __repr__(self) -> str:
377
- """Get the string representation of the action.
378
-
379
- Returns
380
- -------
381
- str
382
- The string representation.
383
- """
384
- return f"MultiDateMatchAction({self.source}, {self.mapper})"
@@ -0,0 +1,17 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ import logging
11
+ from abc import ABC
12
+
13
+ LOG = logging.getLogger(__name__)
14
+
15
+
16
+ class Result(ABC):
17
+ pass
@@ -22,9 +22,7 @@ from anemoi.utils.humanize import seconds_to_human
22
22
  from anemoi.utils.humanize import shorten_list
23
23
  from earthkit.data.core.order import build_remapping
24
24
 
25
- from .action import ActionContext
26
- from .trace import trace
27
- from .trace import trace_datasource
25
+ from . import Result
28
26
 
29
27
  LOG = logging.getLogger(__name__)
30
28
 
@@ -278,40 +276,22 @@ def _data_request(data: Any) -> dict[str, Any]:
278
276
  return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid)
279
277
 
280
278
 
281
- class Result:
279
+ class FieldResult(Result):
282
280
  """Class to represent the result of an action in the dataset creation process."""
283
281
 
284
282
  empty: bool = False
285
283
  _coords_already_built: bool = False
286
284
 
287
- def __init__(self, context: ActionContext, action_path: list[str], dates: Any) -> None:
288
- """Initialize a Result instance.
285
+ def __init__(self, context: Any, datasource: Any) -> None:
289
286
 
290
- Parameters
291
- ----------
292
- context : ActionContext
293
- The context in which the result exists.
294
- action_path : list of str
295
- The action path.
296
- dates : Any
297
- The dates associated with the result.
298
- """
299
287
  from anemoi.datasets.dates.groups import GroupOfDates
300
288
 
301
- assert isinstance(dates, GroupOfDates), dates
302
-
303
- assert isinstance(context, ActionContext), type(context)
304
- assert isinstance(action_path, list), action_path
305
-
306
289
  self.context: Any = context
307
- self.group_of_dates: Any = dates
308
- self.action_path: list[str] = action_path
309
-
310
- @property
311
- @trace_datasource
312
- def datasource(self) -> Any:
313
- """Retrieve the data source for the result."""
314
- self._raise_not_implemented()
290
+ self.datasource = datasource
291
+ self.group_of_dates = context.argument
292
+ assert isinstance(
293
+ self.group_of_dates, GroupOfDates
294
+ ), f"Expected group_of_dates to be a GroupOfDates, got {type(self.group_of_dates)}: {self.group_of_dates}"
315
295
 
316
296
  @property
317
297
  def data_request(self) -> dict[str, Any]:
@@ -326,7 +306,7 @@ class Result:
326
306
  Any
327
307
  The data cube.
328
308
  """
329
- trace("🧊", f"getting cube from {self.__class__.__name__}")
309
+
330
310
  ds: Any = self.datasource
331
311
 
332
312
  remapping: Any = self.context.remapping
@@ -349,8 +329,7 @@ class Result:
349
329
  LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
350
330
  except ValueError:
351
331
  self.explain(ds, order_by, remapping=remapping, patches=patches)
352
- # raise ValueError(f"Error in {self}")
353
- exit(1)
332
+ raise ValueError(f"Error in {self}")
354
333
 
355
334
  if LOG.isEnabledFor(logging.DEBUG):
356
335
  LOG.debug("Cube shape: %s", cube)
@@ -517,67 +496,6 @@ class Result:
517
496
  print()
518
497
  print("❌" * 40)
519
498
  print()
520
- exit(1)
521
-
522
- def _repr(self, *args: Any, _indent_: str = "\n", **kwargs: Any) -> str:
523
- """Return the string representation of the Result instance.
524
-
525
- Parameters
526
- ----------
527
- args : Any
528
- Additional positional arguments.
529
- _indent_ : str
530
- Indentation string.
531
- kwargs : Any
532
- Additional keyword arguments.
533
-
534
- Returns
535
- -------
536
- str
537
- The string representation.
538
- """
539
- more: str = ",".join([str(a)[:5000] for a in args])
540
- more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
541
-
542
- dates: str = " no-dates"
543
- if self.group_of_dates is not None:
544
- dates = f" {len(self.group_of_dates)} dates"
545
- dates += " ("
546
- dates += "/".join(d.strftime("%Y-%m-%dT%H:%M") for d in self.group_of_dates)
547
- if len(dates) > 100:
548
- dates = dates[:100] + "..."
549
- dates += ")"
550
-
551
- more = more[:5000]
552
- txt: str = f"{self.__class__.__name__}:{dates}{_indent_}{more}"
553
- if _indent_:
554
- txt = txt.replace("\n", "\n ")
555
- return txt
556
-
557
- def __repr__(self) -> str:
558
- """Return the string representation of the Result instance."""
559
- return self._repr()
560
-
561
- def _raise_not_implemented(self) -> None:
562
- """Raise a NotImplementedError indicating the method is not implemented."""
563
- raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
564
-
565
- def _trace_datasource(self, *args: Any, **kwargs: Any) -> str:
566
- """Trace the data source for the result.
567
-
568
- Parameters
569
- ----------
570
- args : Any
571
- Additional positional arguments.
572
- kwargs : Any
573
- Additional keyword arguments.
574
-
575
- Returns
576
- -------
577
- str
578
- The trace string.
579
- """
580
- return f"{self.__class__.__name__}({self.group_of_dates})"
581
499
 
582
500
  def build_coords(self) -> None:
583
501
  """Build the coordinates for the result."""