anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +42 -1
  6. anemoi/datasets/create/config.py +2 -0
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
  16. anemoi/datasets/create/sources/accumulations.py +74 -94
  17. anemoi/datasets/create/sources/accumulations2.py +16 -45
  18. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  19. anemoi/datasets/create/sources/constants.py +39 -38
  20. anemoi/datasets/create/sources/empty.py +26 -22
  21. anemoi/datasets/create/sources/forcings.py +29 -28
  22. anemoi/datasets/create/sources/grib.py +92 -72
  23. anemoi/datasets/create/sources/grib_index.py +46 -42
  24. anemoi/datasets/create/sources/hindcasts.py +56 -55
  25. anemoi/datasets/create/sources/legacy.py +10 -62
  26. anemoi/datasets/create/sources/mars.py +107 -131
  27. anemoi/datasets/create/sources/netcdf.py +28 -24
  28. anemoi/datasets/create/sources/opendap.py +28 -24
  29. anemoi/datasets/create/sources/recentre.py +42 -41
  30. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  31. anemoi/datasets/create/sources/source.py +26 -48
  32. anemoi/datasets/create/sources/tendencies.py +67 -94
  33. anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
  34. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  35. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  36. anemoi/datasets/create/sources/zenodo.py +43 -39
  37. anemoi/datasets/create/utils.py +0 -42
  38. anemoi/datasets/data/dataset.py +6 -0
  39. anemoi/datasets/data/grids.py +0 -152
  40. anemoi/datasets/data/rolling_average.py +141 -0
  41. anemoi/datasets/data/stores.py +7 -9
  42. anemoi/datasets/dates/__init__.py +2 -0
  43. anemoi/datasets/dumper.py +76 -0
  44. anemoi/datasets/grids.py +1 -178
  45. anemoi/datasets/schemas/recipe.json +131 -0
  46. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
  47. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
  48. anemoi/datasets/create/filter.py +0 -47
  49. anemoi/datasets/create/input/concat.py +0 -161
  50. anemoi/datasets/create/input/context.py +0 -86
  51. anemoi/datasets/create/input/empty.py +0 -53
  52. anemoi/datasets/create/input/filter.py +0 -117
  53. anemoi/datasets/create/input/function.py +0 -232
  54. anemoi/datasets/create/input/join.py +0 -129
  55. anemoi/datasets/create/input/pipe.py +0 -66
  56. anemoi/datasets/create/input/step.py +0 -173
  57. anemoi/datasets/create/input/template.py +0 -161
  58. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  59. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  60. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  61. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -865,7 +865,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
865
865
  # assert isinstance(group[0], datetime.datetime), type(group[0])
866
866
  LOG.debug(f"Building data for group {igroup}/{self.n_groups}")
867
867
 
868
- result = self.input.select(group_of_dates=group)
868
+ result = self.input.select(argument=group)
869
869
  assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)
870
870
 
871
871
  # There are several groups.
@@ -1616,3 +1616,44 @@ def creator_factory(name: str, trace: str | None = None, **kwargs: Any) -> Any:
1616
1616
  )[name]
1617
1617
  LOG.debug(f"Creating {cls.__name__} with {kwargs}")
1618
1618
  return cls(**kwargs)
1619
+
1620
+
1621
+ def validate_config(config: Any) -> None:
1622
+
1623
+ import json
1624
+
1625
+ import jsonschema
1626
+
1627
+ def _tidy(d):
1628
+ if isinstance(d, dict):
1629
+ return {k: _tidy(v) for k, v in d.items()}
1630
+
1631
+ if isinstance(d, list):
1632
+ return [_tidy(v) for v in d if v is not None]
1633
+
1634
+ # jsonschema does not support datetime.date
1635
+ if isinstance(d, datetime.datetime):
1636
+ return d.isoformat()
1637
+
1638
+ if isinstance(d, datetime.date):
1639
+ return d.isoformat()
1640
+
1641
+ return d
1642
+
1643
+ # https://json-schema.org
1644
+
1645
+ with open(
1646
+ os.path.join(
1647
+ os.path.dirname(os.path.dirname(__file__)),
1648
+ "schemas",
1649
+ "recipe.json",
1650
+ )
1651
+ ) as f:
1652
+ schema = json.load(f)
1653
+
1654
+ try:
1655
+ jsonschema.validate(instance=_tidy(config), schema=schema)
1656
+ except jsonschema.exceptions.ValidationError as e:
1657
+ LOG.error("❌ Config validation failed (jsonschema):")
1658
+ LOG.error(e.message)
1659
+ raise
@@ -279,6 +279,8 @@ class LoadersConfig(Config):
279
279
 
280
280
  self.output.order_by = normalize_order_by(self.output.order_by)
281
281
 
282
+ self.setdefault("dates", Config())
283
+
282
284
  self.dates["group_by"] = self.build.group_by
283
285
 
284
286
  ###########
@@ -1,4 +1,4 @@
1
- # (C) Copyright 2024 Anemoi contributors.
1
+ # (C) Copyright 2024-2025 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
@@ -7,21 +7,15 @@
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
- import logging
11
10
  from copy import deepcopy
11
+ from functools import cached_property
12
+ from typing import TYPE_CHECKING
12
13
  from typing import Any
13
14
 
14
- from anemoi.datasets.dates.groups import GroupOfDates
15
+ from anemoi.datasets.create.input.context.field import FieldContext
15
16
 
16
- from .trace import trace_select
17
-
18
- LOG = logging.getLogger(__name__)
19
-
20
-
21
- class Context:
22
- """Context for building input data."""
23
-
24
- pass
17
+ if TYPE_CHECKING:
18
+ from anemoi.datasets.create.input.action import Recipe
25
19
 
26
20
 
27
21
  class InputBuilder:
@@ -34,72 +28,58 @@ class InputBuilder:
34
28
  ----------
35
29
  config : dict
36
30
  Configuration dictionary.
37
- data_sources : Union[dict, list]
31
+ data_sources : dict
38
32
  Data sources.
39
33
  **kwargs : Any
40
34
  Additional keyword arguments.
41
35
  """
42
36
  self.kwargs = kwargs
37
+ self.config = deepcopy(config)
38
+ self.data_sources = deepcopy(dict(data_sources=data_sources))
43
39
 
44
- config = deepcopy(config)
45
- if data_sources:
46
- config = dict(
47
- data_sources=dict(
48
- sources=data_sources,
49
- input=config,
50
- )
51
- )
52
- self.config = config
53
- self.action_path = ["input"]
54
-
55
- @trace_select
56
- def select(self, group_of_dates: GroupOfDates) -> Any:
57
- """Select data based on the group of dates.
58
-
59
- Parameters
60
- ----------
61
- group_of_dates : GroupOfDates
62
- Group of dates to select data for.
63
-
64
- Returns
65
- -------
66
- Any
67
- Selected data.
68
- """
69
- from .action import ActionContext
40
+ @cached_property
41
+ def action(self) -> "Recipe":
42
+ """Returns the action object based on the configuration."""
43
+ from .action import Recipe
70
44
  from .action import action_factory
71
45
 
72
- """This changes the context."""
73
- context = ActionContext(**self.kwargs)
74
- action = action_factory(self.config, context, self.action_path)
75
- return action.select(group_of_dates)
76
-
77
- def __repr__(self) -> str:
78
- """Return a string representation of the InputBuilder.
79
-
80
- Returns
81
- -------
82
- str
83
- String representation.
84
- """
85
- from .action import ActionContext
86
- from .action import action_factory
46
+ sources = action_factory(self.data_sources, "data_sources")
47
+ input = action_factory(self.config, "input")
87
48
 
88
- context = ActionContext(**self.kwargs)
89
- a = action_factory(self.config, context, self.action_path)
90
- return repr(a)
49
+ return Recipe(input, sources)
91
50
 
92
- def _trace_select(self, group_of_dates: GroupOfDates) -> str:
93
- """Trace the select operation.
51
+ def select(self, argument) -> Any:
52
+ """Select data based on the group of dates.
94
53
 
95
54
  Parameters
96
55
  ----------
97
- group_of_dates : GroupOfDates
56
+ argument : GroupOfDates
98
57
  Group of dates to select data for.
99
58
 
100
59
  Returns
101
60
  -------
102
- str
103
- Trace string.
61
+ Any
62
+ Selected data.
104
63
  """
105
- return f"InputBuilder({group_of_dates})"
64
+ context = FieldContext(argument, **self.kwargs)
65
+ return context.create_result(self.action(context, argument))
66
+
67
+
68
+ def build_input(config: dict, data_sources: dict | list, **kwargs: Any) -> InputBuilder:
69
+ """Build an InputBuilder instance.
70
+
71
+ Parameters
72
+ ----------
73
+ config : dict
74
+ Configuration dictionary.
75
+ data_sources : Union[dict, list]
76
+ Data sources.
77
+ **kwargs : Any
78
+ Additional keyword arguments.
79
+
80
+ Returns
81
+ -------
82
+ InputBuilder
83
+ An instance of InputBuilder.
84
+ """
85
+ return InputBuilder(config, data_sources, **kwargs)