anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/create.py +3 -2
  3. anemoi/datasets/commands/inspect.py +1 -1
  4. anemoi/datasets/commands/publish.py +30 -0
  5. anemoi/datasets/create/__init__.py +72 -35
  6. anemoi/datasets/create/check.py +6 -0
  7. anemoi/datasets/create/config.py +4 -3
  8. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  9. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  10. anemoi/datasets/create/functions/filters/rename.py +2 -3
  11. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  12. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  13. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  14. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  15. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  16. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  17. anemoi/datasets/create/functions/sources/__init__.py +7 -1
  18. anemoi/datasets/create/functions/sources/accumulations.py +2 -0
  19. anemoi/datasets/create/functions/sources/grib.py +87 -2
  20. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  21. anemoi/datasets/create/functions/sources/mars.py +9 -3
  22. anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
  23. anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
  24. anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
  25. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
  26. anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
  27. anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
  28. anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
  29. anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
  30. anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
  31. anemoi/datasets/create/input/__init__.py +69 -0
  32. anemoi/datasets/create/input/action.py +123 -0
  33. anemoi/datasets/create/input/concat.py +92 -0
  34. anemoi/datasets/create/input/context.py +59 -0
  35. anemoi/datasets/create/input/data_sources.py +71 -0
  36. anemoi/datasets/create/input/empty.py +42 -0
  37. anemoi/datasets/create/input/filter.py +76 -0
  38. anemoi/datasets/create/input/function.py +122 -0
  39. anemoi/datasets/create/input/join.py +57 -0
  40. anemoi/datasets/create/input/misc.py +85 -0
  41. anemoi/datasets/create/input/pipe.py +33 -0
  42. anemoi/datasets/create/input/repeated_dates.py +217 -0
  43. anemoi/datasets/create/input/result.py +413 -0
  44. anemoi/datasets/create/input/step.py +99 -0
  45. anemoi/datasets/create/{template.py → input/template.py} +0 -42
  46. anemoi/datasets/create/persistent.py +1 -1
  47. anemoi/datasets/create/statistics/__init__.py +1 -1
  48. anemoi/datasets/create/utils.py +3 -0
  49. anemoi/datasets/create/zarr.py +4 -2
  50. anemoi/datasets/data/dataset.py +11 -1
  51. anemoi/datasets/data/debug.py +5 -1
  52. anemoi/datasets/data/masked.py +2 -2
  53. anemoi/datasets/data/rescale.py +147 -0
  54. anemoi/datasets/data/stores.py +20 -7
  55. anemoi/datasets/dates/__init__.py +113 -30
  56. anemoi/datasets/dates/groups.py +92 -19
  57. anemoi/datasets/fields.py +66 -0
  58. anemoi/datasets/utils/fields.py +47 -0
  59. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
  60. anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
  61. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
  62. anemoi/datasets/create/input.py +0 -1065
  63. anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
  64. /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
  65. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
  66. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
  67. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,71 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import cached_property
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+
15
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
16
+ from anemoi.datasets.fields import FieldArray as FieldArray
17
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
18
+
19
+ from .action import Action
20
+ from .action import action_factory
21
+ from .misc import _tidy
22
+ from .result import Result
23
+
24
+ LOG = logging.getLogger(__name__)
25
+
26
+
27
+ class DataSourcesAction(Action):
28
+ def __init__(self, context, action_path, sources, input):
29
+ super().__init__(context, ["data_sources"], *sources)
30
+ if isinstance(sources, dict):
31
+ configs = [(str(k), c) for k, c in sources.items()]
32
+ elif isinstance(sources, list):
33
+ configs = [(str(i), c) for i, c in enumerate(sources)]
34
+ else:
35
+ raise ValueError(f"Invalid data_sources, expecting list or dict, got {type(sources)}: {sources}")
36
+
37
+ self.sources = [action_factory(config, context, ["data_sources"] + [a_path]) for a_path, config in configs]
38
+ self.input = action_factory(input, context, ["input"])
39
+
40
+ def select(self, group_of_dates):
41
+ sources_results = [a.select(group_of_dates) for a in self.sources]
42
+ return DataSourcesResult(
43
+ self.context,
44
+ self.action_path,
45
+ group_of_dates,
46
+ self.input.select(group_of_dates),
47
+ sources_results,
48
+ )
49
+
50
+ def __repr__(self):
51
+ content = "\n".join([str(i) for i in self.sources])
52
+ return super().__repr__(content)
53
+
54
+
55
+ class DataSourcesResult(Result):
56
+ def __init__(self, context, action_path, dates, input_result, sources_results):
57
+ super().__init__(context, action_path, dates)
58
+ # result is the main input result
59
+ self.input_result = input_result
60
+ # sources_results is the list of the sources_results
61
+ self.sources_results = sources_results
62
+
63
+ @cached_property
64
+ def datasource(self):
65
+ for i in self.sources_results:
66
+ # for each result trigger the datasource to be computed
67
+ # and saved in context
68
+ self.context.notify_result(i.action_path[:-1], i.datasource)
69
+ # then return the input result
70
+ # which can use the datasources of the included results
71
+ return _tidy(self.input_result.datasource)
@@ -0,0 +1,42 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import cached_property
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+
15
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
16
+ from anemoi.datasets.fields import FieldArray as FieldArray
17
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
18
+
19
+ from .misc import assert_fieldlist
20
+ from .result import Result
21
+ from .trace import trace_datasource
22
+
23
+ LOG = logging.getLogger(__name__)
24
+
25
+
26
+ class EmptyResult(Result):
27
+ empty = True
28
+
29
+ def __init__(self, context, action_path, dates):
30
+ super().__init__(context, action_path + ["empty"], dates)
31
+
32
+ @cached_property
33
+ @assert_fieldlist
34
+ @trace_datasource
35
+ def datasource(self):
36
+ from earthkit.data import from_source
37
+
38
+ return from_source("empty")
39
+
40
+ @property
41
+ def variables(self):
42
+ return []
@@ -0,0 +1,76 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import cached_property
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+
15
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
16
+ from anemoi.datasets.fields import FieldArray as FieldArray
17
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
18
+
19
+ from ..functions import import_function
20
+ from .function import FunctionContext
21
+ from .misc import _tidy
22
+ from .misc import assert_fieldlist
23
+ from .step import StepAction
24
+ from .step import StepResult
25
+ from .template import notify_result
26
+ from .trace import trace_datasource
27
+
28
+ LOG = logging.getLogger(__name__)
29
+
30
+
31
+ class FilterStepResult(StepResult):
32
+ @property
33
+ @notify_result
34
+ @assert_fieldlist
35
+ @trace_datasource
36
+ def datasource(self):
37
+ ds = self.upstream_result.datasource
38
+ ds = ds.sel(**self.action.kwargs)
39
+ return _tidy(ds)
40
+
41
+
42
+ class FilterStepAction(StepAction):
43
+ result_class = FilterStepResult
44
+
45
+
46
+ class StepFunctionResult(StepResult):
47
+ @cached_property
48
+ @assert_fieldlist
49
+ @notify_result
50
+ @trace_datasource
51
+ def datasource(self):
52
+ try:
53
+ return _tidy(
54
+ self.action.function(
55
+ FunctionContext(self),
56
+ self.upstream_result.datasource,
57
+ *self.action.args[1:],
58
+ **self.action.kwargs,
59
+ )
60
+ )
61
+
62
+ except Exception:
63
+ LOG.error(f"Error in {self.action.name}", exc_info=True)
64
+ raise
65
+
66
+ def _trace_datasource(self, *args, **kwargs):
67
+ return f"{self.action.name}({self.group_of_dates})"
68
+
69
+
70
+ class FunctionStepAction(StepAction):
71
+ result_class = StepFunctionResult
72
+
73
+ def __init__(self, context, action_path, previous_step, *args, **kwargs):
74
+ super().__init__(context, action_path, previous_step, *args, **kwargs)
75
+ self.name = args[0]
76
+ self.function = import_function(self.name, "filters")
@@ -0,0 +1,122 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import cached_property
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+
15
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
16
+ from anemoi.datasets.fields import FieldArray as FieldArray
17
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
18
+
19
+ from ..functions import import_function
20
+ from .action import Action
21
+ from .misc import _tidy
22
+ from .misc import assert_fieldlist
23
+ from .result import Result
24
+ from .template import notify_result
25
+ from .template import resolve
26
+ from .template import substitute
27
+ from .trace import trace
28
+ from .trace import trace_datasource
29
+ from .trace import trace_select
30
+
31
+ LOG = logging.getLogger(__name__)
32
+
33
+
34
+ class FunctionContext:
35
+ """A FunctionContext is passed to all functions, it will be used to pass information
36
+ to the functions from the other actions and filters and results.
37
+ """
38
+
39
+ def __init__(self, owner):
40
+ self.owner = owner
41
+ self.use_grib_paramid = owner.context.use_grib_paramid
42
+
43
+ def trace(self, emoji, *args):
44
+ trace(emoji, *args)
45
+
46
+ def info(self, *args, **kwargs):
47
+ LOG.info(*args, **kwargs)
48
+
49
+ @property
50
+ def dates_provider(self):
51
+ return self.owner.group_of_dates.provider
52
+
53
+ @property
54
+ def partial_ok(self):
55
+ return self.owner.group_of_dates.partial_ok
56
+
57
+
58
+ class FunctionAction(Action):
59
+ def __init__(self, context, action_path, _name, **kwargs):
60
+ super().__init__(context, action_path, **kwargs)
61
+ self.name = _name
62
+
63
+ @trace_select
64
+ def select(self, group_of_dates):
65
+ return FunctionResult(self.context, self.action_path, group_of_dates, action=self)
66
+
67
+ @property
68
+ def function(self):
69
+ # name, delta = parse_function_name(self.name)
70
+ return import_function(self.name, "sources")
71
+
72
+ def __repr__(self):
73
+ content = ""
74
+ content += ",".join([self._short_str(a) for a in self.args])
75
+ content += " ".join([self._short_str(f"{k}={v}") for k, v in self.kwargs.items()])
76
+ content = self._short_str(content)
77
+ return super().__repr__(_inline_=content, _indent_=" ")
78
+
79
+ def _trace_select(self, group_of_dates):
80
+ return f"{self.name}({group_of_dates})"
81
+
82
+
83
+ class FunctionResult(Result):
84
+ def __init__(self, context, action_path, group_of_dates, action):
85
+ super().__init__(context, action_path, group_of_dates)
86
+ assert isinstance(action, Action), type(action)
87
+ self.action = action
88
+
89
+ self.args, self.kwargs = substitute(context, (self.action.args, self.action.kwargs))
90
+
91
+ def _trace_datasource(self, *args, **kwargs):
92
+ return f"{self.action.name}({self.group_of_dates})"
93
+
94
+ @cached_property
95
+ @assert_fieldlist
96
+ @notify_result
97
+ @trace_datasource
98
+ def datasource(self):
99
+ args, kwargs = resolve(self.context, (self.args, self.kwargs))
100
+
101
+ try:
102
+ return _tidy(
103
+ self.action.function(
104
+ FunctionContext(self),
105
+ list(self.group_of_dates), # Will provide a list of datetime objects
106
+ *args,
107
+ **kwargs,
108
+ )
109
+ )
110
+ except Exception:
111
+ LOG.error(f"Error in {self.action.function.__name__}", exc_info=True)
112
+ raise
113
+
114
+ def __repr__(self):
115
+ try:
116
+ return f"{self.action.name}({self.group_of_dates})"
117
+ except Exception:
118
+ return f"{self.__class__.__name__}(unitialised)"
119
+
120
+ @property
121
+ def function(self):
122
+ raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import cached_property
11
+
12
+ from .action import Action
13
+ from .action import action_factory
14
+ from .empty import EmptyResult
15
+ from .misc import _tidy
16
+ from .misc import assert_fieldlist
17
+ from .result import Result
18
+ from .template import notify_result
19
+ from .trace import trace_datasource
20
+ from .trace import trace_select
21
+
22
+ LOG = logging.getLogger(__name__)
23
+
24
+
25
+ class JoinResult(Result):
26
+ def __init__(self, context, action_path, group_of_dates, results, **kwargs):
27
+ super().__init__(context, action_path, group_of_dates)
28
+ self.results = [r for r in results if not r.empty]
29
+
30
+ @cached_property
31
+ @assert_fieldlist
32
+ @notify_result
33
+ @trace_datasource
34
+ def datasource(self):
35
+ ds = EmptyResult(self.context, self.action_path, self.group_of_dates).datasource
36
+ for i in self.results:
37
+ ds += i.datasource
38
+ return _tidy(ds)
39
+
40
+ def __repr__(self):
41
+ content = "\n".join([str(i) for i in self.results])
42
+ return super().__repr__(content)
43
+
44
+
45
+ class JoinAction(Action):
46
+ def __init__(self, context, action_path, *configs):
47
+ super().__init__(context, action_path, *configs)
48
+ self.actions = [action_factory(c, context, action_path + [str(i)]) for i, c in enumerate(configs)]
49
+
50
+ def __repr__(self):
51
+ content = "\n".join([str(i) for i in self.actions])
52
+ return super().__repr__(content)
53
+
54
+ @trace_select
55
+ def select(self, group_of_dates):
56
+ results = [a.select(group_of_dates) for a in self.actions]
57
+ return JoinResult(self.context, self.action_path, group_of_dates, results)
@@ -0,0 +1,85 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from functools import wraps
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+ from earthkit.data.core.fieldlist import MultiFieldList
15
+ from earthkit.data.indexing.fieldlist import FieldList
16
+
17
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
18
+ from anemoi.datasets.fields import FieldArray as FieldArray
19
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
20
+
21
+ from ..functions import import_function
22
+
23
+ LOG = logging.getLogger(__name__)
24
+
25
+
26
+ def parse_function_name(name):
27
+
28
+ if name.endswith("h") and name[:-1].isdigit():
29
+
30
+ if "-" in name:
31
+ name, delta = name.split("-")
32
+ sign = -1
33
+
34
+ elif "+" in name:
35
+ name, delta = name.split("+")
36
+ sign = 1
37
+
38
+ else:
39
+ return name, None
40
+
41
+ assert delta[-1] == "h", (name, delta)
42
+ delta = sign * int(delta[:-1])
43
+ return name, delta
44
+
45
+ return name, None
46
+
47
+
48
+ def is_function(name, kind):
49
+ name, _ = parse_function_name(name)
50
+ try:
51
+ import_function(name, kind)
52
+ return True
53
+ except ImportError as e:
54
+ print(e)
55
+ return False
56
+
57
+
58
+ def assert_fieldlist(method):
59
+ @wraps(method)
60
+ def wrapper(self, *args, **kwargs):
61
+ result = method(self, *args, **kwargs)
62
+ assert isinstance(result, FieldList), type(result)
63
+ return result
64
+
65
+ return wrapper
66
+
67
+
68
+ def assert_is_fieldlist(obj):
69
+ assert isinstance(obj, FieldList), type(obj)
70
+
71
+
72
+ def _flatten(ds):
73
+ if isinstance(ds, MultiFieldList):
74
+ return [_tidy(f) for s in ds._indexes for f in _flatten(s)]
75
+ return [ds]
76
+
77
+
78
+ def _tidy(ds, indent=0):
79
+ if isinstance(ds, MultiFieldList):
80
+
81
+ sources = [s for s in _flatten(ds) if len(s) > 0]
82
+ if len(sources) == 1:
83
+ return sources[0]
84
+ return MultiFieldList(sources)
85
+ return ds
@@ -0,0 +1,33 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+
11
+ from .action import Action
12
+ from .action import action_factory
13
+ from .step import step_factory
14
+ from .trace import trace_select
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ class PipeAction(Action):
20
+ def __init__(self, context, action_path, *configs):
21
+ super().__init__(context, action_path, *configs)
22
+ assert len(configs) > 1, configs
23
+ current = action_factory(configs[0], context, action_path + ["0"])
24
+ for i, c in enumerate(configs[1:]):
25
+ current = step_factory(c, context, action_path + [str(i + 1)], previous_step=current)
26
+ self.last_step = current
27
+
28
+ @trace_select
29
+ def select(self, group_of_dates):
30
+ return self.last_step.select(group_of_dates)
31
+
32
+ def __repr__(self):
33
+ return super().__repr__(self.last_step)