anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +46 -13
  6. anemoi/datasets/create/config.py +52 -53
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
  16. anemoi/datasets/create/sources/accumulate.py +517 -0
  17. anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
  18. anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
  19. anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
  20. anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
  21. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  22. anemoi/datasets/create/sources/constants.py +39 -38
  23. anemoi/datasets/create/sources/empty.py +26 -22
  24. anemoi/datasets/create/sources/forcings.py +29 -28
  25. anemoi/datasets/create/sources/grib.py +92 -72
  26. anemoi/datasets/create/sources/grib_index.py +102 -54
  27. anemoi/datasets/create/sources/hindcasts.py +56 -55
  28. anemoi/datasets/create/sources/legacy.py +10 -62
  29. anemoi/datasets/create/sources/mars.py +159 -154
  30. anemoi/datasets/create/sources/netcdf.py +28 -24
  31. anemoi/datasets/create/sources/opendap.py +28 -24
  32. anemoi/datasets/create/sources/recentre.py +42 -41
  33. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  34. anemoi/datasets/create/sources/source.py +26 -48
  35. anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
  36. anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
  37. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  38. anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
  39. anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
  40. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  41. anemoi/datasets/create/sources/zenodo.py +43 -39
  42. anemoi/datasets/create/utils.py +0 -42
  43. anemoi/datasets/data/complement.py +26 -17
  44. anemoi/datasets/data/dataset.py +12 -0
  45. anemoi/datasets/data/grids.py +0 -152
  46. anemoi/datasets/data/masked.py +74 -13
  47. anemoi/datasets/data/missing.py +5 -0
  48. anemoi/datasets/data/rolling_average.py +141 -0
  49. anemoi/datasets/data/stores.py +7 -9
  50. anemoi/datasets/dates/__init__.py +2 -0
  51. anemoi/datasets/dumper.py +76 -0
  52. anemoi/datasets/grids.py +1 -178
  53. anemoi/datasets/schemas/recipe.json +131 -0
  54. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
  55. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
  56. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
  57. anemoi/datasets/create/filter.py +0 -47
  58. anemoi/datasets/create/input/concat.py +0 -161
  59. anemoi/datasets/create/input/context.py +0 -86
  60. anemoi/datasets/create/input/empty.py +0 -53
  61. anemoi/datasets/create/input/filter.py +0 -117
  62. anemoi/datasets/create/input/function.py +0 -232
  63. anemoi/datasets/create/input/join.py +0 -129
  64. anemoi/datasets/create/input/pipe.py +0 -66
  65. anemoi/datasets/create/input/step.py +0 -173
  66. anemoi/datasets/create/input/template.py +0 -161
  67. anemoi/datasets/create/sources/accumulations.py +0 -1062
  68. anemoi/datasets/create/sources/accumulations2.py +0 -647
  69. anemoi/datasets/create/sources/tendencies.py +0 -198
  70. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
  71. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
  72. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0
@@ -1,198 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
- import datetime
11
- from collections import defaultdict
12
- from typing import Any
13
-
14
- from earthkit.data.core.temporary import temp_file
15
- from earthkit.data.readers.grib.output import new_grib_output
16
-
17
- from anemoi.datasets.create.utils import to_datetime_list
18
-
19
- from .legacy import legacy_source
20
-
21
-
22
- def _date_to_datetime(d: Any) -> Any:
23
- """Converts a date string or a list/tuple of date strings to datetime objects.
24
-
25
- Parameters
26
- ----------
27
- d : Any
28
- A date string or a list/tuple of date strings.
29
-
30
- Returns
31
- -------
32
- Any
33
- A datetime object or a list/tuple of datetime objects.
34
- """
35
- if isinstance(d, (list, tuple)):
36
- return [_date_to_datetime(x) for x in d]
37
- return datetime.datetime.fromisoformat(d)
38
-
39
-
40
- def normalise_time_delta(t: Any) -> datetime.timedelta:
41
- """Normalizes a time delta string to a datetime.timedelta object.
42
-
43
- Parameters
44
- ----------
45
- t : Any
46
- A time delta string ending with 'h' or a datetime.timedelta object.
47
-
48
- Returns
49
- -------
50
- datetime.timedelta
51
- A normalized datetime.timedelta object.
52
- """
53
- if isinstance(t, datetime.timedelta):
54
- assert t == datetime.timedelta(hours=t.hours), t
55
-
56
- assert t.endswith("h"), t
57
-
58
- t = int(t[:-1])
59
- t = datetime.timedelta(hours=t)
60
- return t
61
-
62
-
63
- def group_by_field(ds: Any) -> dict[tuple, list[Any]]:
64
- """Groups fields by their metadata excluding 'date', 'time', and 'step'.
65
-
66
- Parameters
67
- ----------
68
- ds : Any
69
- A dataset object.
70
-
71
- Returns
72
- -------
73
- Dict[Tuple, List[Any]]
74
- A dictionary where keys are tuples of metadata items and values are lists of fields.
75
- """
76
- d = defaultdict(list)
77
- for field in ds.order_by("valid_datetime"):
78
- m = field.metadata(namespace="mars")
79
- for k in ("date", "time", "step"):
80
- m.pop(k, None)
81
- keys = tuple(m.items())
82
- d[keys].append(field)
83
- return d
84
-
85
-
86
- @legacy_source(__file__)
87
- def tendencies(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
88
- """Computes tendencies for the given dates and time increment.
89
-
90
- Parameters
91
- ----------
92
- dates : List[datetime.datetime]
93
- A list of datetime objects.
94
- time_increment : Any
95
- A time increment string ending with 'h' or a datetime.timedelta object.
96
- **kwargs : Any
97
- Additional keyword arguments.
98
-
99
- Returns
100
- -------
101
- Any
102
- A dataset object with computed tendencies.
103
- """
104
- print("✅", kwargs)
105
- time_increment = normalise_time_delta(time_increment)
106
-
107
- shifted_dates = [d - time_increment for d in dates]
108
- all_dates = sorted(list(set(dates + shifted_dates)))
109
-
110
- # from .mars import execute as mars
111
- from anemoi.datasets.create.mars import execute as mars
112
-
113
- ds = mars(dates=all_dates, **kwargs)
114
-
115
- dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
116
- for d in all_dates:
117
- assert d.isoformat() in dates_in_data, d
118
-
119
- ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
120
- ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
121
-
122
- assert len(ds1) == len(ds2), (len(ds1), len(ds2))
123
-
124
- group1 = group_by_field(ds1)
125
- group2 = group_by_field(ds2)
126
-
127
- assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
128
-
129
- # prepare output tmp file so we can read it back
130
- tmp = temp_file()
131
- path = tmp.path
132
- out = new_grib_output(path)
133
-
134
- for k in group1:
135
- assert len(group1[k]) == len(group2[k]), k
136
- print()
137
- print("❌", k)
138
-
139
- for field, b_field in zip(group1[k], group2[k]):
140
- for k in ["param", "level", "number", "grid", "shape"]:
141
- assert field.metadata(k) == b_field.metadata(k), (
142
- k,
143
- field.metadata(k),
144
- b_field.metadata(k),
145
- )
146
-
147
- c = field.to_numpy()
148
- b = b_field.to_numpy()
149
- assert c.shape == b.shape, (c.shape, b.shape)
150
-
151
- ################
152
- # Actual computation happens here
153
- x = c - b
154
- ################
155
-
156
- assert x.shape == c.shape, c.shape
157
- print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
158
- out.write(x, template=field)
159
-
160
- out.close()
161
-
162
- from earthkit.data import from_source
163
-
164
- ds = from_source("file", path)
165
- # save a reference to the tmp file so it is deleted
166
- # only when the dataset is not used anymore
167
- ds._tmp = tmp
168
-
169
- return ds
170
-
171
-
172
- execute = tendencies
173
-
174
- if __name__ == "__main__":
175
- import yaml
176
-
177
- config = yaml.safe_load(
178
- """
179
-
180
- config:
181
- time_increment: 12h
182
- database: marser
183
- class: ea
184
- # date: computed automatically
185
- # time: computed automatically
186
- expver: "0001"
187
- grid: 20.0/20.0
188
- levtype: sfc
189
- param: [2t]
190
- """
191
- )["config"]
192
-
193
- dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
194
- dates = to_datetime_list(dates)
195
-
196
- DEBUG = True
197
- for f in tendencies(dates, **config):
198
- print(f, f.to_numpy().mean())