anemoi-datasets 0.5.28__py3-none-any.whl → 0.5.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/create/__init__.py +4 -12
  3. anemoi/datasets/create/config.py +50 -53
  4. anemoi/datasets/create/input/result/field.py +1 -3
  5. anemoi/datasets/create/sources/accumulate.py +517 -0
  6. anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
  7. anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
  8. anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +153 -0
  9. anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
  10. anemoi/datasets/create/sources/grib_index.py +79 -51
  11. anemoi/datasets/create/sources/mars.py +56 -27
  12. anemoi/datasets/create/sources/xarray_support/__init__.py +1 -0
  13. anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
  14. anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
  15. anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
  16. anemoi/datasets/data/complement.py +26 -17
  17. anemoi/datasets/data/dataset.py +6 -0
  18. anemoi/datasets/data/masked.py +74 -13
  19. anemoi/datasets/data/missing.py +5 -0
  20. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/METADATA +8 -7
  21. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/RECORD +25 -23
  22. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/WHEEL +1 -1
  23. anemoi/datasets/create/sources/accumulations.py +0 -1042
  24. anemoi/datasets/create/sources/accumulations2.py +0 -618
  25. anemoi/datasets/create/sources/tendencies.py +0 -171
  26. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/entry_points.txt +0 -0
  27. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/licenses/LICENSE +0 -0
  28. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/top_level.txt +0 -0
@@ -1,171 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
-
10
- import datetime
11
- from collections import defaultdict
12
- from typing import Any
13
-
14
- from earthkit.data.core.temporary import temp_file
15
- from earthkit.data.readers.grib.output import new_grib_output
16
-
17
- from anemoi.datasets.create.sources import source_registry
18
-
19
- from .legacy import LegacySource
20
-
21
-
22
- def _date_to_datetime(d: Any) -> Any:
23
- """Converts a date string or a list/tuple of date strings to datetime objects.
24
-
25
- Parameters
26
- ----------
27
- d : Any
28
- A date string or a list/tuple of date strings.
29
-
30
- Returns
31
- -------
32
- Any
33
- A datetime object or a list/tuple of datetime objects.
34
- """
35
- if isinstance(d, (list, tuple)):
36
- return [_date_to_datetime(x) for x in d]
37
- return datetime.datetime.fromisoformat(d)
38
-
39
-
40
- def normalise_time_delta(t: Any) -> datetime.timedelta:
41
- """Normalizes a time delta string to a datetime.timedelta object.
42
-
43
- Parameters
44
- ----------
45
- t : Any
46
- A time delta string ending with 'h' or a datetime.timedelta object.
47
-
48
- Returns
49
- -------
50
- datetime.timedelta
51
- A normalized datetime.timedelta object.
52
- """
53
- if isinstance(t, datetime.timedelta):
54
- assert t == datetime.timedelta(hours=t.hours), t
55
-
56
- assert t.endswith("h"), t
57
-
58
- t = int(t[:-1])
59
- t = datetime.timedelta(hours=t)
60
- return t
61
-
62
-
63
- def group_by_field(ds: Any) -> dict[tuple, list[Any]]:
64
- """Groups fields by their metadata excluding 'date', 'time', and 'step'.
65
-
66
- Parameters
67
- ----------
68
- ds : Any
69
- A dataset object.
70
-
71
- Returns
72
- -------
73
- Dict[Tuple, List[Any]]
74
- A dictionary where keys are tuples of metadata items and values are lists of fields.
75
- """
76
- d = defaultdict(list)
77
- for field in ds.order_by("valid_datetime"):
78
- m = field.metadata(namespace="mars")
79
- for k in ("date", "time", "step"):
80
- m.pop(k, None)
81
- keys = tuple(m.items())
82
- d[keys].append(field)
83
- return d
84
-
85
-
86
- @source_registry.register("tendencies")
87
- class TendenciesSource(LegacySource):
88
-
89
- @staticmethod
90
- def _execute(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
91
- """Computes tendencies for the given dates and time increment.
92
-
93
- Parameters
94
- ----------
95
- dates : List[datetime.datetime]
96
- A list of datetime objects.
97
- time_increment : Any
98
- A time increment string ending with 'h' or a datetime.timedelta object.
99
- **kwargs : Any
100
- Additional keyword arguments.
101
-
102
- Returns
103
- -------
104
- Any
105
- A dataset object with computed tendencies.
106
- """
107
- print("✅", kwargs)
108
- time_increment = normalise_time_delta(time_increment)
109
-
110
- shifted_dates = [d - time_increment for d in dates]
111
- all_dates = sorted(list(set(dates + shifted_dates)))
112
-
113
- from .mars import mars
114
-
115
- ds = mars(dates=all_dates, **kwargs)
116
-
117
- dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
118
- for d in all_dates:
119
- assert d.isoformat() in dates_in_data, d
120
-
121
- ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
122
- ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
123
-
124
- assert len(ds1) == len(ds2), (len(ds1), len(ds2))
125
-
126
- group1 = group_by_field(ds1)
127
- group2 = group_by_field(ds2)
128
-
129
- assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
130
-
131
- # prepare output tmp file so we can read it back
132
- tmp = temp_file()
133
- path = tmp.path
134
- out = new_grib_output(path)
135
-
136
- for k in group1:
137
- assert len(group1[k]) == len(group2[k]), k
138
- print()
139
- print("❌", k)
140
-
141
- for field, b_field in zip(group1[k], group2[k]):
142
- for k in ["param", "level", "number", "grid", "shape"]:
143
- assert field.metadata(k) == b_field.metadata(k), (
144
- k,
145
- field.metadata(k),
146
- b_field.metadata(k),
147
- )
148
-
149
- c = field.to_numpy()
150
- b = b_field.to_numpy()
151
- assert c.shape == b.shape, (c.shape, b.shape)
152
-
153
- ################
154
- # Actual computation happens here
155
- x = c - b
156
- ################
157
-
158
- assert x.shape == c.shape, c.shape
159
- print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
160
- out.write(x, template=field)
161
-
162
- out.close()
163
-
164
- from earthkit.data import from_source
165
-
166
- ds = from_source("file", path)
167
- # save a reference to the tmp file so it is deleted
168
- # only when the dataset is not used anymore
169
- ds._tmp = tmp
170
-
171
- return ds