anemoi-datasets 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/compare.py +59 -0
  3. anemoi/datasets/commands/create.py +84 -3
  4. anemoi/datasets/commands/inspect.py +3 -3
  5. anemoi/datasets/create/__init__.py +44 -17
  6. anemoi/datasets/create/check.py +6 -5
  7. anemoi/datasets/create/chunks.py +1 -1
  8. anemoi/datasets/create/config.py +5 -26
  9. anemoi/datasets/create/functions/filters/rename.py +9 -1
  10. anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
  11. anemoi/datasets/create/functions/sources/__init__.py +39 -0
  12. anemoi/datasets/create/functions/sources/accumulations.py +11 -41
  13. anemoi/datasets/create/functions/sources/constants.py +3 -0
  14. anemoi/datasets/create/functions/sources/grib.py +4 -0
  15. anemoi/datasets/create/functions/sources/hindcasts.py +32 -377
  16. anemoi/datasets/create/functions/sources/mars.py +53 -22
  17. anemoi/datasets/create/functions/sources/netcdf.py +2 -60
  18. anemoi/datasets/create/functions/sources/opendap.py +3 -2
  19. anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
  20. anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
  21. anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
  22. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
  23. anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
  24. anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
  25. anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
  26. anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
  27. anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
  28. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
  29. anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
  30. anemoi/datasets/create/functions/sources/zenodo.py +40 -0
  31. anemoi/datasets/create/input.py +290 -172
  32. anemoi/datasets/create/loaders.py +120 -71
  33. anemoi/datasets/create/patch.py +17 -14
  34. anemoi/datasets/create/persistent.py +1 -1
  35. anemoi/datasets/create/size.py +4 -5
  36. anemoi/datasets/create/statistics/__init__.py +49 -16
  37. anemoi/datasets/create/template.py +11 -61
  38. anemoi/datasets/create/trace.py +91 -0
  39. anemoi/datasets/create/utils.py +0 -48
  40. anemoi/datasets/create/zarr.py +24 -10
  41. anemoi/datasets/data/misc.py +9 -37
  42. anemoi/datasets/data/stores.py +29 -14
  43. anemoi/datasets/dates/__init__.py +7 -1
  44. anemoi/datasets/dates/groups.py +3 -0
  45. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +18 -3
  46. anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
  47. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
  48. anemoi_datasets-0.4.0.dist-info/RECORD +0 -73
  49. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
  50. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
  51. {anemoi_datasets-0.4.0.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,98 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ import datetime
12
+
13
+
14
+ class Time:
15
+ @classmethod
16
+ def from_coordinates(cls, coordinates):
17
+ time_coordinate = [c for c in coordinates if c.is_time]
18
+ step_coordinate = [c for c in coordinates if c.is_step]
19
+ date_coordinate = [c for c in coordinates if c.is_date]
20
+
21
+ if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 1:
22
+ return ForecasstFromValidTimeAndStep(step_coordinate[0])
23
+
24
+ if len(date_coordinate) == 0 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
25
+ return Analysis()
26
+
27
+ if len(date_coordinate) == 0 and len(time_coordinate) == 0 and len(step_coordinate) == 0:
28
+ return Constant()
29
+
30
+ if len(date_coordinate) == 1 and len(time_coordinate) == 1 and len(step_coordinate) == 0:
31
+ return ForecastFromValidTimeAndBaseTime(date_coordinate[0])
32
+
33
+ if len(date_coordinate) == 1 and len(time_coordinate) == 0 and len(step_coordinate) == 1:
34
+ return ForecastFromBaseTimeAndDate(date_coordinate[0], step_coordinate[0])
35
+
36
+ raise NotImplementedError(f"{date_coordinate=} {time_coordinate=} {step_coordinate=}")
37
+
38
+
39
+ class Constant(Time):
40
+
41
+ def fill_time_metadata(self, time, metadata):
42
+ metadata["date"] = time.strftime("%Y%m%d")
43
+ metadata["time"] = time.strftime("%H%M")
44
+ metadata["step"] = 0
45
+
46
+
47
+ class Analysis(Time):
48
+
49
+ def fill_time_metadata(self, time, metadata):
50
+ metadata["date"] = time.strftime("%Y%m%d")
51
+ metadata["time"] = time.strftime("%H%M")
52
+ metadata["step"] = 0
53
+
54
+
55
+ class ForecasstFromValidTimeAndStep(Time):
56
+ def __init__(self, step_coordinate):
57
+ self.step_name = step_coordinate.variable.name
58
+
59
+ def fill_time_metadata(self, time, metadata):
60
+ step = metadata.pop(self.step_name)
61
+ assert isinstance(step, datetime.timedelta)
62
+ base = time - step
63
+
64
+ hours = step.total_seconds() / 3600
65
+ assert int(hours) == hours
66
+
67
+ metadata["date"] = base.strftime("%Y%m%d")
68
+ metadata["time"] = base.strftime("%H%M")
69
+ metadata["step"] = int(hours)
70
+
71
+
72
+ class ForecastFromValidTimeAndBaseTime(Time):
73
+ def __init__(self, date_coordinate):
74
+ self.date_coordinate = date_coordinate
75
+
76
+ def fill_time_metadata(self, time, metadata):
77
+
78
+ step = time - self.date_coordinate
79
+
80
+ hours = step.total_seconds() / 3600
81
+ assert int(hours) == hours
82
+
83
+ metadata["date"] = self.date_coordinate.single_value.strftime("%Y%m%d")
84
+ metadata["time"] = self.date_coordinate.single_value.strftime("%H%M")
85
+ metadata["step"] = int(hours)
86
+
87
+
88
+ class ForecastFromBaseTimeAndDate(Time):
89
+ def __init__(self, date_coordinate, step_coordinate):
90
+ self.date_coordinate = date_coordinate
91
+ self.step_coordinate = step_coordinate
92
+
93
+ def fill_time_metadata(self, time, metadata):
94
+ metadata["date"] = time.strftime("%Y%m%d")
95
+ metadata["time"] = time.strftime("%H%M")
96
+ hours = metadata[self.step_coordinate.name].total_seconds() / 3600
97
+ assert int(hours) == hours
98
+ metadata["step"] = int(hours)
@@ -0,0 +1,198 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ import logging
11
+ import math
12
+ from functools import cached_property
13
+
14
+ import numpy as np
15
+ from earthkit.data.utils.array import ensure_backend
16
+
17
+ from anemoi.datasets.create.functions.sources.xarray.metadata import MDMapping
18
+
19
+ from .field import XArrayField
20
+
21
+ LOG = logging.getLogger(__name__)
22
+
23
+
24
+ class Variable:
25
+ def __init__(self, *, ds, var, coordinates, grid, time, metadata, mapping=None, array_backend=None):
26
+ self.ds = ds
27
+ self.var = var
28
+
29
+ self.grid = grid
30
+ self.coordinates = coordinates
31
+
32
+ # print("Variable", var.name)
33
+ # for c in coordinates:
34
+ # print(" ", c)
35
+
36
+ self._metadata = metadata.copy()
37
+ # self._metadata.update(var.attrs)
38
+ self._metadata.update({"variable": var.name})
39
+
40
+ # self._metadata.setdefault("level", None)
41
+ # self._metadata.setdefault("number", 0)
42
+ # self._metadata.setdefault("levtype", "sfc")
43
+ self._mapping = mapping
44
+
45
+ self.time = time
46
+
47
+ self.shape = tuple(len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid)
48
+ self.names = {c.variable.name: c for c in coordinates if c.is_dim and not c.scalar and not c.is_grid}
49
+ self.by_name = {c.variable.name: c for c in coordinates}
50
+
51
+ self.length = math.prod(self.shape)
52
+ self.array_backend = ensure_backend(array_backend)
53
+
54
+ def update_metadata_mapping(self, kwargs):
55
+
56
+ result = {}
57
+
58
+ for k, v in kwargs.items():
59
+ if k == "param":
60
+ result[k] = "variable"
61
+ continue
62
+
63
+ for c in self.coordinates:
64
+ if k in c.mars_names:
65
+ for v in c.mars_names:
66
+ result[v] = c.variable.name
67
+ break
68
+
69
+ self._mapping = MDMapping(result)
70
+
71
+ @property
72
+ def name(self):
73
+ return self.var.name
74
+
75
+ def __len__(self):
76
+ return self.length
77
+
78
+ @property
79
+ def grid_mapping(self):
80
+ grid_mapping = self.var.attrs.get("grid_mapping", None)
81
+ if grid_mapping is None:
82
+ return None
83
+ return self.ds[grid_mapping].attrs
84
+
85
+ def grid_points(self):
86
+ return self.grid.grid_points()
87
+
88
+ @property
89
+ def latitudes(self):
90
+ return self.grid.latitudes
91
+
92
+ @property
93
+ def longitudes(self):
94
+ return self.grid.longitudes
95
+
96
+ def __repr__(self):
97
+ return "Variable[name=%s,coordinates=%s,metadata=%s]" % (
98
+ self.var.name,
99
+ self.coordinates,
100
+ self._metadata,
101
+ )
102
+
103
+ def __getitem__(self, i):
104
+ """
105
+ Get a 2D field from the variable
106
+ """
107
+ if i >= self.length:
108
+ raise IndexError(i)
109
+
110
+ coords = np.unravel_index(i, self.shape)
111
+ kwargs = {k: v for k, v in zip(self.names, coords)}
112
+ return XArrayField(self, self.var.isel(kwargs))
113
+
114
+ @property
115
+ def mapping(self):
116
+ return self._mapping
117
+
118
+ def sel(self, missing, **kwargs):
119
+
120
+ if not kwargs:
121
+ return self
122
+
123
+ kwargs = self._mapping.from_user(kwargs)
124
+
125
+ k, v = kwargs.popitem()
126
+
127
+ c = self.by_name.get(k)
128
+
129
+ if c is None:
130
+ missing[k] = v
131
+ return self.sel(missing, **kwargs)
132
+
133
+ i = c.index(v)
134
+ if i is None:
135
+ LOG.warning(f"Could not find {k}={v} in {c}")
136
+ return None
137
+
138
+ coordinates = [x.reduced(i) if c is x else x for x in self.coordinates]
139
+
140
+ metadata = self._metadata.copy()
141
+ metadata.update({k: v})
142
+
143
+ variable = Variable(
144
+ ds=self.ds,
145
+ var=self.var.isel({k: i}),
146
+ coordinates=coordinates,
147
+ grid=self.grid,
148
+ time=self.time,
149
+ metadata=metadata,
150
+ mapping=self.mapping,
151
+ )
152
+
153
+ return variable.sel(missing, **kwargs)
154
+
155
+ def match(self, **kwargs):
156
+ kwargs = self._mapping.from_user(kwargs)
157
+
158
+ if "variable" in kwargs:
159
+ name = kwargs.pop("variable")
160
+ if not isinstance(name, (list, tuple)):
161
+ name = [name]
162
+ if self.var.name not in name:
163
+ return False, None
164
+ return True, kwargs
165
+ return True, kwargs
166
+
167
+
168
+ class FilteredVariable:
169
+ def __init__(self, variable, **kwargs):
170
+ self.variable = variable
171
+ self.kwargs = kwargs
172
+
173
+ @cached_property
174
+ def fields(self):
175
+ """Filter the fields of a variable based on metadata.
176
+
177
+ Returns
178
+ -------
179
+ list
180
+ A list of fields that match the metadata.
181
+ """
182
+ return [
183
+ field
184
+ for field in self.variable
185
+ if all(field.metadata(k, default=None) == v for k, v in self.kwargs.items())
186
+ ]
187
+
188
+ @property
189
+ def length(self):
190
+ return len(self.fields)
191
+
192
+ def __len__(self):
193
+ return self.length
194
+
195
+ def __getitem__(self, i):
196
+ if i >= self.length:
197
+ raise IndexError(i)
198
+ return self.fields[i]
@@ -0,0 +1,42 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ from earthkit.data.core.fieldlist import MultiFieldList
12
+
13
+ from . import iterate_patterns
14
+ from .xarray import load_one
15
+
16
+
17
+ def load_many(emoji, context, dates, pattern, options, **kwargs):
18
+
19
+ result = []
20
+ options = options.copy() if options is not None else {}
21
+
22
+ options.setdefault("engine", "zarr")
23
+ options.setdefault("backend_kwargs", {})
24
+
25
+ backend_kwargs = options["backend_kwargs"]
26
+ backend_kwargs.setdefault("consolidated", False)
27
+ backend_kwargs.setdefault("storage_options", {})
28
+
29
+ storage_options = backend_kwargs["storage_options"]
30
+ storage_options.setdefault("remote_protocol", "s3")
31
+ storage_options.setdefault("remote_options", {"anon": True})
32
+
33
+ for path, dates in iterate_patterns(pattern, dates, **kwargs):
34
+ storage_options["fo"] = path
35
+
36
+ result.append(load_one(emoji, context, dates, "reference://", options=options, **kwargs))
37
+
38
+ return MultiFieldList(result)
39
+
40
+
41
+ def execute(context, dates, json, options=None, **kwargs):
42
+ return load_many("🧱", context, dates, json, options, **kwargs)
@@ -0,0 +1,15 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ from .xarray import load_many
12
+
13
+
14
+ def execute(context, dates, url, *args, **kwargs):
15
+ return load_many("🇿", context, dates, url, *args, **kwargs)
@@ -0,0 +1,40 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ from earthkit.data.core.fieldlist import MultiFieldList
12
+ from earthkit.data.sources.url import download_and_cache
13
+
14
+ from . import iterate_patterns
15
+ from .xarray import load_one
16
+
17
+
18
+ def execute(context, dates, record_id, file_key, *args, **kwargs):
19
+ import requests
20
+
21
+ result = []
22
+
23
+ URLPATTERN = "https://zenodo.org/api/records/{record_id}"
24
+ url = URLPATTERN.format(record_id=record_id)
25
+ r = requests.get(url)
26
+ r.raise_for_status()
27
+ record = r.json()
28
+
29
+ urls = {}
30
+ for file in record["files"]:
31
+ urls[file["key"]] = file["links"]["self"]
32
+
33
+ for url, dates in iterate_patterns(file_key, dates, **kwargs):
34
+ if url not in urls:
35
+ continue
36
+
37
+ path = download_and_cache(urls[url])
38
+ result.append(load_one("?", context, dates, path, options={}, flavour=None, **kwargs))
39
+
40
+ return MultiFieldList(result)