anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/create.py +3 -2
  3. anemoi/datasets/commands/inspect.py +1 -1
  4. anemoi/datasets/commands/publish.py +30 -0
  5. anemoi/datasets/create/__init__.py +72 -35
  6. anemoi/datasets/create/check.py +6 -0
  7. anemoi/datasets/create/config.py +4 -3
  8. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  9. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  10. anemoi/datasets/create/functions/filters/rename.py +2 -3
  11. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  12. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  13. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  14. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  15. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  16. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  17. anemoi/datasets/create/functions/sources/__init__.py +7 -1
  18. anemoi/datasets/create/functions/sources/accumulations.py +2 -0
  19. anemoi/datasets/create/functions/sources/grib.py +87 -2
  20. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  21. anemoi/datasets/create/functions/sources/mars.py +9 -3
  22. anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
  23. anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
  24. anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
  25. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
  26. anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
  27. anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
  28. anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
  29. anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
  30. anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
  31. anemoi/datasets/create/input/__init__.py +69 -0
  32. anemoi/datasets/create/input/action.py +123 -0
  33. anemoi/datasets/create/input/concat.py +92 -0
  34. anemoi/datasets/create/input/context.py +59 -0
  35. anemoi/datasets/create/input/data_sources.py +71 -0
  36. anemoi/datasets/create/input/empty.py +42 -0
  37. anemoi/datasets/create/input/filter.py +76 -0
  38. anemoi/datasets/create/input/function.py +122 -0
  39. anemoi/datasets/create/input/join.py +57 -0
  40. anemoi/datasets/create/input/misc.py +85 -0
  41. anemoi/datasets/create/input/pipe.py +33 -0
  42. anemoi/datasets/create/input/repeated_dates.py +217 -0
  43. anemoi/datasets/create/input/result.py +413 -0
  44. anemoi/datasets/create/input/step.py +99 -0
  45. anemoi/datasets/create/{template.py → input/template.py} +0 -42
  46. anemoi/datasets/create/persistent.py +1 -1
  47. anemoi/datasets/create/statistics/__init__.py +1 -1
  48. anemoi/datasets/create/utils.py +3 -0
  49. anemoi/datasets/create/zarr.py +4 -2
  50. anemoi/datasets/data/dataset.py +11 -1
  51. anemoi/datasets/data/debug.py +5 -1
  52. anemoi/datasets/data/masked.py +2 -2
  53. anemoi/datasets/data/rescale.py +147 -0
  54. anemoi/datasets/data/stores.py +20 -7
  55. anemoi/datasets/dates/__init__.py +113 -30
  56. anemoi/datasets/dates/groups.py +92 -19
  57. anemoi/datasets/fields.py +66 -0
  58. anemoi/datasets/utils/fields.py +47 -0
  59. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
  60. anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
  61. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
  62. anemoi/datasets/create/input.py +0 -1065
  63. anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
  64. /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
  65. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
  66. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
  67. {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,217 @@
1
+ # (C) Copyright 2023 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ import logging
11
+ from collections import defaultdict
12
+
13
+ import numpy as np
14
+ from anemoi.utils.dates import as_datetime
15
+ from anemoi.utils.dates import frequency_to_timedelta
16
+
17
+ from anemoi.datasets.fields import FieldArray
18
+ from anemoi.datasets.fields import NewValidDateTimeField
19
+
20
+ from .action import Action
21
+ from .action import action_factory
22
+ from .join import JoinResult
23
+ from .result import Result
24
+ from .trace import trace_select
25
+
26
+ LOG = logging.getLogger(__name__)
27
+
28
+
29
+ class DateMapper:
30
+
31
+ @staticmethod
32
+ def from_mode(mode, source, config):
33
+
34
+ MODES = dict(
35
+ closest=DateMapperClosest,
36
+ climatology=DateMapperClimatology,
37
+ constant=DateMapperConstant,
38
+ )
39
+
40
+ if mode not in MODES:
41
+ raise ValueError(f"Invalid mode for DateMapper: {mode}")
42
+
43
+ return MODES[mode](source, **config)
44
+
45
+
46
+ class DateMapperClosest(DateMapper):
47
+ def __init__(self, source, frequency="1h", maximum="30d", skip_all_nans=False):
48
+ self.source = source
49
+ self.maximum = frequency_to_timedelta(maximum)
50
+ self.frequency = frequency_to_timedelta(frequency)
51
+ self.skip_all_nans = skip_all_nans
52
+ self.tried = set()
53
+ self.found = set()
54
+
55
+ def transform(self, group_of_dates):
56
+ from anemoi.datasets.dates.groups import GroupOfDates
57
+
58
+ asked_dates = list(group_of_dates)
59
+ if not asked_dates:
60
+ return []
61
+
62
+ to_try = set()
63
+ for date in asked_dates:
64
+ start = date
65
+ while start >= date - self.maximum:
66
+ to_try.add(start)
67
+ start -= self.frequency
68
+
69
+ end = date
70
+ while end <= date + self.maximum:
71
+ to_try.add(end)
72
+ end += self.frequency
73
+
74
+ to_try = sorted(to_try - self.tried)
75
+
76
+ if to_try:
77
+ result = self.source.select(
78
+ GroupOfDates(
79
+ sorted(to_try),
80
+ group_of_dates.provider,
81
+ partial_ok=True,
82
+ )
83
+ )
84
+
85
+ for f in result.datasource:
86
+ # We could keep the fields in a dictionary, but we don't want to keep the fields in memory
87
+ date = as_datetime(f.metadata("valid_datetime"))
88
+
89
+ if self.skip_all_nans:
90
+ if np.isnan(f.to_numpy()).all():
91
+ LOG.warning(f"Skipping {date} because all values are NaN")
92
+ continue
93
+
94
+ self.found.add(date)
95
+
96
+ self.tried.update(to_try)
97
+
98
+ new_dates = defaultdict(list)
99
+
100
+ for date in asked_dates:
101
+ best = None
102
+ for found_date in sorted(self.found):
103
+ delta = abs(date - found_date)
104
+ # With < we prefer the first date
105
+ # With <= we prefer the last date
106
+ if best is None or delta <= best[0]:
107
+ best = delta, found_date
108
+ new_dates[best[1]].append(date)
109
+
110
+ for date, dates in new_dates.items():
111
+ yield (
112
+ GroupOfDates([date], group_of_dates.provider),
113
+ GroupOfDates(dates, group_of_dates.provider),
114
+ )
115
+
116
+
117
+ class DateMapperClimatology(DateMapper):
118
+ def __init__(self, source, year, day):
119
+ self.year = year
120
+ self.day = day
121
+
122
+ def transform(self, group_of_dates):
123
+ from anemoi.datasets.dates.groups import GroupOfDates
124
+
125
+ dates = list(group_of_dates)
126
+ if not dates:
127
+ return []
128
+
129
+ new_dates = defaultdict(list)
130
+ for date in dates:
131
+ new_date = date.replace(year=self.year, day=self.day)
132
+ new_dates[new_date].append(date)
133
+
134
+ for date, dates in new_dates.items():
135
+ yield (
136
+ GroupOfDates([date], group_of_dates.provider),
137
+ GroupOfDates(dates, group_of_dates.provider),
138
+ )
139
+
140
+
141
+ class DateMapperConstant(DateMapper):
142
+ def __init__(self, source, date=None):
143
+ self.source = source
144
+ self.date = date
145
+
146
+ def transform(self, group_of_dates):
147
+ from anemoi.datasets.dates.groups import GroupOfDates
148
+
149
+ if self.date is None:
150
+ return [
151
+ (
152
+ GroupOfDates([], group_of_dates.provider),
153
+ group_of_dates,
154
+ )
155
+ ]
156
+
157
+ return [
158
+ (
159
+ GroupOfDates([self.date], group_of_dates.provider),
160
+ group_of_dates,
161
+ )
162
+ ]
163
+
164
+
165
+ class DateMapperResult(Result):
166
+ def __init__(
167
+ self,
168
+ context,
169
+ action_path,
170
+ group_of_dates,
171
+ source_result,
172
+ mapper,
173
+ original_group_of_dates,
174
+ ):
175
+ super().__init__(context, action_path, group_of_dates)
176
+
177
+ self.source_results = source_result
178
+ self.mapper = mapper
179
+ self.original_group_of_dates = original_group_of_dates
180
+
181
+ @property
182
+ def datasource(self):
183
+ result = []
184
+
185
+ for field in self.source_results.datasource:
186
+ for date in self.original_group_of_dates:
187
+ result.append(NewValidDateTimeField(field, date))
188
+
189
+ return FieldArray(result)
190
+
191
+
192
+ class RepeatedDatesAction(Action):
193
+ def __init__(self, context, action_path, source, mode, **kwargs):
194
+ super().__init__(context, action_path, source, mode, **kwargs)
195
+
196
+ self.source = action_factory(source, context, action_path + ["source"])
197
+ self.mapper = DateMapper.from_mode(mode, self.source, kwargs)
198
+
199
+ @trace_select
200
+ def select(self, group_of_dates):
201
+ results = []
202
+ for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
203
+ results.append(
204
+ DateMapperResult(
205
+ self.context,
206
+ self.action_path,
207
+ one_date_group,
208
+ self.source.select(one_date_group),
209
+ self.mapper,
210
+ many_dates_group,
211
+ )
212
+ )
213
+
214
+ return JoinResult(self.context, self.action_path, group_of_dates, results)
215
+
216
+ def __repr__(self):
217
+ return f"MultiDateMatchAction({self.source}, {self.mapper})"
@@ -0,0 +1,413 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import itertools
10
+ import logging
11
+ import math
12
+ import time
13
+ from collections import defaultdict
14
+ from functools import cached_property
15
+
16
+ import numpy as np
17
+ from anemoi.utils.dates import as_datetime as as_datetime
18
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
19
+ from anemoi.utils.humanize import seconds_to_human
20
+ from anemoi.utils.humanize import shorten_list
21
+ from earthkit.data.core.order import build_remapping
22
+
23
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
24
+ from anemoi.datasets.fields import FieldArray as FieldArray
25
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
26
+
27
+ from .trace import trace
28
+ from .trace import trace_datasource
29
+
30
+ LOG = logging.getLogger(__name__)
31
+
32
+
33
+ def _data_request(data):
34
+ date = None
35
+ params_levels = defaultdict(set)
36
+ params_steps = defaultdict(set)
37
+
38
+ area = grid = None
39
+
40
+ for field in data:
41
+ try:
42
+ if date is None:
43
+ date = field.metadata("valid_datetime")
44
+
45
+ if field.metadata("valid_datetime") != date:
46
+ continue
47
+
48
+ as_mars = field.metadata(namespace="mars")
49
+ if not as_mars:
50
+ continue
51
+ step = as_mars.get("step")
52
+ levtype = as_mars.get("levtype", "sfc")
53
+ param = as_mars["param"]
54
+ levelist = as_mars.get("levelist", None)
55
+ area = field.mars_area
56
+ grid = field.mars_grid
57
+
58
+ if levelist is None:
59
+ params_levels[levtype].add(param)
60
+ else:
61
+ params_levels[levtype].add((param, levelist))
62
+
63
+ if step:
64
+ params_steps[levtype].add((param, step))
65
+ except Exception:
66
+ LOG.error(f"Error in retrieving metadata (cannot build data request info) for {field}", exc_info=True)
67
+
68
+ def sort(old_dic):
69
+ new_dic = {}
70
+ for k, v in old_dic.items():
71
+ new_dic[k] = sorted(list(v))
72
+ return new_dic
73
+
74
+ params_steps = sort(params_steps)
75
+ params_levels = sort(params_levels)
76
+
77
+ return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid)
78
+
79
+
80
+ class Result:
81
+ empty = False
82
+ _coords_already_built = False
83
+
84
+ def __init__(self, context, action_path, dates):
85
+ from anemoi.datasets.dates.groups import GroupOfDates
86
+
87
+ from .action import ActionContext
88
+
89
+ assert isinstance(dates, GroupOfDates), dates
90
+
91
+ assert isinstance(context, ActionContext), type(context)
92
+ assert isinstance(action_path, list), action_path
93
+
94
+ self.context = context
95
+ self.group_of_dates = dates
96
+ self.action_path = action_path
97
+
98
+ @property
99
+ @trace_datasource
100
+ def datasource(self):
101
+ self._raise_not_implemented()
102
+
103
+ @property
104
+ def data_request(self):
105
+ """Returns a dictionary with the parameters needed to retrieve the data."""
106
+ return _data_request(self.datasource)
107
+
108
+ def get_cube(self):
109
+ trace("🧊", f"getting cube from {self.__class__.__name__}")
110
+ ds = self.datasource
111
+
112
+ remapping = self.context.remapping
113
+ order_by = self.context.order_by
114
+ flatten_grid = self.context.flatten_grid
115
+ start = time.time()
116
+ LOG.debug("Sorting dataset %s %s", dict(order_by), remapping)
117
+ assert order_by, order_by
118
+
119
+ patches = {"number": {None: 0}}
120
+
121
+ try:
122
+ cube = ds.cube(
123
+ order_by,
124
+ remapping=remapping,
125
+ flatten_values=flatten_grid,
126
+ patches=patches,
127
+ )
128
+ cube = cube.squeeze()
129
+ LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
130
+ except ValueError:
131
+ self.explain(ds, order_by, remapping=remapping, patches=patches)
132
+ # raise ValueError(f"Error in {self}")
133
+ exit(1)
134
+
135
+ if LOG.isEnabledFor(logging.DEBUG):
136
+ LOG.debug("Cube shape: %s", cube)
137
+ for k, v in cube.user_coords.items():
138
+ LOG.debug(" %s %s", k, shorten_list(v, max_length=10))
139
+
140
+ return cube
141
+
142
+ def explain(self, ds, *args, remapping, patches):
143
+
144
+ METADATA = (
145
+ "date",
146
+ "time",
147
+ "step",
148
+ "hdate",
149
+ "valid_datetime",
150
+ "levtype",
151
+ "levelist",
152
+ "number",
153
+ "level",
154
+ "shortName",
155
+ "paramId",
156
+ "variable",
157
+ )
158
+
159
+ # We redo the logic here
160
+ print()
161
+ print("❌" * 40)
162
+ print()
163
+ if len(args) == 1 and isinstance(args[0], (list, tuple)):
164
+ args = args[0]
165
+
166
+ # print("Executing", self.action_path)
167
+ # print("Dates:", compress_dates(self.dates))
168
+
169
+ names = []
170
+ for a in args:
171
+ if isinstance(a, str):
172
+ names.append(a)
173
+ elif isinstance(a, dict):
174
+ names += list(a.keys())
175
+
176
+ print(f"Building a {len(names)}D hypercube using", names)
177
+ ds = ds.order_by(*args, remapping=remapping, patches=patches)
178
+ user_coords = ds.unique_values(*names, remapping=remapping, patches=patches, progress_bar=False)
179
+
180
+ print()
181
+ print("Number of unique values found for each coordinate:")
182
+ for k, v in user_coords.items():
183
+ print(f" {k:20}:", len(v), shorten_list(v, max_length=10))
184
+ print()
185
+ user_shape = tuple(len(v) for k, v in user_coords.items())
186
+ print("Shape of the hypercube :", user_shape)
187
+ print(
188
+ "Number of expected fields :", math.prod(user_shape), "=", " x ".join([str(i) for i in user_shape])
189
+ )
190
+ print("Number of fields in the dataset :", len(ds))
191
+ print("Difference :", abs(len(ds) - math.prod(user_shape)))
192
+ print()
193
+
194
+ remapping = build_remapping(remapping, patches)
195
+ expected = set(itertools.product(*user_coords.values()))
196
+ extra = set()
197
+
198
+ if math.prod(user_shape) > len(ds):
199
+ print(f"This means that all the fields in the datasets do not exists for all combinations of {names}.")
200
+
201
+ for f in ds:
202
+ metadata = remapping(f.metadata)
203
+ key = tuple(metadata(n, default=None) for n in names)
204
+ if key in expected:
205
+ expected.remove(key)
206
+ else:
207
+ extra.add(key)
208
+
209
+ print("Missing fields:")
210
+ print()
211
+ for i, f in enumerate(sorted(expected)):
212
+ print(" ", f)
213
+ if i >= 9 and len(expected) > 10:
214
+ print("...", len(expected) - i - 1, "more")
215
+ break
216
+
217
+ print("Extra fields:")
218
+ print()
219
+ for i, f in enumerate(sorted(extra)):
220
+ print(" ", f)
221
+ if i >= 9 and len(extra) > 10:
222
+ print("...", len(extra) - i - 1, "more")
223
+ break
224
+
225
+ print()
226
+ print("Missing values:")
227
+ per_name = defaultdict(set)
228
+ for e in expected:
229
+ for n, v in zip(names, e):
230
+ per_name[n].add(v)
231
+
232
+ for n, v in per_name.items():
233
+ print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
234
+ print()
235
+
236
+ print("Extra values:")
237
+ per_name = defaultdict(set)
238
+ for e in extra:
239
+ for n, v in zip(names, e):
240
+ per_name[n].add(v)
241
+
242
+ for n, v in per_name.items():
243
+ print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
244
+ print()
245
+
246
+ print("To solve this issue, you can:")
247
+ print(
248
+ " - Provide a better selection, like 'step: 0' or 'level: 1000' to "
249
+ "reduce the number of selected fields."
250
+ )
251
+ print(
252
+ " - Split the 'input' part in smaller sections using 'join', "
253
+ "making sure that each section represent a full hypercube."
254
+ )
255
+
256
+ else:
257
+ print(f"More fields in dataset that expected for {names}. " "This means that some fields are duplicated.")
258
+ duplicated = defaultdict(list)
259
+ for f in ds:
260
+ # print(f.metadata(namespace="default"))
261
+ metadata = remapping(f.metadata)
262
+ key = tuple(metadata(n, default=None) for n in names)
263
+ duplicated[key].append(f)
264
+
265
+ print("Duplicated fields:")
266
+ print()
267
+ duplicated = {k: v for k, v in duplicated.items() if len(v) > 1}
268
+ for i, (k, v) in enumerate(sorted(duplicated.items())):
269
+ print(" ", k)
270
+ for f in v:
271
+ x = {k: f.metadata(k, default=None) for k in METADATA if f.metadata(k, default=None) is not None}
272
+ print(" ", f, x)
273
+ if i >= 9 and len(duplicated) > 10:
274
+ print("...", len(duplicated) - i - 1, "more")
275
+ break
276
+
277
+ print()
278
+ print("To solve this issue, you can:")
279
+ print(" - Provide a better selection, like 'step: 0' or 'level: 1000'")
280
+ print(" - Change the way 'param' is computed using 'variable_naming' " "in the 'build' section.")
281
+
282
+ print()
283
+ print("❌" * 40)
284
+ print()
285
+ exit(1)
286
+
287
+ def __repr__(self, *args, _indent_="\n", **kwargs):
288
+ more = ",".join([str(a)[:5000] for a in args])
289
+ more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
290
+
291
+ dates = " no-dates"
292
+ if self.group_of_dates is not None:
293
+ dates = f" {len(self.group_of_dates)} dates"
294
+ dates += " ("
295
+ dates += "/".join(d.strftime("%Y-%m-%d:%H") for d in self.group_of_dates)
296
+ if len(dates) > 100:
297
+ dates = dates[:100] + "..."
298
+ dates += ")"
299
+
300
+ more = more[:5000]
301
+ txt = f"{self.__class__.__name__}:{dates}{_indent_}{more}"
302
+ if _indent_:
303
+ txt = txt.replace("\n", "\n ")
304
+ return txt
305
+
306
+ def _raise_not_implemented(self):
307
+ raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
308
+
309
+ def _trace_datasource(self, *args, **kwargs):
310
+ return f"{self.__class__.__name__}({self.group_of_dates})"
311
+
312
+ def build_coords(self):
313
+ if self._coords_already_built:
314
+ return
315
+ from_data = self.get_cube().user_coords
316
+ from_config = self.context.order_by
317
+
318
+ keys_from_config = list(from_config.keys())
319
+ keys_from_data = list(from_data.keys())
320
+ assert keys_from_data == keys_from_config, f"Critical error: {keys_from_data=} != {keys_from_config=}. {self=}"
321
+
322
+ variables_key = list(from_config.keys())[1]
323
+ ensembles_key = list(from_config.keys())[2]
324
+
325
+ if isinstance(from_config[variables_key], (list, tuple)):
326
+ assert all([v == w for v, w in zip(from_data[variables_key], from_config[variables_key])]), (
327
+ from_data[variables_key],
328
+ from_config[variables_key],
329
+ )
330
+
331
+ self._variables = from_data[variables_key] # "param_level"
332
+ self._ensembles = from_data[ensembles_key] # "number"
333
+
334
+ first_field = self.datasource[0]
335
+ grid_points = first_field.grid_points()
336
+
337
+ lats, lons = grid_points
338
+
339
+ assert len(lats) == len(lons), (len(lats), len(lons), first_field)
340
+ assert len(lats) == math.prod(first_field.shape), (len(lats), first_field.shape, first_field)
341
+
342
+ north = np.amax(lats)
343
+ south = np.amin(lats)
344
+ east = np.amax(lons)
345
+ west = np.amin(lons)
346
+
347
+ assert -90 <= south <= north <= 90, (south, north, first_field)
348
+ assert (-180 <= west <= east <= 180) or (0 <= west <= east <= 360), (
349
+ west,
350
+ east,
351
+ first_field,
352
+ )
353
+
354
+ grid_values = list(range(len(grid_points[0])))
355
+
356
+ self._grid_points = grid_points
357
+ self._resolution = first_field.resolution
358
+ self._grid_values = grid_values
359
+ self._field_shape = first_field.shape
360
+ self._proj_string = first_field.proj_string if hasattr(first_field, "proj_string") else None
361
+
362
+ @property
363
+ def variables(self):
364
+ self.build_coords()
365
+ return self._variables
366
+
367
+ @property
368
+ def ensembles(self):
369
+ self.build_coords()
370
+ return self._ensembles
371
+
372
+ @property
373
+ def resolution(self):
374
+ self.build_coords()
375
+ return self._resolution
376
+
377
+ @property
378
+ def grid_values(self):
379
+ self.build_coords()
380
+ return self._grid_values
381
+
382
+ @property
383
+ def grid_points(self):
384
+ self.build_coords()
385
+ return self._grid_points
386
+
387
+ @property
388
+ def field_shape(self):
389
+ self.build_coords()
390
+ return self._field_shape
391
+
392
+ @property
393
+ def proj_string(self):
394
+ self.build_coords()
395
+ return self._proj_string
396
+
397
+ @cached_property
398
+ def shape(self):
399
+ return [
400
+ len(self.group_of_dates),
401
+ len(self.variables),
402
+ len(self.ensembles),
403
+ len(self.grid_values),
404
+ ]
405
+
406
+ @cached_property
407
+ def coords(self):
408
+ return {
409
+ "dates": list(self.group_of_dates),
410
+ "variables": self.variables,
411
+ "ensembles": self.ensembles,
412
+ "values": self.grid_values,
413
+ }