anemoi-datasets 0.5.24__py3-none-any.whl → 0.5.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/finalise-additions.py +2 -1
  3. anemoi/datasets/commands/finalise.py +2 -1
  4. anemoi/datasets/commands/grib-index.py +1 -1
  5. anemoi/datasets/commands/init-additions.py +2 -1
  6. anemoi/datasets/commands/load-additions.py +2 -1
  7. anemoi/datasets/commands/load.py +2 -1
  8. anemoi/datasets/create/__init__.py +24 -33
  9. anemoi/datasets/create/filter.py +22 -24
  10. anemoi/datasets/create/input/__init__.py +0 -20
  11. anemoi/datasets/create/input/step.py +2 -16
  12. anemoi/datasets/create/sources/accumulations.py +7 -6
  13. anemoi/datasets/create/sources/planetary_computer.py +44 -0
  14. anemoi/datasets/create/sources/xarray_support/__init__.py +6 -22
  15. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -0
  16. anemoi/datasets/create/sources/xarray_support/field.py +1 -4
  17. anemoi/datasets/create/sources/xarray_support/flavour.py +44 -6
  18. anemoi/datasets/create/sources/xarray_support/patch.py +44 -1
  19. anemoi/datasets/create/sources/xarray_support/variable.py +6 -2
  20. anemoi/datasets/data/complement.py +44 -10
  21. anemoi/datasets/data/dataset.py +29 -0
  22. anemoi/datasets/data/forwards.py +8 -2
  23. anemoi/datasets/data/misc.py +74 -16
  24. anemoi/datasets/data/observations/__init__.py +316 -0
  25. anemoi/datasets/data/observations/legacy_obs_dataset.py +200 -0
  26. anemoi/datasets/data/observations/multi.py +64 -0
  27. anemoi/datasets/data/padded.py +227 -0
  28. anemoi/datasets/data/records/__init__.py +442 -0
  29. anemoi/datasets/data/records/backends/__init__.py +157 -0
  30. anemoi/datasets/data/stores.py +7 -56
  31. anemoi/datasets/data/subset.py +5 -0
  32. anemoi/datasets/grids.py +6 -3
  33. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/METADATA +3 -2
  34. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/RECORD +38 -51
  35. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/WHEEL +1 -1
  36. anemoi/datasets/create/filters/__init__.py +0 -33
  37. anemoi/datasets/create/filters/empty.py +0 -37
  38. anemoi/datasets/create/filters/legacy.py +0 -93
  39. anemoi/datasets/create/filters/noop.py +0 -37
  40. anemoi/datasets/create/filters/orog_to_z.py +0 -58
  41. anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
  42. anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
  43. anemoi/datasets/create/filters/rename.py +0 -205
  44. anemoi/datasets/create/filters/rotate_winds.py +0 -105
  45. anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
  46. anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
  47. anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
  48. anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
  49. anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
  50. anemoi/datasets/create/filters/sum.py +0 -68
  51. anemoi/datasets/create/filters/transform.py +0 -51
  52. anemoi/datasets/create/filters/unrotate_winds.py +0 -105
  53. anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
  54. anemoi/datasets/create/filters/wz_to_w.py +0 -98
  55. anemoi/datasets/create/testing.py +0 -76
  56. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/entry_points.txt +0 -0
  57. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/licenses/LICENSE +0 -0
  58. {anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,227 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import datetime
12
+ import logging
13
+ from functools import cached_property
14
+ from typing import Any
15
+ from typing import Dict
16
+ from typing import Set
17
+
18
+ import numpy as np
19
+ from anemoi.utils.dates import frequency_to_timedelta
20
+ from numpy.typing import NDArray
21
+
22
+ from anemoi.datasets.data.dataset import Dataset
23
+ from anemoi.datasets.data.dataset import FullIndex
24
+ from anemoi.datasets.data.dataset import Shape
25
+ from anemoi.datasets.data.dataset import TupleIndex
26
+ from anemoi.datasets.data.debug import Node
27
+ from anemoi.datasets.data.debug import debug_indexing
28
+ from anemoi.datasets.data.forwards import Forwards
29
+ from anemoi.datasets.data.indexing import expand_list_indexing
30
+ from anemoi.datasets.data.misc import as_first_date
31
+ from anemoi.datasets.data.misc import as_last_date
32
+
33
+ LOG = logging.getLogger(__name__)
34
+
35
+
36
+ class Padded(Forwards):
37
+ _before: int = 0
38
+ _after: int = 0
39
+ _inside: int = 0
40
+
41
+ def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: Dict[str, Any]) -> None:
42
+ """Create a padded subset of a dataset.
43
+
44
+ Attributes:
45
+ dataset (Dataset): The dataset to subset.
46
+ start (str): The start date of the subset.
47
+ end (str): The end date of the subset.
48
+ frequency (str): The frequency of the subset.
49
+ reason (Dict[str, Any]): The reason for the padding.
50
+ """
51
+
52
+ self.reason = {k: v for k, v in reason.items() if v is not None}
53
+
54
+ if frequency is None:
55
+ frequency = dataset.frequency
56
+
57
+ self._frequency = frequency_to_timedelta(frequency)
58
+
59
+ if start is None:
60
+ # default is to start at the first date
61
+ start = dataset.dates[0]
62
+ else:
63
+ start = as_first_date(start, None, frequency=self._frequency)
64
+
65
+ if end is None:
66
+ # default is to end at the last date
67
+ end = dataset.dates[-1]
68
+ else:
69
+ end = as_last_date(end, None, frequency=self._frequency)
70
+
71
+ assert isinstance(dataset.dates[0], np.datetime64), (dataset.dates[0], type(dataset.dates[0]))
72
+
73
+ # 'start' is the requested start date
74
+ # 'end' is the requested end date
75
+ # 'first' is the first date of the dataset
76
+ # 'last' is the last date of the dataset
77
+ first = dataset.dates[0]
78
+ last = dataset.dates[-1]
79
+
80
+ timedelta = np.array([frequency], dtype="timedelta64[s]")[0]
81
+
82
+ parts = []
83
+ before_end = min(end + timedelta, first)
84
+ before_part = np.arange(start, before_end, timedelta)
85
+ if start < first:
86
+ # if the start date is before the first date of the dataset, there is a "before" part
87
+ assert len(before_part) > 0, (start, first, before_end)
88
+ parts.append(before_part)
89
+ self._before = len(before_part)
90
+ if start >= first:
91
+ # if the start date is the first date of the dataset, there is no "before" part
92
+ assert len(before_part) == 0, (start, first, before_end)
93
+ self._before = 0
94
+
95
+ # if the start date is before the last date of the dataset
96
+ # and the end date is after the first date of the dataset
97
+ # there is an "inside" part
98
+ if start < last and end > first:
99
+ inside_start = max(start, first)
100
+ inside_end = min(end, last)
101
+ self.dataset = dataset._subset(start=inside_start, end=inside_end)
102
+ inside_part = self.dataset.dates
103
+ parts.append(inside_part)
104
+ self._inside = len(inside_part)
105
+ else:
106
+ self.dataset = dataset # still needed to get the empty_item
107
+ self._inside = 0
108
+
109
+ after_start = max(start, last + timedelta)
110
+ after_part = np.arange(after_start, end + timedelta, timedelta)
111
+ if end > last:
112
+ # if the end date is after the last date of the dataset, there is an "after" part
113
+ assert len(after_part) > 0, (end, last, after_start)
114
+ parts.append(after_part)
115
+ self._after = len(after_part)
116
+ if end <= last:
117
+ assert len(after_part) == 0, (end, last, after_start)
118
+ self._after = 0
119
+
120
+ self._dates = np.hstack(parts)
121
+
122
+ assert len(self._dates) == self._before + self._inside + self._after, (
123
+ len(self._dates),
124
+ self._before,
125
+ self._inside,
126
+ self._after,
127
+ )
128
+
129
+ assert self._dates[0] == start, (self._dates[0], start)
130
+ assert self._dates[-1] == end, (self._dates[-1], end)
131
+
132
+ # Forward other properties to the super dataset
133
+ super().__init__(dataset)
134
+
135
+ @debug_indexing
136
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
137
+ if isinstance(n, tuple):
138
+ return self._get_tuple(n)
139
+
140
+ if isinstance(n, slice):
141
+ return self._get_slice(n)
142
+
143
+ if self._i_out_of_range(n):
144
+ return self.empty_item()
145
+
146
+ return self.dataset[n - self._before]
147
+
148
+ def _i_out_of_range(self, n: FullIndex) -> bool:
149
+ """Check if the index is out of range."""
150
+ if 0 <= n < self._before:
151
+ return True
152
+
153
+ if (self._before + self._inside) <= n < (self._before + self._inside + self._after):
154
+ return True
155
+ return False
156
+
157
+ @debug_indexing
158
+ def _get_slice(self, s: slice) -> NDArray[Any]:
159
+ LOG.warning("Padded subset does not support slice indexing, returning a list")
160
+ return [self[i] for i in range(*s.indices(self._len))]
161
+
162
+ @debug_indexing
163
+ @expand_list_indexing
164
+ def _get_tuple(self, n: TupleIndex) -> NDArray[Any]:
165
+ LOG.warning("Padded subset does not support tuple indexing, returning a list")
166
+ return [self[i] for i in n]
167
+
168
+ def empty_item(self):
169
+ return self.dataset.empty_item()
170
+
171
+ def get_aux(self, i: FullIndex) -> NDArray[np.timedelta64]:
172
+ if self._i_out_of_range(i):
173
+ arr = np.array([], dtype=np.float32)
174
+ aux = arr, arr, arr
175
+ else:
176
+ aux = self.dataset.get_aux(i - self._before)
177
+
178
+ assert len(aux) == 3, (aux, i)
179
+ return aux
180
+
181
+ def __len__(self) -> int:
182
+ return len(self._dates)
183
+
184
+ @property
185
+ def frequency(self) -> datetime.timedelta:
186
+ """Get the frequency of the subset."""
187
+ return self._frequency
188
+
189
+ @property
190
+ def dates(self) -> NDArray[np.datetime64]:
191
+ return self._dates
192
+
193
+ @property
194
+ def shape(self) -> Shape:
195
+ return (len(self.dates),) + self.dataset.shape[1:]
196
+
197
+ @cached_property
198
+ def missing(self) -> Set[int]:
199
+ raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
200
+ # return self.forward.missing
201
+
202
+ def tree(self) -> Node:
203
+ """Get the tree representation of the subset.
204
+
205
+ Returns:
206
+ Node: The tree representation of the subset.
207
+ """
208
+ return Node(self, [self.dataset.tree()], **self.reason)
209
+
210
+ def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
211
+ """Get the metadata specific to the forwards subclass.
212
+
213
+ Returns:
214
+ Dict[str, Any]: The metadata specific to the forwards subclass.
215
+ """
216
+ return {
217
+ # "indices": self.indices,
218
+ "reason": self.reason,
219
+ }
220
+
221
+ def __repr__(self) -> str:
222
+ """Get the string representation of the subset.
223
+
224
+ Returns:
225
+ str: The string representation of the subset.
226
+ """
227
+ return f"Padded({self.forward}, {self.dates[0]}...{self.dates[-1]}, frequency={self.frequency})"
@@ -0,0 +1,442 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ import datetime
11
+ import logging
12
+ import os
13
+ from collections import defaultdict
14
+ from functools import cached_property
15
+
16
+ import numpy as np
17
+ from anemoi.utils.dates import frequency_to_timedelta
18
+
19
+ from anemoi.datasets.data.records.backends import backend_factory
20
+
21
+ LOG = logging.getLogger(__name__)
22
+
23
+ if os.environ.get("ANEMOI_DATASET_COUNTER", "0") == "1":
24
+
25
+ def counter(func):
26
+ def wrapper(*args, **kwargs):
27
+ count = 0
28
+ for i in range(len(args[0])):
29
+ count += 1
30
+ yield func(*args, **kwargs)
31
+ print(f"Counter: {count} calls to {func.__name__}")
32
+
33
+ return wrapper
34
+
35
+ else:
36
+
37
+ def counter(func):
38
+ return func
39
+
40
+
41
+ def open_records_dataset(dataset, **kwargs):
42
+ if not dataset.endswith(".vz"):
43
+ raise ValueError("dataset must be a .vz file")
44
+ return RecordsDataset(dataset, **kwargs)
45
+
46
+
47
+ class BaseRecordsDataset:
48
+
49
+ def __getitem__(self, i):
50
+ if isinstance(i, str):
51
+ return self._getgroup(i)
52
+
53
+ if isinstance(i, int):
54
+ return self._getrecord(i)
55
+
56
+ raise ValueError(f"Invalid index {i}, must be int or str")
57
+
58
+ def _getgroup(self, i):
59
+ return Tabular(self, i)
60
+
61
+ def _getrecord(self, i):
62
+ return Record(self, i)
63
+
64
+ def _load_data(self, i):
65
+ raise NotImplementedError("Must be implemented in subclass")
66
+
67
+ @property
68
+ def start_date(self):
69
+ return self.dates[0]
70
+
71
+ @property
72
+ def end_date(self):
73
+ if len(self.dates) == 0:
74
+ return None
75
+ if len(self.dates) == 1:
76
+ return self.dates[0]
77
+ return self.dates[-1]
78
+
79
+ @property
80
+ def groups(self):
81
+ return tuple(self.keys())
82
+
83
+ def _subset(self, **kwargs):
84
+ start = kwargs.pop("start", None)
85
+ end = kwargs.pop("end", None)
86
+ frequency = kwargs.pop("frequency", self.frequency)
87
+
88
+ if frequency != self.frequency:
89
+ raise ValueError(f"Changing the frequency {frequency} (from {self.frequency}) is not implemented yet.")
90
+
91
+ if start is not None or end is not None:
92
+
93
+ def _dates_to_indices(start, end):
94
+ from anemoi.datasets.data.misc import as_first_date
95
+ from anemoi.datasets.data.misc import as_last_date
96
+
97
+ start = self.dates[0] if start is None else as_first_date(start, self.dates)
98
+ end = self.dates[-1] if end is None else as_last_date(end, self.dates)
99
+
100
+ return [i for i, date in enumerate(self.dates) if start <= date <= end]
101
+
102
+ return RecordsSubset(
103
+ self, _dates_to_indices(start, end), {"start": start, "end": end, "frequency": frequency}
104
+ )._subset(**kwargs)
105
+
106
+ select = kwargs.pop("select", None)
107
+ if select is not None:
108
+ return Select(self, select)._subset(**kwargs)
109
+
110
+ return self
111
+
112
+ def mutate(self):
113
+ return self
114
+
115
+ def _check(self):
116
+ pass
117
+
118
+ @property
119
+ def name_to_index(self):
120
+ raise NotImplementedError("Must be implemented in subclass")
121
+
122
+
123
+ class RecordsForward(BaseRecordsDataset):
124
+ def __init__(self, dataset):
125
+ self.forward = dataset
126
+
127
+ @property
128
+ def statistics(self):
129
+ return self.forward.statistics
130
+
131
+ @property
132
+ def variables(self):
133
+ return self.forward.variables
134
+
135
+ def keys(self):
136
+ return self.forward.keys()
137
+
138
+ @property
139
+ def dates(self):
140
+ return self.forward.dates
141
+
142
+ @property
143
+ def name_to_index(self):
144
+ return self.forward.name_to_index
145
+
146
+ @property
147
+ def frequency(self):
148
+ return self.forward.frequency
149
+
150
+ @property
151
+ def shapes(self):
152
+ return self.forward.shapes
153
+
154
+ def __len__(self):
155
+ return len(self.forward)
156
+
157
+
158
+ def match_variable(lst, group, name):
159
+ # lst must be a list of strings with dots (if there is no dot, it is automatically added at the end)
160
+ # - a dict with keys as group and values as list of strings
161
+
162
+ if name == "__latitudes" or name == "__longitudes":
163
+ # This should disappear in the future, when we stop saving a duplicate of lat/lon in the data
164
+ return False
165
+
166
+ lst = [k if "." in k else f"{k}.*" for k in lst]
167
+
168
+ key = f"{group}.{name}"
169
+ if key in lst:
170
+ return True
171
+ if f"{group}.*" in lst:
172
+ return True
173
+ if f"*.{name}" in lst:
174
+ return True
175
+ if "*" in lst:
176
+ return True
177
+ return False
178
+
179
+
180
+ class Select(RecordsForward):
181
+ def __init__(self, dataset, select):
182
+ super().__init__(dataset)
183
+
184
+ self.dataset = dataset
185
+
186
+ if isinstance(select, dict):
187
+ # if a dict is provided, make it a list of strings with '.'
188
+ sel = []
189
+ for group, d in select.items():
190
+ for name in d:
191
+ sel.append(f"{group}.{name}")
192
+ select = sel
193
+
194
+ self._select = select
195
+
196
+ self.reason = {"select": select}
197
+ self._build_indices_and_name_to_index()
198
+
199
+ def _build_indices_and_name_to_index(self):
200
+ indices = {}
201
+ name_to_index = {}
202
+ variables = {}
203
+
204
+ # this should be revisited to take into account the order requested by the user
205
+ # see what is done in the fields datasets
206
+ for group, names in self.dataset.variables.items():
207
+ ind = np.zeros(len(names), dtype=bool)
208
+ count = 0
209
+ for j, name in enumerate(names):
210
+ if self.match_variable(group, name):
211
+ assert j == names.index(name), f"Invalid index {j} for {name} in {group}"
212
+ ind[j] = True
213
+ indices[group] = ind
214
+ if group not in name_to_index:
215
+ name_to_index[group] = {}
216
+ assert group not in variables, (group, j, name, variables, name_to_index)
217
+ variables[group] = []
218
+ name_to_index[group][name] = count
219
+ variables[group].append(name)
220
+ count += 1
221
+ assert np.sum(ind) == count, f"Mismatch in {group}: {names}, {ind}"
222
+ self._indices = indices
223
+ self._name_to_index = name_to_index
224
+ self._variables = variables
225
+
226
+ def match_variable(self, *args, **kwargs):
227
+ return match_variable(self._select, *args, **kwargs)
228
+
229
+ def keys(self):
230
+ return self._indices.keys()
231
+
232
+ def _load_data(self, i):
233
+ forward = self.dataset._load_data(i)
234
+ data = {}
235
+ for k, v in self._indices.items():
236
+ data[f"latitudes:{k}"] = forward[f"latitudes:{k}"]
237
+ data[f"longitudes:{k}"] = forward[f"longitudes:{k}"]
238
+ data[f"timedeltas:{k}"] = forward[f"timedeltas:{k}"]
239
+ data[f"metadata:{k}"] = forward[f"metadata:{k}"]
240
+ for k, v in self._indices.items():
241
+ data[f"data:{k}"] = forward[f"data:{k}"][v] # notice the [v] here
242
+ return data
243
+
244
+ @property
245
+ def name_to_index(self):
246
+ return self._name_to_index
247
+
248
+ @property
249
+ def variables(self):
250
+ return self._variables
251
+
252
+ @property
253
+ def statistics(self):
254
+ dic = {}
255
+ for group, v in self._indices.items():
256
+ stats = self.dataset.statistics[group]
257
+ dic[group] = {key: stats[key][v] for key in stats.keys()}
258
+ assert "mean" in dic[group], f"Missing mean in {dic[group]}"
259
+ return dic
260
+
261
+
262
+ class RecordsSubset(RecordsForward):
263
+ def __init__(self, dataset, indices, reason):
264
+ super().__init__(dataset)
265
+ self.dataset = dataset
266
+ self.reason = reason
267
+ self._indices = indices
268
+
269
+ @cached_property
270
+ def dates(self):
271
+ return self.dataset.dates[self._indices]
272
+
273
+ def _load_data(self, i):
274
+ return self.dataset._load_data(self._indices[i])
275
+
276
+ def __len__(self):
277
+ return len(self._indices)
278
+
279
+
280
+ class RecordsDataset(BaseRecordsDataset):
281
+
282
+ def __init__(self, path, backend="npz1", **kwargs):
283
+ if kwargs:
284
+ print("Warning: ignoring additional kwargs", kwargs)
285
+ self.path = path
286
+ self.backend = backend_factory(backend, path, **kwargs)
287
+ self.keys = self.metadata["sources"].keys
288
+
289
+ @property
290
+ def frequency(self):
291
+ frequency = self.metadata["frequency"]
292
+ frequency = frequency_to_timedelta(frequency)
293
+ return frequency
294
+
295
+ @property
296
+ def name_to_index(self):
297
+ return self.metadata["name_to_index"]
298
+
299
+ @property
300
+ def variables(self):
301
+ return self.metadata["variables"]
302
+
303
+ @cached_property
304
+ def metadata(self):
305
+ return self.backend.read_metadata()
306
+
307
+ @property
308
+ def shapes(self):
309
+ return self.metadata["shapes"]
310
+
311
+ def items(self, *args, **kwargs):
312
+ return {k: Tabular(self, k) for k in self.keys()}.items(*args, **kwargs)
313
+
314
+ @cached_property
315
+ def statistics(self):
316
+ return self.backend.read_statistics()
317
+
318
+ def __len__(self):
319
+ return len(self.dates)
320
+
321
+ @property
322
+ def start_date(self):
323
+ date = self.metadata["start_date"]
324
+ return datetime.datetime.fromisoformat(date)
325
+
326
+ @property
327
+ def end_date(self):
328
+ date = self.metadata["end_date"]
329
+ return datetime.datetime.fromisoformat(date)
330
+
331
+ @cached_property
332
+ def dates(self):
333
+ result = []
334
+ delta = self.frequency
335
+ d = self.start_date
336
+ while d <= self.end_date:
337
+ result.append(d)
338
+ d += delta
339
+ return np.array(result)
340
+
341
+ @counter
342
+ def _load_data(self, i):
343
+ return self.backend.read(i)
344
+
345
+ def check(self, i=None):
346
+ if i is not None:
347
+ dict_of_sets = defaultdict(set)
348
+ for key in self._load_data(i).keys():
349
+ kind, group = key.split(":")
350
+ dict_of_sets[group].add(kind)
351
+ for group, s in dict_of_sets.items():
352
+ assert s == {"latitudes", "longitudes", "timedeltas", "metadata", "data"}, f"Invalid keys {s}"
353
+
354
+
355
+ class Record(dict):
356
+ def __init__(self, dataset, n):
357
+ self.dataset = dataset
358
+ self.n = n
359
+
360
+ def __repr__(self):
361
+ d = {group: "<not-loaded>" for group in self.dataset.keys()}
362
+ return str(d)
363
+
364
+ def items(self):
365
+ return self._payload.items()
366
+
367
+ @property
368
+ def name_to_index(self):
369
+ return self.dataset.name_to_index
370
+
371
+ @cached_property
372
+ def _payload(self):
373
+ payload = self.dataset._load_data(self.n)
374
+ for k in payload.keys():
375
+ assert len(k.split(":")) == 2, f"Invalid key {k}"
376
+ return payload
377
+
378
+ def keys(self):
379
+ return self.dataset.keys()
380
+
381
+ def __getitem__(self, group):
382
+ return self._payload["data:" + group]
383
+
384
+ def _get_aux(self, name):
385
+ try:
386
+ return {k: self._payload[name + ":" + k] for k in self.keys()}
387
+ except KeyError as e:
388
+ e.add_note(f"Available keys are {self._payload.keys()}")
389
+ raise
390
+
391
+ @property
392
+ def latitudes(self):
393
+ return self._get_aux("latitudes")
394
+
395
+ @property
396
+ def longitudes(self):
397
+ return self._get_aux("longitudes")
398
+
399
+ @property
400
+ def timedeltas(self):
401
+ return self._get_aux("timedeltas")
402
+
403
+ @property
404
+ def statistics(self):
405
+ return self.dataset.statistics
406
+
407
+ @property
408
+ def groups(self):
409
+ return tuple(self.keys())
410
+
411
+
412
+ class Tabular:
413
+ def __init__(self, dataset, name):
414
+ self.dataset = dataset
415
+ self.name = name
416
+
417
+ @property
418
+ def group(self):
419
+ return self.name
420
+
421
+ def __getitem__(self, i):
422
+ return self.__get(i, "data")
423
+
424
+ def __get(self, i, k):
425
+ payload = self.dataset._load_data(i)
426
+ try:
427
+ return payload[k + ":" + self.name]
428
+ except KeyError:
429
+ print(f"KeyError to retrieve {self.name} available groups are", payload.keys())
430
+ raise
431
+
432
+ @property
433
+ def variables(self):
434
+ return self.dataset.variables[self.name]
435
+
436
+ @property
437
+ def name_to_index(self):
438
+ return self.dataset.name_to_index[self.name]
439
+
440
+ @property
441
+ def statistics(self):
442
+ return self.dataset.statistics[self.name]