anemoi-datasets 0.5.0__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/publish.py +30 -0
- anemoi/datasets/create/__init__.py +42 -3
- anemoi/datasets/create/check.py +6 -0
- anemoi/datasets/create/functions/filters/rename.py +2 -3
- anemoi/datasets/create/functions/sources/__init__.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +2 -0
- anemoi/datasets/create/functions/sources/grib.py +1 -1
- anemoi/datasets/create/functions/sources/xarray/__init__.py +7 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +12 -1
- anemoi/datasets/create/functions/sources/xarray/field.py +13 -4
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
- anemoi/datasets/create/functions/sources/xarray/flavour.py +130 -13
- anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
- anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -11
- anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
- anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
- anemoi/datasets/create/input/__init__.py +69 -0
- anemoi/datasets/create/input/action.py +123 -0
- anemoi/datasets/create/input/concat.py +92 -0
- anemoi/datasets/create/input/context.py +59 -0
- anemoi/datasets/create/input/data_sources.py +71 -0
- anemoi/datasets/create/input/empty.py +42 -0
- anemoi/datasets/create/input/filter.py +76 -0
- anemoi/datasets/create/input/function.py +122 -0
- anemoi/datasets/create/input/join.py +57 -0
- anemoi/datasets/create/input/misc.py +85 -0
- anemoi/datasets/create/input/pipe.py +33 -0
- anemoi/datasets/create/input/repeated_dates.py +217 -0
- anemoi/datasets/create/input/result.py +413 -0
- anemoi/datasets/create/input/step.py +99 -0
- anemoi/datasets/create/{template.py → input/template.py} +0 -42
- anemoi/datasets/create/statistics/__init__.py +1 -1
- anemoi/datasets/create/zarr.py +4 -2
- anemoi/datasets/dates/__init__.py +1 -0
- anemoi/datasets/dates/groups.py +12 -4
- anemoi/datasets/fields.py +66 -0
- anemoi/datasets/utils/fields.py +47 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/METADATA +1 -1
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/RECORD +46 -30
- anemoi/datasets/create/input.py +0 -1087
- /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
import itertools
|
|
10
|
+
import logging
|
|
11
|
+
import math
|
|
12
|
+
import time
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from functools import cached_property
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
from anemoi.utils.dates import as_datetime as as_datetime
|
|
18
|
+
from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
|
|
19
|
+
from anemoi.utils.humanize import seconds_to_human
|
|
20
|
+
from anemoi.utils.humanize import shorten_list
|
|
21
|
+
from earthkit.data.core.order import build_remapping
|
|
22
|
+
|
|
23
|
+
from anemoi.datasets.dates import DatesProvider as DatesProvider
|
|
24
|
+
from anemoi.datasets.fields import FieldArray as FieldArray
|
|
25
|
+
from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
|
|
26
|
+
|
|
27
|
+
from .trace import trace
|
|
28
|
+
from .trace import trace_datasource
|
|
29
|
+
|
|
30
|
+
LOG = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _data_request(data):
|
|
34
|
+
date = None
|
|
35
|
+
params_levels = defaultdict(set)
|
|
36
|
+
params_steps = defaultdict(set)
|
|
37
|
+
|
|
38
|
+
area = grid = None
|
|
39
|
+
|
|
40
|
+
for field in data:
|
|
41
|
+
try:
|
|
42
|
+
if date is None:
|
|
43
|
+
date = field.metadata("valid_datetime")
|
|
44
|
+
|
|
45
|
+
if field.metadata("valid_datetime") != date:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
as_mars = field.metadata(namespace="mars")
|
|
49
|
+
if not as_mars:
|
|
50
|
+
continue
|
|
51
|
+
step = as_mars.get("step")
|
|
52
|
+
levtype = as_mars.get("levtype", "sfc")
|
|
53
|
+
param = as_mars["param"]
|
|
54
|
+
levelist = as_mars.get("levelist", None)
|
|
55
|
+
area = field.mars_area
|
|
56
|
+
grid = field.mars_grid
|
|
57
|
+
|
|
58
|
+
if levelist is None:
|
|
59
|
+
params_levels[levtype].add(param)
|
|
60
|
+
else:
|
|
61
|
+
params_levels[levtype].add((param, levelist))
|
|
62
|
+
|
|
63
|
+
if step:
|
|
64
|
+
params_steps[levtype].add((param, step))
|
|
65
|
+
except Exception:
|
|
66
|
+
LOG.error(f"Error in retrieving metadata (cannot build data request info) for {field}", exc_info=True)
|
|
67
|
+
|
|
68
|
+
def sort(old_dic):
|
|
69
|
+
new_dic = {}
|
|
70
|
+
for k, v in old_dic.items():
|
|
71
|
+
new_dic[k] = sorted(list(v))
|
|
72
|
+
return new_dic
|
|
73
|
+
|
|
74
|
+
params_steps = sort(params_steps)
|
|
75
|
+
params_levels = sort(params_levels)
|
|
76
|
+
|
|
77
|
+
return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Result:
|
|
81
|
+
empty = False
|
|
82
|
+
_coords_already_built = False
|
|
83
|
+
|
|
84
|
+
def __init__(self, context, action_path, dates):
|
|
85
|
+
from anemoi.datasets.dates.groups import GroupOfDates
|
|
86
|
+
|
|
87
|
+
from .action import ActionContext
|
|
88
|
+
|
|
89
|
+
assert isinstance(dates, GroupOfDates), dates
|
|
90
|
+
|
|
91
|
+
assert isinstance(context, ActionContext), type(context)
|
|
92
|
+
assert isinstance(action_path, list), action_path
|
|
93
|
+
|
|
94
|
+
self.context = context
|
|
95
|
+
self.group_of_dates = dates
|
|
96
|
+
self.action_path = action_path
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
@trace_datasource
|
|
100
|
+
def datasource(self):
|
|
101
|
+
self._raise_not_implemented()
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def data_request(self):
|
|
105
|
+
"""Returns a dictionary with the parameters needed to retrieve the data."""
|
|
106
|
+
return _data_request(self.datasource)
|
|
107
|
+
|
|
108
|
+
def get_cube(self):
|
|
109
|
+
trace("🧊", f"getting cube from {self.__class__.__name__}")
|
|
110
|
+
ds = self.datasource
|
|
111
|
+
|
|
112
|
+
remapping = self.context.remapping
|
|
113
|
+
order_by = self.context.order_by
|
|
114
|
+
flatten_grid = self.context.flatten_grid
|
|
115
|
+
start = time.time()
|
|
116
|
+
LOG.debug("Sorting dataset %s %s", dict(order_by), remapping)
|
|
117
|
+
assert order_by, order_by
|
|
118
|
+
|
|
119
|
+
patches = {"number": {None: 0}}
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
cube = ds.cube(
|
|
123
|
+
order_by,
|
|
124
|
+
remapping=remapping,
|
|
125
|
+
flatten_values=flatten_grid,
|
|
126
|
+
patches=patches,
|
|
127
|
+
)
|
|
128
|
+
cube = cube.squeeze()
|
|
129
|
+
LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
|
|
130
|
+
except ValueError:
|
|
131
|
+
self.explain(ds, order_by, remapping=remapping, patches=patches)
|
|
132
|
+
# raise ValueError(f"Error in {self}")
|
|
133
|
+
exit(1)
|
|
134
|
+
|
|
135
|
+
if LOG.isEnabledFor(logging.DEBUG):
|
|
136
|
+
LOG.debug("Cube shape: %s", cube)
|
|
137
|
+
for k, v in cube.user_coords.items():
|
|
138
|
+
LOG.debug(" %s %s", k, shorten_list(v, max_length=10))
|
|
139
|
+
|
|
140
|
+
return cube
|
|
141
|
+
|
|
142
|
+
def explain(self, ds, *args, remapping, patches):
|
|
143
|
+
|
|
144
|
+
METADATA = (
|
|
145
|
+
"date",
|
|
146
|
+
"time",
|
|
147
|
+
"step",
|
|
148
|
+
"hdate",
|
|
149
|
+
"valid_datetime",
|
|
150
|
+
"levtype",
|
|
151
|
+
"levelist",
|
|
152
|
+
"number",
|
|
153
|
+
"level",
|
|
154
|
+
"shortName",
|
|
155
|
+
"paramId",
|
|
156
|
+
"variable",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# We redo the logic here
|
|
160
|
+
print()
|
|
161
|
+
print("❌" * 40)
|
|
162
|
+
print()
|
|
163
|
+
if len(args) == 1 and isinstance(args[0], (list, tuple)):
|
|
164
|
+
args = args[0]
|
|
165
|
+
|
|
166
|
+
# print("Executing", self.action_path)
|
|
167
|
+
# print("Dates:", compress_dates(self.dates))
|
|
168
|
+
|
|
169
|
+
names = []
|
|
170
|
+
for a in args:
|
|
171
|
+
if isinstance(a, str):
|
|
172
|
+
names.append(a)
|
|
173
|
+
elif isinstance(a, dict):
|
|
174
|
+
names += list(a.keys())
|
|
175
|
+
|
|
176
|
+
print(f"Building a {len(names)}D hypercube using", names)
|
|
177
|
+
ds = ds.order_by(*args, remapping=remapping, patches=patches)
|
|
178
|
+
user_coords = ds.unique_values(*names, remapping=remapping, patches=patches, progress_bar=False)
|
|
179
|
+
|
|
180
|
+
print()
|
|
181
|
+
print("Number of unique values found for each coordinate:")
|
|
182
|
+
for k, v in user_coords.items():
|
|
183
|
+
print(f" {k:20}:", len(v), shorten_list(v, max_length=10))
|
|
184
|
+
print()
|
|
185
|
+
user_shape = tuple(len(v) for k, v in user_coords.items())
|
|
186
|
+
print("Shape of the hypercube :", user_shape)
|
|
187
|
+
print(
|
|
188
|
+
"Number of expected fields :", math.prod(user_shape), "=", " x ".join([str(i) for i in user_shape])
|
|
189
|
+
)
|
|
190
|
+
print("Number of fields in the dataset :", len(ds))
|
|
191
|
+
print("Difference :", abs(len(ds) - math.prod(user_shape)))
|
|
192
|
+
print()
|
|
193
|
+
|
|
194
|
+
remapping = build_remapping(remapping, patches)
|
|
195
|
+
expected = set(itertools.product(*user_coords.values()))
|
|
196
|
+
extra = set()
|
|
197
|
+
|
|
198
|
+
if math.prod(user_shape) > len(ds):
|
|
199
|
+
print(f"This means that all the fields in the datasets do not exists for all combinations of {names}.")
|
|
200
|
+
|
|
201
|
+
for f in ds:
|
|
202
|
+
metadata = remapping(f.metadata)
|
|
203
|
+
key = tuple(metadata(n, default=None) for n in names)
|
|
204
|
+
if key in expected:
|
|
205
|
+
expected.remove(key)
|
|
206
|
+
else:
|
|
207
|
+
extra.add(key)
|
|
208
|
+
|
|
209
|
+
print("Missing fields:")
|
|
210
|
+
print()
|
|
211
|
+
for i, f in enumerate(sorted(expected)):
|
|
212
|
+
print(" ", f)
|
|
213
|
+
if i >= 9 and len(expected) > 10:
|
|
214
|
+
print("...", len(expected) - i - 1, "more")
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
print("Extra fields:")
|
|
218
|
+
print()
|
|
219
|
+
for i, f in enumerate(sorted(extra)):
|
|
220
|
+
print(" ", f)
|
|
221
|
+
if i >= 9 and len(extra) > 10:
|
|
222
|
+
print("...", len(extra) - i - 1, "more")
|
|
223
|
+
break
|
|
224
|
+
|
|
225
|
+
print()
|
|
226
|
+
print("Missing values:")
|
|
227
|
+
per_name = defaultdict(set)
|
|
228
|
+
for e in expected:
|
|
229
|
+
for n, v in zip(names, e):
|
|
230
|
+
per_name[n].add(v)
|
|
231
|
+
|
|
232
|
+
for n, v in per_name.items():
|
|
233
|
+
print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
|
|
234
|
+
print()
|
|
235
|
+
|
|
236
|
+
print("Extra values:")
|
|
237
|
+
per_name = defaultdict(set)
|
|
238
|
+
for e in extra:
|
|
239
|
+
for n, v in zip(names, e):
|
|
240
|
+
per_name[n].add(v)
|
|
241
|
+
|
|
242
|
+
for n, v in per_name.items():
|
|
243
|
+
print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
|
|
244
|
+
print()
|
|
245
|
+
|
|
246
|
+
print("To solve this issue, you can:")
|
|
247
|
+
print(
|
|
248
|
+
" - Provide a better selection, like 'step: 0' or 'level: 1000' to "
|
|
249
|
+
"reduce the number of selected fields."
|
|
250
|
+
)
|
|
251
|
+
print(
|
|
252
|
+
" - Split the 'input' part in smaller sections using 'join', "
|
|
253
|
+
"making sure that each section represent a full hypercube."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
else:
|
|
257
|
+
print(f"More fields in dataset that expected for {names}. " "This means that some fields are duplicated.")
|
|
258
|
+
duplicated = defaultdict(list)
|
|
259
|
+
for f in ds:
|
|
260
|
+
# print(f.metadata(namespace="default"))
|
|
261
|
+
metadata = remapping(f.metadata)
|
|
262
|
+
key = tuple(metadata(n, default=None) for n in names)
|
|
263
|
+
duplicated[key].append(f)
|
|
264
|
+
|
|
265
|
+
print("Duplicated fields:")
|
|
266
|
+
print()
|
|
267
|
+
duplicated = {k: v for k, v in duplicated.items() if len(v) > 1}
|
|
268
|
+
for i, (k, v) in enumerate(sorted(duplicated.items())):
|
|
269
|
+
print(" ", k)
|
|
270
|
+
for f in v:
|
|
271
|
+
x = {k: f.metadata(k, default=None) for k in METADATA if f.metadata(k, default=None) is not None}
|
|
272
|
+
print(" ", f, x)
|
|
273
|
+
if i >= 9 and len(duplicated) > 10:
|
|
274
|
+
print("...", len(duplicated) - i - 1, "more")
|
|
275
|
+
break
|
|
276
|
+
|
|
277
|
+
print()
|
|
278
|
+
print("To solve this issue, you can:")
|
|
279
|
+
print(" - Provide a better selection, like 'step: 0' or 'level: 1000'")
|
|
280
|
+
print(" - Change the way 'param' is computed using 'variable_naming' " "in the 'build' section.")
|
|
281
|
+
|
|
282
|
+
print()
|
|
283
|
+
print("❌" * 40)
|
|
284
|
+
print()
|
|
285
|
+
exit(1)
|
|
286
|
+
|
|
287
|
+
def __repr__(self, *args, _indent_="\n", **kwargs):
|
|
288
|
+
more = ",".join([str(a)[:5000] for a in args])
|
|
289
|
+
more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
|
|
290
|
+
|
|
291
|
+
dates = " no-dates"
|
|
292
|
+
if self.group_of_dates is not None:
|
|
293
|
+
dates = f" {len(self.group_of_dates)} dates"
|
|
294
|
+
dates += " ("
|
|
295
|
+
dates += "/".join(d.strftime("%Y-%m-%d:%H") for d in self.group_of_dates)
|
|
296
|
+
if len(dates) > 100:
|
|
297
|
+
dates = dates[:100] + "..."
|
|
298
|
+
dates += ")"
|
|
299
|
+
|
|
300
|
+
more = more[:5000]
|
|
301
|
+
txt = f"{self.__class__.__name__}:{dates}{_indent_}{more}"
|
|
302
|
+
if _indent_:
|
|
303
|
+
txt = txt.replace("\n", "\n ")
|
|
304
|
+
return txt
|
|
305
|
+
|
|
306
|
+
def _raise_not_implemented(self):
|
|
307
|
+
raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
|
|
308
|
+
|
|
309
|
+
def _trace_datasource(self, *args, **kwargs):
|
|
310
|
+
return f"{self.__class__.__name__}({self.group_of_dates})"
|
|
311
|
+
|
|
312
|
+
def build_coords(self):
|
|
313
|
+
if self._coords_already_built:
|
|
314
|
+
return
|
|
315
|
+
from_data = self.get_cube().user_coords
|
|
316
|
+
from_config = self.context.order_by
|
|
317
|
+
|
|
318
|
+
keys_from_config = list(from_config.keys())
|
|
319
|
+
keys_from_data = list(from_data.keys())
|
|
320
|
+
assert keys_from_data == keys_from_config, f"Critical error: {keys_from_data=} != {keys_from_config=}. {self=}"
|
|
321
|
+
|
|
322
|
+
variables_key = list(from_config.keys())[1]
|
|
323
|
+
ensembles_key = list(from_config.keys())[2]
|
|
324
|
+
|
|
325
|
+
if isinstance(from_config[variables_key], (list, tuple)):
|
|
326
|
+
assert all([v == w for v, w in zip(from_data[variables_key], from_config[variables_key])]), (
|
|
327
|
+
from_data[variables_key],
|
|
328
|
+
from_config[variables_key],
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
self._variables = from_data[variables_key] # "param_level"
|
|
332
|
+
self._ensembles = from_data[ensembles_key] # "number"
|
|
333
|
+
|
|
334
|
+
first_field = self.datasource[0]
|
|
335
|
+
grid_points = first_field.grid_points()
|
|
336
|
+
|
|
337
|
+
lats, lons = grid_points
|
|
338
|
+
|
|
339
|
+
assert len(lats) == len(lons), (len(lats), len(lons), first_field)
|
|
340
|
+
assert len(lats) == math.prod(first_field.shape), (len(lats), first_field.shape, first_field)
|
|
341
|
+
|
|
342
|
+
north = np.amax(lats)
|
|
343
|
+
south = np.amin(lats)
|
|
344
|
+
east = np.amax(lons)
|
|
345
|
+
west = np.amin(lons)
|
|
346
|
+
|
|
347
|
+
assert -90 <= south <= north <= 90, (south, north, first_field)
|
|
348
|
+
assert (-180 <= west <= east <= 180) or (0 <= west <= east <= 360), (
|
|
349
|
+
west,
|
|
350
|
+
east,
|
|
351
|
+
first_field,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
grid_values = list(range(len(grid_points[0])))
|
|
355
|
+
|
|
356
|
+
self._grid_points = grid_points
|
|
357
|
+
self._resolution = first_field.resolution
|
|
358
|
+
self._grid_values = grid_values
|
|
359
|
+
self._field_shape = first_field.shape
|
|
360
|
+
self._proj_string = first_field.proj_string if hasattr(first_field, "proj_string") else None
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def variables(self):
|
|
364
|
+
self.build_coords()
|
|
365
|
+
return self._variables
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def ensembles(self):
|
|
369
|
+
self.build_coords()
|
|
370
|
+
return self._ensembles
|
|
371
|
+
|
|
372
|
+
@property
|
|
373
|
+
def resolution(self):
|
|
374
|
+
self.build_coords()
|
|
375
|
+
return self._resolution
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def grid_values(self):
|
|
379
|
+
self.build_coords()
|
|
380
|
+
return self._grid_values
|
|
381
|
+
|
|
382
|
+
@property
|
|
383
|
+
def grid_points(self):
|
|
384
|
+
self.build_coords()
|
|
385
|
+
return self._grid_points
|
|
386
|
+
|
|
387
|
+
@property
|
|
388
|
+
def field_shape(self):
|
|
389
|
+
self.build_coords()
|
|
390
|
+
return self._field_shape
|
|
391
|
+
|
|
392
|
+
@property
|
|
393
|
+
def proj_string(self):
|
|
394
|
+
self.build_coords()
|
|
395
|
+
return self._proj_string
|
|
396
|
+
|
|
397
|
+
@cached_property
|
|
398
|
+
def shape(self):
|
|
399
|
+
return [
|
|
400
|
+
len(self.group_of_dates),
|
|
401
|
+
len(self.variables),
|
|
402
|
+
len(self.ensembles),
|
|
403
|
+
len(self.grid_values),
|
|
404
|
+
]
|
|
405
|
+
|
|
406
|
+
@cached_property
|
|
407
|
+
def coords(self):
|
|
408
|
+
return {
|
|
409
|
+
"dates": list(self.group_of_dates),
|
|
410
|
+
"variables": self.variables,
|
|
411
|
+
"ensembles": self.ensembles,
|
|
412
|
+
"values": self.grid_values,
|
|
413
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
import logging
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
|
|
12
|
+
from anemoi.utils.dates import as_datetime as as_datetime
|
|
13
|
+
from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
|
|
14
|
+
|
|
15
|
+
from anemoi.datasets.dates import DatesProvider as DatesProvider
|
|
16
|
+
from anemoi.datasets.fields import FieldArray as FieldArray
|
|
17
|
+
from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
|
|
18
|
+
|
|
19
|
+
from .action import Action
|
|
20
|
+
from .context import Context
|
|
21
|
+
from .misc import is_function
|
|
22
|
+
from .result import Result
|
|
23
|
+
from .template import notify_result
|
|
24
|
+
from .trace import trace_datasource
|
|
25
|
+
from .trace import trace_select
|
|
26
|
+
|
|
27
|
+
LOG = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StepResult(Result):
|
|
31
|
+
def __init__(self, context, action_path, group_of_dates, action, upstream_result):
|
|
32
|
+
super().__init__(context, action_path, group_of_dates)
|
|
33
|
+
assert isinstance(upstream_result, Result), type(upstream_result)
|
|
34
|
+
self.upstream_result = upstream_result
|
|
35
|
+
self.action = action
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
@notify_result
|
|
39
|
+
@trace_datasource
|
|
40
|
+
def datasource(self):
|
|
41
|
+
raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class StepAction(Action):
|
|
45
|
+
result_class = None
|
|
46
|
+
|
|
47
|
+
def __init__(self, context, action_path, previous_step, *args, **kwargs):
|
|
48
|
+
super().__init__(context, action_path, *args, **kwargs)
|
|
49
|
+
self.previous_step = previous_step
|
|
50
|
+
|
|
51
|
+
@trace_select
|
|
52
|
+
def select(self, group_of_dates):
|
|
53
|
+
return self.result_class(
|
|
54
|
+
self.context,
|
|
55
|
+
self.action_path,
|
|
56
|
+
group_of_dates,
|
|
57
|
+
self,
|
|
58
|
+
self.previous_step.select(group_of_dates),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def __repr__(self):
|
|
62
|
+
return super().__repr__(self.previous_step, _inline_=str(self.kwargs))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def step_factory(config, context, action_path, previous_step):
|
|
66
|
+
|
|
67
|
+
from .filter import FilterStepAction
|
|
68
|
+
from .filter import FunctionStepAction
|
|
69
|
+
|
|
70
|
+
assert isinstance(context, Context), (type, context)
|
|
71
|
+
if not isinstance(config, dict):
|
|
72
|
+
raise ValueError(f"Invalid input config {config}")
|
|
73
|
+
|
|
74
|
+
config = deepcopy(config)
|
|
75
|
+
assert len(config) == 1, config
|
|
76
|
+
|
|
77
|
+
key = list(config.keys())[0]
|
|
78
|
+
cls = dict(
|
|
79
|
+
filter=FilterStepAction,
|
|
80
|
+
# rename=RenameAction,
|
|
81
|
+
# remapping=RemappingAction,
|
|
82
|
+
).get(key)
|
|
83
|
+
|
|
84
|
+
if isinstance(config[key], list):
|
|
85
|
+
args, kwargs = config[key], {}
|
|
86
|
+
|
|
87
|
+
if isinstance(config[key], dict):
|
|
88
|
+
args, kwargs = [], config[key]
|
|
89
|
+
|
|
90
|
+
if isinstance(config[key], str):
|
|
91
|
+
args, kwargs = [config[key]], {}
|
|
92
|
+
|
|
93
|
+
if cls is None:
|
|
94
|
+
if not is_function(key, "filters"):
|
|
95
|
+
raise ValueError(f"Unknown step {key}")
|
|
96
|
+
cls = FunctionStepAction
|
|
97
|
+
args = [key] + args
|
|
98
|
+
|
|
99
|
+
return cls(context, action_path, previous_step, *args, **kwargs)
|
|
@@ -9,14 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import re
|
|
12
|
-
import textwrap
|
|
13
12
|
from functools import wraps
|
|
14
13
|
|
|
15
|
-
from anemoi.utils.humanize import plural
|
|
16
|
-
|
|
17
|
-
from .trace import step
|
|
18
|
-
from .trace import trace
|
|
19
|
-
|
|
20
14
|
LOG = logging.getLogger(__name__)
|
|
21
15
|
|
|
22
16
|
|
|
@@ -30,42 +24,6 @@ def notify_result(method):
|
|
|
30
24
|
return wrapper
|
|
31
25
|
|
|
32
26
|
|
|
33
|
-
class Context:
|
|
34
|
-
def __init__(self):
|
|
35
|
-
# used_references is a set of reference paths that will be needed
|
|
36
|
-
self.used_references = set()
|
|
37
|
-
# results is a dictionary of reference path -> obj
|
|
38
|
-
self.results = {}
|
|
39
|
-
|
|
40
|
-
def will_need_reference(self, key):
|
|
41
|
-
assert isinstance(key, (list, tuple)), key
|
|
42
|
-
key = tuple(key)
|
|
43
|
-
self.used_references.add(key)
|
|
44
|
-
|
|
45
|
-
def notify_result(self, key, result):
|
|
46
|
-
trace(
|
|
47
|
-
"🎯",
|
|
48
|
-
step(key),
|
|
49
|
-
"notify result",
|
|
50
|
-
textwrap.shorten(repr(result).replace(",", ", "), width=40),
|
|
51
|
-
plural(len(result), "field"),
|
|
52
|
-
)
|
|
53
|
-
assert isinstance(key, (list, tuple)), key
|
|
54
|
-
key = tuple(key)
|
|
55
|
-
if key in self.used_references:
|
|
56
|
-
if key in self.results:
|
|
57
|
-
raise ValueError(f"Duplicate result {key}")
|
|
58
|
-
self.results[key] = result
|
|
59
|
-
|
|
60
|
-
def get_result(self, key):
|
|
61
|
-
assert isinstance(key, (list, tuple)), key
|
|
62
|
-
key = tuple(key)
|
|
63
|
-
if key in self.results:
|
|
64
|
-
return self.results[key]
|
|
65
|
-
all_keys = sorted(list(self.results.keys()))
|
|
66
|
-
raise ValueError(f"Cannot find result {key} in {all_keys}")
|
|
67
|
-
|
|
68
|
-
|
|
69
27
|
class Substitution:
|
|
70
28
|
pass
|
|
71
29
|
|
|
@@ -155,7 +155,7 @@ def compute_statistics(array, check_variables_names=None, allow_nans=False):
|
|
|
155
155
|
check_data_values(values[j, :], name=name, allow_nans=allow_nans)
|
|
156
156
|
if np.isnan(values[j, :]).all():
|
|
157
157
|
# LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
|
|
158
|
-
|
|
158
|
+
LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
|
|
159
159
|
|
|
160
160
|
# Ignore NaN values
|
|
161
161
|
minimum[i] = np.nanmin(values, axis=1)
|
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -128,7 +128,7 @@ class ZarrBuiltRegistry:
|
|
|
128
128
|
def add_to_history(self, action, **kwargs):
|
|
129
129
|
new = dict(
|
|
130
130
|
action=action,
|
|
131
|
-
timestamp=datetime.datetime.
|
|
131
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat(),
|
|
132
132
|
)
|
|
133
133
|
new.update(kwargs)
|
|
134
134
|
|
|
@@ -151,7 +151,9 @@ class ZarrBuiltRegistry:
|
|
|
151
151
|
|
|
152
152
|
def set_flag(self, i, value=True):
|
|
153
153
|
z = self._open_write()
|
|
154
|
-
z.attrs["latest_write_timestamp"] =
|
|
154
|
+
z.attrs["latest_write_timestamp"] = (
|
|
155
|
+
datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat()
|
|
156
|
+
)
|
|
155
157
|
z["_build"][self.name_flags][i] = value
|
|
156
158
|
|
|
157
159
|
def ready(self):
|
|
@@ -12,6 +12,7 @@ import warnings
|
|
|
12
12
|
# from anemoi.utils.dates import as_datetime
|
|
13
13
|
from anemoi.utils.dates import DateTimes
|
|
14
14
|
from anemoi.utils.dates import as_datetime
|
|
15
|
+
from anemoi.utils.dates import frequency_to_string
|
|
15
16
|
from anemoi.utils.dates import frequency_to_timedelta
|
|
16
17
|
from anemoi.utils.hindcasts import HindcastDatesTimes
|
|
17
18
|
from anemoi.utils.humanize import print_dates
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -9,18 +9,26 @@
|
|
|
9
9
|
import itertools
|
|
10
10
|
from functools import cached_property
|
|
11
11
|
|
|
12
|
-
from anemoi.datasets.create.input import shorten
|
|
13
12
|
from anemoi.datasets.dates import DatesProvider
|
|
14
13
|
from anemoi.datasets.dates import as_datetime
|
|
15
14
|
|
|
16
15
|
|
|
16
|
+
def _shorten(dates):
|
|
17
|
+
if isinstance(dates, (list, tuple)):
|
|
18
|
+
dates = [d.isoformat() for d in dates]
|
|
19
|
+
if len(dates) > 5:
|
|
20
|
+
return f"{dates[0]}...{dates[-1]}"
|
|
21
|
+
return dates
|
|
22
|
+
|
|
23
|
+
|
|
17
24
|
class GroupOfDates:
|
|
18
|
-
def __init__(self, dates, provider):
|
|
25
|
+
def __init__(self, dates, provider, partial_ok=False):
|
|
19
26
|
assert isinstance(provider, DatesProvider), type(provider)
|
|
20
27
|
assert isinstance(dates, list)
|
|
21
28
|
|
|
22
29
|
self.dates = dates
|
|
23
30
|
self.provider = provider
|
|
31
|
+
self.partial_ok = partial_ok
|
|
24
32
|
|
|
25
33
|
def __len__(self):
|
|
26
34
|
return len(self.dates)
|
|
@@ -29,7 +37,7 @@ class GroupOfDates:
|
|
|
29
37
|
return iter(self.dates)
|
|
30
38
|
|
|
31
39
|
def __repr__(self) -> str:
|
|
32
|
-
return f"GroupOfDates(dates={
|
|
40
|
+
return f"GroupOfDates(dates={_shorten(self.dates)})"
|
|
33
41
|
|
|
34
42
|
def __eq__(self, other: object) -> bool:
|
|
35
43
|
return isinstance(other, GroupOfDates) and self.dates == other.dates
|
|
@@ -93,7 +101,7 @@ class Groups:
|
|
|
93
101
|
return n
|
|
94
102
|
|
|
95
103
|
def __repr__(self):
|
|
96
|
-
return f"{self.__class__.__name__}(dates={len(self)},{
|
|
104
|
+
return f"{self.__class__.__name__}(dates={len(self)},{_shorten(self._dates)})"
|
|
97
105
|
|
|
98
106
|
def describe(self):
|
|
99
107
|
return self.dates.summary
|