anemoi-datasets 0.5.0__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/inspect.py +1 -1
  3. anemoi/datasets/commands/publish.py +30 -0
  4. anemoi/datasets/create/__init__.py +42 -3
  5. anemoi/datasets/create/check.py +6 -0
  6. anemoi/datasets/create/functions/filters/rename.py +2 -3
  7. anemoi/datasets/create/functions/sources/__init__.py +7 -1
  8. anemoi/datasets/create/functions/sources/accumulations.py +2 -0
  9. anemoi/datasets/create/functions/sources/grib.py +1 -1
  10. anemoi/datasets/create/functions/sources/xarray/__init__.py +7 -2
  11. anemoi/datasets/create/functions/sources/xarray/coordinates.py +12 -1
  12. anemoi/datasets/create/functions/sources/xarray/field.py +13 -4
  13. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
  14. anemoi/datasets/create/functions/sources/xarray/flavour.py +130 -13
  15. anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
  16. anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -11
  17. anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
  18. anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
  19. anemoi/datasets/create/input/__init__.py +69 -0
  20. anemoi/datasets/create/input/action.py +123 -0
  21. anemoi/datasets/create/input/concat.py +92 -0
  22. anemoi/datasets/create/input/context.py +59 -0
  23. anemoi/datasets/create/input/data_sources.py +71 -0
  24. anemoi/datasets/create/input/empty.py +42 -0
  25. anemoi/datasets/create/input/filter.py +76 -0
  26. anemoi/datasets/create/input/function.py +122 -0
  27. anemoi/datasets/create/input/join.py +57 -0
  28. anemoi/datasets/create/input/misc.py +85 -0
  29. anemoi/datasets/create/input/pipe.py +33 -0
  30. anemoi/datasets/create/input/repeated_dates.py +217 -0
  31. anemoi/datasets/create/input/result.py +413 -0
  32. anemoi/datasets/create/input/step.py +99 -0
  33. anemoi/datasets/create/{template.py → input/template.py} +0 -42
  34. anemoi/datasets/create/statistics/__init__.py +1 -1
  35. anemoi/datasets/create/zarr.py +4 -2
  36. anemoi/datasets/dates/__init__.py +1 -0
  37. anemoi/datasets/dates/groups.py +12 -4
  38. anemoi/datasets/fields.py +66 -0
  39. anemoi/datasets/utils/fields.py +47 -0
  40. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/METADATA +1 -1
  41. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/RECORD +46 -30
  42. anemoi/datasets/create/input.py +0 -1087
  43. /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
  44. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/LICENSE +0 -0
  45. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/WHEEL +0 -0
  46. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/entry_points.txt +0 -0
  47. {anemoi_datasets-0.5.0.dist-info → anemoi_datasets-0.5.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,413 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import itertools
10
+ import logging
11
+ import math
12
+ import time
13
+ from collections import defaultdict
14
+ from functools import cached_property
15
+
16
+ import numpy as np
17
+ from anemoi.utils.dates import as_datetime as as_datetime
18
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
19
+ from anemoi.utils.humanize import seconds_to_human
20
+ from anemoi.utils.humanize import shorten_list
21
+ from earthkit.data.core.order import build_remapping
22
+
23
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
24
+ from anemoi.datasets.fields import FieldArray as FieldArray
25
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
26
+
27
+ from .trace import trace
28
+ from .trace import trace_datasource
29
+
30
+ LOG = logging.getLogger(__name__)
31
+
32
+
33
+ def _data_request(data):
34
+ date = None
35
+ params_levels = defaultdict(set)
36
+ params_steps = defaultdict(set)
37
+
38
+ area = grid = None
39
+
40
+ for field in data:
41
+ try:
42
+ if date is None:
43
+ date = field.metadata("valid_datetime")
44
+
45
+ if field.metadata("valid_datetime") != date:
46
+ continue
47
+
48
+ as_mars = field.metadata(namespace="mars")
49
+ if not as_mars:
50
+ continue
51
+ step = as_mars.get("step")
52
+ levtype = as_mars.get("levtype", "sfc")
53
+ param = as_mars["param"]
54
+ levelist = as_mars.get("levelist", None)
55
+ area = field.mars_area
56
+ grid = field.mars_grid
57
+
58
+ if levelist is None:
59
+ params_levels[levtype].add(param)
60
+ else:
61
+ params_levels[levtype].add((param, levelist))
62
+
63
+ if step:
64
+ params_steps[levtype].add((param, step))
65
+ except Exception:
66
+ LOG.error(f"Error in retrieving metadata (cannot build data request info) for {field}", exc_info=True)
67
+
68
+ def sort(old_dic):
69
+ new_dic = {}
70
+ for k, v in old_dic.items():
71
+ new_dic[k] = sorted(list(v))
72
+ return new_dic
73
+
74
+ params_steps = sort(params_steps)
75
+ params_levels = sort(params_levels)
76
+
77
+ return dict(param_level=params_levels, param_step=params_steps, area=area, grid=grid)
78
+
79
+
80
+ class Result:
81
+ empty = False
82
+ _coords_already_built = False
83
+
84
+ def __init__(self, context, action_path, dates):
85
+ from anemoi.datasets.dates.groups import GroupOfDates
86
+
87
+ from .action import ActionContext
88
+
89
+ assert isinstance(dates, GroupOfDates), dates
90
+
91
+ assert isinstance(context, ActionContext), type(context)
92
+ assert isinstance(action_path, list), action_path
93
+
94
+ self.context = context
95
+ self.group_of_dates = dates
96
+ self.action_path = action_path
97
+
98
+ @property
99
+ @trace_datasource
100
+ def datasource(self):
101
+ self._raise_not_implemented()
102
+
103
+ @property
104
+ def data_request(self):
105
+ """Returns a dictionary with the parameters needed to retrieve the data."""
106
+ return _data_request(self.datasource)
107
+
108
+ def get_cube(self):
109
+ trace("🧊", f"getting cube from {self.__class__.__name__}")
110
+ ds = self.datasource
111
+
112
+ remapping = self.context.remapping
113
+ order_by = self.context.order_by
114
+ flatten_grid = self.context.flatten_grid
115
+ start = time.time()
116
+ LOG.debug("Sorting dataset %s %s", dict(order_by), remapping)
117
+ assert order_by, order_by
118
+
119
+ patches = {"number": {None: 0}}
120
+
121
+ try:
122
+ cube = ds.cube(
123
+ order_by,
124
+ remapping=remapping,
125
+ flatten_values=flatten_grid,
126
+ patches=patches,
127
+ )
128
+ cube = cube.squeeze()
129
+ LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
130
+ except ValueError:
131
+ self.explain(ds, order_by, remapping=remapping, patches=patches)
132
+ # raise ValueError(f"Error in {self}")
133
+ exit(1)
134
+
135
+ if LOG.isEnabledFor(logging.DEBUG):
136
+ LOG.debug("Cube shape: %s", cube)
137
+ for k, v in cube.user_coords.items():
138
+ LOG.debug(" %s %s", k, shorten_list(v, max_length=10))
139
+
140
+ return cube
141
+
142
+ def explain(self, ds, *args, remapping, patches):
143
+
144
+ METADATA = (
145
+ "date",
146
+ "time",
147
+ "step",
148
+ "hdate",
149
+ "valid_datetime",
150
+ "levtype",
151
+ "levelist",
152
+ "number",
153
+ "level",
154
+ "shortName",
155
+ "paramId",
156
+ "variable",
157
+ )
158
+
159
+ # We redo the logic here
160
+ print()
161
+ print("❌" * 40)
162
+ print()
163
+ if len(args) == 1 and isinstance(args[0], (list, tuple)):
164
+ args = args[0]
165
+
166
+ # print("Executing", self.action_path)
167
+ # print("Dates:", compress_dates(self.dates))
168
+
169
+ names = []
170
+ for a in args:
171
+ if isinstance(a, str):
172
+ names.append(a)
173
+ elif isinstance(a, dict):
174
+ names += list(a.keys())
175
+
176
+ print(f"Building a {len(names)}D hypercube using", names)
177
+ ds = ds.order_by(*args, remapping=remapping, patches=patches)
178
+ user_coords = ds.unique_values(*names, remapping=remapping, patches=patches, progress_bar=False)
179
+
180
+ print()
181
+ print("Number of unique values found for each coordinate:")
182
+ for k, v in user_coords.items():
183
+ print(f" {k:20}:", len(v), shorten_list(v, max_length=10))
184
+ print()
185
+ user_shape = tuple(len(v) for k, v in user_coords.items())
186
+ print("Shape of the hypercube :", user_shape)
187
+ print(
188
+ "Number of expected fields :", math.prod(user_shape), "=", " x ".join([str(i) for i in user_shape])
189
+ )
190
+ print("Number of fields in the dataset :", len(ds))
191
+ print("Difference :", abs(len(ds) - math.prod(user_shape)))
192
+ print()
193
+
194
+ remapping = build_remapping(remapping, patches)
195
+ expected = set(itertools.product(*user_coords.values()))
196
+ extra = set()
197
+
198
+ if math.prod(user_shape) > len(ds):
199
+ print(f"This means that all the fields in the datasets do not exists for all combinations of {names}.")
200
+
201
+ for f in ds:
202
+ metadata = remapping(f.metadata)
203
+ key = tuple(metadata(n, default=None) for n in names)
204
+ if key in expected:
205
+ expected.remove(key)
206
+ else:
207
+ extra.add(key)
208
+
209
+ print("Missing fields:")
210
+ print()
211
+ for i, f in enumerate(sorted(expected)):
212
+ print(" ", f)
213
+ if i >= 9 and len(expected) > 10:
214
+ print("...", len(expected) - i - 1, "more")
215
+ break
216
+
217
+ print("Extra fields:")
218
+ print()
219
+ for i, f in enumerate(sorted(extra)):
220
+ print(" ", f)
221
+ if i >= 9 and len(extra) > 10:
222
+ print("...", len(extra) - i - 1, "more")
223
+ break
224
+
225
+ print()
226
+ print("Missing values:")
227
+ per_name = defaultdict(set)
228
+ for e in expected:
229
+ for n, v in zip(names, e):
230
+ per_name[n].add(v)
231
+
232
+ for n, v in per_name.items():
233
+ print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
234
+ print()
235
+
236
+ print("Extra values:")
237
+ per_name = defaultdict(set)
238
+ for e in extra:
239
+ for n, v in zip(names, e):
240
+ per_name[n].add(v)
241
+
242
+ for n, v in per_name.items():
243
+ print(" ", n, len(v), shorten_list(sorted(v), max_length=10))
244
+ print()
245
+
246
+ print("To solve this issue, you can:")
247
+ print(
248
+ " - Provide a better selection, like 'step: 0' or 'level: 1000' to "
249
+ "reduce the number of selected fields."
250
+ )
251
+ print(
252
+ " - Split the 'input' part in smaller sections using 'join', "
253
+ "making sure that each section represent a full hypercube."
254
+ )
255
+
256
+ else:
257
+ print(f"More fields in dataset that expected for {names}. " "This means that some fields are duplicated.")
258
+ duplicated = defaultdict(list)
259
+ for f in ds:
260
+ # print(f.metadata(namespace="default"))
261
+ metadata = remapping(f.metadata)
262
+ key = tuple(metadata(n, default=None) for n in names)
263
+ duplicated[key].append(f)
264
+
265
+ print("Duplicated fields:")
266
+ print()
267
+ duplicated = {k: v for k, v in duplicated.items() if len(v) > 1}
268
+ for i, (k, v) in enumerate(sorted(duplicated.items())):
269
+ print(" ", k)
270
+ for f in v:
271
+ x = {k: f.metadata(k, default=None) for k in METADATA if f.metadata(k, default=None) is not None}
272
+ print(" ", f, x)
273
+ if i >= 9 and len(duplicated) > 10:
274
+ print("...", len(duplicated) - i - 1, "more")
275
+ break
276
+
277
+ print()
278
+ print("To solve this issue, you can:")
279
+ print(" - Provide a better selection, like 'step: 0' or 'level: 1000'")
280
+ print(" - Change the way 'param' is computed using 'variable_naming' " "in the 'build' section.")
281
+
282
+ print()
283
+ print("❌" * 40)
284
+ print()
285
+ exit(1)
286
+
287
+ def __repr__(self, *args, _indent_="\n", **kwargs):
288
+ more = ",".join([str(a)[:5000] for a in args])
289
+ more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
290
+
291
+ dates = " no-dates"
292
+ if self.group_of_dates is not None:
293
+ dates = f" {len(self.group_of_dates)} dates"
294
+ dates += " ("
295
+ dates += "/".join(d.strftime("%Y-%m-%d:%H") for d in self.group_of_dates)
296
+ if len(dates) > 100:
297
+ dates = dates[:100] + "..."
298
+ dates += ")"
299
+
300
+ more = more[:5000]
301
+ txt = f"{self.__class__.__name__}:{dates}{_indent_}{more}"
302
+ if _indent_:
303
+ txt = txt.replace("\n", "\n ")
304
+ return txt
305
+
306
+ def _raise_not_implemented(self):
307
+ raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
308
+
309
+ def _trace_datasource(self, *args, **kwargs):
310
+ return f"{self.__class__.__name__}({self.group_of_dates})"
311
+
312
+ def build_coords(self):
313
+ if self._coords_already_built:
314
+ return
315
+ from_data = self.get_cube().user_coords
316
+ from_config = self.context.order_by
317
+
318
+ keys_from_config = list(from_config.keys())
319
+ keys_from_data = list(from_data.keys())
320
+ assert keys_from_data == keys_from_config, f"Critical error: {keys_from_data=} != {keys_from_config=}. {self=}"
321
+
322
+ variables_key = list(from_config.keys())[1]
323
+ ensembles_key = list(from_config.keys())[2]
324
+
325
+ if isinstance(from_config[variables_key], (list, tuple)):
326
+ assert all([v == w for v, w in zip(from_data[variables_key], from_config[variables_key])]), (
327
+ from_data[variables_key],
328
+ from_config[variables_key],
329
+ )
330
+
331
+ self._variables = from_data[variables_key] # "param_level"
332
+ self._ensembles = from_data[ensembles_key] # "number"
333
+
334
+ first_field = self.datasource[0]
335
+ grid_points = first_field.grid_points()
336
+
337
+ lats, lons = grid_points
338
+
339
+ assert len(lats) == len(lons), (len(lats), len(lons), first_field)
340
+ assert len(lats) == math.prod(first_field.shape), (len(lats), first_field.shape, first_field)
341
+
342
+ north = np.amax(lats)
343
+ south = np.amin(lats)
344
+ east = np.amax(lons)
345
+ west = np.amin(lons)
346
+
347
+ assert -90 <= south <= north <= 90, (south, north, first_field)
348
+ assert (-180 <= west <= east <= 180) or (0 <= west <= east <= 360), (
349
+ west,
350
+ east,
351
+ first_field,
352
+ )
353
+
354
+ grid_values = list(range(len(grid_points[0])))
355
+
356
+ self._grid_points = grid_points
357
+ self._resolution = first_field.resolution
358
+ self._grid_values = grid_values
359
+ self._field_shape = first_field.shape
360
+ self._proj_string = first_field.proj_string if hasattr(first_field, "proj_string") else None
361
+
362
+ @property
363
+ def variables(self):
364
+ self.build_coords()
365
+ return self._variables
366
+
367
+ @property
368
+ def ensembles(self):
369
+ self.build_coords()
370
+ return self._ensembles
371
+
372
+ @property
373
+ def resolution(self):
374
+ self.build_coords()
375
+ return self._resolution
376
+
377
+ @property
378
+ def grid_values(self):
379
+ self.build_coords()
380
+ return self._grid_values
381
+
382
+ @property
383
+ def grid_points(self):
384
+ self.build_coords()
385
+ return self._grid_points
386
+
387
+ @property
388
+ def field_shape(self):
389
+ self.build_coords()
390
+ return self._field_shape
391
+
392
+ @property
393
+ def proj_string(self):
394
+ self.build_coords()
395
+ return self._proj_string
396
+
397
+ @cached_property
398
+ def shape(self):
399
+ return [
400
+ len(self.group_of_dates),
401
+ len(self.variables),
402
+ len(self.ensembles),
403
+ len(self.grid_values),
404
+ ]
405
+
406
+ @cached_property
407
+ def coords(self):
408
+ return {
409
+ "dates": list(self.group_of_dates),
410
+ "variables": self.variables,
411
+ "ensembles": self.ensembles,
412
+ "values": self.grid_values,
413
+ }
@@ -0,0 +1,99 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import logging
10
+ from copy import deepcopy
11
+
12
+ from anemoi.utils.dates import as_datetime as as_datetime
13
+ from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
14
+
15
+ from anemoi.datasets.dates import DatesProvider as DatesProvider
16
+ from anemoi.datasets.fields import FieldArray as FieldArray
17
+ from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
18
+
19
+ from .action import Action
20
+ from .context import Context
21
+ from .misc import is_function
22
+ from .result import Result
23
+ from .template import notify_result
24
+ from .trace import trace_datasource
25
+ from .trace import trace_select
26
+
27
+ LOG = logging.getLogger(__name__)
28
+
29
+
30
+ class StepResult(Result):
31
+ def __init__(self, context, action_path, group_of_dates, action, upstream_result):
32
+ super().__init__(context, action_path, group_of_dates)
33
+ assert isinstance(upstream_result, Result), type(upstream_result)
34
+ self.upstream_result = upstream_result
35
+ self.action = action
36
+
37
+ @property
38
+ @notify_result
39
+ @trace_datasource
40
+ def datasource(self):
41
+ raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
42
+
43
+
44
+ class StepAction(Action):
45
+ result_class = None
46
+
47
+ def __init__(self, context, action_path, previous_step, *args, **kwargs):
48
+ super().__init__(context, action_path, *args, **kwargs)
49
+ self.previous_step = previous_step
50
+
51
+ @trace_select
52
+ def select(self, group_of_dates):
53
+ return self.result_class(
54
+ self.context,
55
+ self.action_path,
56
+ group_of_dates,
57
+ self,
58
+ self.previous_step.select(group_of_dates),
59
+ )
60
+
61
+ def __repr__(self):
62
+ return super().__repr__(self.previous_step, _inline_=str(self.kwargs))
63
+
64
+
65
+ def step_factory(config, context, action_path, previous_step):
66
+
67
+ from .filter import FilterStepAction
68
+ from .filter import FunctionStepAction
69
+
70
+ assert isinstance(context, Context), (type, context)
71
+ if not isinstance(config, dict):
72
+ raise ValueError(f"Invalid input config {config}")
73
+
74
+ config = deepcopy(config)
75
+ assert len(config) == 1, config
76
+
77
+ key = list(config.keys())[0]
78
+ cls = dict(
79
+ filter=FilterStepAction,
80
+ # rename=RenameAction,
81
+ # remapping=RemappingAction,
82
+ ).get(key)
83
+
84
+ if isinstance(config[key], list):
85
+ args, kwargs = config[key], {}
86
+
87
+ if isinstance(config[key], dict):
88
+ args, kwargs = [], config[key]
89
+
90
+ if isinstance(config[key], str):
91
+ args, kwargs = [config[key]], {}
92
+
93
+ if cls is None:
94
+ if not is_function(key, "filters"):
95
+ raise ValueError(f"Unknown step {key}")
96
+ cls = FunctionStepAction
97
+ args = [key] + args
98
+
99
+ return cls(context, action_path, previous_step, *args, **kwargs)
@@ -9,14 +9,8 @@
9
9
 
10
10
  import logging
11
11
  import re
12
- import textwrap
13
12
  from functools import wraps
14
13
 
15
- from anemoi.utils.humanize import plural
16
-
17
- from .trace import step
18
- from .trace import trace
19
-
20
14
  LOG = logging.getLogger(__name__)
21
15
 
22
16
 
@@ -30,42 +24,6 @@ def notify_result(method):
30
24
  return wrapper
31
25
 
32
26
 
33
- class Context:
34
- def __init__(self):
35
- # used_references is a set of reference paths that will be needed
36
- self.used_references = set()
37
- # results is a dictionary of reference path -> obj
38
- self.results = {}
39
-
40
- def will_need_reference(self, key):
41
- assert isinstance(key, (list, tuple)), key
42
- key = tuple(key)
43
- self.used_references.add(key)
44
-
45
- def notify_result(self, key, result):
46
- trace(
47
- "🎯",
48
- step(key),
49
- "notify result",
50
- textwrap.shorten(repr(result).replace(",", ", "), width=40),
51
- plural(len(result), "field"),
52
- )
53
- assert isinstance(key, (list, tuple)), key
54
- key = tuple(key)
55
- if key in self.used_references:
56
- if key in self.results:
57
- raise ValueError(f"Duplicate result {key}")
58
- self.results[key] = result
59
-
60
- def get_result(self, key):
61
- assert isinstance(key, (list, tuple)), key
62
- key = tuple(key)
63
- if key in self.results:
64
- return self.results[key]
65
- all_keys = sorted(list(self.results.keys()))
66
- raise ValueError(f"Cannot find result {key} in {all_keys}")
67
-
68
-
69
27
  class Substitution:
70
28
  pass
71
29
 
@@ -155,7 +155,7 @@ def compute_statistics(array, check_variables_names=None, allow_nans=False):
155
155
  check_data_values(values[j, :], name=name, allow_nans=allow_nans)
156
156
  if np.isnan(values[j, :]).all():
157
157
  # LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
158
- raise ValueError(f"All NaN values for {name} ({j}) for date {i}")
158
+ LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
159
159
 
160
160
  # Ignore NaN values
161
161
  minimum[i] = np.nanmin(values, axis=1)
@@ -128,7 +128,7 @@ class ZarrBuiltRegistry:
128
128
  def add_to_history(self, action, **kwargs):
129
129
  new = dict(
130
130
  action=action,
131
- timestamp=datetime.datetime.utcnow().isoformat(),
131
+ timestamp=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat(),
132
132
  )
133
133
  new.update(kwargs)
134
134
 
@@ -151,7 +151,9 @@ class ZarrBuiltRegistry:
151
151
 
152
152
  def set_flag(self, i, value=True):
153
153
  z = self._open_write()
154
- z.attrs["latest_write_timestamp"] = datetime.datetime.utcnow().isoformat()
154
+ z.attrs["latest_write_timestamp"] = (
155
+ datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat()
156
+ )
155
157
  z["_build"][self.name_flags][i] = value
156
158
 
157
159
  def ready(self):
@@ -12,6 +12,7 @@ import warnings
12
12
  # from anemoi.utils.dates import as_datetime
13
13
  from anemoi.utils.dates import DateTimes
14
14
  from anemoi.utils.dates import as_datetime
15
+ from anemoi.utils.dates import frequency_to_string
15
16
  from anemoi.utils.dates import frequency_to_timedelta
16
17
  from anemoi.utils.hindcasts import HindcastDatesTimes
17
18
  from anemoi.utils.humanize import print_dates
@@ -9,18 +9,26 @@
9
9
  import itertools
10
10
  from functools import cached_property
11
11
 
12
- from anemoi.datasets.create.input import shorten
13
12
  from anemoi.datasets.dates import DatesProvider
14
13
  from anemoi.datasets.dates import as_datetime
15
14
 
16
15
 
16
+ def _shorten(dates):
17
+ if isinstance(dates, (list, tuple)):
18
+ dates = [d.isoformat() for d in dates]
19
+ if len(dates) > 5:
20
+ return f"{dates[0]}...{dates[-1]}"
21
+ return dates
22
+
23
+
17
24
  class GroupOfDates:
18
- def __init__(self, dates, provider):
25
+ def __init__(self, dates, provider, partial_ok=False):
19
26
  assert isinstance(provider, DatesProvider), type(provider)
20
27
  assert isinstance(dates, list)
21
28
 
22
29
  self.dates = dates
23
30
  self.provider = provider
31
+ self.partial_ok = partial_ok
24
32
 
25
33
  def __len__(self):
26
34
  return len(self.dates)
@@ -29,7 +37,7 @@ class GroupOfDates:
29
37
  return iter(self.dates)
30
38
 
31
39
  def __repr__(self) -> str:
32
- return f"GroupOfDates(dates={shorten(self.dates)})"
40
+ return f"GroupOfDates(dates={_shorten(self.dates)})"
33
41
 
34
42
  def __eq__(self, other: object) -> bool:
35
43
  return isinstance(other, GroupOfDates) and self.dates == other.dates
@@ -93,7 +101,7 @@ class Groups:
93
101
  return n
94
102
 
95
103
  def __repr__(self):
96
- return f"{self.__class__.__name__}(dates={len(self)},{shorten(self._dates)})"
104
+ return f"{self.__class__.__name__}(dates={len(self)},{_shorten(self._dates)})"
97
105
 
98
106
  def describe(self):
99
107
  return self.dates.summary