anemoi-datasets 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/cleanup.py +44 -0
  3. anemoi/datasets/commands/create.py +50 -20
  4. anemoi/datasets/commands/finalise-additions.py +45 -0
  5. anemoi/datasets/commands/finalise.py +39 -0
  6. anemoi/datasets/commands/init-additions.py +45 -0
  7. anemoi/datasets/commands/init.py +67 -0
  8. anemoi/datasets/commands/inspect.py +1 -1
  9. anemoi/datasets/commands/load-additions.py +47 -0
  10. anemoi/datasets/commands/load.py +47 -0
  11. anemoi/datasets/commands/patch.py +39 -0
  12. anemoi/datasets/compute/recentre.py +1 -1
  13. anemoi/datasets/create/__init__.py +961 -146
  14. anemoi/datasets/create/check.py +5 -3
  15. anemoi/datasets/create/config.py +53 -2
  16. anemoi/datasets/create/functions/sources/accumulations.py +6 -22
  17. anemoi/datasets/create/functions/sources/hindcasts.py +27 -12
  18. anemoi/datasets/create/functions/sources/tendencies.py +1 -1
  19. anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
  20. anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
  21. anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
  22. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
  23. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
  24. anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
  25. anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
  26. anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
  27. anemoi/datasets/create/input.py +62 -25
  28. anemoi/datasets/create/statistics/__init__.py +39 -23
  29. anemoi/datasets/create/utils.py +3 -2
  30. anemoi/datasets/data/__init__.py +1 -0
  31. anemoi/datasets/data/concat.py +46 -2
  32. anemoi/datasets/data/dataset.py +109 -34
  33. anemoi/datasets/data/forwards.py +17 -8
  34. anemoi/datasets/data/grids.py +17 -3
  35. anemoi/datasets/data/interpolate.py +133 -0
  36. anemoi/datasets/data/misc.py +56 -66
  37. anemoi/datasets/data/missing.py +240 -0
  38. anemoi/datasets/data/select.py +7 -1
  39. anemoi/datasets/data/stores.py +3 -3
  40. anemoi/datasets/data/subset.py +47 -5
  41. anemoi/datasets/data/unchecked.py +20 -22
  42. anemoi/datasets/data/xy.py +125 -0
  43. anemoi/datasets/dates/__init__.py +33 -20
  44. anemoi/datasets/dates/groups.py +2 -2
  45. anemoi/datasets/grids.py +66 -48
  46. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
  47. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +51 -41
  48. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
  49. anemoi/datasets/create/loaders.py +0 -924
  50. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
  51. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
  52. {anemoi_datasets-0.4.3.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ import logging
9
+ from functools import cached_property
10
+
11
+ import numpy as np
12
+
13
+ from anemoi.datasets.create.utils import to_datetime
14
+ from anemoi.datasets.data import MissingDateError
15
+
16
+ from .debug import Node
17
+ from .debug import debug_indexing
18
+ from .forwards import Forwards
19
+ from .indexing import expand_list_indexing
20
+ from .indexing import update_tuple
21
+
22
+ LOG = logging.getLogger(__name__)
23
+
24
+
25
+ class MissingDates(Forwards):
26
+ # TODO: Use that class instead of ZarrMissing
27
+
28
+ def __init__(self, dataset, missing_dates):
29
+ super().__init__(dataset)
30
+ self.missing_dates = []
31
+
32
+ self._missing = set()
33
+
34
+ other = []
35
+ for date in missing_dates:
36
+ if isinstance(date, int):
37
+ self._missing.add(date)
38
+ self.missing_dates.append(dataset.dates[date])
39
+ else:
40
+ date = to_datetime(date)
41
+ other.append(date)
42
+
43
+ if other:
44
+ for i, date in enumerate(dataset.dates):
45
+ if date in other:
46
+ self._missing.add(i)
47
+ self.missing_dates.append(date)
48
+
49
+ n = self.forward._len
50
+ self._missing = set(i for i in self._missing if 0 <= i < n)
51
+ self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
52
+
53
+ assert len(self._missing), "No dates to force missing"
54
+
55
+ @cached_property
56
+ def missing(self):
57
+ return self._missing.union(self.forward.missing)
58
+
59
+ @debug_indexing
60
+ @expand_list_indexing
61
+ def __getitem__(self, n):
62
+ if isinstance(n, int):
63
+ if n in self.missing:
64
+ self._report_missing(n)
65
+ return self.forward[n]
66
+
67
+ if isinstance(n, slice):
68
+ common = set(range(*n.indices(len(self)))) & self.missing
69
+ if common:
70
+ self._report_missing(list(common)[0])
71
+ return self.forward[n]
72
+
73
+ if isinstance(n, tuple):
74
+ first = n[0]
75
+ if isinstance(first, int):
76
+ if first in self.missing:
77
+ self._report_missing(first)
78
+ return self.forward[n]
79
+
80
+ if isinstance(first, slice):
81
+ common = set(range(*first.indices(len(self)))) & self.missing
82
+ if common:
83
+ self._report_missing(list(common)[0])
84
+ return self.forward[n]
85
+
86
+ if isinstance(first, (list, tuple)):
87
+ common = set(first) & self.missing
88
+ if common:
89
+ self._report_missing(list(common)[0])
90
+ return self.forward[n]
91
+
92
+ raise TypeError(f"Unsupported index {n} {type(n)}")
93
+
94
+ def _report_missing(self, n):
95
+ raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
96
+
97
+ @property
98
+ def reason(self):
99
+ return {"missing_dates": self.missing_dates}
100
+
101
+ def tree(self):
102
+ return Node(self, [self.forward.tree()], **self.reason)
103
+
104
+ def subclass_metadata_specific(self):
105
+ return {"missing_dates": self.missing_dates}
106
+
107
+
108
+ class SkipMissingDates(Forwards):
109
+
110
+ def __init__(self, dataset, expected_access):
111
+ super().__init__(dataset)
112
+
113
+ # if isinstance(expected_access, (tuple, list)):
114
+ # expected_access = slice(*expected_access)
115
+
116
+ if isinstance(expected_access, int):
117
+ expected_access = slice(0, expected_access)
118
+
119
+ assert isinstance(expected_access, slice), f"Expected access must be a slice, got {expected_access}"
120
+
121
+ expected_access = slice(*expected_access.indices(dataset._len))
122
+ missing = dataset.missing.copy()
123
+
124
+ size = (expected_access.stop - expected_access.start) // expected_access.step
125
+ indices = []
126
+
127
+ for i in range(dataset._len):
128
+ s = slice(expected_access.start + i, expected_access.stop + i, expected_access.step)
129
+ p = set(range(*s.indices(dataset._len)))
130
+ if p.intersection(missing):
131
+ continue
132
+
133
+ if len(p) != size:
134
+ continue
135
+
136
+ indices.append(tuple(sorted(p)))
137
+
138
+ self.expected_access = expected_access
139
+ self.indices = indices
140
+
141
+ def __len__(self):
142
+ return len(self.indices)
143
+
144
+ @property
145
+ def start_date(self):
146
+ return self.forward.start_date
147
+
148
+ @property
149
+ def end_date(self):
150
+ return self.forward.end_date
151
+
152
+ @property
153
+ def dates(self):
154
+ raise NotImplementedError("SkipMissingDates.dates")
155
+
156
+ @debug_indexing
157
+ @expand_list_indexing
158
+ def _get_tuple(self, index):
159
+
160
+ def _get_one(n):
161
+ result = []
162
+ for i in self.indices[n]:
163
+ s, _ = update_tuple(index, 0, i)
164
+ result.append(self.forward[s])
165
+
166
+ return tuple(result)
167
+
168
+ first = index[0]
169
+ if isinstance(first, int):
170
+ return _get_one(first)
171
+
172
+ assert isinstance(first, slice), f"SkipMissingDates._get_tuple {index}"
173
+
174
+ values = [_get_one(i) for i in range(*first.indices(self._len))]
175
+
176
+ result = [_ for _ in zip(*values)]
177
+ return tuple(np.stack(_) for _ in result)
178
+
179
+ @debug_indexing
180
+ def _get_slice(self, s):
181
+ values = [self[i] for i in range(*s.indices(self._len))]
182
+ result = [_ for _ in zip(*values)]
183
+ return tuple(np.stack(_) for _ in result)
184
+
185
+ @debug_indexing
186
+ def __getitem__(self, n):
187
+ if isinstance(n, tuple):
188
+ return self._get_tuple(n)
189
+
190
+ if isinstance(n, slice):
191
+ return self._get_slice(n)
192
+
193
+ return tuple(self.forward[i] for i in self.indices[n])
194
+
195
+ @property
196
+ def frequency(self):
197
+ return self.forward.frequency
198
+
199
+ def tree(self):
200
+ return Node(self, [self.forward.tree()], expected_access=self.expected_access)
201
+
202
+ def subclass_metadata_specific(self):
203
+ return {"expected_access": self.expected_access}
204
+
205
+
206
+ class MissingDataset(Forwards):
207
+
208
+ def __init__(self, dataset, start, end):
209
+ super().__init__(dataset)
210
+ self.start = start
211
+ self.end = end
212
+
213
+ dates = []
214
+ date = start
215
+ while date <= end:
216
+ dates.append(date)
217
+ date += dataset.frequency
218
+
219
+ self._dates = np.array(dates, dtype="datetime64")
220
+ self._missing = set(range(len(dates)))
221
+
222
+ def __len__(self):
223
+ return len(self._dates)
224
+
225
+ @property
226
+ def dates(self):
227
+ return self._dates
228
+
229
+ @property
230
+ def missing(self):
231
+ return self._missing
232
+
233
+ def __getitem__(self, n):
234
+ raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
235
+
236
+ def tree(self):
237
+ return Node(self, [self.forward.tree()], start=self.start, end=self.end)
238
+
239
+ def subclass_metadata_specific(self):
240
+ return {"start": self.start, "end": self.end}
@@ -40,6 +40,12 @@ class Select(Forwards):
40
40
  # Forward other properties to the main dataset
41
41
  super().__init__(dataset)
42
42
 
43
+ def clone(self, dataset):
44
+ return self.__class__(dataset, self.indices, self.reason).mutate()
45
+
46
+ def mutate(self):
47
+ return self.forward.swap_with_parent(parent=self)
48
+
43
49
  @debug_indexing
44
50
  @expand_list_indexing
45
51
  def _get_tuple(self, index):
@@ -101,7 +107,7 @@ class Rename(Forwards):
101
107
  def __init__(self, dataset, rename):
102
108
  super().__init__(dataset)
103
109
  for n in rename:
104
- assert n in dataset.variables
110
+ assert n in dataset.variables, n
105
111
  self._variables = [rename.get(v, v) for v in dataset.variables]
106
112
  self.rename = rename
107
113
 
@@ -13,6 +13,7 @@ from urllib.parse import urlparse
13
13
 
14
14
  import numpy as np
15
15
  import zarr
16
+ from anemoi.utils.dates import frequency_to_timedelta
16
17
 
17
18
  from . import MissingDateError
18
19
  from .dataset import Dataset
@@ -268,12 +269,11 @@ class Zarr(Dataset):
268
269
  @property
269
270
  def frequency(self):
270
271
  try:
271
- return self.z.attrs["frequency"]
272
+ return frequency_to_timedelta(self.z.attrs["frequency"])
272
273
  except KeyError:
273
274
  LOG.warning("No 'frequency' in %r, computing from 'dates'", self)
274
275
  dates = self.dates
275
- delta = dates[1].astype(object) - dates[0].astype(object)
276
- return int(delta.total_seconds() / 3600)
276
+ return dates[1].astype(object) - dates[0].astype(object)
277
277
 
278
278
  @property
279
279
  def name_to_index(self):
@@ -9,6 +9,7 @@ import logging
9
9
  from functools import cached_property
10
10
 
11
11
  import numpy as np
12
+ from anemoi.utils.dates import frequency_to_timedelta
12
13
 
13
14
  from .debug import Node
14
15
  from .debug import Source
@@ -23,13 +24,51 @@ from .indexing import update_tuple
23
24
  LOG = logging.getLogger(__name__)
24
25
 
25
26
 
27
+ def _default(a, b, dates):
28
+ return [a, b]
29
+
30
+
31
+ def _start(a, b, dates):
32
+ from .misc import as_first_date
33
+
34
+ c = as_first_date(a, dates)
35
+ d = as_first_date(b, dates)
36
+ if c < d:
37
+ return b
38
+ else:
39
+ return a
40
+
41
+
42
+ def _end(a, b, dates):
43
+ from .misc import as_last_date
44
+
45
+ c = as_last_date(a, dates)
46
+ d = as_last_date(b, dates)
47
+ if c < d:
48
+ return a
49
+ else:
50
+ return b
51
+
52
+
53
+ def _combine_reasons(reason1, reason2, dates):
54
+
55
+ reason = reason1.copy()
56
+ for k, v in reason2.items():
57
+ if k not in reason:
58
+ reason[k] = v
59
+ else:
60
+ func = globals().get(f"_{k}", _default)
61
+ reason[k] = func(reason[k], v, dates)
62
+ return reason
63
+
64
+
26
65
  class Subset(Forwards):
27
66
  """Select a subset of the dates."""
28
67
 
29
68
  def __init__(self, dataset, indices, reason):
30
69
  while isinstance(dataset, Subset):
31
70
  indices = [dataset.indices[i] for i in indices]
32
- reason = {**reason, **dataset.reason}
71
+ reason = _combine_reasons(reason, dataset.reason, dataset.dates)
33
72
  dataset = dataset.dataset
34
73
 
35
74
  self.dataset = dataset
@@ -39,6 +78,12 @@ class Subset(Forwards):
39
78
  # Forward other properties to the super dataset
40
79
  super().__init__(dataset)
41
80
 
81
+ def clone(self, dataset):
82
+ return self.__class__(dataset, self.indices, self.reason).mutate()
83
+
84
+ def mutate(self):
85
+ return self.forward.swap_with_parent(parent=self)
86
+
42
87
  @debug_indexing
43
88
  def __getitem__(self, n):
44
89
  if isinstance(n, tuple):
@@ -66,10 +111,8 @@ class Subset(Forwards):
66
111
  @expand_list_indexing
67
112
  def _get_tuple(self, n):
68
113
  index, changes = index_to_slices(n, self.shape)
69
- # print('INDEX', index, changes)
70
114
  indices = [self.indices[i] for i in range(*index[0].indices(self._len))]
71
115
  indices = make_slice_or_index_from_list_or_tuple(indices)
72
- # print('INDICES', indices)
73
116
  index, _ = update_tuple(index, 0, indices)
74
117
  result = self.dataset[index]
75
118
  result = apply_index_to_slices_changes(result, changes)
@@ -89,8 +132,7 @@ class Subset(Forwards):
89
132
  @cached_property
90
133
  def frequency(self):
91
134
  dates = self.dates
92
- delta = dates[1].astype(object) - dates[0].astype(object)
93
- return int(delta.total_seconds() / 3600)
135
+ return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
94
136
 
95
137
  def source(self, index):
96
138
  return Source(self, index, self.forward.source(index))
@@ -104,22 +104,29 @@ class Unchecked(Combined):
104
104
  def shape(self):
105
105
  raise NotImplementedError()
106
106
 
107
- @property
108
- def dtype(self):
109
- raise NotImplementedError()
107
+ # @property
108
+ # def field_shape(self):
109
+ # return tuple(d.shape for d in self.datasets)
110
110
 
111
- @property
112
- def grids(self):
113
- raise NotImplementedError()
111
+ # @property
112
+ # def latitudes(self):
113
+ # return tuple(d.latitudes for d in self.datasets)
114
114
 
115
+ # @property
116
+ # def longitudes(self):
117
+ # return tuple(d.longitudes for d in self.datasets)
115
118
 
116
- class Zip(Unchecked):
119
+ # @property
120
+ # def statistics(self):
121
+ # return tuple(d.statistics for d in self.datasets)
117
122
 
118
- def __len__(self):
119
- return min(len(d) for d in self.datasets)
123
+ # @property
124
+ # def resolution(self):
125
+ # return tuple(d.resolution for d in self.datasets)
120
126
 
121
- def __getitem__(self, n):
122
- return tuple(d[n] for d in self.datasets)
127
+ # @property
128
+ # def name_to_index(self):
129
+ # return tuple(d.name_to_index for d in self.datasets)
123
130
 
124
131
  @cached_property
125
132
  def missing(self):
@@ -142,17 +149,8 @@ class Chain(ConcatMixin, Unchecked):
142
149
  def dates(self):
143
150
  raise NotImplementedError()
144
151
 
145
-
146
- def zip_factory(args, kwargs):
147
-
148
- zip = kwargs.pop("zip")
149
- assert len(args) == 0
150
- assert isinstance(zip, (list, tuple))
151
-
152
- datasets = [_open(e) for e in zip]
153
- datasets, kwargs = _auto_adjust(datasets, kwargs)
154
-
155
- return Zip(datasets)._subset(**kwargs)
152
+ def dataset_metadata(self):
153
+ return {"multiple": [d.dataset_metadata() for d in self.datasets]}
156
154
 
157
155
 
158
156
  def chain_factory(args, kwargs):
@@ -0,0 +1,125 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ import logging
9
+ from functools import cached_property
10
+
11
+ from .debug import Node
12
+ from .forwards import Combined
13
+ from .misc import _auto_adjust
14
+ from .misc import _open
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ class ZipBase(Combined):
20
+
21
+ def swap_with_parent(self, parent):
22
+ new_parents = [parent.clone(ds) for ds in self.datasets]
23
+ return self.clone(new_parents)
24
+
25
+ def clone(self, datasets):
26
+ return self.__class__(datasets)
27
+
28
+ def tree(self):
29
+ return Node(self, [d.tree() for d in self.datasets])
30
+
31
+ def __len__(self):
32
+ return min(len(d) for d in self.datasets)
33
+
34
+ def __getitem__(self, n):
35
+ return tuple(d[n] for d in self.datasets)
36
+
37
+ def check_same_resolution(self, d1, d2):
38
+ pass
39
+
40
+ def check_same_grid(self, d1, d2):
41
+ pass
42
+
43
+ def check_same_variables(self, d1, d2):
44
+ pass
45
+
46
+ @cached_property
47
+ def missing(self):
48
+ result = set()
49
+ for d in self.datasets:
50
+ result = result | d.missing
51
+ return result
52
+
53
+ @property
54
+ def shape(self):
55
+ return tuple(d.shape for d in self.datasets)
56
+
57
+ @property
58
+ def field_shape(self):
59
+ return tuple(d.shape for d in self.datasets)
60
+
61
+ @property
62
+ def latitudes(self):
63
+ return tuple(d.latitudes for d in self.datasets)
64
+
65
+ @property
66
+ def longitudes(self):
67
+ return tuple(d.longitudes for d in self.datasets)
68
+
69
+ @property
70
+ def dtype(self):
71
+ return tuple(d.dtype for d in self.datasets)
72
+
73
+ @property
74
+ def grids(self):
75
+ return tuple(d.grids for d in self.datasets)
76
+
77
+ @property
78
+ def statistics(self):
79
+ return tuple(d.statistics for d in self.datasets)
80
+
81
+ @property
82
+ def resolution(self):
83
+ return tuple(d.resolution for d in self.datasets)
84
+
85
+ @property
86
+ def name_to_index(self):
87
+ return tuple(d.name_to_index for d in self.datasets)
88
+
89
+
90
+ class Zip(ZipBase):
91
+ pass
92
+
93
+
94
+ class XY(ZipBase):
95
+ pass
96
+
97
+
98
+ def xy_factory(args, kwargs):
99
+
100
+ if "xy" in kwargs:
101
+ xy = kwargs.pop("xy")
102
+ else:
103
+ xy = [kwargs.pop("x"), kwargs.pop("y")]
104
+
105
+ assert len(args) == 0
106
+ assert isinstance(xy, (list, tuple))
107
+
108
+ datasets = [_open(e) for e in xy]
109
+ datasets, kwargs = _auto_adjust(datasets, kwargs)
110
+
111
+ assert len(datasets) == 2
112
+
113
+ return XY(datasets)._subset(**kwargs)
114
+
115
+
116
+ def zip_factory(args, kwargs):
117
+
118
+ zip = kwargs.pop("zip")
119
+ assert len(args) == 0
120
+ assert isinstance(zip, (list, tuple))
121
+
122
+ datasets = [_open(e) for e in zip]
123
+ datasets, kwargs = _auto_adjust(datasets, kwargs)
124
+
125
+ return Zip(datasets)._subset(**kwargs)
@@ -9,25 +9,31 @@
9
9
  import datetime
10
10
  import warnings
11
11
 
12
+ # from anemoi.utils.dates import as_datetime
13
+ from anemoi.utils.dates import as_datetime
14
+ from anemoi.utils.dates import frequency_to_timedelta
15
+ from anemoi.utils.humanize import print_dates
12
16
 
13
- def no_time_zone(date):
14
- return date.replace(tzinfo=None)
15
17
 
18
+ def extend(x):
16
19
 
17
- def frequency_to_hours(frequency):
18
- if isinstance(frequency, int):
19
- return frequency
20
- assert isinstance(frequency, str), (type(frequency), frequency)
20
+ if isinstance(x, (list, tuple)):
21
+ for y in x:
22
+ yield from extend(y)
23
+ return
21
24
 
22
- unit = frequency[-1].lower()
23
- v = int(frequency[:-1])
24
- return {"h": v, "d": v * 24}[unit]
25
-
26
-
27
- def normalize_date(x):
28
25
  if isinstance(x, str):
29
- return no_time_zone(datetime.datetime.fromisoformat(x))
30
- return x
26
+ if "/" in x:
27
+ start, end, step = x.split("/")
28
+ start = as_datetime(start)
29
+ end = as_datetime(end)
30
+ step = frequency_to_timedelta(step)
31
+ while start <= end:
32
+ yield start
33
+ start += datetime.timedelta(hours=step)
34
+ return
35
+
36
+ yield as_datetime(x)
31
37
 
32
38
 
33
39
  class Dates:
@@ -59,7 +65,7 @@ class Dates:
59
65
  def __init__(self, missing=None):
60
66
  if not missing:
61
67
  missing = []
62
- self.missing = [normalize_date(x) for x in missing]
68
+ self.missing = list(extend(missing))
63
69
  if set(self.missing) - set(self.values):
64
70
  warnings.warn(f"Missing dates {self.missing} not in list.")
65
71
 
@@ -85,7 +91,7 @@ class Dates:
85
91
 
86
92
  class ValuesDates(Dates):
87
93
  def __init__(self, values, **kwargs):
88
- self.values = sorted([no_time_zone(_) for _ in values])
94
+ self.values = sorted([as_datetime(_) for _ in values])
89
95
  super().__init__(**kwargs)
90
96
 
91
97
  def __repr__(self):
@@ -97,7 +103,8 @@ class ValuesDates(Dates):
97
103
 
98
104
  class StartEndDates(Dates):
99
105
  def __init__(self, start, end, frequency=1, months=None, **kwargs):
100
- frequency = frequency_to_hours(frequency)
106
+ frequency = frequency_to_timedelta(frequency)
107
+ assert isinstance(frequency, datetime.timedelta), frequency
101
108
 
102
109
  def _(x):
103
110
  if isinstance(x, str):
@@ -113,13 +120,13 @@ class StartEndDates(Dates):
113
120
  if isinstance(end, datetime.date) and not isinstance(end, datetime.datetime):
114
121
  end = datetime.datetime(end.year, end.month, end.day)
115
122
 
116
- start = no_time_zone(start)
117
- end = no_time_zone(end)
123
+ start = as_datetime(start)
124
+ end = as_datetime(end)
118
125
 
119
126
  # if end <= start:
120
127
  # raise ValueError(f"End date {end} must be after start date {start}")
121
128
 
122
- increment = datetime.timedelta(hours=frequency)
129
+ increment = frequency
123
130
 
124
131
  self.start = start
125
132
  self.end = end
@@ -145,3 +152,9 @@ class StartEndDates(Dates):
145
152
  "end": self.end.isoformat(),
146
153
  "frequency": f"{self.frequency}h",
147
154
  }
155
+
156
+
157
+ if __name__ == "__main__":
158
+ print_dates([datetime.datetime(2023, 1, 1, 0, 0)])
159
+ s = StartEndDates(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=1)
160
+ print_dates(list(s))
@@ -9,7 +9,7 @@
9
9
  import itertools
10
10
 
11
11
  from anemoi.datasets.dates import Dates
12
- from anemoi.datasets.dates import no_time_zone
12
+ from anemoi.datasets.dates import as_datetime
13
13
 
14
14
 
15
15
  class Groups:
@@ -67,7 +67,7 @@ class Groups:
67
67
 
68
68
  class Filter:
69
69
  def __init__(self, missing):
70
- self.missing = [no_time_zone(m) for m in missing]
70
+ self.missing = [as_datetime(m) for m in missing]
71
71
 
72
72
  def __call__(self, dates):
73
73
  return [d for d in dates if d not in self.missing]