anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/cleanup.py +44 -0
- anemoi/datasets/commands/create.py +52 -21
- anemoi/datasets/commands/finalise-additions.py +45 -0
- anemoi/datasets/commands/finalise.py +39 -0
- anemoi/datasets/commands/init-additions.py +45 -0
- anemoi/datasets/commands/init.py +67 -0
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/load-additions.py +47 -0
- anemoi/datasets/commands/load.py +47 -0
- anemoi/datasets/commands/patch.py +39 -0
- anemoi/datasets/create/__init__.py +959 -146
- anemoi/datasets/create/check.py +5 -3
- anemoi/datasets/create/config.py +54 -2
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/grib.py +86 -1
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
- anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
- anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
- anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
- anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
- anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
- anemoi/datasets/create/input.py +62 -39
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/statistics/__init__.py +39 -23
- anemoi/datasets/create/utils.py +6 -2
- anemoi/datasets/data/__init__.py +1 -0
- anemoi/datasets/data/concat.py +46 -2
- anemoi/datasets/data/dataset.py +119 -34
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/forwards.py +17 -8
- anemoi/datasets/data/grids.py +17 -3
- anemoi/datasets/data/interpolate.py +133 -0
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/misc.py +56 -66
- anemoi/datasets/data/missing.py +240 -0
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/select.py +7 -1
- anemoi/datasets/data/stores.py +23 -10
- anemoi/datasets/data/subset.py +47 -5
- anemoi/datasets/data/unchecked.py +20 -22
- anemoi/datasets/data/xy.py +125 -0
- anemoi/datasets/dates/__init__.py +124 -95
- anemoi/datasets/dates/groups.py +85 -20
- anemoi/datasets/grids.py +66 -48
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
- anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/loaders.py +0 -936
- anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
anemoi/datasets/data/stores.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
import logging
|
|
9
10
|
import os
|
|
10
11
|
import warnings
|
|
@@ -13,6 +14,7 @@ from urllib.parse import urlparse
|
|
|
13
14
|
|
|
14
15
|
import numpy as np
|
|
15
16
|
import zarr
|
|
17
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
16
18
|
|
|
17
19
|
from . import MissingDateError
|
|
18
20
|
from .dataset import Dataset
|
|
@@ -82,6 +84,8 @@ class S3Store(ReadOnlyStore):
|
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
class DebugStore(ReadOnlyStore):
|
|
87
|
+
"""A store to debug the zarr loading."""
|
|
88
|
+
|
|
85
89
|
def __init__(self, store):
|
|
86
90
|
assert not isinstance(store, DebugStore)
|
|
87
91
|
self.store = store
|
|
@@ -147,6 +151,8 @@ def open_zarr(path, dont_fail=False, cache=None):
|
|
|
147
151
|
|
|
148
152
|
|
|
149
153
|
class Zarr(Dataset):
|
|
154
|
+
"""A zarr dataset."""
|
|
155
|
+
|
|
150
156
|
def __init__(self, path):
|
|
151
157
|
if isinstance(path, zarr.hierarchy.Group):
|
|
152
158
|
self.was_zarr = True
|
|
@@ -243,14 +249,20 @@ class Zarr(Dataset):
|
|
|
243
249
|
delta = self.frequency
|
|
244
250
|
if isinstance(delta, int):
|
|
245
251
|
delta = f"{delta}h"
|
|
246
|
-
from anemoi.
|
|
252
|
+
from anemoi.utils.dates import frequency_to_string
|
|
253
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
254
|
+
|
|
255
|
+
delta = frequency_to_timedelta(delta)
|
|
256
|
+
delta = frequency_to_string(delta)
|
|
257
|
+
|
|
258
|
+
def func(k):
|
|
259
|
+
return f"statistics_tendencies_{delta}_{k}"
|
|
247
260
|
|
|
248
|
-
func = TendenciesStatisticsAddition.final_storage_name_from_delta
|
|
249
261
|
return dict(
|
|
250
|
-
mean=self.z[func("mean"
|
|
251
|
-
stdev=self.z[func("stdev"
|
|
252
|
-
maximum=self.z[func("maximum"
|
|
253
|
-
minimum=self.z[func("minimum"
|
|
262
|
+
mean=self.z[func("mean")][:],
|
|
263
|
+
stdev=self.z[func("stdev")][:],
|
|
264
|
+
maximum=self.z[func("maximum")][:],
|
|
265
|
+
minimum=self.z[func("minimum")][:],
|
|
254
266
|
)
|
|
255
267
|
|
|
256
268
|
@property
|
|
@@ -268,12 +280,11 @@ class Zarr(Dataset):
|
|
|
268
280
|
@property
|
|
269
281
|
def frequency(self):
|
|
270
282
|
try:
|
|
271
|
-
return self.z.attrs["frequency"]
|
|
283
|
+
return frequency_to_timedelta(self.z.attrs["frequency"])
|
|
272
284
|
except KeyError:
|
|
273
285
|
LOG.warning("No 'frequency' in %r, computing from 'dates'", self)
|
|
274
286
|
dates = self.dates
|
|
275
|
-
|
|
276
|
-
return int(delta.total_seconds() / 3600)
|
|
287
|
+
return dates[1].astype(object) - dates[0].astype(object)
|
|
277
288
|
|
|
278
289
|
@property
|
|
279
290
|
def name_to_index(self):
|
|
@@ -322,11 +333,13 @@ class Zarr(Dataset):
|
|
|
322
333
|
|
|
323
334
|
|
|
324
335
|
class ZarrWithMissingDates(Zarr):
|
|
336
|
+
"""A zarr dataset with missing dates."""
|
|
337
|
+
|
|
325
338
|
def __init__(self, path):
|
|
326
339
|
super().__init__(path)
|
|
327
340
|
|
|
328
341
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
329
|
-
missing_dates = [np.datetime64(x) for x in missing_dates]
|
|
342
|
+
missing_dates = set([np.datetime64(x) for x in missing_dates])
|
|
330
343
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
331
344
|
self.missing = set(self.missing_to_dates)
|
|
332
345
|
|
anemoi/datasets/data/subset.py
CHANGED
|
@@ -9,6 +9,7 @@ import logging
|
|
|
9
9
|
from functools import cached_property
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
12
13
|
|
|
13
14
|
from .debug import Node
|
|
14
15
|
from .debug import Source
|
|
@@ -23,13 +24,51 @@ from .indexing import update_tuple
|
|
|
23
24
|
LOG = logging.getLogger(__name__)
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
def _default(a, b, dates):
|
|
28
|
+
return [a, b]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _start(a, b, dates):
|
|
32
|
+
from .misc import as_first_date
|
|
33
|
+
|
|
34
|
+
c = as_first_date(a, dates)
|
|
35
|
+
d = as_first_date(b, dates)
|
|
36
|
+
if c < d:
|
|
37
|
+
return b
|
|
38
|
+
else:
|
|
39
|
+
return a
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _end(a, b, dates):
|
|
43
|
+
from .misc import as_last_date
|
|
44
|
+
|
|
45
|
+
c = as_last_date(a, dates)
|
|
46
|
+
d = as_last_date(b, dates)
|
|
47
|
+
if c < d:
|
|
48
|
+
return a
|
|
49
|
+
else:
|
|
50
|
+
return b
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _combine_reasons(reason1, reason2, dates):
|
|
54
|
+
|
|
55
|
+
reason = reason1.copy()
|
|
56
|
+
for k, v in reason2.items():
|
|
57
|
+
if k not in reason:
|
|
58
|
+
reason[k] = v
|
|
59
|
+
else:
|
|
60
|
+
func = globals().get(f"_{k}", _default)
|
|
61
|
+
reason[k] = func(reason[k], v, dates)
|
|
62
|
+
return reason
|
|
63
|
+
|
|
64
|
+
|
|
26
65
|
class Subset(Forwards):
|
|
27
66
|
"""Select a subset of the dates."""
|
|
28
67
|
|
|
29
68
|
def __init__(self, dataset, indices, reason):
|
|
30
69
|
while isinstance(dataset, Subset):
|
|
31
70
|
indices = [dataset.indices[i] for i in indices]
|
|
32
|
-
reason =
|
|
71
|
+
reason = _combine_reasons(reason, dataset.reason, dataset.dates)
|
|
33
72
|
dataset = dataset.dataset
|
|
34
73
|
|
|
35
74
|
self.dataset = dataset
|
|
@@ -39,6 +78,12 @@ class Subset(Forwards):
|
|
|
39
78
|
# Forward other properties to the super dataset
|
|
40
79
|
super().__init__(dataset)
|
|
41
80
|
|
|
81
|
+
def clone(self, dataset):
|
|
82
|
+
return self.__class__(dataset, self.indices, self.reason).mutate()
|
|
83
|
+
|
|
84
|
+
def mutate(self):
|
|
85
|
+
return self.forward.swap_with_parent(parent=self)
|
|
86
|
+
|
|
42
87
|
@debug_indexing
|
|
43
88
|
def __getitem__(self, n):
|
|
44
89
|
if isinstance(n, tuple):
|
|
@@ -66,10 +111,8 @@ class Subset(Forwards):
|
|
|
66
111
|
@expand_list_indexing
|
|
67
112
|
def _get_tuple(self, n):
|
|
68
113
|
index, changes = index_to_slices(n, self.shape)
|
|
69
|
-
# print('INDEX', index, changes)
|
|
70
114
|
indices = [self.indices[i] for i in range(*index[0].indices(self._len))]
|
|
71
115
|
indices = make_slice_or_index_from_list_or_tuple(indices)
|
|
72
|
-
# print('INDICES', indices)
|
|
73
116
|
index, _ = update_tuple(index, 0, indices)
|
|
74
117
|
result = self.dataset[index]
|
|
75
118
|
result = apply_index_to_slices_changes(result, changes)
|
|
@@ -89,8 +132,7 @@ class Subset(Forwards):
|
|
|
89
132
|
@cached_property
|
|
90
133
|
def frequency(self):
|
|
91
134
|
dates = self.dates
|
|
92
|
-
|
|
93
|
-
return int(delta.total_seconds() / 3600)
|
|
135
|
+
return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
|
|
94
136
|
|
|
95
137
|
def source(self, index):
|
|
96
138
|
return Source(self, index, self.forward.source(index))
|
|
@@ -104,22 +104,29 @@ class Unchecked(Combined):
|
|
|
104
104
|
def shape(self):
|
|
105
105
|
raise NotImplementedError()
|
|
106
106
|
|
|
107
|
-
@property
|
|
108
|
-
def
|
|
109
|
-
|
|
107
|
+
# @property
|
|
108
|
+
# def field_shape(self):
|
|
109
|
+
# return tuple(d.shape for d in self.datasets)
|
|
110
110
|
|
|
111
|
-
@property
|
|
112
|
-
def
|
|
113
|
-
|
|
111
|
+
# @property
|
|
112
|
+
# def latitudes(self):
|
|
113
|
+
# return tuple(d.latitudes for d in self.datasets)
|
|
114
114
|
|
|
115
|
+
# @property
|
|
116
|
+
# def longitudes(self):
|
|
117
|
+
# return tuple(d.longitudes for d in self.datasets)
|
|
115
118
|
|
|
116
|
-
|
|
119
|
+
# @property
|
|
120
|
+
# def statistics(self):
|
|
121
|
+
# return tuple(d.statistics for d in self.datasets)
|
|
117
122
|
|
|
118
|
-
|
|
119
|
-
|
|
123
|
+
# @property
|
|
124
|
+
# def resolution(self):
|
|
125
|
+
# return tuple(d.resolution for d in self.datasets)
|
|
120
126
|
|
|
121
|
-
|
|
122
|
-
|
|
127
|
+
# @property
|
|
128
|
+
# def name_to_index(self):
|
|
129
|
+
# return tuple(d.name_to_index for d in self.datasets)
|
|
123
130
|
|
|
124
131
|
@cached_property
|
|
125
132
|
def missing(self):
|
|
@@ -142,17 +149,8 @@ class Chain(ConcatMixin, Unchecked):
|
|
|
142
149
|
def dates(self):
|
|
143
150
|
raise NotImplementedError()
|
|
144
151
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
zip = kwargs.pop("zip")
|
|
149
|
-
assert len(args) == 0
|
|
150
|
-
assert isinstance(zip, (list, tuple))
|
|
151
|
-
|
|
152
|
-
datasets = [_open(e) for e in zip]
|
|
153
|
-
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
154
|
-
|
|
155
|
-
return Zip(datasets)._subset(**kwargs)
|
|
152
|
+
def dataset_metadata(self):
|
|
153
|
+
return {"multiple": [d.dataset_metadata() for d in self.datasets]}
|
|
156
154
|
|
|
157
155
|
|
|
158
156
|
def chain_factory(args, kwargs):
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
|
|
11
|
+
from .debug import Node
|
|
12
|
+
from .forwards import Combined
|
|
13
|
+
from .misc import _auto_adjust
|
|
14
|
+
from .misc import _open
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ZipBase(Combined):
|
|
20
|
+
|
|
21
|
+
def swap_with_parent(self, parent):
|
|
22
|
+
new_parents = [parent.clone(ds) for ds in self.datasets]
|
|
23
|
+
return self.clone(new_parents)
|
|
24
|
+
|
|
25
|
+
def clone(self, datasets):
|
|
26
|
+
return self.__class__(datasets)
|
|
27
|
+
|
|
28
|
+
def tree(self):
|
|
29
|
+
return Node(self, [d.tree() for d in self.datasets])
|
|
30
|
+
|
|
31
|
+
def __len__(self):
|
|
32
|
+
return min(len(d) for d in self.datasets)
|
|
33
|
+
|
|
34
|
+
def __getitem__(self, n):
|
|
35
|
+
return tuple(d[n] for d in self.datasets)
|
|
36
|
+
|
|
37
|
+
def check_same_resolution(self, d1, d2):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
def check_same_grid(self, d1, d2):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def check_same_variables(self, d1, d2):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@cached_property
|
|
47
|
+
def missing(self):
|
|
48
|
+
result = set()
|
|
49
|
+
for d in self.datasets:
|
|
50
|
+
result = result | d.missing
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def shape(self):
|
|
55
|
+
return tuple(d.shape for d in self.datasets)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def field_shape(self):
|
|
59
|
+
return tuple(d.shape for d in self.datasets)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def latitudes(self):
|
|
63
|
+
return tuple(d.latitudes for d in self.datasets)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def longitudes(self):
|
|
67
|
+
return tuple(d.longitudes for d in self.datasets)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def dtype(self):
|
|
71
|
+
return tuple(d.dtype for d in self.datasets)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def grids(self):
|
|
75
|
+
return tuple(d.grids for d in self.datasets)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def statistics(self):
|
|
79
|
+
return tuple(d.statistics for d in self.datasets)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def resolution(self):
|
|
83
|
+
return tuple(d.resolution for d in self.datasets)
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def name_to_index(self):
|
|
87
|
+
return tuple(d.name_to_index for d in self.datasets)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Zip(ZipBase):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class XY(ZipBase):
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def xy_factory(args, kwargs):
|
|
99
|
+
|
|
100
|
+
if "xy" in kwargs:
|
|
101
|
+
xy = kwargs.pop("xy")
|
|
102
|
+
else:
|
|
103
|
+
xy = [kwargs.pop("x"), kwargs.pop("y")]
|
|
104
|
+
|
|
105
|
+
assert len(args) == 0
|
|
106
|
+
assert isinstance(xy, (list, tuple))
|
|
107
|
+
|
|
108
|
+
datasets = [_open(e) for e in xy]
|
|
109
|
+
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
110
|
+
|
|
111
|
+
assert len(datasets) == 2
|
|
112
|
+
|
|
113
|
+
return XY(datasets)._subset(**kwargs)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def zip_factory(args, kwargs):
|
|
117
|
+
|
|
118
|
+
zip = kwargs.pop("zip")
|
|
119
|
+
assert len(args) == 0
|
|
120
|
+
assert isinstance(zip, (list, tuple))
|
|
121
|
+
|
|
122
|
+
datasets = [_open(e) for e in zip]
|
|
123
|
+
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
124
|
+
|
|
125
|
+
return Zip(datasets)._subset(**kwargs)
|
|
@@ -9,64 +9,12 @@
|
|
|
9
9
|
import datetime
|
|
10
10
|
import warnings
|
|
11
11
|
|
|
12
|
+
# from anemoi.utils.dates import as_datetime
|
|
13
|
+
from anemoi.utils.dates import DateTimes
|
|
12
14
|
from anemoi.utils.dates import as_datetime
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
dates = sorted(dates)
|
|
17
|
-
if len(dates) < 3:
|
|
18
|
-
yield dates
|
|
19
|
-
return
|
|
20
|
-
|
|
21
|
-
prev = first = dates.pop(0)
|
|
22
|
-
curr = dates.pop(0)
|
|
23
|
-
delta = curr - prev
|
|
24
|
-
while curr - prev == delta:
|
|
25
|
-
prev = curr
|
|
26
|
-
if not dates:
|
|
27
|
-
break
|
|
28
|
-
curr = dates.pop(0)
|
|
29
|
-
|
|
30
|
-
yield (first, prev, delta)
|
|
31
|
-
if dates:
|
|
32
|
-
yield from _compress_dates([curr] + dates)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def compress_dates(dates):
|
|
36
|
-
dates = [as_datetime(_) for _ in dates]
|
|
37
|
-
result = []
|
|
38
|
-
|
|
39
|
-
for n in _compress_dates(dates):
|
|
40
|
-
if isinstance(n, list):
|
|
41
|
-
result.extend([str(_) for _ in n])
|
|
42
|
-
else:
|
|
43
|
-
result.append(" ".join([str(n[0]), "to", str(n[1]), "by", str(n[2])]))
|
|
44
|
-
|
|
45
|
-
return result
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def print_dates(dates):
|
|
49
|
-
print(compress_dates(dates))
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def no_time_zone(date):
|
|
53
|
-
return date.replace(tzinfo=None)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def frequency_to_hours(frequency):
|
|
57
|
-
if isinstance(frequency, int):
|
|
58
|
-
return frequency
|
|
59
|
-
assert isinstance(frequency, str), (type(frequency), frequency)
|
|
60
|
-
|
|
61
|
-
unit = frequency[-1].lower()
|
|
62
|
-
v = int(frequency[:-1])
|
|
63
|
-
return {"h": v, "d": v * 24}[unit]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def normalize_date(x):
|
|
67
|
-
if isinstance(x, str):
|
|
68
|
-
return no_time_zone(datetime.datetime.fromisoformat(x))
|
|
69
|
-
return x
|
|
15
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
16
|
+
from anemoi.utils.hindcasts import HindcastDatesTimes
|
|
17
|
+
from anemoi.utils.humanize import print_dates
|
|
70
18
|
|
|
71
19
|
|
|
72
20
|
def extend(x):
|
|
@@ -79,37 +27,37 @@ def extend(x):
|
|
|
79
27
|
if isinstance(x, str):
|
|
80
28
|
if "/" in x:
|
|
81
29
|
start, end, step = x.split("/")
|
|
82
|
-
start =
|
|
83
|
-
end =
|
|
84
|
-
step =
|
|
30
|
+
start = as_datetime(start)
|
|
31
|
+
end = as_datetime(end)
|
|
32
|
+
step = frequency_to_timedelta(step)
|
|
85
33
|
while start <= end:
|
|
86
34
|
yield start
|
|
87
|
-
start +=
|
|
35
|
+
start += step
|
|
88
36
|
return
|
|
89
37
|
|
|
90
|
-
yield
|
|
38
|
+
yield as_datetime(x)
|
|
91
39
|
|
|
92
40
|
|
|
93
|
-
class
|
|
41
|
+
class DatesProvider:
|
|
94
42
|
"""Base class for date generation.
|
|
95
43
|
|
|
96
|
-
>>>
|
|
44
|
+
>>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
|
|
97
45
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]
|
|
98
46
|
|
|
99
|
-
>>>
|
|
47
|
+
>>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-03 00:00", "frequency": "18h"}).values
|
|
100
48
|
[datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 18, 0), datetime.datetime(2023, 1, 2, 12, 0)]
|
|
101
49
|
|
|
102
|
-
>>>
|
|
50
|
+
>>> DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=6).as_dict()
|
|
103
51
|
{'start': '2023-01-01T00:00:00', 'end': '2023-01-02T00:00:00', 'frequency': '6h'}
|
|
104
52
|
|
|
105
|
-
>>> len(
|
|
53
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00", end="2023-01-02 00:00", frequency=12))
|
|
106
54
|
3
|
|
107
|
-
>>> len(
|
|
55
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00",
|
|
108
56
|
... end="2023-01-02 00:00",
|
|
109
57
|
... frequency=12,
|
|
110
58
|
... missing=["2023-01-01 12:00"]))
|
|
111
59
|
3
|
|
112
|
-
>>> len(
|
|
60
|
+
>>> len(DatesProvider.from_config(start="2023-01-01 00:00",
|
|
113
61
|
... end="2023-01-02 00:00",
|
|
114
62
|
... frequency=12,
|
|
115
63
|
... missing=["2099-01-01 12:00"]))
|
|
@@ -121,12 +69,18 @@ class Dates:
|
|
|
121
69
|
missing = []
|
|
122
70
|
self.missing = list(extend(missing))
|
|
123
71
|
if set(self.missing) - set(self.values):
|
|
124
|
-
|
|
72
|
+
diff = set(self.missing) - set(self.values)
|
|
73
|
+
warnings.warn(f"Missing dates {len(diff)=} not in list.")
|
|
125
74
|
|
|
126
75
|
@classmethod
|
|
127
76
|
def from_config(cls, **kwargs):
|
|
77
|
+
|
|
78
|
+
if kwargs.pop("hindcasts", False):
|
|
79
|
+
return HindcastsDates(**kwargs)
|
|
80
|
+
|
|
128
81
|
if "values" in kwargs:
|
|
129
82
|
return ValuesDates(**kwargs)
|
|
83
|
+
|
|
130
84
|
return StartEndDates(**kwargs)
|
|
131
85
|
|
|
132
86
|
def __iter__(self):
|
|
@@ -143,9 +97,9 @@ class Dates:
|
|
|
143
97
|
return f"📅 {self.values[0]} ... {self.values[-1]}"
|
|
144
98
|
|
|
145
99
|
|
|
146
|
-
class ValuesDates(
|
|
100
|
+
class ValuesDates(DatesProvider):
|
|
147
101
|
def __init__(self, values, **kwargs):
|
|
148
|
-
self.values = sorted([
|
|
102
|
+
self.values = sorted([as_datetime(_) for _ in values])
|
|
149
103
|
super().__init__(**kwargs)
|
|
150
104
|
|
|
151
105
|
def __repr__(self):
|
|
@@ -155,9 +109,11 @@ class ValuesDates(Dates):
|
|
|
155
109
|
return {"values": self.values[0]}
|
|
156
110
|
|
|
157
111
|
|
|
158
|
-
class StartEndDates(
|
|
159
|
-
def __init__(self, start, end, frequency=1,
|
|
160
|
-
|
|
112
|
+
class StartEndDates(DatesProvider):
|
|
113
|
+
def __init__(self, start, end, frequency=1, **kwargs):
|
|
114
|
+
|
|
115
|
+
frequency = frequency_to_timedelta(frequency)
|
|
116
|
+
assert isinstance(frequency, datetime.timedelta), frequency
|
|
161
117
|
|
|
162
118
|
def _(x):
|
|
163
119
|
if isinstance(x, str):
|
|
@@ -173,38 +129,111 @@ class StartEndDates(Dates):
|
|
|
173
129
|
if isinstance(end, datetime.date) and not isinstance(end, datetime.datetime):
|
|
174
130
|
end = datetime.datetime(end.year, end.month, end.day)
|
|
175
131
|
|
|
176
|
-
start =
|
|
177
|
-
end =
|
|
178
|
-
|
|
179
|
-
# if end <= start:
|
|
180
|
-
# raise ValueError(f"End date {end} must be after start date {start}")
|
|
181
|
-
|
|
182
|
-
increment = datetime.timedelta(hours=frequency)
|
|
132
|
+
start = as_datetime(start)
|
|
133
|
+
end = as_datetime(end)
|
|
183
134
|
|
|
184
135
|
self.start = start
|
|
185
136
|
self.end = end
|
|
186
137
|
self.frequency = frequency
|
|
187
138
|
|
|
188
|
-
|
|
189
|
-
self.values = []
|
|
190
|
-
while date <= end:
|
|
139
|
+
missing = kwargs.pop("missing", [])
|
|
191
140
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
date += increment
|
|
195
|
-
continue
|
|
141
|
+
self.values = list(DateTimes(start, end, increment=frequency, **kwargs))
|
|
142
|
+
self.kwargs = kwargs
|
|
196
143
|
|
|
197
|
-
|
|
198
|
-
date += increment
|
|
199
|
-
|
|
200
|
-
super().__init__(**kwargs)
|
|
144
|
+
super().__init__(missing=missing)
|
|
201
145
|
|
|
202
146
|
def as_dict(self):
|
|
203
147
|
return {
|
|
204
148
|
"start": self.start.isoformat(),
|
|
205
149
|
"end": self.end.isoformat(),
|
|
206
|
-
"frequency":
|
|
207
|
-
}
|
|
150
|
+
"frequency": frequency_to_string(self.frequency),
|
|
151
|
+
}.update(self.kwargs)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Hindcast:
|
|
155
|
+
|
|
156
|
+
def __init__(self, date, refdate, hdate, step):
|
|
157
|
+
self.date = date
|
|
158
|
+
self.refdate = refdate
|
|
159
|
+
self.hdate = hdate
|
|
160
|
+
self.step = step
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class HindcastsDates(DatesProvider):
|
|
164
|
+
def __init__(self, start, end, steps=[0], years=20, **kwargs):
|
|
165
|
+
|
|
166
|
+
if not isinstance(start, list):
|
|
167
|
+
start = [start]
|
|
168
|
+
end = [end]
|
|
169
|
+
|
|
170
|
+
reference_dates = []
|
|
171
|
+
for s, e in zip(start, end):
|
|
172
|
+
reference_dates.extend(list(DateTimes(s, e, increment=24, **kwargs)))
|
|
173
|
+
# reference_dates = list(DateTimes(start, end, increment=24, **kwargs))
|
|
174
|
+
dates = []
|
|
175
|
+
|
|
176
|
+
seen = {}
|
|
177
|
+
|
|
178
|
+
for hdate, refdate in HindcastDatesTimes(reference_dates=reference_dates, years=years):
|
|
179
|
+
assert refdate - hdate >= datetime.timedelta(days=365), (refdate - hdate, refdate, hdate)
|
|
180
|
+
for step in steps:
|
|
181
|
+
|
|
182
|
+
date = hdate + datetime.timedelta(hours=step)
|
|
183
|
+
|
|
184
|
+
if date in seen:
|
|
185
|
+
raise ValueError(f"Duplicate date {date}={hdate}+{step} for {refdate} and {seen[date]}")
|
|
186
|
+
|
|
187
|
+
seen[date] = Hindcast(date, refdate, hdate, step)
|
|
188
|
+
|
|
189
|
+
assert refdate - date > datetime.timedelta(days=360), (refdate - date, refdate, date, hdate, step)
|
|
190
|
+
|
|
191
|
+
dates.append(date)
|
|
192
|
+
|
|
193
|
+
dates = sorted(dates)
|
|
194
|
+
|
|
195
|
+
mindelta = None
|
|
196
|
+
for a, b in zip(dates, dates[1:]):
|
|
197
|
+
delta = b - a
|
|
198
|
+
assert isinstance(delta, datetime.timedelta), delta
|
|
199
|
+
if mindelta is None:
|
|
200
|
+
mindelta = delta
|
|
201
|
+
else:
|
|
202
|
+
mindelta = min(mindelta, delta)
|
|
203
|
+
|
|
204
|
+
self.frequency = mindelta
|
|
205
|
+
assert mindelta.total_seconds() > 0, mindelta
|
|
206
|
+
|
|
207
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
|
|
208
|
+
|
|
209
|
+
# Use all values between start and end by frequency, and set the ones that are missing
|
|
210
|
+
self.values = []
|
|
211
|
+
missing = []
|
|
212
|
+
date = dates[0]
|
|
213
|
+
last = date
|
|
214
|
+
print("------", date, dates[-1])
|
|
215
|
+
dateset = set(dates)
|
|
216
|
+
while date <= dates[-1]:
|
|
217
|
+
self.values.append(date)
|
|
218
|
+
if date not in dateset:
|
|
219
|
+
missing.append(date)
|
|
220
|
+
seen[date] = seen[last]
|
|
221
|
+
else:
|
|
222
|
+
last = date
|
|
223
|
+
date = date + mindelta
|
|
224
|
+
|
|
225
|
+
self.mapping = seen
|
|
226
|
+
|
|
227
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", self.values[0], self.values[-1], mindelta)
|
|
228
|
+
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", f"{len(self.values)=} - {len(missing)=}")
|
|
229
|
+
|
|
230
|
+
super().__init__(missing=missing)
|
|
231
|
+
|
|
232
|
+
def __repr__(self):
|
|
233
|
+
return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
|
|
234
|
+
|
|
235
|
+
def as_dict(self):
|
|
236
|
+
return {"hindcasts": self.hindcasts}
|
|
208
237
|
|
|
209
238
|
|
|
210
239
|
if __name__ == "__main__":
|