anemoi-datasets 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +11 -3
- anemoi/datasets/__main__.py +2 -3
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/__init__.py +2 -3
- anemoi/datasets/commands/cleanup.py +9 -0
- anemoi/datasets/commands/compare.py +3 -3
- anemoi/datasets/commands/copy.py +38 -68
- anemoi/datasets/commands/create.py +20 -5
- anemoi/datasets/commands/finalise-additions.py +9 -0
- anemoi/datasets/commands/finalise.py +9 -0
- anemoi/datasets/commands/init-additions.py +9 -0
- anemoi/datasets/commands/init.py +9 -0
- anemoi/datasets/commands/inspect.py +3 -1
- anemoi/datasets/commands/load-additions.py +9 -0
- anemoi/datasets/commands/load.py +9 -0
- anemoi/datasets/commands/patch.py +9 -0
- anemoi/datasets/commands/publish.py +9 -0
- anemoi/datasets/commands/scan.py +9 -0
- anemoi/datasets/compute/__init__.py +8 -0
- anemoi/datasets/compute/recentre.py +3 -2
- anemoi/datasets/create/__init__.py +62 -12
- anemoi/datasets/create/check.py +4 -3
- anemoi/datasets/create/chunks.py +3 -2
- anemoi/datasets/create/config.py +5 -5
- anemoi/datasets/create/functions/__init__.py +22 -7
- anemoi/datasets/create/functions/filters/__init__.py +2 -1
- anemoi/datasets/create/functions/filters/empty.py +3 -2
- anemoi/datasets/create/functions/filters/noop.py +2 -2
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/rename.py +16 -11
- anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
- anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
- anemoi/datasets/create/functions/sources/__init__.py +2 -2
- anemoi/datasets/create/functions/sources/accumulations.py +10 -4
- anemoi/datasets/create/functions/sources/constants.py +3 -2
- anemoi/datasets/create/functions/sources/empty.py +3 -2
- anemoi/datasets/create/functions/sources/forcings.py +3 -2
- anemoi/datasets/create/functions/sources/grib.py +8 -2
- anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
- anemoi/datasets/create/functions/sources/mars.py +97 -17
- anemoi/datasets/create/functions/sources/netcdf.py +3 -2
- anemoi/datasets/create/functions/sources/opendap.py +2 -2
- anemoi/datasets/create/functions/sources/recentre.py +3 -2
- anemoi/datasets/create/functions/sources/source.py +3 -2
- anemoi/datasets/create/functions/sources/tendencies.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -3
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/field.py +6 -5
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -4
- anemoi/datasets/create/functions/sources/xarray/flavour.py +2 -2
- anemoi/datasets/create/functions/sources/xarray/grid.py +2 -2
- anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
- anemoi/datasets/create/functions/sources/xarray/time.py +2 -2
- anemoi/datasets/create/functions/sources/xarray/variable.py +6 -9
- anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
- anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
- anemoi/datasets/create/functions/sources/zenodo.py +2 -2
- anemoi/datasets/create/input/__init__.py +3 -17
- anemoi/datasets/create/input/action.py +3 -8
- anemoi/datasets/create/input/concat.py +3 -2
- anemoi/datasets/create/input/context.py +3 -8
- anemoi/datasets/create/input/data_sources.py +3 -9
- anemoi/datasets/create/input/empty.py +3 -9
- anemoi/datasets/create/input/filter.py +3 -9
- anemoi/datasets/create/input/function.py +3 -9
- anemoi/datasets/create/input/join.py +3 -2
- anemoi/datasets/create/input/misc.py +3 -8
- anemoi/datasets/create/input/pipe.py +9 -3
- anemoi/datasets/create/input/repeated_dates.py +14 -8
- anemoi/datasets/create/input/result.py +154 -12
- anemoi/datasets/create/input/step.py +4 -9
- anemoi/datasets/create/input/template.py +3 -2
- anemoi/datasets/create/input/trace.py +3 -2
- anemoi/datasets/create/patch.py +9 -1
- anemoi/datasets/create/persistent.py +3 -2
- anemoi/datasets/create/size.py +3 -2
- anemoi/datasets/create/statistics/__init__.py +3 -2
- anemoi/datasets/create/statistics/summary.py +3 -2
- anemoi/datasets/create/utils.py +15 -2
- anemoi/datasets/create/writer.py +3 -2
- anemoi/datasets/create/zarr.py +3 -2
- anemoi/datasets/data/__init__.py +27 -1
- anemoi/datasets/data/concat.py +5 -1
- anemoi/datasets/data/dataset.py +216 -37
- anemoi/datasets/data/debug.py +4 -1
- anemoi/datasets/data/ensemble.py +4 -1
- anemoi/datasets/data/fill_missing.py +165 -0
- anemoi/datasets/data/forwards.py +23 -1
- anemoi/datasets/data/grids.py +236 -58
- anemoi/datasets/data/indexing.py +4 -1
- anemoi/datasets/data/interpolate.py +4 -1
- anemoi/datasets/data/join.py +12 -9
- anemoi/datasets/data/masked.py +36 -10
- anemoi/datasets/data/merge.py +180 -0
- anemoi/datasets/data/misc.py +18 -3
- anemoi/datasets/data/missing.py +4 -1
- anemoi/datasets/data/rescale.py +4 -1
- anemoi/datasets/data/select.py +4 -1
- anemoi/datasets/data/statistics.py +4 -1
- anemoi/datasets/data/stores.py +66 -3
- anemoi/datasets/data/subset.py +6 -1
- anemoi/datasets/data/unchecked.py +4 -1
- anemoi/datasets/data/xy.py +20 -5
- anemoi/datasets/dates/__init__.py +9 -7
- anemoi/datasets/dates/groups.py +4 -2
- anemoi/datasets/grids.py +86 -2
- anemoi/datasets/testing.py +3 -2
- anemoi/datasets/utils/__init__.py +8 -0
- anemoi/datasets/utils/fields.py +2 -2
- {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/METADATA +11 -29
- anemoi_datasets-0.5.11.dist-info/RECORD +123 -0
- {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/WHEEL +1 -1
- anemoi/datasets/fields.py +0 -66
- anemoi_datasets-0.5.7.dist-info/RECORD +0 -122
- {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from functools import cached_property
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
from . import MissingDateError
|
|
17
|
+
from .debug import Node
|
|
18
|
+
from .debug import debug_indexing
|
|
19
|
+
from .forwards import Combined
|
|
20
|
+
from .indexing import apply_index_to_slices_changes
|
|
21
|
+
from .indexing import expand_list_indexing
|
|
22
|
+
from .indexing import index_to_slices
|
|
23
|
+
from .indexing import update_tuple
|
|
24
|
+
from .misc import _auto_adjust
|
|
25
|
+
from .misc import _open
|
|
26
|
+
|
|
27
|
+
LOG = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Merge(Combined):
|
|
31
|
+
|
|
32
|
+
# d0 d2 d4 d6 ...
|
|
33
|
+
# d1 d3 d5 d7 ...
|
|
34
|
+
|
|
35
|
+
# gives
|
|
36
|
+
# d0 d1 d2 d3 ...
|
|
37
|
+
|
|
38
|
+
def __init__(self, datasets, allow_gaps_in_dates=False):
|
|
39
|
+
super().__init__(datasets)
|
|
40
|
+
|
|
41
|
+
self.allow_gaps_in_dates = allow_gaps_in_dates
|
|
42
|
+
|
|
43
|
+
dates = dict() # date -> (dataset_index, date_index)
|
|
44
|
+
|
|
45
|
+
for i, d in enumerate(datasets):
|
|
46
|
+
for j, date in enumerate(d.dates):
|
|
47
|
+
date = date.astype(object)
|
|
48
|
+
if date in dates:
|
|
49
|
+
|
|
50
|
+
d1 = datasets[dates[date][0]] # Selected
|
|
51
|
+
d2 = datasets[i] # The new one
|
|
52
|
+
|
|
53
|
+
if j in d2.missing:
|
|
54
|
+
# LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
k = dates[date][1]
|
|
58
|
+
if k in d1.missing:
|
|
59
|
+
# LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
|
|
60
|
+
dates[date] = (i, j) # Replace the missing date with the new one
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
raise ValueError(f"Duplicate date {date} found in datasets {d1} and {d2}")
|
|
64
|
+
else:
|
|
65
|
+
dates[date] = (i, j)
|
|
66
|
+
|
|
67
|
+
all_dates = sorted(dates)
|
|
68
|
+
start = all_dates[0]
|
|
69
|
+
end = all_dates[-1]
|
|
70
|
+
|
|
71
|
+
frequency = min(d2 - d1 for d1, d2 in zip(all_dates[:-1], all_dates[1:]))
|
|
72
|
+
|
|
73
|
+
date = start
|
|
74
|
+
indices = []
|
|
75
|
+
_dates = []
|
|
76
|
+
|
|
77
|
+
self._missing_index = len(datasets)
|
|
78
|
+
|
|
79
|
+
while date <= end:
|
|
80
|
+
if date not in dates:
|
|
81
|
+
if self.allow_gaps_in_dates:
|
|
82
|
+
dates[date] = (self._missing_index, -1)
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"merge: date {date} not covered by dataset. Start={start}, end={end}, frequency={frequency}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
indices.append(dates[date])
|
|
89
|
+
_dates.append(date)
|
|
90
|
+
date += frequency
|
|
91
|
+
|
|
92
|
+
self._dates = np.array(_dates, dtype="datetime64[s]")
|
|
93
|
+
self._indices = np.array(indices)
|
|
94
|
+
self._frequency = frequency # .astype(object)
|
|
95
|
+
|
|
96
|
+
def __len__(self):
|
|
97
|
+
return len(self._dates)
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def dates(self):
|
|
101
|
+
return self._dates
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def frequency(self):
|
|
105
|
+
return self._frequency
|
|
106
|
+
|
|
107
|
+
@cached_property
|
|
108
|
+
def missing(self):
|
|
109
|
+
# TODO: optimize
|
|
110
|
+
result = set()
|
|
111
|
+
|
|
112
|
+
for i, (dataset, row) in enumerate(self._indices):
|
|
113
|
+
if dataset == self._missing_index:
|
|
114
|
+
result.add(i)
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if row in self.datasets[dataset].missing:
|
|
118
|
+
result.add(i)
|
|
119
|
+
|
|
120
|
+
return result
|
|
121
|
+
|
|
122
|
+
def check_same_lengths(self, d1, d2):
|
|
123
|
+
# Turned off because we are concatenating along the first axis
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
def check_same_dates(self, d1, d2):
|
|
127
|
+
# Turned off because we are concatenating along the dates axis
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
def check_compatibility(self, d1, d2):
|
|
131
|
+
super().check_compatibility(d1, d2)
|
|
132
|
+
self.check_same_sub_shapes(d1, d2, drop_axis=0)
|
|
133
|
+
|
|
134
|
+
def tree(self):
|
|
135
|
+
return Node(self, [d.tree() for d in self.datasets], allow_gaps_in_dates=self.allow_gaps_in_dates)
|
|
136
|
+
|
|
137
|
+
@debug_indexing
|
|
138
|
+
def __getitem__(self, n):
|
|
139
|
+
if isinstance(n, tuple):
|
|
140
|
+
return self._get_tuple(n)
|
|
141
|
+
|
|
142
|
+
if isinstance(n, slice):
|
|
143
|
+
return self._get_slice(n)
|
|
144
|
+
|
|
145
|
+
dataset, row = self._indices[n]
|
|
146
|
+
|
|
147
|
+
if dataset == self._missing_index:
|
|
148
|
+
raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
|
|
149
|
+
|
|
150
|
+
return self.datasets[dataset][int(row)]
|
|
151
|
+
|
|
152
|
+
@debug_indexing
|
|
153
|
+
@expand_list_indexing
|
|
154
|
+
def _get_tuple(self, index):
|
|
155
|
+
index, changes = index_to_slices(index, self.shape)
|
|
156
|
+
index, previous = update_tuple(index, 0, slice(None))
|
|
157
|
+
result = self._get_slice(previous)
|
|
158
|
+
return apply_index_to_slices_changes(result[index], changes)
|
|
159
|
+
|
|
160
|
+
def _get_slice(self, s):
|
|
161
|
+
return np.stack([self[i] for i in range(*s.indices(self._len))])
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def merge_factory(args, kwargs):
|
|
165
|
+
|
|
166
|
+
datasets = kwargs.pop("merge")
|
|
167
|
+
|
|
168
|
+
assert isinstance(datasets, (list, tuple))
|
|
169
|
+
assert len(args) == 0
|
|
170
|
+
|
|
171
|
+
datasets = [_open(e) for e in datasets]
|
|
172
|
+
|
|
173
|
+
if len(datasets) == 1:
|
|
174
|
+
return datasets[0]._subset(**kwargs)
|
|
175
|
+
|
|
176
|
+
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
177
|
+
|
|
178
|
+
allow_gaps_in_dates = kwargs.pop("allow_gaps_in_dates", False)
|
|
179
|
+
|
|
180
|
+
return Merge(datasets, allow_gaps_in_dates=allow_gaps_in_dates)._subset(**kwargs)
|
anemoi/datasets/data/misc.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import calendar
|
|
9
12
|
import datetime
|
|
10
13
|
import logging
|
|
@@ -235,16 +238,21 @@ def _auto_adjust(datasets, kwargs):
|
|
|
235
238
|
if set(d.variables) != variables:
|
|
236
239
|
subset_kwargs[i]["select"] = sorted(variables)
|
|
237
240
|
|
|
241
|
+
if "start" or "end" in adjust_set:
|
|
242
|
+
common = datasets[0].dates
|
|
243
|
+
for d in datasets[0:]:
|
|
244
|
+
common = np.intersect1d(common, d.dates)
|
|
245
|
+
|
|
238
246
|
if "start" in adjust_set:
|
|
239
247
|
assert "start" not in kwargs, "Cannot use 'start' in adjust and kwargs"
|
|
240
|
-
start =
|
|
248
|
+
start = min(common).astype(object)
|
|
241
249
|
for i, d in enumerate(datasets):
|
|
242
250
|
if start != d.dates[0]:
|
|
243
251
|
subset_kwargs[i]["start"] = start
|
|
244
252
|
|
|
245
253
|
if "end" in adjust_set:
|
|
246
254
|
assert "end" not in kwargs, "Cannot use 'end' in adjust and kwargs"
|
|
247
|
-
end =
|
|
255
|
+
end = max(common).astype(object)
|
|
248
256
|
for i, d in enumerate(datasets):
|
|
249
257
|
if end != d.dates[-1]:
|
|
250
258
|
subset_kwargs[i]["end"] = end
|
|
@@ -262,6 +270,7 @@ def _auto_adjust(datasets, kwargs):
|
|
|
262
270
|
|
|
263
271
|
|
|
264
272
|
def _open_dataset(*args, **kwargs):
|
|
273
|
+
|
|
265
274
|
sets = []
|
|
266
275
|
for a in args:
|
|
267
276
|
sets.append(_open(a))
|
|
@@ -302,6 +311,12 @@ def _open_dataset(*args, **kwargs):
|
|
|
302
311
|
assert not sets, sets
|
|
303
312
|
return concat_factory(args, kwargs).mutate()
|
|
304
313
|
|
|
314
|
+
if "merge" in kwargs:
|
|
315
|
+
from .merge import merge_factory
|
|
316
|
+
|
|
317
|
+
assert not sets, sets
|
|
318
|
+
return merge_factory(args, kwargs).mutate()
|
|
319
|
+
|
|
305
320
|
if "ensemble" in kwargs:
|
|
306
321
|
from .ensemble import ensemble_factory
|
|
307
322
|
|
anemoi/datasets/data/missing.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
anemoi/datasets/data/rescale.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
anemoi/datasets/data/stores.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
@@ -69,7 +71,7 @@ class S3Store(ReadOnlyStore):
|
|
|
69
71
|
"""
|
|
70
72
|
|
|
71
73
|
def __init__(self, url, region=None):
|
|
72
|
-
from anemoi.utils.s3 import s3_client
|
|
74
|
+
from anemoi.utils.remote.s3 import s3_client
|
|
73
75
|
|
|
74
76
|
_, _, self.bucket, self.key = url.split("/", 3)
|
|
75
77
|
self.s3 = s3_client(self.bucket, region=region)
|
|
@@ -83,6 +85,41 @@ class S3Store(ReadOnlyStore):
|
|
|
83
85
|
return response["Body"].read()
|
|
84
86
|
|
|
85
87
|
|
|
88
|
+
class PlanetaryComputerStore(ReadOnlyStore):
|
|
89
|
+
"""We write our own Store to access catalogs on Planetary Computer,
|
|
90
|
+
as it requires some extra arguements to use xr.open_zarr.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self, data_catalog_id):
|
|
94
|
+
self.data_catalog_id = data_catalog_id
|
|
95
|
+
|
|
96
|
+
def __getitem__(self):
|
|
97
|
+
import planetary_computer
|
|
98
|
+
import pystac_client
|
|
99
|
+
|
|
100
|
+
catalog = pystac_client.Client.open(
|
|
101
|
+
"https://planetarycomputer.microsoft.com/api/stac/v1/",
|
|
102
|
+
modifier=planetary_computer.sign_inplace,
|
|
103
|
+
)
|
|
104
|
+
collection = catalog.get_collection(self.data_catalog_id)
|
|
105
|
+
|
|
106
|
+
asset = collection.assets["zarr-abfs"]
|
|
107
|
+
|
|
108
|
+
if "xarray:storage_options" in asset.extra_fields:
|
|
109
|
+
store = {
|
|
110
|
+
"store": asset.href,
|
|
111
|
+
"storage_options": asset.extra_fields["xarray:storage_options"],
|
|
112
|
+
**asset.extra_fields["xarray:open_kwargs"],
|
|
113
|
+
}
|
|
114
|
+
else:
|
|
115
|
+
store = {
|
|
116
|
+
"filename_or_obj": asset.href,
|
|
117
|
+
**asset.extra_fields["xarray:open_kwargs"],
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return store
|
|
121
|
+
|
|
122
|
+
|
|
86
123
|
class DebugStore(ReadOnlyStore):
|
|
87
124
|
"""A store to debug the zarr loading."""
|
|
88
125
|
|
|
@@ -119,6 +156,9 @@ def name_to_zarr_store(path_or_url):
|
|
|
119
156
|
if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
|
|
120
157
|
s3_url = f"s3://{bits[0]}{parsed.path}"
|
|
121
158
|
store = S3Store(s3_url, region=bits[2])
|
|
159
|
+
elif store.startswith("https://planetarycomputer.microsoft.com/"):
|
|
160
|
+
data_catalog_id = store.rsplit("/", 1)[-1]
|
|
161
|
+
store = PlanetaryComputerStore(data_catalog_id).__getitem__()
|
|
122
162
|
else:
|
|
123
163
|
store = HTTPStore(store)
|
|
124
164
|
|
|
@@ -302,6 +342,13 @@ class Zarr(Dataset):
|
|
|
302
342
|
)
|
|
303
343
|
]
|
|
304
344
|
|
|
345
|
+
@cached_property
|
|
346
|
+
def constant_fields(self):
|
|
347
|
+
result = self.z.attrs.get("constant_fields")
|
|
348
|
+
if result is None:
|
|
349
|
+
LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
|
|
350
|
+
return self.computed_constant_fields()
|
|
351
|
+
|
|
305
352
|
@property
|
|
306
353
|
def variables_metadata(self):
|
|
307
354
|
return self.z.attrs.get("variables_metadata", {})
|
|
@@ -317,6 +364,7 @@ class Zarr(Dataset):
|
|
|
317
364
|
attrs=dict(self.z.attrs),
|
|
318
365
|
chunks=self.chunks,
|
|
319
366
|
dtype=str(self.dtype),
|
|
367
|
+
path=self.path,
|
|
320
368
|
)
|
|
321
369
|
|
|
322
370
|
def source(self, index):
|
|
@@ -335,6 +383,12 @@ class Zarr(Dataset):
|
|
|
335
383
|
name, _ = os.path.splitext(os.path.basename(self.path))
|
|
336
384
|
names.add(name)
|
|
337
385
|
|
|
386
|
+
def collect_supporting_arrays(self, collected, *path):
|
|
387
|
+
pass
|
|
388
|
+
|
|
389
|
+
def collect_input_sources(self, collected):
|
|
390
|
+
pass
|
|
391
|
+
|
|
338
392
|
|
|
339
393
|
class ZarrWithMissingDates(Zarr):
|
|
340
394
|
"""A zarr dataset with missing dates."""
|
|
@@ -343,7 +397,7 @@ class ZarrWithMissingDates(Zarr):
|
|
|
343
397
|
super().__init__(path)
|
|
344
398
|
|
|
345
399
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
346
|
-
missing_dates = set([np.datetime64(x) for x in missing_dates])
|
|
400
|
+
missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
|
|
347
401
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
348
402
|
self.missing = set(self.missing_to_dates)
|
|
349
403
|
|
|
@@ -396,6 +450,9 @@ class ZarrWithMissingDates(Zarr):
|
|
|
396
450
|
return "zarr*"
|
|
397
451
|
|
|
398
452
|
|
|
453
|
+
QUIET = set()
|
|
454
|
+
|
|
455
|
+
|
|
399
456
|
def zarr_lookup(name, fail=True):
|
|
400
457
|
|
|
401
458
|
if name.endswith(".zarr") or name.endswith(".zip"):
|
|
@@ -404,6 +461,9 @@ def zarr_lookup(name, fail=True):
|
|
|
404
461
|
config = load_config()["datasets"]
|
|
405
462
|
|
|
406
463
|
if name in config["named"]:
|
|
464
|
+
if name not in QUIET:
|
|
465
|
+
LOG.info("Opening `%s` as `%s`", name, config["named"][name])
|
|
466
|
+
QUIET.add(name)
|
|
407
467
|
return config["named"][name]
|
|
408
468
|
|
|
409
469
|
tried = []
|
|
@@ -417,6 +477,9 @@ def zarr_lookup(name, fail=True):
|
|
|
417
477
|
if z is not None:
|
|
418
478
|
# Cache for next time
|
|
419
479
|
config["named"][name] = full
|
|
480
|
+
if name not in QUIET:
|
|
481
|
+
LOG.info("Opening `%s` as `%s`", name, full)
|
|
482
|
+
QUIET.add(name)
|
|
420
483
|
return full
|
|
421
484
|
except zarr.errors.PathNotFoundError:
|
|
422
485
|
pass
|
anemoi/datasets/data/subset.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
|
@@ -132,6 +135,8 @@ class Subset(Forwards):
|
|
|
132
135
|
@cached_property
|
|
133
136
|
def frequency(self):
|
|
134
137
|
dates = self.dates
|
|
138
|
+
if len(dates) < 2:
|
|
139
|
+
raise ValueError(f"Cannot determine frequency of a subset with less than two dates ({self.dates}).")
|
|
135
140
|
return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
|
|
136
141
|
|
|
137
142
|
def source(self, index):
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
from functools import wraps
|
anemoi/datasets/data/xy.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
7
9
|
|
|
10
|
+
|
|
8
11
|
import logging
|
|
9
12
|
from functools import cached_property
|
|
10
13
|
|
|
@@ -18,15 +21,19 @@ LOG = logging.getLogger(__name__)
|
|
|
18
21
|
|
|
19
22
|
class ZipBase(Combined):
|
|
20
23
|
|
|
24
|
+
def __init__(self, datasets, check_compatibility=True):
|
|
25
|
+
self._check_compatibility = check_compatibility
|
|
26
|
+
super().__init__(datasets)
|
|
27
|
+
|
|
21
28
|
def swap_with_parent(self, parent):
|
|
22
29
|
new_parents = [parent.clone(ds) for ds in self.datasets]
|
|
23
30
|
return self.clone(new_parents)
|
|
24
31
|
|
|
25
32
|
def clone(self, datasets):
|
|
26
|
-
return self.__class__(datasets)
|
|
33
|
+
return self.__class__(datasets, check_compatibility=self._check_compatibility)
|
|
27
34
|
|
|
28
35
|
def tree(self):
|
|
29
|
-
return Node(self, [d.tree() for d in self.datasets])
|
|
36
|
+
return Node(self, [d.tree() for d in self.datasets], check_compatibility=self._check_compatibility)
|
|
30
37
|
|
|
31
38
|
def __len__(self):
|
|
32
39
|
return min(len(d) for d in self.datasets)
|
|
@@ -86,6 +93,10 @@ class ZipBase(Combined):
|
|
|
86
93
|
def name_to_index(self):
|
|
87
94
|
return tuple(d.name_to_index for d in self.datasets)
|
|
88
95
|
|
|
96
|
+
def check_compatibility(self, d1, d2):
|
|
97
|
+
if self._check_compatibility:
|
|
98
|
+
super().check_compatibility(d1, d2)
|
|
99
|
+
|
|
89
100
|
|
|
90
101
|
class Zip(ZipBase):
|
|
91
102
|
pass
|
|
@@ -110,7 +121,9 @@ def xy_factory(args, kwargs):
|
|
|
110
121
|
|
|
111
122
|
assert len(datasets) == 2
|
|
112
123
|
|
|
113
|
-
|
|
124
|
+
check_compatibility = kwargs.pop("check_compatibility", True)
|
|
125
|
+
|
|
126
|
+
return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
|
|
114
127
|
|
|
115
128
|
|
|
116
129
|
def zip_factory(args, kwargs):
|
|
@@ -122,4 +135,6 @@ def zip_factory(args, kwargs):
|
|
|
122
135
|
datasets = [_open(e) for e in zip]
|
|
123
136
|
datasets, kwargs = _auto_adjust(datasets, kwargs)
|
|
124
137
|
|
|
125
|
-
|
|
138
|
+
check_compatibility = kwargs.pop("check_compatibility", True)
|
|
139
|
+
|
|
140
|
+
return Zip(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
@@ -8,6 +10,8 @@
|
|
|
8
10
|
|
|
9
11
|
import datetime
|
|
10
12
|
import warnings
|
|
13
|
+
from functools import reduce
|
|
14
|
+
from math import gcd
|
|
11
15
|
|
|
12
16
|
# from anemoi.utils.dates import as_datetime
|
|
13
17
|
from anemoi.utils.dates import DateTimes
|
|
@@ -193,18 +197,16 @@ class HindcastsDates(DatesProvider):
|
|
|
193
197
|
|
|
194
198
|
dates = sorted(dates)
|
|
195
199
|
|
|
196
|
-
|
|
200
|
+
deltas = set()
|
|
197
201
|
for a, b in zip(dates, dates[1:]):
|
|
198
202
|
delta = b - a
|
|
199
203
|
assert isinstance(delta, datetime.timedelta), delta
|
|
200
|
-
|
|
201
|
-
mindelta = delta
|
|
202
|
-
else:
|
|
203
|
-
mindelta = min(mindelta, delta)
|
|
204
|
+
deltas.add(delta)
|
|
204
205
|
|
|
206
|
+
mindelta_seconds = reduce(gcd, [int(delta.total_seconds()) for delta in deltas])
|
|
207
|
+
mindelta = datetime.timedelta(seconds=mindelta_seconds)
|
|
205
208
|
self.frequency = mindelta
|
|
206
209
|
assert mindelta.total_seconds() > 0, mindelta
|
|
207
|
-
|
|
208
210
|
print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
|
|
209
211
|
|
|
210
212
|
# Use all values between start and end by frequency, and set the ones that are missing
|
anemoi/datasets/dates/groups.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# (C) Copyright
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
@@ -26,7 +28,7 @@ class GroupOfDates:
|
|
|
26
28
|
assert isinstance(provider, DatesProvider), type(provider)
|
|
27
29
|
assert isinstance(dates, list)
|
|
28
30
|
|
|
29
|
-
self.dates = dates
|
|
31
|
+
self.dates = [as_datetime(_) for _ in dates]
|
|
30
32
|
self.provider = provider
|
|
31
33
|
self.partial_ok = partial_ok
|
|
32
34
|
|