anemoi-datasets 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +3 -2
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/copy.py +35 -23
- anemoi/datasets/create/__init__.py +12 -9
- anemoi/datasets/create/functions/sources/accumulations.py +9 -1
- anemoi/datasets/create/functions/sources/mars.py +74 -0
- anemoi/datasets/create/loaders.py +96 -86
- anemoi/datasets/create/statistics/__init__.py +6 -139
- anemoi/datasets/data/misc.py +3 -1
- anemoi/datasets/data/select.py +8 -4
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/METADATA +1 -1
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/RECORD +16 -16
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/WHEEL +1 -1
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.3.7.dist-info → anemoi_datasets-0.3.9.dist-info}/top_level.txt +0 -0
anemoi/datasets/__init__.py
CHANGED
|
@@ -13,9 +13,10 @@ from .data import list_dataset_names
|
|
|
13
13
|
from .data import open_dataset
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
|
-
"
|
|
17
|
-
"MissingDateError",
|
|
16
|
+
"__version__",
|
|
18
17
|
"add_dataset_path",
|
|
19
18
|
"add_named_dataset",
|
|
20
19
|
"list_dataset_names",
|
|
20
|
+
"MissingDateError",
|
|
21
|
+
"open_dataset",
|
|
21
22
|
]
|
anemoi/datasets/_version.py
CHANGED
anemoi/datasets/commands/copy.py
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
9
|
import os
|
|
10
|
+
import shutil
|
|
10
11
|
import sys
|
|
11
12
|
from concurrent.futures import ThreadPoolExecutor
|
|
12
13
|
from concurrent.futures import as_completed
|
|
@@ -26,54 +27,61 @@ except AttributeError:
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class S3Downloader:
|
|
29
|
-
def __init__(self, source, target, transfers, overwrite, resume,
|
|
30
|
+
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
|
|
30
31
|
self.source = source
|
|
31
32
|
self.target = target
|
|
32
33
|
self.transfers = transfers
|
|
33
34
|
self.overwrite = overwrite
|
|
34
35
|
self.resume = resume
|
|
35
|
-
self.
|
|
36
|
+
self.verbosity = verbosity
|
|
36
37
|
|
|
37
38
|
def run(self):
|
|
39
|
+
if self.target == ".":
|
|
40
|
+
self.target = os.path.basename(self.source)
|
|
41
|
+
|
|
42
|
+
if self.overwrite and os.path.exists(self.target):
|
|
43
|
+
LOG.info(f"Deleting {self.target}")
|
|
44
|
+
shutil.rmtree(self.target)
|
|
45
|
+
|
|
38
46
|
download(
|
|
39
47
|
self.source + "/" if not self.source.endswith("/") else self.source,
|
|
40
48
|
self.target,
|
|
41
49
|
overwrite=self.overwrite,
|
|
42
|
-
|
|
50
|
+
resume=self.resume,
|
|
51
|
+
verbosity=self.verbosity,
|
|
43
52
|
threads=self.transfers,
|
|
44
|
-
show_progress=self.progress,
|
|
45
53
|
)
|
|
46
54
|
|
|
47
55
|
|
|
48
56
|
class S3Uploader:
|
|
49
|
-
def __init__(self, source, target, transfers, overwrite, resume,
|
|
57
|
+
def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
|
|
50
58
|
self.source = source
|
|
51
59
|
self.target = target
|
|
52
60
|
self.transfers = transfers
|
|
53
61
|
self.overwrite = overwrite
|
|
54
62
|
self.resume = resume
|
|
55
|
-
self.
|
|
63
|
+
self.verbosity = verbosity
|
|
56
64
|
|
|
57
65
|
def run(self):
|
|
58
66
|
upload(
|
|
59
67
|
self.source,
|
|
60
68
|
self.target,
|
|
61
69
|
overwrite=self.overwrite,
|
|
62
|
-
|
|
70
|
+
resume=self.resume,
|
|
71
|
+
verbosity=self.verbosity,
|
|
63
72
|
threads=self.transfers,
|
|
64
|
-
show_progress=self.progress,
|
|
65
73
|
)
|
|
66
74
|
|
|
67
75
|
|
|
68
76
|
class DefaultCopier:
|
|
69
|
-
def __init__(self, source, target, transfers, block_size, overwrite, resume,
|
|
77
|
+
def __init__(self, source, target, transfers, block_size, overwrite, resume, verbosity, nested, rechunk, **kwargs):
|
|
70
78
|
self.source = source
|
|
71
79
|
self.target = target
|
|
72
80
|
self.transfers = transfers
|
|
73
81
|
self.block_size = block_size
|
|
74
82
|
self.overwrite = overwrite
|
|
75
83
|
self.resume = resume
|
|
76
|
-
self.
|
|
84
|
+
self.verbosity = verbosity
|
|
77
85
|
self.nested = nested
|
|
78
86
|
self.rechunk = rechunk
|
|
79
87
|
|
|
@@ -86,7 +94,7 @@ class DefaultCopier:
|
|
|
86
94
|
return zarr.storage.NestedDirectoryStore(path)
|
|
87
95
|
return path
|
|
88
96
|
|
|
89
|
-
def copy_chunk(self, n, m, source, target, _copy,
|
|
97
|
+
def copy_chunk(self, n, m, source, target, _copy, verbosity):
|
|
90
98
|
if _copy[n:m].all():
|
|
91
99
|
LOG.info(f"Skipping {n} to {m}")
|
|
92
100
|
return None
|
|
@@ -106,7 +114,7 @@ class DefaultCopier:
|
|
|
106
114
|
range(n, m),
|
|
107
115
|
desc=f"Copying {n} to {m}",
|
|
108
116
|
leave=False,
|
|
109
|
-
disable=not isatty and not
|
|
117
|
+
disable=not isatty and not verbosity,
|
|
110
118
|
):
|
|
111
119
|
target[i] = source[i]
|
|
112
120
|
|
|
@@ -131,7 +139,7 @@ class DefaultCopier:
|
|
|
131
139
|
# raise NotImplementedError("Rechunking with multiple transfers is not implemented")
|
|
132
140
|
return chunks
|
|
133
141
|
|
|
134
|
-
def copy_data(self, source, target, _copy,
|
|
142
|
+
def copy_data(self, source, target, _copy, verbosity):
|
|
135
143
|
LOG.info("Copying data")
|
|
136
144
|
source_data = source["data"]
|
|
137
145
|
|
|
@@ -145,6 +153,7 @@ class DefaultCopier:
|
|
|
145
153
|
shape=source_data.shape,
|
|
146
154
|
chunks=self.data_chunks,
|
|
147
155
|
dtype=source_data.dtype,
|
|
156
|
+
fill_value=source_data.fill_value,
|
|
148
157
|
)
|
|
149
158
|
)
|
|
150
159
|
|
|
@@ -160,7 +169,7 @@ class DefaultCopier:
|
|
|
160
169
|
source_data,
|
|
161
170
|
target_data,
|
|
162
171
|
_copy,
|
|
163
|
-
|
|
172
|
+
verbosity,
|
|
164
173
|
)
|
|
165
174
|
)
|
|
166
175
|
n += self.block_size
|
|
@@ -175,7 +184,7 @@ class DefaultCopier:
|
|
|
175
184
|
|
|
176
185
|
LOG.info("Copied data")
|
|
177
186
|
|
|
178
|
-
def copy_array(self, name, source, target, _copy,
|
|
187
|
+
def copy_array(self, name, source, target, _copy, verbosity):
|
|
179
188
|
for k, v in source.attrs.items():
|
|
180
189
|
target.attrs[k] = v
|
|
181
190
|
|
|
@@ -183,14 +192,14 @@ class DefaultCopier:
|
|
|
183
192
|
return
|
|
184
193
|
|
|
185
194
|
if name == "data":
|
|
186
|
-
self.copy_data(source, target, _copy,
|
|
195
|
+
self.copy_data(source, target, _copy, verbosity)
|
|
187
196
|
return
|
|
188
197
|
|
|
189
198
|
LOG.info(f"Copying {name}")
|
|
190
199
|
target[name] = source[name]
|
|
191
200
|
LOG.info(f"Copied {name}")
|
|
192
201
|
|
|
193
|
-
def copy_group(self, source, target, _copy,
|
|
202
|
+
def copy_group(self, source, target, _copy, verbosity):
|
|
194
203
|
import zarr
|
|
195
204
|
|
|
196
205
|
for k, v in source.attrs.items():
|
|
@@ -203,7 +212,7 @@ class DefaultCopier:
|
|
|
203
212
|
source[name],
|
|
204
213
|
group,
|
|
205
214
|
_copy,
|
|
206
|
-
|
|
215
|
+
verbosity,
|
|
207
216
|
)
|
|
208
217
|
else:
|
|
209
218
|
self.copy_array(
|
|
@@ -211,10 +220,10 @@ class DefaultCopier:
|
|
|
211
220
|
source,
|
|
212
221
|
target,
|
|
213
222
|
_copy,
|
|
214
|
-
|
|
223
|
+
verbosity,
|
|
215
224
|
)
|
|
216
225
|
|
|
217
|
-
def copy(self, source, target,
|
|
226
|
+
def copy(self, source, target, verbosity):
|
|
218
227
|
import zarr
|
|
219
228
|
|
|
220
229
|
if "_copy" not in target:
|
|
@@ -225,7 +234,7 @@ class DefaultCopier:
|
|
|
225
234
|
_copy = target["_copy"]
|
|
226
235
|
_copy_np = _copy[:]
|
|
227
236
|
|
|
228
|
-
self.copy_group(source, target, _copy_np,
|
|
237
|
+
self.copy_group(source, target, _copy_np, verbosity)
|
|
229
238
|
del target["_copy"]
|
|
230
239
|
|
|
231
240
|
def run(self):
|
|
@@ -284,7 +293,7 @@ class DefaultCopier:
|
|
|
284
293
|
assert target is not None, target
|
|
285
294
|
|
|
286
295
|
source = zarr.open(self._store(self.source), mode="r")
|
|
287
|
-
self.copy(source, target, self.
|
|
296
|
+
self.copy(source, target, self.verbosity)
|
|
288
297
|
|
|
289
298
|
|
|
290
299
|
class CopyMixin:
|
|
@@ -303,7 +312,10 @@ class CopyMixin:
|
|
|
303
312
|
)
|
|
304
313
|
command_parser.add_argument("--transfers", type=int, default=8, help="Number of parallel transfers.")
|
|
305
314
|
command_parser.add_argument(
|
|
306
|
-
"--
|
|
315
|
+
"--verbosity",
|
|
316
|
+
type=int,
|
|
317
|
+
help="Verbosity level. 0 is silent, 1 is normal, 2 is verbose.",
|
|
318
|
+
default=1,
|
|
307
319
|
)
|
|
308
320
|
command_parser.add_argument("--nested", action="store_true", help="Use ZARR's nested directpry backend.")
|
|
309
321
|
command_parser.add_argument(
|
|
@@ -97,13 +97,14 @@ class Creator:
|
|
|
97
97
|
|
|
98
98
|
apply_patch(self.path, **kwargs)
|
|
99
99
|
|
|
100
|
-
def init_additions(self, delta=[1, 3, 6, 12, 24]):
|
|
100
|
+
def init_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
|
|
101
101
|
from .loaders import StatisticsAddition
|
|
102
102
|
from .loaders import TendenciesStatisticsAddition
|
|
103
103
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
104
104
|
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
if statistics:
|
|
106
|
+
a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
|
|
107
|
+
a.initialise()
|
|
107
108
|
|
|
108
109
|
for d in delta:
|
|
109
110
|
try:
|
|
@@ -112,13 +113,14 @@ class Creator:
|
|
|
112
113
|
except TendenciesStatisticsDeltaNotMultipleOfFrequency:
|
|
113
114
|
self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
|
|
114
115
|
|
|
115
|
-
def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24]):
|
|
116
|
+
def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24], statistics=True):
|
|
116
117
|
from .loaders import StatisticsAddition
|
|
117
118
|
from .loaders import TendenciesStatisticsAddition
|
|
118
119
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
121
|
+
if statistics:
|
|
122
|
+
a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
|
|
123
|
+
a.run(parts)
|
|
122
124
|
|
|
123
125
|
for d in delta:
|
|
124
126
|
try:
|
|
@@ -127,13 +129,14 @@ class Creator:
|
|
|
127
129
|
except TendenciesStatisticsDeltaNotMultipleOfFrequency:
|
|
128
130
|
self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
|
|
129
131
|
|
|
130
|
-
def finalise_additions(self, delta=[1, 3, 6, 12, 24]):
|
|
132
|
+
def finalise_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
|
|
131
133
|
from .loaders import StatisticsAddition
|
|
132
134
|
from .loaders import TendenciesStatisticsAddition
|
|
133
135
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
134
136
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
if statistics:
|
|
138
|
+
a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
|
|
139
|
+
a.finalise()
|
|
137
140
|
|
|
138
141
|
for d in delta:
|
|
139
142
|
try:
|
|
@@ -19,6 +19,8 @@ from climetlab.utils.availability import Availability
|
|
|
19
19
|
|
|
20
20
|
from anemoi.datasets.create.utils import to_datetime_list
|
|
21
21
|
|
|
22
|
+
from .mars import use_grib_paramid
|
|
23
|
+
|
|
22
24
|
LOG = logging.getLogger(__name__)
|
|
23
25
|
|
|
24
26
|
|
|
@@ -85,6 +87,7 @@ class Accumulation:
|
|
|
85
87
|
stepType="accum",
|
|
86
88
|
startStep=self.startStep,
|
|
87
89
|
endStep=self.endStep,
|
|
90
|
+
check_nans=True,
|
|
88
91
|
)
|
|
89
92
|
self.values = None
|
|
90
93
|
self.done = True
|
|
@@ -230,6 +233,7 @@ def identity(x):
|
|
|
230
233
|
|
|
231
234
|
|
|
232
235
|
def compute_accumulations(
|
|
236
|
+
context,
|
|
233
237
|
dates,
|
|
234
238
|
request,
|
|
235
239
|
user_accumulation_period=6,
|
|
@@ -306,7 +310,10 @@ def compute_accumulations(
|
|
|
306
310
|
ds = cml.load_source("empty")
|
|
307
311
|
for r in compressed.iterate():
|
|
308
312
|
request.update(r)
|
|
313
|
+
if context.use_grib_paramid and "param" in request:
|
|
314
|
+
request = use_grib_paramid(request)
|
|
309
315
|
print("🌧️", request)
|
|
316
|
+
|
|
310
317
|
ds = ds + cml.load_source("mars", **request)
|
|
311
318
|
|
|
312
319
|
accumulations = {}
|
|
@@ -395,7 +402,7 @@ def accumulations(context, dates, **request):
|
|
|
395
402
|
class_ = request.get("class", "od")
|
|
396
403
|
stream = request.get("stream", "oper")
|
|
397
404
|
|
|
398
|
-
user_accumulation_period = request.
|
|
405
|
+
user_accumulation_period = request.pop("accumulation_period", 6)
|
|
399
406
|
|
|
400
407
|
KWARGS = {
|
|
401
408
|
("od", "oper"): dict(patch=scda),
|
|
@@ -409,6 +416,7 @@ def accumulations(context, dates, **request):
|
|
|
409
416
|
context.trace("🌧️", f"accumulations {request} {user_accumulation_period} {kwargs}")
|
|
410
417
|
|
|
411
418
|
return compute_accumulations(
|
|
419
|
+
context,
|
|
412
420
|
dates,
|
|
413
421
|
request,
|
|
414
422
|
user_accumulation_period=user_accumulation_period,
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
import datetime
|
|
10
10
|
from copy import deepcopy
|
|
11
11
|
|
|
12
|
+
from anemoi.utils.humanize import did_you_mean
|
|
12
13
|
from climetlab import load_source
|
|
13
14
|
from climetlab.utils.availability import Availability
|
|
14
15
|
|
|
@@ -102,6 +103,74 @@ def use_grib_paramid(r):
|
|
|
102
103
|
return r
|
|
103
104
|
|
|
104
105
|
|
|
106
|
+
MARS_KEYS = [
|
|
107
|
+
"accuracy",
|
|
108
|
+
"activity",
|
|
109
|
+
"anoffset",
|
|
110
|
+
"area",
|
|
111
|
+
"bitmap",
|
|
112
|
+
"channel",
|
|
113
|
+
"class",
|
|
114
|
+
"database",
|
|
115
|
+
"dataset",
|
|
116
|
+
"date",
|
|
117
|
+
"diagnostic",
|
|
118
|
+
"direction",
|
|
119
|
+
"domain",
|
|
120
|
+
"expect",
|
|
121
|
+
"experiment",
|
|
122
|
+
"expver",
|
|
123
|
+
"fcmonth",
|
|
124
|
+
"fcperiod",
|
|
125
|
+
"fieldset",
|
|
126
|
+
"filter",
|
|
127
|
+
"format",
|
|
128
|
+
"frame",
|
|
129
|
+
"frequency",
|
|
130
|
+
"gaussian",
|
|
131
|
+
"generation",
|
|
132
|
+
"grid",
|
|
133
|
+
"hdate",
|
|
134
|
+
"ident",
|
|
135
|
+
"instrument",
|
|
136
|
+
"interpolation",
|
|
137
|
+
"intgrid",
|
|
138
|
+
"iteration",
|
|
139
|
+
"level",
|
|
140
|
+
"levelist",
|
|
141
|
+
"levtype",
|
|
142
|
+
"method",
|
|
143
|
+
"model",
|
|
144
|
+
"month",
|
|
145
|
+
"number",
|
|
146
|
+
"obsgroup",
|
|
147
|
+
"obstype",
|
|
148
|
+
"offsetdate",
|
|
149
|
+
"offsettime",
|
|
150
|
+
"optimise",
|
|
151
|
+
"origin",
|
|
152
|
+
"packing",
|
|
153
|
+
"padding",
|
|
154
|
+
"param",
|
|
155
|
+
"quantile",
|
|
156
|
+
"realization",
|
|
157
|
+
"reference",
|
|
158
|
+
"reportype",
|
|
159
|
+
"repres",
|
|
160
|
+
"resol",
|
|
161
|
+
"resolution",
|
|
162
|
+
"rotation",
|
|
163
|
+
"step",
|
|
164
|
+
"stream",
|
|
165
|
+
"system",
|
|
166
|
+
"target",
|
|
167
|
+
"time",
|
|
168
|
+
"truncation",
|
|
169
|
+
"type",
|
|
170
|
+
"year",
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
|
|
105
174
|
def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
106
175
|
if not requests:
|
|
107
176
|
requests = [kwargs]
|
|
@@ -117,6 +186,11 @@ def mars(context, dates, *requests, date_key="date", **kwargs):
|
|
|
117
186
|
if DEBUG:
|
|
118
187
|
context.trace("✅", f"load_source(mars, {r}")
|
|
119
188
|
|
|
189
|
+
for k, v in r.items():
|
|
190
|
+
if k not in MARS_KEYS:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
|
|
193
|
+
)
|
|
120
194
|
ds = ds + load_source("mars", **r)
|
|
121
195
|
return ds
|
|
122
196
|
|
|
@@ -46,8 +46,44 @@ LOG = logging.getLogger(__name__)
|
|
|
46
46
|
VERSION = "0.20"
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
def set_to_test_mode(cfg):
|
|
50
|
+
NUMBER_OF_DATES = 4
|
|
51
|
+
|
|
52
|
+
dates = cfg.dates
|
|
53
|
+
LOG.warn(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
|
|
54
|
+
groups = Groups(**cfg.dates)
|
|
55
|
+
dates = groups.dates
|
|
56
|
+
cfg.dates = dict(
|
|
57
|
+
start=dates[0],
|
|
58
|
+
end=dates[NUMBER_OF_DATES - 1],
|
|
59
|
+
frequency=dates.frequency,
|
|
60
|
+
group_by=NUMBER_OF_DATES,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def set_element_to_test(obj):
|
|
64
|
+
if isinstance(obj, (list, tuple)):
|
|
65
|
+
for v in obj:
|
|
66
|
+
set_element_to_test(v)
|
|
67
|
+
return
|
|
68
|
+
if isinstance(obj, (dict, DictObj)):
|
|
69
|
+
if "grid" in obj:
|
|
70
|
+
previous = obj["grid"]
|
|
71
|
+
obj["grid"] = "20./20."
|
|
72
|
+
LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
73
|
+
if "number" in obj:
|
|
74
|
+
if isinstance(obj["number"], (list, tuple)):
|
|
75
|
+
previous = obj["number"]
|
|
76
|
+
obj["number"] = previous[0:3]
|
|
77
|
+
LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
78
|
+
for k, v in obj.items():
|
|
79
|
+
set_element_to_test(v)
|
|
80
|
+
|
|
81
|
+
set_element_to_test(cfg)
|
|
82
|
+
|
|
83
|
+
|
|
49
84
|
class GenericDatasetHandler:
|
|
50
85
|
def __init__(self, *, path, print=print, **kwargs):
|
|
86
|
+
|
|
51
87
|
# Catch all floating point errors, including overflow, sqrt(<0), etc
|
|
52
88
|
np.seterr(all="raise", under="warn")
|
|
53
89
|
|
|
@@ -61,12 +97,15 @@ class GenericDatasetHandler:
|
|
|
61
97
|
|
|
62
98
|
@classmethod
|
|
63
99
|
def from_config(cls, *, config, path, print=print, **kwargs):
|
|
64
|
-
|
|
100
|
+
"""Config is the path to the config file or a dict with the config"""
|
|
101
|
+
|
|
65
102
|
assert isinstance(config, dict) or isinstance(config, str), config
|
|
66
103
|
return cls(config=config, path=path, print=print, **kwargs)
|
|
67
104
|
|
|
68
105
|
@classmethod
|
|
69
106
|
def from_dataset_config(cls, *, path, print=print, **kwargs):
|
|
107
|
+
"""Read the config saved inside the zarr dataset and instantiate the class for this config."""
|
|
108
|
+
|
|
70
109
|
assert os.path.exists(path), f"Path {path} does not exist."
|
|
71
110
|
z = zarr.open(path, mode="r")
|
|
72
111
|
config = z.attrs["_create_yaml_config"]
|
|
@@ -75,6 +114,8 @@ class GenericDatasetHandler:
|
|
|
75
114
|
|
|
76
115
|
@classmethod
|
|
77
116
|
def from_dataset(cls, *, path, **kwargs):
|
|
117
|
+
"""Instanciate the class from the path to the zarr dataset, without config."""
|
|
118
|
+
|
|
78
119
|
assert os.path.exists(path), f"Path {path} does not exist."
|
|
79
120
|
return cls(path=path, **kwargs)
|
|
80
121
|
|
|
@@ -156,68 +197,50 @@ class Loader(DatasetHandlerWithStatistics):
|
|
|
156
197
|
class InitialiserLoader(Loader):
|
|
157
198
|
def __init__(self, config, **kwargs):
|
|
158
199
|
super().__init__(**kwargs)
|
|
159
|
-
self.main_config = loader_config(config)
|
|
160
|
-
|
|
161
|
-
self.tmp_statistics.delete()
|
|
162
200
|
|
|
201
|
+
self.main_config = loader_config(config)
|
|
163
202
|
if self.test:
|
|
164
|
-
|
|
165
|
-
def test_dates(cfg, n=4):
|
|
166
|
-
LOG.warn("Running in test mode. Changing the list of dates to use only 4.")
|
|
167
|
-
groups = Groups(**cfg)
|
|
168
|
-
dates = groups.dates
|
|
169
|
-
return dict(start=dates[0], end=dates[n - 1], frequency=dates.frequency, group_by=n)
|
|
170
|
-
|
|
171
|
-
self.main_config.dates = test_dates(self.main_config.dates)
|
|
172
|
-
|
|
173
|
-
def set_to_test_mode(obj):
|
|
174
|
-
if isinstance(obj, (list, tuple)):
|
|
175
|
-
for v in obj:
|
|
176
|
-
set_to_test_mode(v)
|
|
177
|
-
return
|
|
178
|
-
if isinstance(obj, (dict, DictObj)):
|
|
179
|
-
if "grid" in obj:
|
|
180
|
-
previous = obj["grid"]
|
|
181
|
-
obj["grid"] = "20./20."
|
|
182
|
-
LOG.warn(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
183
|
-
if "number" in obj:
|
|
184
|
-
if isinstance(obj["number"], (list, tuple)):
|
|
185
|
-
previous = obj["number"]
|
|
186
|
-
obj["number"] = previous[0:3]
|
|
187
|
-
LOG.warn(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
188
|
-
for k, v in obj.items():
|
|
189
|
-
set_to_test_mode(v)
|
|
190
|
-
|
|
191
203
|
set_to_test_mode(self.main_config)
|
|
192
204
|
|
|
193
205
|
LOG.info(self.main_config.dates)
|
|
194
206
|
|
|
207
|
+
self.tmp_statistics.delete()
|
|
208
|
+
|
|
195
209
|
self.groups = Groups(**self.main_config.dates)
|
|
210
|
+
LOG.info(self.groups)
|
|
196
211
|
|
|
197
212
|
self.output = build_output(self.main_config.output, parent=self)
|
|
198
213
|
self.input = self.build_input()
|
|
199
|
-
|
|
200
214
|
LOG.info(self.input)
|
|
201
|
-
all_dates = self.groups.dates
|
|
202
|
-
self.minimal_input = self.input.select([all_dates[0]])
|
|
203
215
|
|
|
204
|
-
|
|
205
|
-
|
|
216
|
+
first_date = self.groups.dates[0]
|
|
217
|
+
self.minimal_input = self.input.select([first_date])
|
|
218
|
+
LOG.info("Minimal input (using only the first date) :")
|
|
206
219
|
LOG.info(self.minimal_input)
|
|
207
220
|
|
|
208
221
|
def build_statistics_dates(self, start, end):
|
|
222
|
+
"""Compute the start and end dates for the statistics, based on :
|
|
223
|
+
- The start and end dates in the config
|
|
224
|
+
- The default statistics dates convention
|
|
225
|
+
|
|
226
|
+
Then adapt according to the actual dates in the dataset.
|
|
227
|
+
"""
|
|
228
|
+
|
|
209
229
|
ds = open_dataset(self.path)
|
|
210
230
|
dates = ds.dates
|
|
211
231
|
|
|
232
|
+
# if not specified, use the default statistics dates
|
|
212
233
|
default_start, default_end = default_statistics_dates(dates)
|
|
213
234
|
if start is None:
|
|
214
235
|
start = default_start
|
|
215
236
|
if end is None:
|
|
216
237
|
end = default_end
|
|
217
238
|
|
|
239
|
+
# in any case, adapt to the actual dates in the dataset
|
|
218
240
|
start = as_first_date(start, dates)
|
|
219
241
|
end = as_last_date(end, dates)
|
|
220
242
|
|
|
243
|
+
# and convert to datetime to isoformat
|
|
221
244
|
start = start.astype(datetime.datetime)
|
|
222
245
|
end = end.astype(datetime.datetime)
|
|
223
246
|
return (start.isoformat(), end.isoformat())
|
|
@@ -227,7 +250,10 @@ class InitialiserLoader(Loader):
|
|
|
227
250
|
z.create_group("_build")
|
|
228
251
|
|
|
229
252
|
def initialise(self, check_name=True):
|
|
230
|
-
"""Create empty dataset
|
|
253
|
+
"""Create an empty dataset of the right final shape
|
|
254
|
+
|
|
255
|
+
Read a small part of the data to get the shape of the data and the resolution and more metadata.
|
|
256
|
+
"""
|
|
231
257
|
|
|
232
258
|
self.print("Config loaded ok:")
|
|
233
259
|
LOG.info(self.main_config)
|
|
@@ -276,11 +302,10 @@ class InitialiserLoader(Loader):
|
|
|
276
302
|
metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
|
|
277
303
|
|
|
278
304
|
metadata["description"] = self.main_config.description
|
|
279
|
-
metadata["
|
|
305
|
+
metadata["licence"] = self.main_config["licence"]
|
|
306
|
+
metadata["attribution"] = self.main_config["attribution"]
|
|
280
307
|
|
|
281
|
-
metadata["data_request"] = self.minimal_input.data_request
|
|
282
308
|
metadata["remapping"] = self.output.remapping
|
|
283
|
-
|
|
284
309
|
metadata["order_by"] = self.output.order_by_as_list
|
|
285
310
|
metadata["flatten_grid"] = self.output.flatten_grid
|
|
286
311
|
|
|
@@ -288,26 +313,21 @@ class InitialiserLoader(Loader):
|
|
|
288
313
|
metadata["variables"] = variables
|
|
289
314
|
metadata["variables_with_nans"] = variables_with_nans
|
|
290
315
|
metadata["resolution"] = resolution
|
|
316
|
+
|
|
317
|
+
metadata["data_request"] = self.minimal_input.data_request
|
|
291
318
|
metadata["field_shape"] = self.minimal_input.field_shape
|
|
292
319
|
metadata["proj_string"] = self.minimal_input.proj_string
|
|
293
320
|
|
|
294
|
-
metadata["licence"] = self.main_config["licence"]
|
|
295
|
-
metadata["attribution"] = self.main_config["attribution"]
|
|
296
|
-
|
|
297
|
-
metadata["frequency"] = frequency
|
|
298
321
|
metadata["start_date"] = dates[0].isoformat()
|
|
299
322
|
metadata["end_date"] = dates[-1].isoformat()
|
|
323
|
+
metadata["frequency"] = frequency
|
|
300
324
|
metadata["missing_dates"] = [_.isoformat() for _ in dates.missing]
|
|
301
325
|
|
|
326
|
+
metadata["version"] = VERSION
|
|
327
|
+
|
|
302
328
|
if check_name:
|
|
303
329
|
basename, ext = os.path.splitext(os.path.basename(self.path)) # noqa: F841
|
|
304
|
-
ds_name = DatasetName(
|
|
305
|
-
basename,
|
|
306
|
-
resolution,
|
|
307
|
-
dates[0],
|
|
308
|
-
dates[-1],
|
|
309
|
-
frequency,
|
|
310
|
-
)
|
|
330
|
+
ds_name = DatasetName(basename, resolution, dates[0], dates[-1], frequency)
|
|
311
331
|
ds_name.raise_if_not_valid(print=self.print)
|
|
312
332
|
|
|
313
333
|
if len(dates) != total_shape[0]:
|
|
@@ -316,17 +336,12 @@ class InitialiserLoader(Loader):
|
|
|
316
336
|
f"does not match data shape {total_shape[0]}. {total_shape=}"
|
|
317
337
|
)
|
|
318
338
|
|
|
319
|
-
dates = normalize_and_check_dates(
|
|
320
|
-
dates,
|
|
321
|
-
metadata["start_date"],
|
|
322
|
-
metadata["end_date"],
|
|
323
|
-
metadata["frequency"],
|
|
324
|
-
)
|
|
339
|
+
dates = normalize_and_check_dates(dates, metadata["start_date"], metadata["end_date"], metadata["frequency"])
|
|
325
340
|
|
|
326
341
|
metadata.update(self.main_config.get("force_metadata", {}))
|
|
327
342
|
|
|
328
343
|
###############################################################
|
|
329
|
-
# write
|
|
344
|
+
# write metadata
|
|
330
345
|
###############################################################
|
|
331
346
|
|
|
332
347
|
self.initialise_dataset_backend()
|
|
@@ -346,10 +361,7 @@ class InitialiserLoader(Loader):
|
|
|
346
361
|
self.main_config.statistics.get("start"),
|
|
347
362
|
self.main_config.statistics.get("end"),
|
|
348
363
|
)
|
|
349
|
-
self.update_metadata(
|
|
350
|
-
statistics_start_date=statistics_start,
|
|
351
|
-
statistics_end_date=statistics_end,
|
|
352
|
-
)
|
|
364
|
+
self.update_metadata(statistics_start_date=statistics_start, statistics_end_date=statistics_end)
|
|
353
365
|
LOG.info(f"Will compute statistics from {statistics_start} to {statistics_end}")
|
|
354
366
|
|
|
355
367
|
self.registry.add_to_history("init finished")
|
|
@@ -586,37 +598,22 @@ class GenericAdditions(GenericDatasetHandler):
|
|
|
586
598
|
|
|
587
599
|
@property
|
|
588
600
|
def tmp_storage_path(self):
|
|
589
|
-
|
|
601
|
+
"""This should be implemented in the subclass."""
|
|
602
|
+
raise NotImplementedError()
|
|
590
603
|
|
|
591
604
|
@property
|
|
592
605
|
def final_storage_path(self):
|
|
593
|
-
|
|
606
|
+
"""This should be implemented in the subclass."""
|
|
607
|
+
raise NotImplementedError()
|
|
594
608
|
|
|
595
609
|
def initialise(self):
|
|
596
610
|
self.tmp_storage.delete()
|
|
597
611
|
self.tmp_storage.create()
|
|
598
612
|
LOG.info(f"Dataset {self.path} additions initialized.")
|
|
599
613
|
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
if "variables_with_nans" in z.attrs:
|
|
604
|
-
return z.attrs["variables_with_nans"]
|
|
605
|
-
return None
|
|
606
|
-
|
|
607
|
-
def allow_nan(self, name):
|
|
608
|
-
if self._variables_with_nans is not None:
|
|
609
|
-
return name in self._variables_with_nans
|
|
610
|
-
warnings.warn(f"❗Cannot find 'variables_with_nans' in {self.path}, Assuming nans allowed for {name}.")
|
|
611
|
-
return True
|
|
612
|
-
|
|
613
|
-
@classmethod
|
|
614
|
-
def _check_type_equal(cls, a, b):
|
|
615
|
-
a = list(a)
|
|
616
|
-
b = list(b)
|
|
617
|
-
a = a[0] if a else None
|
|
618
|
-
b = b[0] if b else None
|
|
619
|
-
assert type(a) is type(b), (type(a), type(b))
|
|
614
|
+
def run(self, parts):
|
|
615
|
+
"""This should be implemented in the subclass."""
|
|
616
|
+
raise NotImplementedError()
|
|
620
617
|
|
|
621
618
|
def finalise(self):
|
|
622
619
|
shape = (len(self.dates), len(self.variables))
|
|
@@ -696,7 +693,7 @@ class GenericAdditions(GenericDatasetHandler):
|
|
|
696
693
|
variables_names=self.variables,
|
|
697
694
|
has_nans=has_nans,
|
|
698
695
|
)
|
|
699
|
-
LOG.info(f"Dataset {self.path} additions
|
|
696
|
+
LOG.info(f"Dataset {self.path} additions finalised.")
|
|
700
697
|
self.check_statistics()
|
|
701
698
|
self._write(self.summary)
|
|
702
699
|
self.tmp_storage.delete()
|
|
@@ -711,6 +708,19 @@ class GenericAdditions(GenericDatasetHandler):
|
|
|
711
708
|
def check_statistics(self):
|
|
712
709
|
pass
|
|
713
710
|
|
|
711
|
+
@cached_property
|
|
712
|
+
def _variables_with_nans(self):
|
|
713
|
+
z = zarr.open(self.path, mode="r")
|
|
714
|
+
if "variables_with_nans" in z.attrs:
|
|
715
|
+
return z.attrs["variables_with_nans"]
|
|
716
|
+
return None
|
|
717
|
+
|
|
718
|
+
def allow_nan(self, name):
|
|
719
|
+
if self._variables_with_nans is not None:
|
|
720
|
+
return name in self._variables_with_nans
|
|
721
|
+
warnings.warn(f"❗Cannot find 'variables_with_nans' in {self.path}, Assuming nans allowed for {name}.")
|
|
722
|
+
return True
|
|
723
|
+
|
|
714
724
|
|
|
715
725
|
class StatisticsAddition(GenericAdditions):
|
|
716
726
|
def __init__(self, **kwargs):
|
|
@@ -798,7 +808,7 @@ class TendenciesStatisticsAddition(GenericAdditions):
|
|
|
798
808
|
start = z.attrs["statistics_start_date"]
|
|
799
809
|
end = z.attrs["statistics_end_date"]
|
|
800
810
|
start = datetime.datetime.fromisoformat(start)
|
|
801
|
-
ds = open_dataset(self.path, start=start
|
|
811
|
+
ds = open_dataset(self.path, start=start, end=end)
|
|
802
812
|
self.dates = ds.dates
|
|
803
813
|
self.total = len(self.dates)
|
|
804
814
|
|
|
@@ -98,6 +98,8 @@ def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squa
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
def compute_statistics(array, check_variables_names=None, allow_nan=False):
|
|
101
|
+
"""Compute statistics for a given array, provides minimum, maximum, sum, squares, count and has_nans as a dictionary."""
|
|
102
|
+
|
|
101
103
|
nvars = array.shape[1]
|
|
102
104
|
|
|
103
105
|
LOG.info(f"Stats {nvars}, {array.shape}, {check_variables_names}")
|
|
@@ -242,10 +244,7 @@ class StatAggregator:
|
|
|
242
244
|
offset = 0
|
|
243
245
|
for _, _dates, stats in self.owner._gather_data():
|
|
244
246
|
assert isinstance(stats, dict), stats
|
|
245
|
-
assert stats["minimum"].shape[0] == len(_dates), (
|
|
246
|
-
stats["minimum"].shape,
|
|
247
|
-
len(_dates),
|
|
248
|
-
)
|
|
247
|
+
assert stats["minimum"].shape[0] == len(_dates), (stats["minimum"].shape, len(_dates))
|
|
249
248
|
assert stats["minimum"].shape[1] == len(self.variables_names), (
|
|
250
249
|
stats["minimum"].shape,
|
|
251
250
|
len(self.variables_names),
|
|
@@ -270,19 +269,13 @@ class StatAggregator:
|
|
|
270
269
|
for k in self.NAMES:
|
|
271
270
|
stats[k] = stats[k][bitmap]
|
|
272
271
|
|
|
273
|
-
assert stats["minimum"].shape[0] == len(dates), (
|
|
274
|
-
stats["minimum"].shape,
|
|
275
|
-
len(dates),
|
|
276
|
-
)
|
|
272
|
+
assert stats["minimum"].shape[0] == len(dates), (stats["minimum"].shape, len(dates))
|
|
277
273
|
|
|
278
274
|
# store data in self
|
|
279
275
|
found |= set(dates)
|
|
280
276
|
for name in self.NAMES:
|
|
281
277
|
array = getattr(self, name)
|
|
282
|
-
assert stats[name].shape[0] == len(dates), (
|
|
283
|
-
stats[name].shape,
|
|
284
|
-
len(dates),
|
|
285
|
-
)
|
|
278
|
+
assert stats[name].shape[0] == len(dates), (stats[name].shape, len(dates))
|
|
286
279
|
array[offset : offset + len(dates)] = stats[name]
|
|
287
280
|
offset += len(dates)
|
|
288
281
|
|
|
@@ -310,133 +303,7 @@ class StatAggregator:
|
|
|
310
303
|
stdev = np.sqrt(x)
|
|
311
304
|
|
|
312
305
|
for j, name in enumerate(self.variables_names):
|
|
313
|
-
check_data_values(
|
|
314
|
-
np.array(
|
|
315
|
-
[
|
|
316
|
-
mean[j],
|
|
317
|
-
]
|
|
318
|
-
),
|
|
319
|
-
name=name,
|
|
320
|
-
allow_nan=False,
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
return Summary(
|
|
324
|
-
minimum=minimum,
|
|
325
|
-
maximum=maximum,
|
|
326
|
-
mean=mean,
|
|
327
|
-
count=count,
|
|
328
|
-
sums=sums,
|
|
329
|
-
squares=squares,
|
|
330
|
-
stdev=stdev,
|
|
331
|
-
variables_names=self.variables_names,
|
|
332
|
-
has_nans=has_nans,
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
class SummaryAggregator:
|
|
337
|
-
NAMES = ["minimum", "maximum", "sums", "squares", "count", "has_nans"]
|
|
338
|
-
|
|
339
|
-
def __init__(self, owner, dates, variables_names, allow_nan):
|
|
340
|
-
dates = sorted(dates)
|
|
341
|
-
dates = to_datetimes(dates)
|
|
342
|
-
assert dates, "No dates selected"
|
|
343
|
-
self.owner = owner
|
|
344
|
-
self.dates = dates
|
|
345
|
-
self.variables_names = variables_names
|
|
346
|
-
self.allow_nan = allow_nan
|
|
347
|
-
|
|
348
|
-
self.shape = (len(self.dates), len(self.variables_names))
|
|
349
|
-
LOG.info(f"Aggregating statistics on shape={self.shape}. Variables : {self.variables_names}")
|
|
350
|
-
|
|
351
|
-
self.minimum = np.full(self.shape, np.nan, dtype=np.float64)
|
|
352
|
-
self.maximum = np.full(self.shape, np.nan, dtype=np.float64)
|
|
353
|
-
self.sums = np.full(self.shape, np.nan, dtype=np.float64)
|
|
354
|
-
self.squares = np.full(self.shape, np.nan, dtype=np.float64)
|
|
355
|
-
self.count = np.full(self.shape, -1, dtype=np.int64)
|
|
356
|
-
self.has_nans = np.full(self.shape, False, dtype=np.bool_)
|
|
357
|
-
|
|
358
|
-
self._read()
|
|
359
|
-
|
|
360
|
-
def _read(self):
|
|
361
|
-
def check_type(a, b):
|
|
362
|
-
a = list(a)
|
|
363
|
-
b = list(b)
|
|
364
|
-
a = a[0] if a else None
|
|
365
|
-
b = b[0] if b else None
|
|
366
|
-
assert type(a) is type(b), (type(a), type(b))
|
|
367
|
-
|
|
368
|
-
found = set()
|
|
369
|
-
offset = 0
|
|
370
|
-
for _, _dates, stats in self.owner._gather_data():
|
|
371
|
-
for n in self.NAMES:
|
|
372
|
-
assert n in stats, (n, list(stats.keys()))
|
|
373
|
-
_dates = to_datetimes(_dates)
|
|
374
|
-
check_type(_dates, self.dates)
|
|
375
|
-
if found:
|
|
376
|
-
check_type(found, self.dates)
|
|
377
|
-
assert found.isdisjoint(_dates), "Duplicate dates found in precomputed statistics"
|
|
378
|
-
|
|
379
|
-
# filter dates
|
|
380
|
-
dates = set(_dates) & set(self.dates)
|
|
381
|
-
|
|
382
|
-
if not dates:
|
|
383
|
-
# dates have been completely filtered for this chunk
|
|
384
|
-
continue
|
|
385
|
-
|
|
386
|
-
# filter data
|
|
387
|
-
bitmap = np.isin(_dates, self.dates)
|
|
388
|
-
for k in self.NAMES:
|
|
389
|
-
stats[k] = stats[k][bitmap]
|
|
390
|
-
|
|
391
|
-
assert stats["minimum"].shape[0] == len(dates), (
|
|
392
|
-
stats["minimum"].shape,
|
|
393
|
-
len(dates),
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
# store data in self
|
|
397
|
-
found |= set(dates)
|
|
398
|
-
for name in self.NAMES:
|
|
399
|
-
array = getattr(self, name)
|
|
400
|
-
assert stats[name].shape[0] == len(dates), (
|
|
401
|
-
stats[name].shape,
|
|
402
|
-
len(dates),
|
|
403
|
-
)
|
|
404
|
-
array[offset : offset + len(dates)] = stats[name]
|
|
405
|
-
offset += len(dates)
|
|
406
|
-
|
|
407
|
-
for d in self.dates:
|
|
408
|
-
assert d in found, f"Statistics for date {d} not precomputed."
|
|
409
|
-
assert len(self.dates) == len(found), "Not all dates found in precomputed statistics"
|
|
410
|
-
assert len(self.dates) == offset, "Not all dates found in precomputed statistics."
|
|
411
|
-
LOG.info(f"Statistics for {len(found)} dates found.")
|
|
412
|
-
|
|
413
|
-
def aggregate(self):
|
|
414
|
-
minimum = np.nanmin(self.minimum, axis=0)
|
|
415
|
-
maximum = np.nanmax(self.maximum, axis=0)
|
|
416
|
-
sums = np.nansum(self.sums, axis=0)
|
|
417
|
-
squares = np.nansum(self.squares, axis=0)
|
|
418
|
-
count = np.nansum(self.count, axis=0)
|
|
419
|
-
has_nans = np.any(self.has_nans, axis=0)
|
|
420
|
-
mean = sums / count
|
|
421
|
-
|
|
422
|
-
assert sums.shape == count.shape == squares.shape == mean.shape == minimum.shape == maximum.shape
|
|
423
|
-
|
|
424
|
-
x = squares / count - mean * mean
|
|
425
|
-
# remove negative variance due to numerical errors
|
|
426
|
-
# x[- 1e-15 < (x / (np.sqrt(squares / count) + np.abs(mean))) < 0] = 0
|
|
427
|
-
check_variance(x, self.variables_names, minimum, maximum, mean, count, sums, squares)
|
|
428
|
-
stdev = np.sqrt(x)
|
|
429
|
-
|
|
430
|
-
for j, name in enumerate(self.variables_names):
|
|
431
|
-
check_data_values(
|
|
432
|
-
np.array(
|
|
433
|
-
[
|
|
434
|
-
mean[j],
|
|
435
|
-
]
|
|
436
|
-
),
|
|
437
|
-
name=name,
|
|
438
|
-
allow_nan=False,
|
|
439
|
-
)
|
|
306
|
+
check_data_values(np.array([mean[j]]), name=name, allow_nan=False)
|
|
440
307
|
|
|
441
308
|
return Summary(
|
|
442
309
|
minimum=minimum,
|
anemoi/datasets/data/misc.py
CHANGED
|
@@ -49,7 +49,9 @@ def load_config():
|
|
|
49
49
|
if CONFIG is not None:
|
|
50
50
|
return CONFIG
|
|
51
51
|
|
|
52
|
-
conf = os.path.expanduser("~/.anemoi.toml")
|
|
52
|
+
conf = os.path.expanduser("~/.config/anemoi/settings.toml")
|
|
53
|
+
if not os.path.exists(conf):
|
|
54
|
+
conf = os.path.expanduser("~/.anemoi.toml")
|
|
53
55
|
|
|
54
56
|
if os.path.exists(conf):
|
|
55
57
|
|
anemoi/datasets/data/select.py
CHANGED
|
@@ -23,15 +23,19 @@ LOG = logging.getLogger(__name__)
|
|
|
23
23
|
class Select(Forwards):
|
|
24
24
|
"""Select a subset of the variables."""
|
|
25
25
|
|
|
26
|
-
def __init__(self, dataset, indices,
|
|
26
|
+
def __init__(self, dataset, indices, reason):
|
|
27
|
+
|
|
28
|
+
reason = reason.copy()
|
|
29
|
+
|
|
27
30
|
while isinstance(dataset, Select):
|
|
28
31
|
indices = [dataset.indices[i] for i in indices]
|
|
32
|
+
reason.update(dataset.reason)
|
|
29
33
|
dataset = dataset.dataset
|
|
30
34
|
|
|
31
35
|
self.dataset = dataset
|
|
32
36
|
self.indices = list(indices)
|
|
33
37
|
assert len(self.indices) > 0
|
|
34
|
-
self.
|
|
38
|
+
self.reason = reason or {"indices": self.indices}
|
|
35
39
|
|
|
36
40
|
# Forward other properties to the main dataset
|
|
37
41
|
super().__init__(dataset)
|
|
@@ -86,11 +90,11 @@ class Select(Forwards):
|
|
|
86
90
|
return Source(self, index, self.dataset.source(self.indices[index]))
|
|
87
91
|
|
|
88
92
|
def tree(self):
|
|
89
|
-
return Node(self, [self.dataset.tree()], **self.
|
|
93
|
+
return Node(self, [self.dataset.tree()], **self.reason)
|
|
90
94
|
|
|
91
95
|
def subclass_metadata_specific(self):
|
|
92
96
|
# return dict(indices=self.indices)
|
|
93
|
-
return
|
|
97
|
+
return dict(reason=self.reason)
|
|
94
98
|
|
|
95
99
|
|
|
96
100
|
class Rename(Forwards):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-datasets
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
anemoi/datasets/__init__.py,sha256=
|
|
1
|
+
anemoi/datasets/__init__.py,sha256=Z1gqZWhecLcT0RZQqYBLlz01MUlUZd0kWEj_RavbITM,782
|
|
2
2
|
anemoi/datasets/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
|
|
3
|
-
anemoi/datasets/_version.py,sha256=
|
|
3
|
+
anemoi/datasets/_version.py,sha256=nV2HEiFwTdaOZoFEyVxxG_D8Oq_nlSmX2vHL4jK4h6w,411
|
|
4
4
|
anemoi/datasets/grids.py,sha256=3YBMMJodgYhavarXPAlMZHaMtDT9v2IbTmAXZTqf8Qo,8481
|
|
5
5
|
anemoi/datasets/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
|
|
6
6
|
anemoi/datasets/commands/compare.py,sha256=p2jQOAC3JhScCLF0GjTCO8goYLWLN8p7vzy_gf5fFcI,1473
|
|
7
|
-
anemoi/datasets/commands/copy.py,sha256=
|
|
7
|
+
anemoi/datasets/commands/copy.py,sha256=SxAeN51owyN5gwtwpt30xhJSIJRlJb9YOUt_4K4m-D8,11780
|
|
8
8
|
anemoi/datasets/commands/create.py,sha256=POdOsVDlvRrHFFkI3SNXNgNIbSxkVUUPMoo660x7Ma0,987
|
|
9
9
|
anemoi/datasets/commands/inspect.py,sha256=G3fzcgiLaU8jln7GKvgamN7Y06-qC_JnFw2SbNn1_E4,18646
|
|
10
10
|
anemoi/datasets/commands/scan.py,sha256=HxsLdCgBMSdEXjlJfPq5M_9LxXHHQIoZ1ZEHO_AoPgA,2881
|
|
11
11
|
anemoi/datasets/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
anemoi/datasets/compute/recentre.py,sha256=GRxI6rY_KyXJnZGPxU_UO9YDb-rY_raK70Fiwv1mjhs,4792
|
|
13
|
-
anemoi/datasets/create/__init__.py,sha256=
|
|
13
|
+
anemoi/datasets/create/__init__.py,sha256=Q8uXUdbE-SRYYaZd5cPQ2RVbSoHnGX7-eKdOJHYVhDk,5951
|
|
14
14
|
anemoi/datasets/create/check.py,sha256=DLjw-eyaCNxPhoKFsP4Yn_l3SIr57YHdyPR-tE5vx80,5791
|
|
15
15
|
anemoi/datasets/create/chunks.py,sha256=YEDcr0K2KiiceSTiBuZzj0TbRbzZ9J546XO7rrrTFQw,2441
|
|
16
16
|
anemoi/datasets/create/config.py,sha256=uLIp1WHg3hbqwwMV9EepMwJQsXJAGImkbo0okBeEVd4,7683
|
|
17
17
|
anemoi/datasets/create/input.py,sha256=3G7sqdn7R1pLBeeswXwwi8VRAHrBnjq1PdRYHJBe594,27741
|
|
18
|
-
anemoi/datasets/create/loaders.py,sha256
|
|
18
|
+
anemoi/datasets/create/loaders.py,sha256=-fJ9qKjsCd8Wvnobn34WsQpE9uAjon5M4REgCpW5q_w,30594
|
|
19
19
|
anemoi/datasets/create/patch.py,sha256=xjCLhvIQKRqmypsKInRU1CvFh1uoaB3YGSQP1UVZZik,3682
|
|
20
20
|
anemoi/datasets/create/persistent.py,sha256=nT8gvhVPdI1H3zW_F7uViGKIlQQ94jCDrMSWTmhQ2_A,4290
|
|
21
21
|
anemoi/datasets/create/size.py,sha256=A1w6RkaL0L9IlwIdmYsCTJTecmY_QtvbkGf__jvQle0,1068
|
|
@@ -31,19 +31,19 @@ anemoi/datasets/create/functions/filters/rename.py,sha256=cGoHr-IS-PhYEtZvXDpH03
|
|
|
31
31
|
anemoi/datasets/create/functions/filters/rotate_winds.py,sha256=fUdh8ILcMzMzckGlvwzdgG-c7w5R9NnWfaijp28Bf5M,4092
|
|
32
32
|
anemoi/datasets/create/functions/filters/unrotate_winds.py,sha256=nsa3EHly8ppWd2WH4ROoMczM8WFu5qKaIhO_UFcL9TY,3502
|
|
33
33
|
anemoi/datasets/create/functions/sources/__init__.py,sha256=Xe9G54CKvCI3ji-7k0R5l0WZZdhlydRgawsXuBcX_hg,379
|
|
34
|
-
anemoi/datasets/create/functions/sources/accumulations.py,sha256=
|
|
34
|
+
anemoi/datasets/create/functions/sources/accumulations.py,sha256=klbp-akoZlOk9jByDFsgPfHRCdfLvpatTLMxDPZaNZc,12943
|
|
35
35
|
anemoi/datasets/create/functions/sources/constants.py,sha256=aqquu6HDc8t-zsF9KRFLaj0eV4S0UPZ59BVna8E3bU8,785
|
|
36
36
|
anemoi/datasets/create/functions/sources/empty.py,sha256=SBuAfC33imbfcRnFnnOR44y8Q3KSQcqx3juIcXfCa3c,481
|
|
37
37
|
anemoi/datasets/create/functions/sources/forcings.py,sha256=EVcdu8puMSW451qj3LKCWWXaSf2LlmF8YXVs8hSMxkU,643
|
|
38
38
|
anemoi/datasets/create/functions/sources/grib.py,sha256=YQNuGnlh2EYb2NIHYpzlipwUTmOhrmyQtP3zgk8MAUU,1661
|
|
39
39
|
anemoi/datasets/create/functions/sources/hindcasts.py,sha256=0Psnsx2J0cRLMpJuNN-gESm1xJFC1gmQzI8sdnXCoYE,13042
|
|
40
|
-
anemoi/datasets/create/functions/sources/mars.py,sha256=
|
|
40
|
+
anemoi/datasets/create/functions/sources/mars.py,sha256=JWsbzyoXF95HPk2VWzmX53f_SJwXhKkaJvXtXJMGLig,5285
|
|
41
41
|
anemoi/datasets/create/functions/sources/netcdf.py,sha256=kic6PH7SAK3gseXChD38IDXw6Zcg2zhF4SeDXB2LQ8Q,2084
|
|
42
42
|
anemoi/datasets/create/functions/sources/opendap.py,sha256=T0CPinscfafrVLaye5ue-PbiCNbcNqf_3m6pphN9rCU,543
|
|
43
43
|
anemoi/datasets/create/functions/sources/recentre.py,sha256=t07LIXG3Hp9gmPkPriILVt86TxubsHyS1EL1lzwgtXY,1810
|
|
44
44
|
anemoi/datasets/create/functions/sources/source.py,sha256=hPQnV_6UIxFw97uRKcTA8TplcgG1kC8NlFHoEaaLet4,1418
|
|
45
45
|
anemoi/datasets/create/functions/sources/tendencies.py,sha256=kwS_GZt8R9kpfs5RrvxPb0Gj-5nDP0sgJgfSRCAwwww,4057
|
|
46
|
-
anemoi/datasets/create/statistics/__init__.py,sha256=
|
|
46
|
+
anemoi/datasets/create/statistics/__init__.py,sha256=eXyOdlgXBt6QdVWM7ZVyUWdFMv6iNsFefkjvOVvZAlQ,11010
|
|
47
47
|
anemoi/datasets/create/statistics/summary.py,sha256=sgmhA24y3VRyjmDUgTnPIqcHSlWBbFA0qynx6gJ9Xw8,3370
|
|
48
48
|
anemoi/datasets/data/__init__.py,sha256=to9L_RZVQ4OgyHUpX6lcvt4GqJdZjBa5HCTaWx1aGKo,1046
|
|
49
49
|
anemoi/datasets/data/concat.py,sha256=AkpyOs16OjW7X0cdyYFQfWSCV6dteXBp-x9WlokO-DI,3550
|
|
@@ -56,8 +56,8 @@ anemoi/datasets/data/grids.py,sha256=rooOeR6rvjl4U8B4LO3N23fcgxvGE7ZUmhVryk1QS4M
|
|
|
56
56
|
anemoi/datasets/data/indexing.py,sha256=625m__JG5m_tDMrkz1hB6Vydenwt0oHuyAlc-o3Zwos,4799
|
|
57
57
|
anemoi/datasets/data/join.py,sha256=dtCBbMTicqrRPxfBULi3RwEcQBLhQpIcvCjdN5A3XUU,4892
|
|
58
58
|
anemoi/datasets/data/masked.py,sha256=czAv1ZfZ9q6Wr4RqI2Xj8SEm7yoCgJrwMl-CPDs_wSI,3857
|
|
59
|
-
anemoi/datasets/data/misc.py,sha256=
|
|
60
|
-
anemoi/datasets/data/select.py,sha256=
|
|
59
|
+
anemoi/datasets/data/misc.py,sha256=tuNsUY06nWh3Raf_RCi8bzCXsMB4t2hOuIkNGV4epj8,10501
|
|
60
|
+
anemoi/datasets/data/select.py,sha256=Oje3KG1shRawjuBy2-GM8s_Nk_68l-uujvx5SGW0tUM,3781
|
|
61
61
|
anemoi/datasets/data/statistics.py,sha256=lZCcKw9s7ttMBEp6ANyxtbXoZZvchhE7SClq-D4AUR8,1645
|
|
62
62
|
anemoi/datasets/data/stores.py,sha256=yy914zMHIYKm5q6mHOqGeK0dC_26VFeqKLXyb7x9NXE,11190
|
|
63
63
|
anemoi/datasets/data/subset.py,sha256=9urVTXdnwCgqn0_BRYquMi8oiXn4ubAf0n4586hWfKw,3814
|
|
@@ -65,9 +65,9 @@ anemoi/datasets/data/unchecked.py,sha256=xhdMg-ToI1UfBWHNsWyn1y2meZWngZtHx-33L0K
|
|
|
65
65
|
anemoi/datasets/dates/__init__.py,sha256=4ItowfLLh90T8L_JOjtv98lE6M7gAaWt7dV3niUrFvk,4473
|
|
66
66
|
anemoi/datasets/dates/groups.py,sha256=iq310Pi7ullglOhcNblv14MmcT8FPgYCD5s45qAfV_s,3383
|
|
67
67
|
anemoi/datasets/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
anemoi_datasets-0.3.
|
|
69
|
-
anemoi_datasets-0.3.
|
|
70
|
-
anemoi_datasets-0.3.
|
|
71
|
-
anemoi_datasets-0.3.
|
|
72
|
-
anemoi_datasets-0.3.
|
|
73
|
-
anemoi_datasets-0.3.
|
|
68
|
+
anemoi_datasets-0.3.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
69
|
+
anemoi_datasets-0.3.9.dist-info/METADATA,sha256=Gmz0Y5ihJDyIqDTTMbO-1s3QYBmbe1vsKvSqt0mh6_0,16019
|
|
70
|
+
anemoi_datasets-0.3.9.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
71
|
+
anemoi_datasets-0.3.9.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
|
|
72
|
+
anemoi_datasets-0.3.9.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
73
|
+
anemoi_datasets-0.3.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|