anemoi-datasets 0.4.5__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/create.py +3 -2
- anemoi/datasets/commands/inspect.py +1 -1
- anemoi/datasets/commands/publish.py +30 -0
- anemoi/datasets/create/__init__.py +72 -35
- anemoi/datasets/create/check.py +6 -0
- anemoi/datasets/create/config.py +4 -3
- anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
- anemoi/datasets/create/functions/filters/rename.py +2 -3
- anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
- anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
- anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
- anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
- anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
- anemoi/datasets/create/functions/sources/__init__.py +7 -1
- anemoi/datasets/create/functions/sources/accumulations.py +2 -0
- anemoi/datasets/create/functions/sources/grib.py +87 -2
- anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
- anemoi/datasets/create/functions/sources/mars.py +9 -3
- anemoi/datasets/create/functions/sources/xarray/__init__.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/coordinates.py +6 -1
- anemoi/datasets/create/functions/sources/xarray/field.py +20 -5
- anemoi/datasets/create/functions/sources/xarray/fieldlist.py +16 -16
- anemoi/datasets/create/functions/sources/xarray/flavour.py +126 -12
- anemoi/datasets/create/functions/sources/xarray/grid.py +106 -17
- anemoi/datasets/create/functions/sources/xarray/metadata.py +6 -12
- anemoi/datasets/create/functions/sources/xarray/time.py +1 -5
- anemoi/datasets/create/functions/sources/xarray/variable.py +10 -10
- anemoi/datasets/create/input/__init__.py +69 -0
- anemoi/datasets/create/input/action.py +123 -0
- anemoi/datasets/create/input/concat.py +92 -0
- anemoi/datasets/create/input/context.py +59 -0
- anemoi/datasets/create/input/data_sources.py +71 -0
- anemoi/datasets/create/input/empty.py +42 -0
- anemoi/datasets/create/input/filter.py +76 -0
- anemoi/datasets/create/input/function.py +122 -0
- anemoi/datasets/create/input/join.py +57 -0
- anemoi/datasets/create/input/misc.py +85 -0
- anemoi/datasets/create/input/pipe.py +33 -0
- anemoi/datasets/create/input/repeated_dates.py +217 -0
- anemoi/datasets/create/input/result.py +413 -0
- anemoi/datasets/create/input/step.py +99 -0
- anemoi/datasets/create/{template.py → input/template.py} +0 -42
- anemoi/datasets/create/persistent.py +1 -1
- anemoi/datasets/create/statistics/__init__.py +1 -1
- anemoi/datasets/create/utils.py +3 -0
- anemoi/datasets/create/zarr.py +4 -2
- anemoi/datasets/data/dataset.py +11 -1
- anemoi/datasets/data/debug.py +5 -1
- anemoi/datasets/data/masked.py +2 -2
- anemoi/datasets/data/rescale.py +147 -0
- anemoi/datasets/data/stores.py +20 -7
- anemoi/datasets/dates/__init__.py +113 -30
- anemoi/datasets/dates/groups.py +92 -19
- anemoi/datasets/fields.py +66 -0
- anemoi/datasets/utils/fields.py +47 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/METADATA +10 -19
- anemoi_datasets-0.5.5.dist-info/RECORD +121 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/input.py +0 -1065
- anemoi_datasets-0.4.5.dist-info/RECORD +0 -96
- /anemoi/datasets/create/{trace.py → input/trace.py} +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/LICENSE +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.4.5.dist-info → anemoi_datasets-0.5.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# (C) Copyright 2024 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
import logging
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
|
|
12
|
+
from anemoi.utils.dates import as_datetime as as_datetime
|
|
13
|
+
from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta
|
|
14
|
+
|
|
15
|
+
from anemoi.datasets.dates import DatesProvider as DatesProvider
|
|
16
|
+
from anemoi.datasets.fields import FieldArray as FieldArray
|
|
17
|
+
from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField
|
|
18
|
+
|
|
19
|
+
from .action import Action
|
|
20
|
+
from .context import Context
|
|
21
|
+
from .misc import is_function
|
|
22
|
+
from .result import Result
|
|
23
|
+
from .template import notify_result
|
|
24
|
+
from .trace import trace_datasource
|
|
25
|
+
from .trace import trace_select
|
|
26
|
+
|
|
27
|
+
LOG = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StepResult(Result):
|
|
31
|
+
def __init__(self, context, action_path, group_of_dates, action, upstream_result):
|
|
32
|
+
super().__init__(context, action_path, group_of_dates)
|
|
33
|
+
assert isinstance(upstream_result, Result), type(upstream_result)
|
|
34
|
+
self.upstream_result = upstream_result
|
|
35
|
+
self.action = action
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
@notify_result
|
|
39
|
+
@trace_datasource
|
|
40
|
+
def datasource(self):
|
|
41
|
+
raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class StepAction(Action):
|
|
45
|
+
result_class = None
|
|
46
|
+
|
|
47
|
+
def __init__(self, context, action_path, previous_step, *args, **kwargs):
|
|
48
|
+
super().__init__(context, action_path, *args, **kwargs)
|
|
49
|
+
self.previous_step = previous_step
|
|
50
|
+
|
|
51
|
+
@trace_select
|
|
52
|
+
def select(self, group_of_dates):
|
|
53
|
+
return self.result_class(
|
|
54
|
+
self.context,
|
|
55
|
+
self.action_path,
|
|
56
|
+
group_of_dates,
|
|
57
|
+
self,
|
|
58
|
+
self.previous_step.select(group_of_dates),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def __repr__(self):
|
|
62
|
+
return super().__repr__(self.previous_step, _inline_=str(self.kwargs))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def step_factory(config, context, action_path, previous_step):
|
|
66
|
+
|
|
67
|
+
from .filter import FilterStepAction
|
|
68
|
+
from .filter import FunctionStepAction
|
|
69
|
+
|
|
70
|
+
assert isinstance(context, Context), (type, context)
|
|
71
|
+
if not isinstance(config, dict):
|
|
72
|
+
raise ValueError(f"Invalid input config {config}")
|
|
73
|
+
|
|
74
|
+
config = deepcopy(config)
|
|
75
|
+
assert len(config) == 1, config
|
|
76
|
+
|
|
77
|
+
key = list(config.keys())[0]
|
|
78
|
+
cls = dict(
|
|
79
|
+
filter=FilterStepAction,
|
|
80
|
+
# rename=RenameAction,
|
|
81
|
+
# remapping=RemappingAction,
|
|
82
|
+
).get(key)
|
|
83
|
+
|
|
84
|
+
if isinstance(config[key], list):
|
|
85
|
+
args, kwargs = config[key], {}
|
|
86
|
+
|
|
87
|
+
if isinstance(config[key], dict):
|
|
88
|
+
args, kwargs = [], config[key]
|
|
89
|
+
|
|
90
|
+
if isinstance(config[key], str):
|
|
91
|
+
args, kwargs = [config[key]], {}
|
|
92
|
+
|
|
93
|
+
if cls is None:
|
|
94
|
+
if not is_function(key, "filters"):
|
|
95
|
+
raise ValueError(f"Unknown step {key}")
|
|
96
|
+
cls = FunctionStepAction
|
|
97
|
+
args = [key] + args
|
|
98
|
+
|
|
99
|
+
return cls(context, action_path, previous_step, *args, **kwargs)
|
|
@@ -9,14 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import re
|
|
12
|
-
import textwrap
|
|
13
12
|
from functools import wraps
|
|
14
13
|
|
|
15
|
-
from anemoi.utils.humanize import plural
|
|
16
|
-
|
|
17
|
-
from .trace import step
|
|
18
|
-
from .trace import trace
|
|
19
|
-
|
|
20
14
|
LOG = logging.getLogger(__name__)
|
|
21
15
|
|
|
22
16
|
|
|
@@ -30,42 +24,6 @@ def notify_result(method):
|
|
|
30
24
|
return wrapper
|
|
31
25
|
|
|
32
26
|
|
|
33
|
-
class Context:
|
|
34
|
-
def __init__(self):
|
|
35
|
-
# used_references is a set of reference paths that will be needed
|
|
36
|
-
self.used_references = set()
|
|
37
|
-
# results is a dictionary of reference path -> obj
|
|
38
|
-
self.results = {}
|
|
39
|
-
|
|
40
|
-
def will_need_reference(self, key):
|
|
41
|
-
assert isinstance(key, (list, tuple)), key
|
|
42
|
-
key = tuple(key)
|
|
43
|
-
self.used_references.add(key)
|
|
44
|
-
|
|
45
|
-
def notify_result(self, key, result):
|
|
46
|
-
trace(
|
|
47
|
-
"🎯",
|
|
48
|
-
step(key),
|
|
49
|
-
"notify result",
|
|
50
|
-
textwrap.shorten(repr(result).replace(",", ", "), width=40),
|
|
51
|
-
plural(len(result), "field"),
|
|
52
|
-
)
|
|
53
|
-
assert isinstance(key, (list, tuple)), key
|
|
54
|
-
key = tuple(key)
|
|
55
|
-
if key in self.used_references:
|
|
56
|
-
if key in self.results:
|
|
57
|
-
raise ValueError(f"Duplicate result {key}")
|
|
58
|
-
self.results[key] = result
|
|
59
|
-
|
|
60
|
-
def get_result(self, key):
|
|
61
|
-
assert isinstance(key, (list, tuple)), key
|
|
62
|
-
key = tuple(key)
|
|
63
|
-
if key in self.results:
|
|
64
|
-
return self.results[key]
|
|
65
|
-
all_keys = sorted(list(self.results.keys()))
|
|
66
|
-
raise ValueError(f"Cannot find result {key} in {all_keys}")
|
|
67
|
-
|
|
68
|
-
|
|
69
27
|
class Substitution:
|
|
70
28
|
pass
|
|
71
29
|
|
|
@@ -68,7 +68,7 @@ class PersistentDict:
|
|
|
68
68
|
path = os.path.join(self.dirname, f"{h}.pickle")
|
|
69
69
|
|
|
70
70
|
if os.path.exists(path):
|
|
71
|
-
LOG.
|
|
71
|
+
LOG.warning(f"{path} already exists")
|
|
72
72
|
|
|
73
73
|
tmp_path = path + f".tmp-{os.getpid()}-on-{socket.gethostname()}"
|
|
74
74
|
with open(tmp_path, "wb") as f:
|
|
@@ -155,7 +155,7 @@ def compute_statistics(array, check_variables_names=None, allow_nans=False):
|
|
|
155
155
|
check_data_values(values[j, :], name=name, allow_nans=allow_nans)
|
|
156
156
|
if np.isnan(values[j, :]).all():
|
|
157
157
|
# LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
|
|
158
|
-
|
|
158
|
+
LOG.warning(f"All NaN values for {name} ({j}) for date {i}")
|
|
159
159
|
|
|
160
160
|
# Ignore NaN values
|
|
161
161
|
minimum[i] = np.nanmin(values, axis=1)
|
anemoi/datasets/create/utils.py
CHANGED
|
@@ -62,6 +62,9 @@ def make_list_int(value):
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]"):
|
|
65
|
+
|
|
66
|
+
dates = [d.hdate if hasattr(d, "hdate") else d for d in dates]
|
|
67
|
+
|
|
65
68
|
assert isinstance(frequency, datetime.timedelta), frequency
|
|
66
69
|
start = np.datetime64(start)
|
|
67
70
|
end = np.datetime64(end)
|
anemoi/datasets/create/zarr.py
CHANGED
|
@@ -128,7 +128,7 @@ class ZarrBuiltRegistry:
|
|
|
128
128
|
def add_to_history(self, action, **kwargs):
|
|
129
129
|
new = dict(
|
|
130
130
|
action=action,
|
|
131
|
-
timestamp=datetime.datetime.
|
|
131
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat(),
|
|
132
132
|
)
|
|
133
133
|
new.update(kwargs)
|
|
134
134
|
|
|
@@ -151,7 +151,9 @@ class ZarrBuiltRegistry:
|
|
|
151
151
|
|
|
152
152
|
def set_flag(self, i, value=True):
|
|
153
153
|
z = self._open_write()
|
|
154
|
-
z.attrs["latest_write_timestamp"] =
|
|
154
|
+
z.attrs["latest_write_timestamp"] = (
|
|
155
|
+
datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat()
|
|
156
|
+
)
|
|
155
157
|
z["_build"][self.name_flags][i] = value
|
|
156
158
|
|
|
157
159
|
def ready(self):
|
anemoi/datasets/data/dataset.py
CHANGED
|
@@ -23,7 +23,11 @@ LOG = logging.getLogger(__name__)
|
|
|
23
23
|
class Dataset:
|
|
24
24
|
arguments = {}
|
|
25
25
|
|
|
26
|
-
def mutate(self):
|
|
26
|
+
def mutate(self) -> "Dataset":
|
|
27
|
+
"""
|
|
28
|
+
Give an opportunity to a subclass to return a new Dataset
|
|
29
|
+
object of a different class, if needed.
|
|
30
|
+
"""
|
|
27
31
|
return self
|
|
28
32
|
|
|
29
33
|
def swap_with_parent(self, parent):
|
|
@@ -90,6 +94,12 @@ class Dataset:
|
|
|
90
94
|
rename = kwargs.pop("rename")
|
|
91
95
|
return Rename(self, rename)._subset(**kwargs).mutate()
|
|
92
96
|
|
|
97
|
+
if "rescale" in kwargs:
|
|
98
|
+
from .rescale import Rescale
|
|
99
|
+
|
|
100
|
+
rescale = kwargs.pop("rescale")
|
|
101
|
+
return Rescale(self, rescale)._subset(**kwargs).mutate()
|
|
102
|
+
|
|
93
103
|
if "statistics" in kwargs:
|
|
94
104
|
from ..data import open_dataset
|
|
95
105
|
from .statistics import Statistics
|
anemoi/datasets/data/debug.py
CHANGED
|
@@ -209,10 +209,14 @@ def _debug_indexing(method):
|
|
|
209
209
|
return wrapper
|
|
210
210
|
|
|
211
211
|
|
|
212
|
+
def _identity(x):
|
|
213
|
+
return x
|
|
214
|
+
|
|
215
|
+
|
|
212
216
|
if DEBUG_ZARR_INDEXING:
|
|
213
217
|
debug_indexing = _debug_indexing
|
|
214
218
|
else:
|
|
215
|
-
debug_indexing =
|
|
219
|
+
debug_indexing = _identity
|
|
216
220
|
|
|
217
221
|
|
|
218
222
|
def debug_zarr_loading(on_off):
|
anemoi/datasets/data/masked.py
CHANGED
|
@@ -112,5 +112,5 @@ class Cropping(Masked):
|
|
|
112
112
|
def tree(self):
|
|
113
113
|
return Node(self, [self.forward.tree()], area=self.area)
|
|
114
114
|
|
|
115
|
-
def
|
|
116
|
-
return
|
|
115
|
+
def subclass_metadata_specific(self):
|
|
116
|
+
return dict(area=self.area)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from functools import cached_property
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from .debug import Node
|
|
14
|
+
from .debug import debug_indexing
|
|
15
|
+
from .forwards import Forwards
|
|
16
|
+
from .indexing import apply_index_to_slices_changes
|
|
17
|
+
from .indexing import expand_list_indexing
|
|
18
|
+
from .indexing import index_to_slices
|
|
19
|
+
from .indexing import update_tuple
|
|
20
|
+
|
|
21
|
+
LOG = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_rescale(variable, rescale):
|
|
25
|
+
|
|
26
|
+
if isinstance(rescale, (tuple, list)):
|
|
27
|
+
|
|
28
|
+
assert len(rescale) == 2, rescale
|
|
29
|
+
|
|
30
|
+
if isinstance(rescale[0], (int, float)):
|
|
31
|
+
return rescale
|
|
32
|
+
|
|
33
|
+
from cfunits import Units
|
|
34
|
+
|
|
35
|
+
u0 = Units(rescale[0])
|
|
36
|
+
u1 = Units(rescale[1])
|
|
37
|
+
|
|
38
|
+
x1, x2 = 0.0, 1.0
|
|
39
|
+
y1, y2 = Units.conform([x1, x2], u0, u1)
|
|
40
|
+
|
|
41
|
+
a = (y2 - y1) / (x2 - x1)
|
|
42
|
+
b = y1 - a * x1
|
|
43
|
+
|
|
44
|
+
return a, b
|
|
45
|
+
|
|
46
|
+
return rescale
|
|
47
|
+
|
|
48
|
+
if isinstance(rescale, dict):
|
|
49
|
+
assert "scale" in rescale, rescale
|
|
50
|
+
assert "offset" in rescale, rescale
|
|
51
|
+
return rescale["scale"], rescale["offset"]
|
|
52
|
+
|
|
53
|
+
assert False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Rescale(Forwards):
|
|
57
|
+
def __init__(self, dataset, rescale):
|
|
58
|
+
super().__init__(dataset)
|
|
59
|
+
for n in rescale:
|
|
60
|
+
assert n in dataset.variables, n
|
|
61
|
+
|
|
62
|
+
variables = dataset.variables
|
|
63
|
+
|
|
64
|
+
self._a = np.ones(len(variables))
|
|
65
|
+
self._b = np.zeros(len(variables))
|
|
66
|
+
|
|
67
|
+
self.rescale = {}
|
|
68
|
+
for i, v in enumerate(variables):
|
|
69
|
+
if v in rescale:
|
|
70
|
+
a, b = make_rescale(v, rescale[v])
|
|
71
|
+
self.rescale[v] = a, b
|
|
72
|
+
self._a[i], self._b[i] = a, b
|
|
73
|
+
|
|
74
|
+
self._a = self._a[np.newaxis, :, np.newaxis, np.newaxis]
|
|
75
|
+
self._b = self._b[np.newaxis, :, np.newaxis, np.newaxis]
|
|
76
|
+
|
|
77
|
+
self._a = self._a.astype(self.forward.dtype)
|
|
78
|
+
self._b = self._b.astype(self.forward.dtype)
|
|
79
|
+
|
|
80
|
+
def tree(self):
|
|
81
|
+
return Node(self, [self.forward.tree()], rescale=self.rescale)
|
|
82
|
+
|
|
83
|
+
def subclass_metadata_specific(self):
|
|
84
|
+
return dict(rescale=self.rescale)
|
|
85
|
+
|
|
86
|
+
@debug_indexing
|
|
87
|
+
@expand_list_indexing
|
|
88
|
+
def _get_tuple(self, index):
|
|
89
|
+
index, changes = index_to_slices(index, self.shape)
|
|
90
|
+
index, previous = update_tuple(index, 1, slice(None))
|
|
91
|
+
result = self.forward[index]
|
|
92
|
+
result = result * self._a + self._b
|
|
93
|
+
result = result[:, previous]
|
|
94
|
+
result = apply_index_to_slices_changes(result, changes)
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
@debug_indexing
|
|
98
|
+
def __get_slice_(self, n):
|
|
99
|
+
data = self.forward[n]
|
|
100
|
+
return data * self._a + self._b
|
|
101
|
+
|
|
102
|
+
@debug_indexing
|
|
103
|
+
def __getitem__(self, n):
|
|
104
|
+
|
|
105
|
+
if isinstance(n, tuple):
|
|
106
|
+
return self._get_tuple(n)
|
|
107
|
+
|
|
108
|
+
if isinstance(n, slice):
|
|
109
|
+
return self.__get_slice_(n)
|
|
110
|
+
|
|
111
|
+
data = self.forward[n]
|
|
112
|
+
|
|
113
|
+
return data * self._a[0] + self._b[0]
|
|
114
|
+
|
|
115
|
+
@cached_property
|
|
116
|
+
def statistics(self):
|
|
117
|
+
result = {}
|
|
118
|
+
a = self._a.squeeze()
|
|
119
|
+
assert np.all(a >= 0)
|
|
120
|
+
|
|
121
|
+
b = self._b.squeeze()
|
|
122
|
+
for k, v in self.forward.statistics.items():
|
|
123
|
+
if k in ("maximum", "minimum", "mean"):
|
|
124
|
+
result[k] = v * a + b
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
if k in ("stdev",):
|
|
128
|
+
result[k] = v * a
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
raise NotImplementedError("rescale statistics", k)
|
|
132
|
+
|
|
133
|
+
return result
|
|
134
|
+
|
|
135
|
+
def statistics_tendencies(self, delta=None):
|
|
136
|
+
result = {}
|
|
137
|
+
a = self._a.squeeze()
|
|
138
|
+
assert np.all(a >= 0)
|
|
139
|
+
|
|
140
|
+
for k, v in self.forward.statistics_tendencies(delta).items():
|
|
141
|
+
if k in ("maximum", "minimum", "mean", "stdev"):
|
|
142
|
+
result[k] = v * a
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
raise NotImplementedError("rescale tendencies statistics", k)
|
|
146
|
+
|
|
147
|
+
return result
|
anemoi/datasets/data/stores.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
import logging
|
|
9
10
|
import os
|
|
10
11
|
import warnings
|
|
@@ -83,6 +84,8 @@ class S3Store(ReadOnlyStore):
|
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
class DebugStore(ReadOnlyStore):
|
|
87
|
+
"""A store to debug the zarr loading."""
|
|
88
|
+
|
|
86
89
|
def __init__(self, store):
|
|
87
90
|
assert not isinstance(store, DebugStore)
|
|
88
91
|
self.store = store
|
|
@@ -148,6 +151,8 @@ def open_zarr(path, dont_fail=False, cache=None):
|
|
|
148
151
|
|
|
149
152
|
|
|
150
153
|
class Zarr(Dataset):
|
|
154
|
+
"""A zarr dataset."""
|
|
155
|
+
|
|
151
156
|
def __init__(self, path):
|
|
152
157
|
if isinstance(path, zarr.hierarchy.Group):
|
|
153
158
|
self.was_zarr = True
|
|
@@ -244,14 +249,20 @@ class Zarr(Dataset):
|
|
|
244
249
|
delta = self.frequency
|
|
245
250
|
if isinstance(delta, int):
|
|
246
251
|
delta = f"{delta}h"
|
|
247
|
-
from anemoi.
|
|
252
|
+
from anemoi.utils.dates import frequency_to_string
|
|
253
|
+
from anemoi.utils.dates import frequency_to_timedelta
|
|
254
|
+
|
|
255
|
+
delta = frequency_to_timedelta(delta)
|
|
256
|
+
delta = frequency_to_string(delta)
|
|
257
|
+
|
|
258
|
+
def func(k):
|
|
259
|
+
return f"statistics_tendencies_{delta}_{k}"
|
|
248
260
|
|
|
249
|
-
func = TendenciesStatisticsAddition.final_storage_name_from_delta
|
|
250
261
|
return dict(
|
|
251
|
-
mean=self.z[func("mean"
|
|
252
|
-
stdev=self.z[func("stdev"
|
|
253
|
-
maximum=self.z[func("maximum"
|
|
254
|
-
minimum=self.z[func("minimum"
|
|
262
|
+
mean=self.z[func("mean")][:],
|
|
263
|
+
stdev=self.z[func("stdev")][:],
|
|
264
|
+
maximum=self.z[func("maximum")][:],
|
|
265
|
+
minimum=self.z[func("minimum")][:],
|
|
255
266
|
)
|
|
256
267
|
|
|
257
268
|
@property
|
|
@@ -322,11 +333,13 @@ class Zarr(Dataset):
|
|
|
322
333
|
|
|
323
334
|
|
|
324
335
|
class ZarrWithMissingDates(Zarr):
|
|
336
|
+
"""A zarr dataset with missing dates."""
|
|
337
|
+
|
|
325
338
|
def __init__(self, path):
|
|
326
339
|
super().__init__(path)
|
|
327
340
|
|
|
328
341
|
missing_dates = self.z.attrs.get("missing_dates", [])
|
|
329
|
-
missing_dates = [np.datetime64(x) for x in missing_dates]
|
|
342
|
+
missing_dates = set([np.datetime64(x) for x in missing_dates])
|
|
330
343
|
self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
|
|
331
344
|
self.missing = set(self.missing_to_dates)
|
|
332
345
|
|