anemoi-datasets 0.5.28__py3-none-any.whl → 0.5.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/create/__init__.py +4 -12
- anemoi/datasets/create/config.py +50 -53
- anemoi/datasets/create/input/result/field.py +1 -3
- anemoi/datasets/create/sources/accumulate.py +517 -0
- anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
- anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
- anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +153 -0
- anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
- anemoi/datasets/create/sources/grib_index.py +79 -51
- anemoi/datasets/create/sources/mars.py +56 -27
- anemoi/datasets/create/sources/xarray_support/__init__.py +1 -0
- anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
- anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
- anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
- anemoi/datasets/data/complement.py +26 -17
- anemoi/datasets/data/dataset.py +6 -0
- anemoi/datasets/data/masked.py +74 -13
- anemoi/datasets/data/missing.py +5 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/METADATA +8 -7
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/RECORD +25 -23
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/sources/accumulations.py +0 -1042
- anemoi/datasets/create/sources/accumulations2.py +0 -618
- anemoi/datasets/create/sources/tendencies.py +0 -171
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/top_level.txt +0 -0
|
@@ -10,13 +10,14 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any
|
|
13
|
+
from typing import Literal
|
|
13
14
|
|
|
14
15
|
import xarray as xr
|
|
15
16
|
|
|
16
17
|
LOG = logging.getLogger(__name__)
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def patch_attributes(ds: xr.Dataset, attributes: dict[str, dict[str, Any]]) ->
|
|
20
|
+
def patch_attributes(ds: xr.Dataset, attributes: dict[str, dict[str, Any]]) -> xr.Dataset:
|
|
20
21
|
"""Patch the attributes of the dataset.
|
|
21
22
|
|
|
22
23
|
Parameters
|
|
@@ -38,7 +39,7 @@ def patch_attributes(ds: xr.Dataset, attributes: dict[str, dict[str, Any]]) -> A
|
|
|
38
39
|
return ds
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
def patch_coordinates(ds: xr.Dataset, coordinates: list[str]) ->
|
|
42
|
+
def patch_coordinates(ds: xr.Dataset, coordinates: list[str]) -> xr.Dataset:
|
|
42
43
|
"""Patch the coordinates of the dataset.
|
|
43
44
|
|
|
44
45
|
Parameters
|
|
@@ -59,7 +60,7 @@ def patch_coordinates(ds: xr.Dataset, coordinates: list[str]) -> Any:
|
|
|
59
60
|
return ds
|
|
60
61
|
|
|
61
62
|
|
|
62
|
-
def patch_rename(ds: xr.Dataset, renames: dict[str, str]) ->
|
|
63
|
+
def patch_rename(ds: xr.Dataset, renames: dict[str, str]) -> xr.Dataset:
|
|
63
64
|
"""Rename variables in the dataset.
|
|
64
65
|
|
|
65
66
|
Parameters
|
|
@@ -77,7 +78,7 @@ def patch_rename(ds: xr.Dataset, renames: dict[str, str]) -> Any:
|
|
|
77
78
|
return ds.rename(renames)
|
|
78
79
|
|
|
79
80
|
|
|
80
|
-
def patch_sort_coordinate(ds: xr.Dataset, sort_coordinates: list[str]) ->
|
|
81
|
+
def patch_sort_coordinate(ds: xr.Dataset, sort_coordinates: list[str]) -> xr.Dataset:
|
|
81
82
|
"""Sort the coordinates of the dataset.
|
|
82
83
|
|
|
83
84
|
Parameters
|
|
@@ -98,11 +99,175 @@ def patch_sort_coordinate(ds: xr.Dataset, sort_coordinates: list[str]) -> Any:
|
|
|
98
99
|
return ds
|
|
99
100
|
|
|
100
101
|
|
|
102
|
+
def patch_subset_dataset(ds: xr.Dataset, selection: dict[str, Any]) -> xr.Dataset:
|
|
103
|
+
"""Select a subset of the dataset using xarray's sel method.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
ds : xr.Dataset
|
|
108
|
+
The dataset to patch.
|
|
109
|
+
selection : dict[str, Any]
|
|
110
|
+
Dictionary mapping dimension names to selection criteria.
|
|
111
|
+
Keys must be existing dimension names in the dataset.
|
|
112
|
+
Values can be any type accepted by xarray's sel method, including:
|
|
113
|
+
- Single values (int, float, str, datetime)
|
|
114
|
+
- Lists or arrays of values
|
|
115
|
+
- Slices (using slice() objects)
|
|
116
|
+
- Boolean arrays
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
xr.Dataset
|
|
121
|
+
The patched dataset containing only the selected subset.
|
|
122
|
+
|
|
123
|
+
Examples
|
|
124
|
+
--------
|
|
125
|
+
>>> # Select specific time and pressure level
|
|
126
|
+
>>> patch_subset_dataset(ds, {
|
|
127
|
+
... 'time': '2020-01-01',
|
|
128
|
+
... 'pressure': 500
|
|
129
|
+
... })
|
|
130
|
+
|
|
131
|
+
>>> # Select a range using slice
|
|
132
|
+
>>> patch_subset_dataset(ds, {
|
|
133
|
+
... 'lat': slice(-90, 90),
|
|
134
|
+
... 'lon': slice(0, 180)
|
|
135
|
+
... })
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
ds = ds.sel(selection)
|
|
139
|
+
|
|
140
|
+
return ds
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def patch_analysis_lead_to_valid_time(
|
|
144
|
+
ds: xr.Dataset,
|
|
145
|
+
time_coord_names: dict[Literal["analysis_time_coordinate", "lead_time_coordinate", "valid_time_coordinate"], str],
|
|
146
|
+
) -> xr.Dataset:
|
|
147
|
+
"""Convert analysis time and lead time coordinates to valid time.
|
|
148
|
+
|
|
149
|
+
This function creates a new valid time coordinate by adding the analysis time
|
|
150
|
+
and lead time coordinates, then stacks and reorganizes the dataset to use
|
|
151
|
+
valid time as the primary time dimension.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
ds : xr.Dataset
|
|
156
|
+
The dataset to patch.
|
|
157
|
+
time_coord_names : dict[str, str]
|
|
158
|
+
Dictionary mapping required keys to coordinate names in the dataset:
|
|
159
|
+
|
|
160
|
+
- 'analysis_time_coordinate' : str
|
|
161
|
+
Name of the analysis/initialization time coordinate.
|
|
162
|
+
- 'lead_time_coordinate' : str
|
|
163
|
+
Name of the forecast lead time coordinate.
|
|
164
|
+
- 'valid_time_coordinate' : str
|
|
165
|
+
Name for the new valid time coordinate to create.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
xr.Dataset
|
|
170
|
+
The patched dataset with valid time as the primary time coordinate.
|
|
171
|
+
The analysis and lead time coordinates are removed.
|
|
172
|
+
|
|
173
|
+
Examples
|
|
174
|
+
--------
|
|
175
|
+
>>> patch_analysis_lead_to_valid_time(ds, {
|
|
176
|
+
... 'analysis_time_coordinate': 'forecast_reference_time',
|
|
177
|
+
... 'lead_time_coordinate': 'step',
|
|
178
|
+
... 'valid_time_coordinate': 'time'
|
|
179
|
+
... })
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
assert time_coord_names.keys() == {
|
|
183
|
+
"analysis_time_coordinate",
|
|
184
|
+
"lead_time_coordinate",
|
|
185
|
+
"valid_time_coordinate",
|
|
186
|
+
}, "time_coord_names must contain exactly keys 'analysis_time_coordinate', 'lead_time_coordinate', and 'valid_time_coordinate'"
|
|
187
|
+
|
|
188
|
+
analysis_time_coordinate = time_coord_names["analysis_time_coordinate"]
|
|
189
|
+
lead_time_coordinate = time_coord_names["lead_time_coordinate"]
|
|
190
|
+
valid_time_coordinate = time_coord_names["valid_time_coordinate"]
|
|
191
|
+
|
|
192
|
+
valid_time = ds[analysis_time_coordinate] + ds[lead_time_coordinate]
|
|
193
|
+
|
|
194
|
+
ds = (
|
|
195
|
+
ds.assign_coords({valid_time_coordinate: valid_time})
|
|
196
|
+
.stack(time_index=[analysis_time_coordinate, lead_time_coordinate])
|
|
197
|
+
.set_index(time_index=valid_time_coordinate)
|
|
198
|
+
.rename(time_index=valid_time_coordinate)
|
|
199
|
+
.drop_vars([analysis_time_coordinate, lead_time_coordinate])
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return ds
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def patch_rolling_operation(
|
|
206
|
+
ds: xr.Dataset, vars_operation_config: dict[Literal["dim", "steps", "vars", "operation"], str | int | list[str]]
|
|
207
|
+
) -> xr.Dataset:
|
|
208
|
+
"""Apply a rolling operation to specified variables in the dataset.
|
|
209
|
+
|
|
210
|
+
This function calculates a rolling operation over a specified dimension for selected
|
|
211
|
+
variables. The rolling window requires all periods to be present (min_periods=steps).
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
ds : xr.Dataset
|
|
216
|
+
The dataset to patch.
|
|
217
|
+
vars_operation_config: dict
|
|
218
|
+
Configuration for the rolling operation with the following keys:
|
|
219
|
+
|
|
220
|
+
- 'dim' : str
|
|
221
|
+
The dimension along which to apply the rolling operation (e.g., 'time').
|
|
222
|
+
- 'steps' : int
|
|
223
|
+
The number of steps in the rolling window.
|
|
224
|
+
- 'vars' : list[str]
|
|
225
|
+
List of variable names to apply the rolling operation to.
|
|
226
|
+
- 'operation' : str
|
|
227
|
+
The operation to apply ('sum', 'mean', 'min', 'max', 'std', etc.).
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
xr.Dataset
|
|
232
|
+
The patched dataset with rolling operations applied to the specified variables.
|
|
233
|
+
|
|
234
|
+
Examples
|
|
235
|
+
--------
|
|
236
|
+
>>> patch_rolling_operation(ds, {
|
|
237
|
+
... 'dim': 'time',
|
|
238
|
+
... 'steps': 3,
|
|
239
|
+
... 'vars': ['precipitation', 'radiation'],
|
|
240
|
+
... 'operation': 'sum'
|
|
241
|
+
... })
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
assert vars_operation_config.keys() == {
|
|
245
|
+
"dim",
|
|
246
|
+
"steps",
|
|
247
|
+
"vars",
|
|
248
|
+
"operation",
|
|
249
|
+
}, "vars_operation_config must contain exactly keys 'dim', 'steps', 'vars', and 'operation'"
|
|
250
|
+
|
|
251
|
+
dim = vars_operation_config["dim"]
|
|
252
|
+
steps = vars_operation_config["steps"]
|
|
253
|
+
vars = vars_operation_config["vars"]
|
|
254
|
+
operation = vars_operation_config["operation"]
|
|
255
|
+
|
|
256
|
+
for var in vars:
|
|
257
|
+
rolling = ds[var].rolling(dim={dim: steps}, min_periods=steps)
|
|
258
|
+
ds[var] = getattr(rolling, operation)()
|
|
259
|
+
|
|
260
|
+
return ds
|
|
261
|
+
|
|
262
|
+
|
|
101
263
|
PATCHES = {
|
|
102
264
|
"attributes": patch_attributes,
|
|
103
265
|
"coordinates": patch_coordinates,
|
|
104
266
|
"rename": patch_rename,
|
|
105
267
|
"sort_coordinates": patch_sort_coordinate,
|
|
268
|
+
"analysis_lead_to_valid_time": patch_analysis_lead_to_valid_time,
|
|
269
|
+
"rolling_operation": patch_rolling_operation,
|
|
270
|
+
"subset_dataset": patch_subset_dataset,
|
|
106
271
|
}
|
|
107
272
|
|
|
108
273
|
|
|
@@ -122,7 +287,15 @@ def patch_dataset(ds: xr.Dataset, patch: dict[str, dict[str, Any]]) -> Any:
|
|
|
122
287
|
The patched dataset.
|
|
123
288
|
"""
|
|
124
289
|
|
|
125
|
-
ORDER = [
|
|
290
|
+
ORDER = [
|
|
291
|
+
"coordinates",
|
|
292
|
+
"attributes",
|
|
293
|
+
"rename",
|
|
294
|
+
"sort_coordinates",
|
|
295
|
+
"subset_dataset",
|
|
296
|
+
"analysis_lead_to_valid_time",
|
|
297
|
+
"rolling_operation",
|
|
298
|
+
]
|
|
126
299
|
for what, values in sorted(patch.items(), key=lambda x: ORDER.index(x[0])):
|
|
127
300
|
if what not in PATCHES:
|
|
128
301
|
raise ValueError(f"Unknown patch type {what!r}")
|
|
@@ -293,21 +293,29 @@ class ComplementNearest(Complement):
|
|
|
293
293
|
index, previous = update_tuple(index, variable_index, slice(None))
|
|
294
294
|
source_index = [self._source.name_to_index[x] for x in self.variables[previous]]
|
|
295
295
|
source_data = self._source[index[0], source_index, index[2], ...]
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
296
|
+
if any(self._nearest_grid_points >= source_data.shape[-1]):
|
|
297
|
+
target_shape = source_data.shape[:-1] + self._target.shape[-1:]
|
|
298
|
+
target_data = np.full(target_shape, np.nan, dtype=self._target.dtype)
|
|
299
|
+
cond = self._nearest_grid_points < source_data.shape[-1]
|
|
300
|
+
reachable = np.where(cond)[0]
|
|
301
|
+
nearest_reachable = self._nearest_grid_points[cond]
|
|
302
|
+
target_data[..., reachable] = source_data[..., nearest_reachable]
|
|
303
|
+
result = target_data[..., index[3]]
|
|
304
|
+
else:
|
|
305
|
+
target_data = source_data[..., self._nearest_grid_points]
|
|
306
|
+
epsilon = 1e-8 # prevent division by zero
|
|
307
|
+
weights = 1.0 / (self._distances + epsilon)
|
|
308
|
+
weights = weights.astype(target_data.dtype)
|
|
309
|
+
weights /= weights.sum(axis=1, keepdims=True) # normalize
|
|
310
|
+
|
|
311
|
+
# Reshape weights to broadcast correctly
|
|
312
|
+
# Add leading singleton dimensions so it matches target_data shape
|
|
313
|
+
while weights.ndim < target_data.ndim:
|
|
314
|
+
weights = np.expand_dims(weights, axis=0)
|
|
315
|
+
|
|
316
|
+
# Compute weighted average along the last dimension
|
|
317
|
+
final_point = np.sum(target_data * weights, axis=-1)
|
|
318
|
+
result = final_point[..., index[3]]
|
|
311
319
|
|
|
312
320
|
return apply_index_to_slices_changes(result, changes)
|
|
313
321
|
|
|
@@ -353,8 +361,9 @@ def complement_factory(args: tuple, kwargs: dict) -> Dataset:
|
|
|
353
361
|
}[interpolation]
|
|
354
362
|
|
|
355
363
|
if interpolation == "nearest":
|
|
356
|
-
k = kwargs.pop("k",
|
|
357
|
-
|
|
364
|
+
k = kwargs.pop("k", 1)
|
|
365
|
+
max_distance = kwargs.pop("max_distance", None)
|
|
366
|
+
complement = Class(target=target, source=source, k=k, max_distance=max_distance)._subset(**kwargs)
|
|
358
367
|
|
|
359
368
|
else:
|
|
360
369
|
complement = Class(target=target, source=source)._subset(**kwargs)
|
anemoi/datasets/data/dataset.py
CHANGED
|
@@ -245,6 +245,12 @@ class Dataset(ABC, Sized):
|
|
|
245
245
|
|
|
246
246
|
return Statistics(self, open_dataset(statistics))._subset(**kwargs).mutate()
|
|
247
247
|
|
|
248
|
+
if "mask" in kwargs:
|
|
249
|
+
from .masked import Masking
|
|
250
|
+
|
|
251
|
+
mask_file = kwargs.pop("mask")
|
|
252
|
+
return Masking(self, mask_file)._subset(**kwargs).mutate()
|
|
253
|
+
|
|
248
254
|
# Note: trim_edge should go before thinning
|
|
249
255
|
if "trim_edge" in kwargs:
|
|
250
256
|
from .masked import TrimEdge
|
anemoi/datasets/data/masked.py
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from functools import cached_property
|
|
13
|
+
from pathlib import Path
|
|
13
14
|
from typing import Any
|
|
14
15
|
|
|
15
16
|
import numpy as np
|
|
@@ -66,6 +67,12 @@ class Masked(Forwards):
|
|
|
66
67
|
"""Get the masked longitudes."""
|
|
67
68
|
return self.forward.longitudes[self.mask]
|
|
68
69
|
|
|
70
|
+
@property
|
|
71
|
+
def grids(self) -> TupleIndex:
|
|
72
|
+
"""Returns the number of grid points after masking"""
|
|
73
|
+
grids = np.sum(self.mask)
|
|
74
|
+
return (grids,)
|
|
75
|
+
|
|
69
76
|
@debug_indexing
|
|
70
77
|
def __getitem__(self, index: FullIndex) -> NDArray[Any]:
|
|
71
78
|
"""Get the masked data at the specified index.
|
|
@@ -150,19 +157,9 @@ class Thinning(Masked):
|
|
|
150
157
|
if len(shape) != 2:
|
|
151
158
|
raise ValueError("Thinning only works latitude/longitude fields")
|
|
152
159
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
latitudes = forward_latitudes.reshape(shape)
|
|
158
|
-
longitudes = forward_longitudes.reshape(shape)
|
|
159
|
-
latitudes = latitudes[::thinning, ::thinning].flatten()
|
|
160
|
-
longitudes = longitudes[::thinning, ::thinning].flatten()
|
|
161
|
-
|
|
162
|
-
# TODO: This is not very efficient
|
|
163
|
-
|
|
164
|
-
mask = [lat in latitudes and lon in longitudes for lat, lon in zip(forward_latitudes, forward_longitudes)]
|
|
165
|
-
mask = np.array(mask, dtype=bool)
|
|
160
|
+
mask = np.full(shape, False, dtype=bool)
|
|
161
|
+
mask[::thinning, ::thinning] = True
|
|
162
|
+
mask = mask.flatten()
|
|
166
163
|
else:
|
|
167
164
|
mask = None
|
|
168
165
|
|
|
@@ -200,6 +197,70 @@ class Thinning(Masked):
|
|
|
200
197
|
"""
|
|
201
198
|
return dict(thinning=self.thinning, method=self.method)
|
|
202
199
|
|
|
200
|
+
@property
|
|
201
|
+
def field_shape(self) -> Shape:
|
|
202
|
+
"""Returns the field shape of the dataset."""
|
|
203
|
+
if self.thinning is None:
|
|
204
|
+
return self.forward.field_shape
|
|
205
|
+
x, y = self.forward.field_shape
|
|
206
|
+
x = (x + self.thinning - 1) // self.thinning
|
|
207
|
+
y = (y + self.thinning - 1) // self.thinning
|
|
208
|
+
return x, y
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class Masking(Masked):
|
|
212
|
+
"""A class that applies a precomputed boolean mask from a .npy file."""
|
|
213
|
+
|
|
214
|
+
def __init__(self, forward: Dataset, mask_file: str) -> None:
|
|
215
|
+
"""Initialize the Masking class.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
forward : Dataset
|
|
220
|
+
The dataset to be masked.
|
|
221
|
+
mask_file : str
|
|
222
|
+
Path to a .npy file containing a boolean mask of same shape as fields.
|
|
223
|
+
"""
|
|
224
|
+
self.mask_file = mask_file
|
|
225
|
+
|
|
226
|
+
# Check path
|
|
227
|
+
if not Path(self.mask_file).exists():
|
|
228
|
+
raise FileNotFoundError(f"Mask file not found: {self.mask_file}")
|
|
229
|
+
# Load mask
|
|
230
|
+
try:
|
|
231
|
+
mask = np.load(self.mask_file)
|
|
232
|
+
except Exception as e:
|
|
233
|
+
raise ValueError(f"Could not load data from {mask_file}: {e}")
|
|
234
|
+
|
|
235
|
+
if mask.dtype != bool:
|
|
236
|
+
raise ValueError(f"Mask file {mask_file} does not contain boolean values.")
|
|
237
|
+
if mask.shape != forward.field_shape:
|
|
238
|
+
raise ValueError(f"Mask length {mask.shape} does not match field size {forward.field_shape}.")
|
|
239
|
+
if sum(mask) == 0:
|
|
240
|
+
LOG.warning(f"Mask in {mask_file} eliminates all points in field.")
|
|
241
|
+
|
|
242
|
+
super().__init__(forward, mask)
|
|
243
|
+
|
|
244
|
+
def tree(self) -> Node:
|
|
245
|
+
"""Get the tree representation of the dataset.
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
Node
|
|
250
|
+
The tree representation of the dataset.
|
|
251
|
+
"""
|
|
252
|
+
return Node(self, [self.forward.tree()], mask_file=self.mask_file)
|
|
253
|
+
|
|
254
|
+
def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
|
|
255
|
+
"""Get the metadata specific to the Masking subclass.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
Dict[str, Any]
|
|
260
|
+
The metadata specific to the Masking subclass.
|
|
261
|
+
"""
|
|
262
|
+
return dict(mask_file=self.mask_file)
|
|
263
|
+
|
|
203
264
|
|
|
204
265
|
class Cropping(Masked):
|
|
205
266
|
"""A class to represent a cropped dataset."""
|
anemoi/datasets/data/missing.py
CHANGED
|
@@ -440,3 +440,8 @@ class MissingDataset(Forwards):
|
|
|
440
440
|
Metadata specific to the subclass.
|
|
441
441
|
"""
|
|
442
442
|
return {"start": self.start, "end": self.end}
|
|
443
|
+
|
|
444
|
+
@property
|
|
445
|
+
def shape(self) -> tuple[int, ...]:
|
|
446
|
+
"""Return the shape of the dataset."""
|
|
447
|
+
return (len(self),) + self.forward.shape[1:]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anemoi-datasets
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.30
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -216,26 +216,27 @@ Classifier: Intended Audience :: Developers
|
|
|
216
216
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
217
217
|
Classifier: Operating System :: OS Independent
|
|
218
218
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
219
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
220
219
|
Classifier: Programming Language :: Python :: 3.11
|
|
221
220
|
Classifier: Programming Language :: Python :: 3.12
|
|
222
221
|
Classifier: Programming Language :: Python :: 3.13
|
|
223
222
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
224
223
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
225
|
-
Requires-Python:
|
|
224
|
+
Requires-Python: <3.14,>=3.11
|
|
226
225
|
License-File: LICENSE
|
|
227
|
-
Requires-Dist: anemoi-transform>=0.1.
|
|
228
|
-
Requires-Dist: anemoi-utils
|
|
226
|
+
Requires-Dist: anemoi-transform>=0.1.12
|
|
227
|
+
Requires-Dist: anemoi-utils>=0.4.26
|
|
229
228
|
Requires-Dist: cfunits
|
|
230
229
|
Requires-Dist: glom
|
|
231
230
|
Requires-Dist: jsonschema
|
|
232
231
|
Requires-Dist: numcodecs<0.16
|
|
233
232
|
Requires-Dist: numpy
|
|
233
|
+
Requires-Dist: pytest>=8.4.1
|
|
234
|
+
Requires-Dist: pytest-xdist>=3.7
|
|
234
235
|
Requires-Dist: pyyaml
|
|
235
|
-
Requires-Dist: ruamel-yaml
|
|
236
|
+
Requires-Dist: ruamel-yaml>=0.16
|
|
236
237
|
Requires-Dist: semantic-version
|
|
237
238
|
Requires-Dist: tqdm
|
|
238
|
-
Requires-Dist: zarr<=2.18.
|
|
239
|
+
Requires-Dist: zarr<=2.18.7
|
|
239
240
|
Provides-Extra: all
|
|
240
241
|
Requires-Dist: anemoi-datasets[comparelam,create,remote,xarray]; extra == "all"
|
|
241
242
|
Provides-Extra: comparelam
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
anemoi/datasets/__init__.py,sha256=SPozLbLUFiQ9rtFVRlgeOobFUiFzj-jGo85Tt4YMJp4,1041
|
|
2
2
|
anemoi/datasets/__main__.py,sha256=ErwAqE3rBc7OaNO2JRsEOhWpB8ldjAt7BFSuRhbnlqQ,936
|
|
3
|
-
anemoi/datasets/_version.py,sha256=
|
|
3
|
+
anemoi/datasets/_version.py,sha256=nlDBoPHNkA36lz3zWwFvhf44VnMEtmYAEf4dXn6AUnU,706
|
|
4
4
|
anemoi/datasets/check.py,sha256=hbEMUurl2IjZbp56dBgOfAEsAmmgymgRM5ySaMJSTdk,2755
|
|
5
5
|
anemoi/datasets/dumper.py,sha256=Jud4qGooSQjJcPHsJrrYiJ909nM-hvJGXEDK4kfZ0k4,2505
|
|
6
6
|
anemoi/datasets/grids.py,sha256=ugJZznQ4frWH2qlYzV5ds4QLbzsGHwI_q5erVxocFxE,13926
|
|
@@ -30,10 +30,10 @@ anemoi/datasets/commands/recipe/format.py,sha256=roq7cXu-7ymjcxQ3IwLOV7beQkJzXvx
|
|
|
30
30
|
anemoi/datasets/commands/recipe/migrate.py,sha256=tBEj9E2nltxrFsvmWeiddg630_YIHTR2E4xToXDqx4M,15547
|
|
31
31
|
anemoi/datasets/compute/__init__.py,sha256=hCW0QcLHJmE-C1r38P27_ZOvCLNewex5iQEtZqx2ckI,393
|
|
32
32
|
anemoi/datasets/compute/recentre.py,sha256=XEyXZIIbSpmmSfuQWikLd562Ml48VdhWHAFK6xGtbeI,5835
|
|
33
|
-
anemoi/datasets/create/__init__.py,sha256=
|
|
33
|
+
anemoi/datasets/create/__init__.py,sha256=ZX7OStgc0ioSVnxdqjZW2EQfnpbgs8Bti8q0y98S-Is,51399
|
|
34
34
|
anemoi/datasets/create/check.py,sha256=w8_AhuWxqRjI1c0SdGgfFHcW5rYmBVQuyoS3ncsF9TI,10724
|
|
35
35
|
anemoi/datasets/create/chunks.py,sha256=MBEXTClQyOuYtl96ho8IqQr75hpnWbKe5Li5l6nx_48,3994
|
|
36
|
-
anemoi/datasets/create/config.py,sha256=
|
|
36
|
+
anemoi/datasets/create/config.py,sha256=b2Efl7pNYXz3PyAWTSnGakeHTSbiA5BSDawQnpVU6sE,13276
|
|
37
37
|
anemoi/datasets/create/patch.py,sha256=4852mUbEfM1GCUxIUq56gr6RI_lk8kuWCkG8N80OOqc,5367
|
|
38
38
|
anemoi/datasets/create/persistent.py,sha256=L8Ttk8J4rMWRYgzkFGb5QN4h0tDSizYBXNflCWoEZow,7779
|
|
39
39
|
anemoi/datasets/create/size.py,sha256=2AaD8toBkGO_l8m8UaYX5LSTS4UkU5ULdZAREXZ1JbE,1399
|
|
@@ -52,10 +52,9 @@ anemoi/datasets/create/input/trace.py,sha256=kVZI1eilg857sKRweGyyDZYb2IjT15a_KWp
|
|
|
52
52
|
anemoi/datasets/create/input/context/__init__.py,sha256=WKdIJv6ywkw7EmSLMxD5k0R-7N9sYrAxL3MF6UieLps,2263
|
|
53
53
|
anemoi/datasets/create/input/context/field.py,sha256=0NWkDYqyDX90gVrBZwz7y2sbICxBp38OUmNdisMFg1U,1605
|
|
54
54
|
anemoi/datasets/create/input/result/__init__.py,sha256=rER6c5hoVxCwPgbrIef74h2DT-V2CqdetOzp5gGD7ww,494
|
|
55
|
-
anemoi/datasets/create/input/result/field.py,sha256
|
|
55
|
+
anemoi/datasets/create/input/result/field.py,sha256=qcqqPEJBG7JmQFOHdFWM4RxGaEV75CWKVVols__T9zo,21607
|
|
56
56
|
anemoi/datasets/create/sources/__init__.py,sha256=XNiiGaC6NbxnGfl6glPw-gTJASi3vsGKwVlfkMqYGk4,950
|
|
57
|
-
anemoi/datasets/create/sources/
|
|
58
|
-
anemoi/datasets/create/sources/accumulations2.py,sha256=ytOURbfIdp2S95X1DLqmjxbK_wsW4QE8qS9kv-LCGRI,19940
|
|
57
|
+
anemoi/datasets/create/sources/accumulate.py,sha256=VKQRTFmNaaHhqQ2PwKNgZExdr8F7LHnZ2Tkfm6Ft1ZU,20183
|
|
59
58
|
anemoi/datasets/create/sources/anemoi_dataset.py,sha256=mu9URoxLj3zOX9FwSCvWTezQr_AgigJ2BMG5LKTy8Eg,2286
|
|
60
59
|
anemoi/datasets/create/sources/constants.py,sha256=rZ7pGal4ein6ta0CeyWUitgODo9Jhudx2OejPiiqiBQ,1665
|
|
61
60
|
anemoi/datasets/create/sources/eccc_fstd.py,sha256=8HK38f444HcWMvBhooP0XqTfMXYoCbN_8G9RI_Ne5rc,659
|
|
@@ -63,10 +62,10 @@ anemoi/datasets/create/sources/empty.py,sha256=QQzbI2NJ0Z4X9DvECKJkqDiCO_SfzpBUp
|
|
|
63
62
|
anemoi/datasets/create/sources/fdb.py,sha256=AkxvW7vZqJs2AbwYEzfkU5j3Udk4gTaGx2Pbwsa5qNk,4396
|
|
64
63
|
anemoi/datasets/create/sources/forcings.py,sha256=hy1oyC1Zjg1uzO2UWfNiTJXfQRaM_B8vs8X7GXXO4Nc,1356
|
|
65
64
|
anemoi/datasets/create/sources/grib.py,sha256=G1qLyZYZxUFr54IIrRQbeKTnpkZ5zjFH16_f-m6gurY,5060
|
|
66
|
-
anemoi/datasets/create/sources/grib_index.py,sha256=
|
|
65
|
+
anemoi/datasets/create/sources/grib_index.py,sha256=yHiT6ZSZLid6OJ74WPzI5UaebnIaAvL1ZMZmEiPnhk0,20725
|
|
67
66
|
anemoi/datasets/create/sources/hindcasts.py,sha256=4QuTSbaP5SfGFOdRSqm4jf__r7tMFx0lgPOlOVC6BEg,2773
|
|
68
67
|
anemoi/datasets/create/sources/legacy.py,sha256=Lv8MgFvdpP6ygu6_wGI47dpq7nuvuSbaHJAiUGqC4z8,1254
|
|
69
|
-
anemoi/datasets/create/sources/mars.py,sha256=
|
|
68
|
+
anemoi/datasets/create/sources/mars.py,sha256=OQVgdikBP__CYEPc82ClKgm-dxC9Pqsd44w55Vz1VJo,14625
|
|
70
69
|
anemoi/datasets/create/sources/netcdf.py,sha256=6PlDpskth2xn_ad7rCzhH5GOqwps6bCRq7zFdkoGJsQ,1372
|
|
71
70
|
anemoi/datasets/create/sources/opendap.py,sha256=ZvMqTgcDl8k_CY8CLzxaW79cBxvVOo0e9Jd1bhLGBoA,1382
|
|
72
71
|
anemoi/datasets/create/sources/patterns.py,sha256=dLCWz7nJtqMI4v9Qytiun-iUs4Lkv-4i7pbA19hoQe8,2243
|
|
@@ -74,28 +73,31 @@ anemoi/datasets/create/sources/planetary_computer.py,sha256=Erk6fKJt63gj_pgbklBW
|
|
|
74
73
|
anemoi/datasets/create/sources/recentre.py,sha256=hgBQFbgDXvr0DfFejdnLYkU7Hqs9634F7Sg3VMxIPk0,4155
|
|
75
74
|
anemoi/datasets/create/sources/repeated_dates.py,sha256=IM3YFep2ClC8eTJk7GGkIpyUO_gM3UqwPO87oJTXhg4,1730
|
|
76
75
|
anemoi/datasets/create/sources/source.py,sha256=Srf_68kXrkk5N8lnWmrazcsKVUw5PAhv5Ry-5-OAxRM,1643
|
|
77
|
-
anemoi/datasets/create/sources/tendencies.py,sha256=BpZXKAZC9BsG8V1heoS7ai1IIIFhh0Hng_j-gW8mnMA,5159
|
|
78
76
|
anemoi/datasets/create/sources/xarray.py,sha256=6c2Ss2I795Y3Xj9Qbr4Voiwcq7SaZFLaq1G_5lh_kAs,2491
|
|
79
77
|
anemoi/datasets/create/sources/xarray_kerchunk.py,sha256=vdFaFzze8VLjYUgIX8Lc39ELvwmgfT3ioyxBHAt4nrs,1136
|
|
80
78
|
anemoi/datasets/create/sources/xarray_zarr.py,sha256=tOcA1rUam0rwAYSIXe2QB9cSNW--NCLlGK_Ou78FIec,1360
|
|
81
79
|
anemoi/datasets/create/sources/zenodo.py,sha256=6NvK5KLMtb39a-YBs44sgPyc2k9NZU6oeYNbU8Lh98g,2259
|
|
80
|
+
anemoi/datasets/create/sources/accumulate_utils/__init__.py,sha256=iLLlOculEHrloIO13MSrYGEMNBZ1vVwK9x9rxKXLK-M,393
|
|
81
|
+
anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py,sha256=Hm0pIATDl9zK_tAjDkhxlRa9yUUbaHKpHH_0igSv3Tk,7642
|
|
82
|
+
anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py,sha256=maqgUvWVyNRJPsHvD1aMTB0D5QelmznoR5C6Wmeesk4,5909
|
|
83
|
+
anemoi/datasets/create/sources/accumulate_utils/interval_generators.py,sha256=vqtHiKFUavnK26uUYdOOsPbUcyzgUnwxhQ_cydooNyU,13092
|
|
82
84
|
anemoi/datasets/create/sources/xarray_support/README.md,sha256=56olM9Jh0vI0_bU9GI-IqbBcz4DZXWONqvdzN_VeAFE,78
|
|
83
|
-
anemoi/datasets/create/sources/xarray_support/__init__.py,sha256=
|
|
84
|
-
anemoi/datasets/create/sources/xarray_support/coordinates.py,sha256=
|
|
85
|
+
anemoi/datasets/create/sources/xarray_support/__init__.py,sha256=nYmNSwOX1g8JEEsnV9jnlK1jRbUqdGnvoL-AuqHNx7E,4943
|
|
86
|
+
anemoi/datasets/create/sources/xarray_support/coordinates.py,sha256=hi8sqmeRnCX2g_e_EcXQ55DjN-MkpdFjcWkOl-oGD4g,11042
|
|
85
87
|
anemoi/datasets/create/sources/xarray_support/field.py,sha256=lo5V7pMcVt3j_nbntt7mweGCXkUGNANq35stW69zrCo,6550
|
|
86
88
|
anemoi/datasets/create/sources/xarray_support/fieldlist.py,sha256=BqLeP8ObTd3Ze7O6jsFJqX9aXD7E352vIT8ZtySv9_0,8158
|
|
87
|
-
anemoi/datasets/create/sources/xarray_support/flavour.py,sha256=
|
|
89
|
+
anemoi/datasets/create/sources/xarray_support/flavour.py,sha256=ckYmUaenRLOb_e6EZFHw2bKQ3sMMW5g6BdHlzSWP858,33338
|
|
88
90
|
anemoi/datasets/create/sources/xarray_support/grid.py,sha256=4VLpWBCkFaRErFsjW8AzY4L-xeSoMYoQhxRor3hQqs8,6496
|
|
89
91
|
anemoi/datasets/create/sources/xarray_support/metadata.py,sha256=CR9MYneZH49kJw-G1Ie2Vhevs-fliPUzKohiMfO-sWs,7838
|
|
90
|
-
anemoi/datasets/create/sources/xarray_support/patch.py,sha256=
|
|
92
|
+
anemoi/datasets/create/sources/xarray_support/patch.py,sha256=M7YJQSzMc2KeBWYytGrcbVPDSSHLWGfydgfROwYPidI,8634
|
|
91
93
|
anemoi/datasets/create/sources/xarray_support/time.py,sha256=S-tvA5QZIMh0wQnQa51FotK9oxE8JTpUqfSYimF_Hwo,12401
|
|
92
94
|
anemoi/datasets/create/sources/xarray_support/variable.py,sha256=8Cekc5YOpZx_MNFEHMM2cE91ydjY-YBUhB7wX17wkVs,9288
|
|
93
95
|
anemoi/datasets/create/statistics/__init__.py,sha256=xe8WE7xezM6EDhRywOGRmK1c8mim1k6FhXnEw8yHnBQ,18199
|
|
94
96
|
anemoi/datasets/create/statistics/summary.py,sha256=JdtChTmsr1Y958_nka36HltTbeZkawuGbprbfZD7Ux8,4790
|
|
95
97
|
anemoi/datasets/data/__init__.py,sha256=WKWn_k4bKRUbzRO-5Mp89gV6DeuBuMb00WisD_M-ypI,3200
|
|
96
|
-
anemoi/datasets/data/complement.py,sha256=
|
|
98
|
+
anemoi/datasets/data/complement.py,sha256=Cx-mFoRPgNTEagrvawj7FcnNPM_VxKHkT8InCPZJoCQ,12533
|
|
97
99
|
anemoi/datasets/data/concat.py,sha256=ArOLD7mrZUw6yzaObSzNWlMKZWH74gaYBx0AvjSCFps,8921
|
|
98
|
-
anemoi/datasets/data/dataset.py,sha256=
|
|
100
|
+
anemoi/datasets/data/dataset.py,sha256=yeYckknSYYD6uz407Qe8OIPGv6CNqpw0Ea7qni6acmY,32578
|
|
99
101
|
anemoi/datasets/data/debug.css,sha256=z2X_ZDSnZ9C3pyZPWnQiEyAxuMxUaxJxET4oaCImTAQ,211
|
|
100
102
|
anemoi/datasets/data/debug.py,sha256=DUG_Rr5sYbXENSFMbtDZuG4IEwvDF-EoqF45z16tHso,10747
|
|
101
103
|
anemoi/datasets/data/ensemble.py,sha256=N_43HcgcvFmYnU3afyobjx9nZIGtb5WDTVSDXFQPwWE,5303
|
|
@@ -105,10 +107,10 @@ anemoi/datasets/data/grids.py,sha256=HbahQBCS1MIH-lj2PEwtZlROAhWHMaxsUT9hGC4bvPE
|
|
|
105
107
|
anemoi/datasets/data/indexing.py,sha256=5JhiRTBUrj2tOmhSmrFyugw_kOOIKOdtZnoq2Cfgj_k,7444
|
|
106
108
|
anemoi/datasets/data/interpolate.py,sha256=n1klwVIwIyBSnMtjOqPh09MkhVqO5CTCH6xYkaLVhlM,9099
|
|
107
109
|
anemoi/datasets/data/join.py,sha256=EjgnjXKNbEUVaQEMzoJ7sv63vApwhDtkcJutxhZ1H5g,9110
|
|
108
|
-
anemoi/datasets/data/masked.py,sha256=
|
|
110
|
+
anemoi/datasets/data/masked.py,sha256=epmbI-fgp3hMPSkCvfd4pKxLxoKY8yDlVQvCanWLwF4,11894
|
|
109
111
|
anemoi/datasets/data/merge.py,sha256=zdF7-OKouN-iE3-Km-g5MZfyARg85IepQWQrSeUsG7w,8637
|
|
110
112
|
anemoi/datasets/data/misc.py,sha256=iGbZg6EEheJlIH-AECTLnf-30lXaQXG-ngfbwFIt5CM,22862
|
|
111
|
-
anemoi/datasets/data/missing.py,sha256=
|
|
113
|
+
anemoi/datasets/data/missing.py,sha256=MGRhxw7XtB6o9a5-BJhno9sSDVGFz6pNPNBdzcXGpNA,12612
|
|
112
114
|
anemoi/datasets/data/padded.py,sha256=sunn3QRThlfUHJOGE04s008F-v7kT7gY_i2E6wulCdk,7834
|
|
113
115
|
anemoi/datasets/data/rescale.py,sha256=xTj2Vn8YuyB1KWW5y37B2HHilTVdCcuNCnYHoiQFRBg,6859
|
|
114
116
|
anemoi/datasets/data/rolling_average.py,sha256=Ng7Te_8bHp7wOH_ARnvqtLI0Weze-uXLO3FP60gaJlU,5107
|
|
@@ -126,9 +128,9 @@ anemoi/datasets/data/records/backends/__init__.py,sha256=u894d7duXMiGTOQh5WfuxTs
|
|
|
126
128
|
anemoi/datasets/dates/__init__.py,sha256=4WFEG8tujrXOT6nbpy6BufBqcqGfECnvG42rmxgLh9w,13476
|
|
127
129
|
anemoi/datasets/dates/groups.py,sha256=bdA6YjFtNlgFAXiov5-zFHZ3C_QtUrdQiSFgb2zWxVM,10034
|
|
128
130
|
anemoi/datasets/schemas/recipe.json,sha256=UvfOQYKcTz-OrJv4is-qe-rhUNBrrmpcpXF32jB0Oz4,3208
|
|
129
|
-
anemoi_datasets-0.5.
|
|
130
|
-
anemoi_datasets-0.5.
|
|
131
|
-
anemoi_datasets-0.5.
|
|
132
|
-
anemoi_datasets-0.5.
|
|
133
|
-
anemoi_datasets-0.5.
|
|
134
|
-
anemoi_datasets-0.5.
|
|
131
|
+
anemoi_datasets-0.5.30.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
|
|
132
|
+
anemoi_datasets-0.5.30.dist-info/METADATA,sha256=7jOQpPGb_2fRLu2Ov0dVcTvWDZoriS-X-JNDITuBXdk,16238
|
|
133
|
+
anemoi_datasets-0.5.30.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
134
|
+
anemoi_datasets-0.5.30.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
|
|
135
|
+
anemoi_datasets-0.5.30.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
136
|
+
anemoi_datasets-0.5.30.dist-info/RECORD,,
|