mxalign 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mxalign/__init__.py +36 -0
- mxalign/accessors/__init__.py +7 -0
- mxalign/accessors/space.py +205 -0
- mxalign/accessors/time.py +180 -0
- mxalign/align/__init__.py +7 -0
- mxalign/align/nans.py +72 -0
- mxalign/align/space.py +21 -0
- mxalign/align/time.py +62 -0
- mxalign/cli.py +157 -0
- mxalign/interpolations/__init__.py +9 -0
- mxalign/interpolations/base.py +29 -0
- mxalign/interpolations/delaunay.py +218 -0
- mxalign/interpolations/interpolate.py +29 -0
- mxalign/interpolations/registry.py +17 -0
- mxalign/interpolations/xarray.py +63 -0
- mxalign/loaders/__init__.py +11 -0
- mxalign/loaders/anemoi_datasets.py +92 -0
- mxalign/loaders/anemoi_inference.py +103 -0
- mxalign/loaders/base.py +103 -0
- mxalign/loaders/harp_obstable.py +81 -0
- mxalign/loaders/loader.py +8 -0
- mxalign/loaders/registry.py +17 -0
- mxalign/properties/__init__.py +0 -0
- mxalign/properties/properties.py +25 -0
- mxalign/properties/specs.py +54 -0
- mxalign/properties/utils.py +43 -0
- mxalign/properties/validation.py +48 -0
- mxalign/runner.py +167 -0
- mxalign/transformations/__init__.py +7 -0
- mxalign/transformations/base.py +38 -0
- mxalign/transformations/external.py +34 -0
- mxalign/transformations/registry.py +20 -0
- mxalign/transformations/transform.py +28 -0
- mxalign/utils/config.py +55 -0
- mxalign/utils/dates.py +76 -0
- mxalign/utils/projections.py +104 -0
- mxalign/utils/save.py +62 -0
- mxalign/verification.py +57 -0
- mxalign-0.1.0.dist-info/METADATA +136 -0
- mxalign-0.1.0.dist-info/RECORD +43 -0
- mxalign-0.1.0.dist-info/WHEEL +4 -0
- mxalign-0.1.0.dist-info/entry_points.txt +2 -0
- mxalign-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from .registry import register_transformation
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@register_transformation("rename")
|
|
5
|
+
def transform_rename(ds, rename_dict):
|
|
6
|
+
new_dict = {}
|
|
7
|
+
for new_name, old_names in rename_dict.items():
|
|
8
|
+
for name in ds.keys():
|
|
9
|
+
if name in old_names:
|
|
10
|
+
new_dict[name] = new_name
|
|
11
|
+
else:
|
|
12
|
+
pass
|
|
13
|
+
return ds.rename(new_dict)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@register_transformation("kelvin_to_celcius")
|
|
17
|
+
def transform_kelvin_to_celcius(ds, variables, inverse=False):
|
|
18
|
+
T_C2K = 273.15
|
|
19
|
+
if isinstance(variables, str):
|
|
20
|
+
variables = [variables]
|
|
21
|
+
if inverse:
|
|
22
|
+
t = T_C2K
|
|
23
|
+
else:
|
|
24
|
+
t = -T_C2K
|
|
25
|
+
|
|
26
|
+
for var in variables:
|
|
27
|
+
ds[var] = ds[var] + t
|
|
28
|
+
|
|
29
|
+
return ds
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@register_transformation("uv_to_speed")
|
|
33
|
+
def transform(ds, u, v, speed):
|
|
34
|
+
import numpy as np
|
|
35
|
+
|
|
36
|
+
result = np.sqrt(ds[u] ** 2 + ds[v] ** 2)
|
|
37
|
+
ds[speed] = result
|
|
38
|
+
return ds
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from .registry import register_transformation
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@register_transformation("external")
|
|
5
|
+
def transform(ds, func_path, inputs, output, **kwargs):
|
|
6
|
+
func = _resolve_function(func_path)
|
|
7
|
+
|
|
8
|
+
input_kwargs = {arg_name: ds[var_name] for arg_name, var_name in inputs.items()}
|
|
9
|
+
|
|
10
|
+
all_kwargs = {**input_kwargs, **kwargs}
|
|
11
|
+
result = func(**all_kwargs)
|
|
12
|
+
# print(result)
|
|
13
|
+
ds[output] = (ds.dims, result)
|
|
14
|
+
return ds
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _resolve_function(func_path):
|
|
18
|
+
import importlib
|
|
19
|
+
|
|
20
|
+
module_path, func_name = func_path.rsplit(".", 1)
|
|
21
|
+
try:
|
|
22
|
+
module = importlib.import_module(module_path)
|
|
23
|
+
except ImportError as e:
|
|
24
|
+
raise ImportError(
|
|
25
|
+
f"Could not import module '{module_path}' required for transform '{func_path}'. "
|
|
26
|
+
f"Make sure it is installed. Original error: {e}"
|
|
27
|
+
)
|
|
28
|
+
try:
|
|
29
|
+
return getattr(module, func_name)
|
|
30
|
+
except AttributeError:
|
|
31
|
+
raise AttributeError(
|
|
32
|
+
f"Module '{module_path}' has no function '{func_name}'. "
|
|
33
|
+
f"Check the function name in your config."
|
|
34
|
+
)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
_TRANSFORMATION_REGISTRY = {}
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def register_transformation(name):
|
|
5
|
+
def decorator(func):
|
|
6
|
+
_TRANSFORMATION_REGISTRY[name] = func
|
|
7
|
+
return func
|
|
8
|
+
|
|
9
|
+
return decorator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def available_transformations():
|
|
13
|
+
return list(_TRANSFORMATION_REGISTRY.keys())
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_transformation(name):
|
|
17
|
+
try:
|
|
18
|
+
return _TRANSFORMATION_REGISTRY[name]
|
|
19
|
+
except KeyError:
|
|
20
|
+
raise ValueError(f"Unknown transformation: {name}")
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from .registry import get_transformation
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def transform(name, datasets, *args, **kwargs):
|
|
5
|
+
transform = get_transformation(name)
|
|
6
|
+
if isinstance(datasets, dict):
|
|
7
|
+
keys = list(datasets.keys())
|
|
8
|
+
datasets = list(datasets.values())
|
|
9
|
+
else:
|
|
10
|
+
if not isinstance(datasets, list):
|
|
11
|
+
datasets = [datasets]
|
|
12
|
+
keys = None
|
|
13
|
+
|
|
14
|
+
if keys:
|
|
15
|
+
transformed_datasets = dict()
|
|
16
|
+
for key, ds in zip(keys, datasets):
|
|
17
|
+
transformed_datasets[key] = transform(ds.copy(), *args, **kwargs)
|
|
18
|
+
else:
|
|
19
|
+
transformed_datasets = []
|
|
20
|
+
for ds in datasets:
|
|
21
|
+
transformed_datasets.append(transform(ds.copy(), *args, **kwargs))
|
|
22
|
+
|
|
23
|
+
transformed_datasets = (
|
|
24
|
+
transformed_datasets[0]
|
|
25
|
+
if len(transformed_datasets) == 1
|
|
26
|
+
else transformed_datasets
|
|
27
|
+
)
|
|
28
|
+
return transformed_datasets
|
mxalign/utils/config.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
|
|
3
|
+
from .dates import Dates
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_yaml(fn: str) -> dict:
|
|
7
|
+
with open(fn, "r") as f:
|
|
8
|
+
return yaml.safe_load(f)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Config:
|
|
12
|
+
def __init__(self, config: str | dict):
|
|
13
|
+
self.config = load_yaml(config) if isinstance(config, str) else config
|
|
14
|
+
if not isinstance(self.config, dict):
|
|
15
|
+
raise TypeError("config should be a dictionary.")
|
|
16
|
+
self.dates = self.config.pop("dates", None)
|
|
17
|
+
self._init_datasets()
|
|
18
|
+
print("Config initialized")
|
|
19
|
+
|
|
20
|
+
def __getitem__(self, key):
|
|
21
|
+
config = self.config.get(key, None)
|
|
22
|
+
if config:
|
|
23
|
+
return config.copy()
|
|
24
|
+
else:
|
|
25
|
+
return config
|
|
26
|
+
|
|
27
|
+
def __call__(self):
|
|
28
|
+
return self.config
|
|
29
|
+
|
|
30
|
+
def _init_datasets(self):
|
|
31
|
+
for key, loader in self.config["datasets"].items():
|
|
32
|
+
dates_loader = loader.pop("dates", None)
|
|
33
|
+
if self.dates:
|
|
34
|
+
if dates_loader:
|
|
35
|
+
keys_all = list(set(self.dates.keys()).union(dates_loader.keys()))
|
|
36
|
+
dates = {
|
|
37
|
+
key: (
|
|
38
|
+
dates_loader[key]
|
|
39
|
+
if key in dates_loader.keys()
|
|
40
|
+
else self.dates[key]
|
|
41
|
+
)
|
|
42
|
+
for key in keys_all
|
|
43
|
+
}
|
|
44
|
+
else:
|
|
45
|
+
dates = self.dates.copy()
|
|
46
|
+
else:
|
|
47
|
+
if dates_loader:
|
|
48
|
+
dates = dates_loader.copy()
|
|
49
|
+
else:
|
|
50
|
+
dates = None
|
|
51
|
+
|
|
52
|
+
if dates:
|
|
53
|
+
dates = Dates(**dates)
|
|
54
|
+
loader["files"] = dates.substitute(loader["files"])
|
|
55
|
+
self.config["datasets"][key] = loader
|
mxalign/utils/dates.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from earthkit.data.utils.patterns import Pattern
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Dates:
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
start: str | np.datetime64,
|
|
9
|
+
end: str | np.datetime64,
|
|
10
|
+
period: str | np.timedelta64,
|
|
11
|
+
range: str | np.timedelta64,
|
|
12
|
+
step: str | np.timedelta64,
|
|
13
|
+
):
|
|
14
|
+
self._start = (
|
|
15
|
+
np.datetime64(start) if not isinstance(start, np.datetime64) else start
|
|
16
|
+
)
|
|
17
|
+
self._end = np.datetime64(end) if not isinstance(end, np.datetime64) else end
|
|
18
|
+
self._period = to_timedelta64(period) if isinstance(period, str) else period
|
|
19
|
+
self._range = to_timedelta64(range) if isinstance(range, str) else range
|
|
20
|
+
self._step = to_timedelta64(step) if isinstance(step, str) else step
|
|
21
|
+
valid_times = set()
|
|
22
|
+
lead_times = set()
|
|
23
|
+
reference_times = set()
|
|
24
|
+
date = self._start
|
|
25
|
+
while date <= self._end:
|
|
26
|
+
# print(date)
|
|
27
|
+
reference_times.add(date)
|
|
28
|
+
delta = np.timedelta64(0, "s")
|
|
29
|
+
while delta <= self._range:
|
|
30
|
+
valid_times.add(date + delta)
|
|
31
|
+
lead_times.add(delta)
|
|
32
|
+
delta += self._step
|
|
33
|
+
date += self._period
|
|
34
|
+
self.valid_times = list(valid_times)
|
|
35
|
+
self.reference_times = list(reference_times)
|
|
36
|
+
# FIXME: can we simplify this? earthkit.data.utils.patterns.Pattern does not accept np.int64
|
|
37
|
+
self.lead_times = sorted([int(t.astype(int)) for t in lead_times])
|
|
38
|
+
|
|
39
|
+
def substitute(self, path: str):
|
|
40
|
+
pattern = Pattern(path)
|
|
41
|
+
paths = pattern.substitute(
|
|
42
|
+
dict(reference_time=self.reference_times),
|
|
43
|
+
# dict(lead_time=self.lead_times),
|
|
44
|
+
# dict(valid_time=self.valid_times),
|
|
45
|
+
allow_extra=True,
|
|
46
|
+
)
|
|
47
|
+
return sorted(paths)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def to_timedelta64(freq: str) -> np.timedelta64:
|
|
51
|
+
"""
|
|
52
|
+
Convert a frequency string to a numpy timedelta64 object.
|
|
53
|
+
The frequency string should be in the format of a number followed by a time unit,
|
|
54
|
+
e.g. '1D', '2H', '3M', etc.
|
|
55
|
+
The time unit can be one of the following:
|
|
56
|
+
- 'Y' for years
|
|
57
|
+
- 'M' for months
|
|
58
|
+
- 'W' for weeks
|
|
59
|
+
- 'D' for days
|
|
60
|
+
- 'h' for hours
|
|
61
|
+
- 'm' for minutes
|
|
62
|
+
- 's' for seconds
|
|
63
|
+
- 'ms' for milliseconds
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
freq : str
|
|
67
|
+
The frequency string to convert.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
np.timedelta64
|
|
72
|
+
The converted numpy timedelta64 object.
|
|
73
|
+
"""
|
|
74
|
+
value = freq[:-1]
|
|
75
|
+
unit = freq[-1]
|
|
76
|
+
return np.timedelta64(value, unit)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import cartopy.crs as ccrs
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def create_cartopy_crs(projection, kws_projection, kws_globe=None) -> ccrs.Projection:
|
|
5
|
+
"""Create a Cartopy coordinate reference system (CRS) based on the specified projection.
|
|
6
|
+
|
|
7
|
+
This function creates a Cartopy projection object using the provided projection name
|
|
8
|
+
and associated keyword arguments.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
projection : str
|
|
13
|
+
Name of the projection to create. Must be one of the supported projections
|
|
14
|
+
defined in PROJECTIONS.
|
|
15
|
+
projection_kws : dict[str, str]
|
|
16
|
+
Dictionary of keyword arguments to pass to the projection constructor.
|
|
17
|
+
globe_kws: dict[str, str], optional
|
|
18
|
+
Optional globe parameters which will be used to create a ccrs.Globe object.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
ccrs.Projection
|
|
23
|
+
The created Cartopy projection object.
|
|
24
|
+
Raises
|
|
25
|
+
------
|
|
26
|
+
AssertionError
|
|
27
|
+
If the specified projection is not supported (not in PROJECTIONS).
|
|
28
|
+
|
|
29
|
+
Examples
|
|
30
|
+
--------
|
|
31
|
+
>>> projection_kws = {'central_longitude': 0,}
|
|
32
|
+
>>> globe_kws = {'ellipse': 'WGS84'}}
|
|
33
|
+
>>> crs = create_cartopy_crs('latlon', projection_kws, globe_kws)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# - Get the cartopy projection (crs)
|
|
37
|
+
try:
|
|
38
|
+
projection = PROJECTIONS[projection]
|
|
39
|
+
except KeyError:
|
|
40
|
+
raise ValueError(f"Unsupported projection: {projection}")
|
|
41
|
+
kws_projection.copy()
|
|
42
|
+
|
|
43
|
+
# - Move globe keywords to different dictionary
|
|
44
|
+
if kws_globe:
|
|
45
|
+
globe = ccrs.Globe(**kws_globe)
|
|
46
|
+
|
|
47
|
+
crs = projection(globe=globe, **kws_projection)
|
|
48
|
+
return crs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
PROJECTIONS = dict(
|
|
52
|
+
lcc=ccrs.LambertConformal,
|
|
53
|
+
latlon=ccrs.PlateCarree,
|
|
54
|
+
PlateCarree=ccrs.PlateCarree,
|
|
55
|
+
Mercator=ccrs.Mercator,
|
|
56
|
+
Orthographic=ccrs.Orthographic,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
BUILTIN = dict(
|
|
60
|
+
cerra=dict(
|
|
61
|
+
projection="lcc",
|
|
62
|
+
kws_globe=dict(
|
|
63
|
+
semimajor_axis=6371229.0,
|
|
64
|
+
semiminor_axis=6371229.0,
|
|
65
|
+
),
|
|
66
|
+
kws_projection=dict(
|
|
67
|
+
central_longitude=8.0,
|
|
68
|
+
central_latitude=50.0,
|
|
69
|
+
standard_parallels=[50.0, 50.0],
|
|
70
|
+
),
|
|
71
|
+
kws_grid=dict(
|
|
72
|
+
lon_ll=-17.4859,
|
|
73
|
+
lat_ll=20.2923,
|
|
74
|
+
lon_ur=74.1051,
|
|
75
|
+
lat_ur=63.7695,
|
|
76
|
+
dx=5500.0,
|
|
77
|
+
dy=5500.0,
|
|
78
|
+
nx=1069,
|
|
79
|
+
ny=1069,
|
|
80
|
+
),
|
|
81
|
+
),
|
|
82
|
+
uwcw=dict(
|
|
83
|
+
projection="lcc",
|
|
84
|
+
kws_globe=dict(
|
|
85
|
+
semimajor_axis=6371229.0,
|
|
86
|
+
semiminor_axis=6371229.0,
|
|
87
|
+
),
|
|
88
|
+
kws_projection=dict(
|
|
89
|
+
central_longitude=-1.96590281,
|
|
90
|
+
central_latitude=55.5164337,
|
|
91
|
+
standard_parallels=[55.499996, 55.499996],
|
|
92
|
+
),
|
|
93
|
+
kws_grid=dict(
|
|
94
|
+
lon_ll=-25.4470005,
|
|
95
|
+
lat_ll=39.6389999,
|
|
96
|
+
lon_ur=40.1508102,
|
|
97
|
+
lat_ur=62.6713715,
|
|
98
|
+
dx=2000.0,
|
|
99
|
+
dy=2000.0,
|
|
100
|
+
nx=1909,
|
|
101
|
+
ny=1609,
|
|
102
|
+
),
|
|
103
|
+
),
|
|
104
|
+
)
|
mxalign/utils/save.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from earthkit.data.utils.patterns import Pattern
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DatasetPath:
|
|
5
|
+
def __init__(self, name, ds):
|
|
6
|
+
self.name = name
|
|
7
|
+
if ds.time.is_forecast():
|
|
8
|
+
years = ds["reference_time"].groupby(ds["reference_time"].dt.year).count()
|
|
9
|
+
self.year = int(years.isel(year=years.argmax())["year"].values)
|
|
10
|
+
ds_month = ds.sel(reference_time=ds.reference_time.dt.year == self.year)
|
|
11
|
+
months = (
|
|
12
|
+
ds_month["reference_time"]
|
|
13
|
+
.groupby(ds_month["reference_time"].dt.month)
|
|
14
|
+
.count()
|
|
15
|
+
)
|
|
16
|
+
self.month = int(months.isel(month=months.argmax())["month"].values)
|
|
17
|
+
ds_day = ds_month.sel(
|
|
18
|
+
reference_time=ds_month.reference_time.dt.month == self.month
|
|
19
|
+
)
|
|
20
|
+
days = (
|
|
21
|
+
ds_day["reference_time"]
|
|
22
|
+
.groupby(ds_day["reference_time"].dt.day)
|
|
23
|
+
.count()
|
|
24
|
+
)
|
|
25
|
+
self.day = int(days.isel(day=days.argmax())["day"].values)
|
|
26
|
+
elif ds.time.is_observation():
|
|
27
|
+
years = ds["valid_time"].groupby(ds["valid_time"].dt.year).count()
|
|
28
|
+
self.year = int(years.isel(year=years.argmax())["year"].values)
|
|
29
|
+
ds_month = ds.sel(valid_time=ds.valid_time.dt.year == self.year)
|
|
30
|
+
months = (
|
|
31
|
+
ds_month["valid_time"].groupby(ds_month["valid_time"].dt.month).count()
|
|
32
|
+
)
|
|
33
|
+
self.month = int(months.isel(month=months.argmax())["month"].values)
|
|
34
|
+
ds_day = ds_month.sel(valid_time=ds_month.valid_time.dt.month == self.month)
|
|
35
|
+
days = ds_day["valid_time"].groupby(ds_day["valid_time"].dt.day).count()
|
|
36
|
+
self.day = int(days.isel(day=days.argmax())["day"].values)
|
|
37
|
+
|
|
38
|
+
def substitute(self, path: str):
|
|
39
|
+
pattern = Pattern(path)
|
|
40
|
+
path = pattern.substitute(
|
|
41
|
+
dict(name=self.name),
|
|
42
|
+
dict(year=self.year),
|
|
43
|
+
dict(month=self.month),
|
|
44
|
+
dict(day=self.day),
|
|
45
|
+
allow_extra=True,
|
|
46
|
+
)
|
|
47
|
+
return path
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def save_dataset(method, name, ds, **kwargs):
|
|
51
|
+
save_fn = getattr(ds, method)
|
|
52
|
+
dataset = DatasetPath(name, ds)
|
|
53
|
+
path = dataset.substitute(kwargs.pop("path"))
|
|
54
|
+
print(f"Saving to {path}")
|
|
55
|
+
save_fn(path, **kwargs)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def save_metrics(method, ds, **kwargs):
|
|
59
|
+
save_fn = getattr(ds, method)
|
|
60
|
+
path = kwargs.pop("path")
|
|
61
|
+
print(f"Saving to {path}")
|
|
62
|
+
save_fn(path, **kwargs)
|
mxalign/verification.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from .transformations.external import _resolve_function
|
|
2
|
+
from functools import partial
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Metric:
|
|
6
|
+
def __init__(self, name, func_path, ds_ref, inputs, **kwargs):
|
|
7
|
+
self.name = name
|
|
8
|
+
func = _resolve_function(func_path)
|
|
9
|
+
self._is_xskillscore = func.__module__.startswith("xskillscore")
|
|
10
|
+
self._dim = kwargs.get("dim", None)
|
|
11
|
+
|
|
12
|
+
kwarg_ref = {}
|
|
13
|
+
kwarg_ds = []
|
|
14
|
+
for input_arg, ds_type in inputs.items():
|
|
15
|
+
if ds_type == "reference":
|
|
16
|
+
kwarg_ref[input_arg] = (
|
|
17
|
+
self._rechunk(ds_ref) if self._is_xskillscore else ds_ref
|
|
18
|
+
)
|
|
19
|
+
else:
|
|
20
|
+
kwarg_ds.append(input_arg)
|
|
21
|
+
if len(kwarg_ds) > 1:
|
|
22
|
+
raise ValueError(
|
|
23
|
+
f"More than one predictor-input argument defined for function {func_path}"
|
|
24
|
+
)
|
|
25
|
+
partial_kwargs = {**kwarg_ref, **kwargs}
|
|
26
|
+
self._func = partial(func, **partial_kwargs)
|
|
27
|
+
self._kwarg_ds = kwarg_ds[0]
|
|
28
|
+
|
|
29
|
+
def compute(self, ds):
|
|
30
|
+
if self._is_xskillscore:
|
|
31
|
+
ds = self._rechunk(ds)
|
|
32
|
+
kwarg_ds = {self._kwarg_ds: ds}
|
|
33
|
+
return self._func(**kwarg_ds)
|
|
34
|
+
|
|
35
|
+
def _rechunk(self, ds):
|
|
36
|
+
if self._dim is None:
|
|
37
|
+
return ds
|
|
38
|
+
dim = [self._dim] if isinstance(self._dim, str) else self._dim
|
|
39
|
+
dim_other = [d for d in ds.dims if d not in dim]
|
|
40
|
+
chunks = {d: -1 for d in dim}
|
|
41
|
+
for d in dim_other:
|
|
42
|
+
chunks[d] = 1
|
|
43
|
+
return ds.chunk(chunks)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def verify(fcst, obs, func_path, inputs, **kwargs):
|
|
47
|
+
func = _resolve_function(func_path=func_path)
|
|
48
|
+
datasets = {
|
|
49
|
+
"forecast": fcst,
|
|
50
|
+
"observation": obs,
|
|
51
|
+
}
|
|
52
|
+
input_kwargs = {arg_name: datasets[ds_type] for arg_name, ds_type in inputs.items()}
|
|
53
|
+
|
|
54
|
+
all_kwargs = {**input_kwargs, **kwargs}
|
|
55
|
+
|
|
56
|
+
result = func(**all_kwargs)
|
|
57
|
+
return result
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mxalign
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author-email: Michiel Van Ginderachter <michiel.vanginderachter@meteo.be>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: bokeh>=3.8.2
|
|
9
|
+
Requires-Dist: cartopy>=0.25.0
|
|
10
|
+
Requires-Dist: dask>=2026.1.2
|
|
11
|
+
Requires-Dist: distributed>=2026.1.2
|
|
12
|
+
Requires-Dist: earthkit-data>=0.19.0
|
|
13
|
+
Requires-Dist: h5netcdf>=1.8.1
|
|
14
|
+
Requires-Dist: h5py>=3.15.1
|
|
15
|
+
Requires-Dist: netcdf4>=1.7.4
|
|
16
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
17
|
+
Requires-Dist: scipy>=1.17.0
|
|
18
|
+
Requires-Dist: xarray>=2026.1.0
|
|
19
|
+
Requires-Dist: zarr<3.0
|
|
20
|
+
Provides-Extra: earthkit
|
|
21
|
+
Requires-Dist: earthkit-meteo>=0.6.1; extra == 'earthkit'
|
|
22
|
+
Provides-Extra: jobqueue
|
|
23
|
+
Requires-Dist: dask-jobqueue>=0.9.0; extra == 'jobqueue'
|
|
24
|
+
Provides-Extra: verification
|
|
25
|
+
Requires-Dist: xskillscore>=0.0.29; extra == 'verification'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# Meteo-xAlign
|
|
29
|
+
|
|
30
|
+
**An xarray based package for alignment of meteorological datasets**
|
|
31
|
+
|
|
32
|
+
## What is this?
|
|
33
|
+
|
|
34
|
+
`mxalign` is an `xarray`-based package designed for the alignment and verification of meteorological datasets. It standardizes operations across datasets by attaching properties along three main axes:
|
|
35
|
+
- **Space:** Grid or point-based data
|
|
36
|
+
- **Time:** Forecasts, observations, or climatology
|
|
37
|
+
- **Uncertainty:** Deterministic, ensemble, or quantile forecasts
|
|
38
|
+
|
|
39
|
+
Currently, `mxalign` also acts as a full execution engine. It can load datasets (e.g., Anemoi inference outputs, observation datasets), apply transformations, align datasets in both space and time to match a reference, safely broadcast NaNs, and execute verification metrics on scaled Dask clusters (Local or Slurm).
|
|
40
|
+
|
|
41
|
+
> ⚠️ **Roadmap & Future Architecture Changes (planned for v0.2.0):**
|
|
42
|
+
> Currently, `mxalign` handles both alignment and the execution of the verification tooling pipeline, including loading and validation. In the upcoming `v0.2.0` release, this architecture will be refactored:
|
|
43
|
+
> - **Loading** will be split out into [`mlwp-data-loaders`](https://github.com/mlwp-tools/mlwp-data-loaders).
|
|
44
|
+
> - **Validation** of loaded `xr.Dataset`s will be moved to [`mlwp-data-specs`](https://github.com/mlwp-tools/mlwp-data-specs) (which will contain the requirements for each of the dataset traits and the validation logic).
|
|
45
|
+
> - **Execution** of the full verification pipeline (loading, transformations, alignment, and verification) from configuration files may be moved to a separate package in future releases.
|
|
46
|
+
> - **Tests** will be added to `mxalign` (building on test datasets already integrated into `mlwp-data-loaders`) that ensure that all alignment operations work correctly (Testing notebook execution inside `mxalign` is explicitly excluded from the current roadmap).
|
|
47
|
+
|
|
48
|
+
## Python API
|
|
49
|
+
|
|
50
|
+
`mxalign` provides building blocks for manual alignment, transformations, and interpolations of `xarray` datasets. This is ideal for interactive use in Jupyter notebooks or custom Python scripts.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import xarray as xr
|
|
54
|
+
from mxalign import load, align_space, align_time, transform
|
|
55
|
+
|
|
56
|
+
# Load datasets (using registered loaders)
|
|
57
|
+
ds_obs = load(name="observations_loader", files=["obs.nc"])
|
|
58
|
+
ds_fcst = load(name="anemoi_inference", files=["forecast.nc"])
|
|
59
|
+
|
|
60
|
+
# Align the forecast spatially to match the observation reference
|
|
61
|
+
ds_fcst_aligned_space = align_space(ds_fcst, reference=ds_obs, method="interpolation")
|
|
62
|
+
|
|
63
|
+
# Align datasets temporally
|
|
64
|
+
datasets = {"obs": ds_obs, "fcst": ds_fcst_aligned_space}
|
|
65
|
+
aligned_datasets = align_time(datasets, method="intersection")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For a more comprehensive interactive example, check out the [introductory notebook](./examples/introduction.ipynb).
|
|
69
|
+
|
|
70
|
+
## Executing via a Configuration
|
|
71
|
+
|
|
72
|
+
For full verification pipeline execution, `mxalign` uses a YAML configuration file. This allows you to declaratively define how datasets are loaded, transformed, aligned, and verified.
|
|
73
|
+
|
|
74
|
+
### Configuration Contents
|
|
75
|
+
|
|
76
|
+
The configuration file is divided into several main sections:
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
datasets:
|
|
80
|
+
# Define datasets to load, specifying the loader, files, and variables
|
|
81
|
+
obs_data:
|
|
82
|
+
loader: observations_loader
|
|
83
|
+
files: ["obs.nc"]
|
|
84
|
+
fcst_data:
|
|
85
|
+
loader: anemoi_inference
|
|
86
|
+
files: ["forecast.nc"]
|
|
87
|
+
|
|
88
|
+
transformations:
|
|
89
|
+
# Apply transformations to loaded datasets
|
|
90
|
+
|
|
91
|
+
alignment:
|
|
92
|
+
# Define reference dataset and alignment methods (space, time, NaN broadcasting)
|
|
93
|
+
reference: obs_data
|
|
94
|
+
time:
|
|
95
|
+
method: intersection
|
|
96
|
+
|
|
97
|
+
verification:
|
|
98
|
+
# Specify the reference dataset and the metrics to calculate
|
|
99
|
+
reference: obs_data
|
|
100
|
+
metrics:
|
|
101
|
+
# define metrics here
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Running from the Command Line
|
|
105
|
+
|
|
106
|
+
The CLI uses Dask to distribute the workload and supports both local execution and execution on Slurm-managed HPC clusters.
|
|
107
|
+
|
|
108
|
+
**Local Execution**
|
|
109
|
+
Run the pipeline on a local Dask cluster:
|
|
110
|
+
```bash
|
|
111
|
+
mxalign local path/to/config.yaml --n_workers 4 --threads_per_worker 1
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Slurm Execution**
|
|
115
|
+
Run the pipeline on a Slurm cluster:
|
|
116
|
+
```bash
|
|
117
|
+
mxalign slurm path/to/config.yaml --account your_account --queue your_queue --cores 8 --memory 64GB
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Running from Python
|
|
121
|
+
|
|
122
|
+
You can also execute the entire configuration-driven pipeline directly from Python using the `Runner` class.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from mxalign.runner import Runner
|
|
126
|
+
|
|
127
|
+
# Initialize the runner with a YAML config file or a dictionary
|
|
128
|
+
runner = Runner("path/to/config.yaml")
|
|
129
|
+
|
|
130
|
+
# Execute the pipeline: loads, transforms, aligns, and verifies the datasets
|
|
131
|
+
runner.run()
|
|
132
|
+
|
|
133
|
+
# The resulting aligned datasets and computed metrics are accessible via:
|
|
134
|
+
aligned_datasets = runner.datasets
|
|
135
|
+
metrics = runner.metrics
|
|
136
|
+
```
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
mxalign/__init__.py,sha256=zCGKgXlTyEMwLtd-Zy-RfLBxH6jbuR-dwnRSPxO2TZ8,1035
|
|
2
|
+
mxalign/cli.py,sha256=UpctNL68XOUwRIR9xRXaVD89LjovpUJ_l3COTr6DErU,4257
|
|
3
|
+
mxalign/runner.py,sha256=0TyWsHieToUKi-JSe2evqagPcP5wp3sLNtzA1rY15po,6105
|
|
4
|
+
mxalign/verification.py,sha256=jpRxCxfifSl5CQbSbP8sQbdChvyxmDNtLQKPpYSC184,1865
|
|
5
|
+
mxalign/accessors/__init__.py,sha256=NWKgy0F54cvFAUYZGoJjlqH1b7sFRWaqkuNkH0Sjr2w,79
|
|
6
|
+
mxalign/accessors/space.py,sha256=LF5L146ztvLI9V-t1oRsLhU3EDKuNyyGMP6Yr51LH2A,7469
|
|
7
|
+
mxalign/accessors/time.py,sha256=gwgMBsr88uE8ObwGNGGcIUG6KAUfb3n6u85buGB1NpI,7137
|
|
8
|
+
mxalign/align/__init__.py,sha256=cLlhuJYItabVtO73uaNcKmBa4-E1EWynwslzEDn824s,79
|
|
9
|
+
mxalign/align/nans.py,sha256=8sQhAoRAIXI27-Zg4SpjslidHRfYJcJi6yiQD7rYkrA,2511
|
|
10
|
+
mxalign/align/space.py,sha256=i4dt3D309xyqc1Pcbf5DbAJaysk73u-63X9m-c4mGVg,574
|
|
11
|
+
mxalign/align/time.py,sha256=ARuaz2KFeAYO84bgJUNORRxup-waNJ9w6qCHc8VnoIM,2298
|
|
12
|
+
mxalign/interpolations/__init__.py,sha256=Wgo_miRwfLn6moypehGkqCo1t_HhAP1ihAAnfAjcHUE,120
|
|
13
|
+
mxalign/interpolations/base.py,sha256=CqC7NQKH-1_3TxyhhzcCM1pa86ausiQ3CfBeFKQGq_Y,868
|
|
14
|
+
mxalign/interpolations/delaunay.py,sha256=KCkAtlqa7q-UwwOOIGcpWIx3KJItfm-Xg35owaO46c0,7925
|
|
15
|
+
mxalign/interpolations/interpolate.py,sha256=ApZzglT5ZDCG9X9h6ocwOc36mBiPEUaC6cGHSTRMVBA,972
|
|
16
|
+
mxalign/interpolations/registry.py,sha256=T3qez9s8SC86UvdErgv8DAXlCW-X4M3MrP_eEOGRxFw,333
|
|
17
|
+
mxalign/interpolations/xarray.py,sha256=uQC90t9JleoOPMmq33GpybUrvehTjKzST3K3XJi2cYM,2034
|
|
18
|
+
mxalign/loaders/__init__.py,sha256=QYZZPwpywvVNJhMYBMztS1prekJnHFK19xzUO-N5BoQ,203
|
|
19
|
+
mxalign/loaders/anemoi_datasets.py,sha256=TB4k1RnfSBd4GrXbQXRR5XISToysfdJMXLyXWI2nTaI,2764
|
|
20
|
+
mxalign/loaders/anemoi_inference.py,sha256=Siv-fHlDnEWwVE1ng5PvkyOKMFu8Fingc7_fdNYOBjw,2614
|
|
21
|
+
mxalign/loaders/base.py,sha256=SSgwYheu54WdZWs1IkW2q6C1NWignWWcyIx2eohZFo4,2999
|
|
22
|
+
mxalign/loaders/harp_obstable.py,sha256=rJFVf3tNSAZUnADlFN6jDVxlhaNZ_T96fX7oxgI23Sk,2290
|
|
23
|
+
mxalign/loaders/loader.py,sha256=cK3s6T2ORB1VDVfTrAv4ePMEFD-wF3YQIh54RfMocvM,229
|
|
24
|
+
mxalign/loaders/registry.py,sha256=yC32ta-35poHDzWNXbakcpgwRcPqx7VST2ZOvgCIt9A,282
|
|
25
|
+
mxalign/properties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
mxalign/properties/properties.py,sha256=B87GRFsfd_2nNXDRGLpS1h0kRzkRvCYBPJ1vVIHdN90,456
|
|
27
|
+
mxalign/properties/specs.py,sha256=OqmgwcuktrytJDwRx5r9jreMdKf_n8KoOnYXtuEGNLg,1601
|
|
28
|
+
mxalign/properties/utils.py,sha256=FndoAJgcAi4kNdBpErF61ThgTSs6sRM_KAUVaEN4Ckw,1252
|
|
29
|
+
mxalign/properties/validation.py,sha256=-b4FG-OuesN0H0dJ6Nrl9btkHNXF77qxy04LamwkWrI,1436
|
|
30
|
+
mxalign/transformations/__init__.py,sha256=GPYFFS2H3rbmEaGWSrsRWaixr6DTQes15Yl-kCrLFDM,85
|
|
31
|
+
mxalign/transformations/base.py,sha256=i9ZH9N2OHNoEJB_i8DIPAi-DSNt9dg5z-Su7Q4NvOTE,882
|
|
32
|
+
mxalign/transformations/external.py,sha256=pBoUEr0Sp4xNIbwNLsX2DAxKwdqEXbqdgjl6AANA6ws,1053
|
|
33
|
+
mxalign/transformations/registry.py,sha256=8IKlXJTR5o1Ql5i8eXUVvWW4VuiugLobeYwv7qV9GtQ,432
|
|
34
|
+
mxalign/transformations/transform.py,sha256=v6dL0066ijinO4JmGaKbsV1zpr9hAUNHMa32XW3fddE,855
|
|
35
|
+
mxalign/utils/config.py,sha256=MF0MA5rIlILYHkxwFmDtrv5DqTku6dv7iSd0xBxRf38,1719
|
|
36
|
+
mxalign/utils/dates.py,sha256=HFp8RxzLwiPWfnjixgFmBkkJNckJarHqD-KXfLfeub0,2522
|
|
37
|
+
mxalign/utils/projections.py,sha256=6Pa5KPIpSF40pYRHQvdvee8hh7zjY7GcDCZdqXPJfI8,2873
|
|
38
|
+
mxalign/utils/save.py,sha256=N9Fryh4anLQ0TKWSxb7QqEV8cQ6T5Rnw26T-5CJ7fzc,2431
|
|
39
|
+
mxalign-0.1.0.dist-info/METADATA,sha256=2Z50cQfZEfYGkdTizl98zE2ubl4EaL0-AYImb9ctBsc,5492
|
|
40
|
+
mxalign-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
41
|
+
mxalign-0.1.0.dist-info/entry_points.txt,sha256=WDMMkJE_x_dc_01YiSNXHoodh4XZtAS0W44b8u6by_w,45
|
|
42
|
+
mxalign-0.1.0.dist-info/licenses/LICENSE,sha256=ah0n-QlWlM2C19ceTpLEKA7h9SflIBXli6_wo8imRVg,1065
|
|
43
|
+
mxalign-0.1.0.dist-info/RECORD,,
|