mxalign 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. mxalign/__init__.py +36 -0
  2. mxalign/accessors/__init__.py +7 -0
  3. mxalign/accessors/space.py +205 -0
  4. mxalign/accessors/time.py +180 -0
  5. mxalign/align/__init__.py +7 -0
  6. mxalign/align/nans.py +72 -0
  7. mxalign/align/space.py +21 -0
  8. mxalign/align/time.py +62 -0
  9. mxalign/cli.py +157 -0
  10. mxalign/interpolations/__init__.py +9 -0
  11. mxalign/interpolations/base.py +29 -0
  12. mxalign/interpolations/delaunay.py +218 -0
  13. mxalign/interpolations/interpolate.py +29 -0
  14. mxalign/interpolations/registry.py +17 -0
  15. mxalign/interpolations/xarray.py +63 -0
  16. mxalign/loaders/__init__.py +11 -0
  17. mxalign/loaders/anemoi_datasets.py +92 -0
  18. mxalign/loaders/anemoi_inference.py +103 -0
  19. mxalign/loaders/base.py +103 -0
  20. mxalign/loaders/harp_obstable.py +81 -0
  21. mxalign/loaders/loader.py +8 -0
  22. mxalign/loaders/registry.py +17 -0
  23. mxalign/properties/__init__.py +0 -0
  24. mxalign/properties/properties.py +25 -0
  25. mxalign/properties/specs.py +54 -0
  26. mxalign/properties/utils.py +43 -0
  27. mxalign/properties/validation.py +48 -0
  28. mxalign/runner.py +167 -0
  29. mxalign/transformations/__init__.py +7 -0
  30. mxalign/transformations/base.py +38 -0
  31. mxalign/transformations/external.py +34 -0
  32. mxalign/transformations/registry.py +20 -0
  33. mxalign/transformations/transform.py +28 -0
  34. mxalign/utils/config.py +55 -0
  35. mxalign/utils/dates.py +76 -0
  36. mxalign/utils/projections.py +104 -0
  37. mxalign/utils/save.py +62 -0
  38. mxalign/verification.py +57 -0
  39. mxalign-0.1.0.dist-info/METADATA +136 -0
  40. mxalign-0.1.0.dist-info/RECORD +43 -0
  41. mxalign-0.1.0.dist-info/WHEEL +4 -0
  42. mxalign-0.1.0.dist-info/entry_points.txt +2 -0
  43. mxalign-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,38 @@
1
+ from .registry import register_transformation
2
+
3
+
4
+ @register_transformation("rename")
5
+ def transform_rename(ds, rename_dict):
6
+ new_dict = {}
7
+ for new_name, old_names in rename_dict.items():
8
+ for name in ds.keys():
9
+ if name in old_names:
10
+ new_dict[name] = new_name
11
+ else:
12
+ pass
13
+ return ds.rename(new_dict)
14
+
15
+
16
+ @register_transformation("kelvin_to_celcius")
17
+ def transform_kelvin_to_celcius(ds, variables, inverse=False):
18
+ T_C2K = 273.15
19
+ if isinstance(variables, str):
20
+ variables = [variables]
21
+ if inverse:
22
+ t = T_C2K
23
+ else:
24
+ t = -T_C2K
25
+
26
+ for var in variables:
27
+ ds[var] = ds[var] + t
28
+
29
+ return ds
30
+
31
+
32
+ @register_transformation("uv_to_speed")
33
+ def transform(ds, u, v, speed):
34
+ import numpy as np
35
+
36
+ result = np.sqrt(ds[u] ** 2 + ds[v] ** 2)
37
+ ds[speed] = result
38
+ return ds
@@ -0,0 +1,34 @@
1
+ from .registry import register_transformation
2
+
3
+
4
+ @register_transformation("external")
5
+ def transform(ds, func_path, inputs, output, **kwargs):
6
+ func = _resolve_function(func_path)
7
+
8
+ input_kwargs = {arg_name: ds[var_name] for arg_name, var_name in inputs.items()}
9
+
10
+ all_kwargs = {**input_kwargs, **kwargs}
11
+ result = func(**all_kwargs)
12
+ # print(result)
13
+ ds[output] = (ds.dims, result)
14
+ return ds
15
+
16
+
17
+ def _resolve_function(func_path):
18
+ import importlib
19
+
20
+ module_path, func_name = func_path.rsplit(".", 1)
21
+ try:
22
+ module = importlib.import_module(module_path)
23
+ except ImportError as e:
24
+ raise ImportError(
25
+ f"Could not import module '{module_path}' required for transform '{func_path}'. "
26
+ f"Make sure it is installed. Original error: {e}"
27
+ )
28
+ try:
29
+ return getattr(module, func_name)
30
+ except AttributeError:
31
+ raise AttributeError(
32
+ f"Module '{module_path}' has no function '{func_name}'. "
33
+ f"Check the function name in your config."
34
+ )
@@ -0,0 +1,20 @@
1
+ _TRANSFORMATION_REGISTRY = {}
2
+
3
+
4
+ def register_transformation(name):
5
+ def decorator(func):
6
+ _TRANSFORMATION_REGISTRY[name] = func
7
+ return func
8
+
9
+ return decorator
10
+
11
+
12
+ def available_transformations():
13
+ return list(_TRANSFORMATION_REGISTRY.keys())
14
+
15
+
16
+ def get_transformation(name):
17
+ try:
18
+ return _TRANSFORMATION_REGISTRY[name]
19
+ except KeyError:
20
+ raise ValueError(f"Unknown transformation: {name}")
@@ -0,0 +1,28 @@
1
+ from .registry import get_transformation
2
+
3
+
4
+ def transform(name, datasets, *args, **kwargs):
5
+ transform = get_transformation(name)
6
+ if isinstance(datasets, dict):
7
+ keys = list(datasets.keys())
8
+ datasets = list(datasets.values())
9
+ else:
10
+ if not isinstance(datasets, list):
11
+ datasets = [datasets]
12
+ keys = None
13
+
14
+ if keys:
15
+ transformed_datasets = dict()
16
+ for key, ds in zip(keys, datasets):
17
+ transformed_datasets[key] = transform(ds.copy(), *args, **kwargs)
18
+ else:
19
+ transformed_datasets = []
20
+ for ds in datasets:
21
+ transformed_datasets.append(transform(ds.copy(), *args, **kwargs))
22
+
23
+ transformed_datasets = (
24
+ transformed_datasets[0]
25
+ if len(transformed_datasets) == 1
26
+ else transformed_datasets
27
+ )
28
+ return transformed_datasets
@@ -0,0 +1,55 @@
1
+ import yaml
2
+
3
+ from .dates import Dates
4
+
5
+
6
+ def load_yaml(fn: str) -> dict:
7
+ with open(fn, "r") as f:
8
+ return yaml.safe_load(f)
9
+
10
+
11
+ class Config:
12
+ def __init__(self, config: str | dict):
13
+ self.config = load_yaml(config) if isinstance(config, str) else config
14
+ if not isinstance(self.config, dict):
15
+ raise TypeError("config should be a dictionary.")
16
+ self.dates = self.config.pop("dates", None)
17
+ self._init_datasets()
18
+ print("Config initialized")
19
+
20
+ def __getitem__(self, key):
21
+ config = self.config.get(key, None)
22
+ if config:
23
+ return config.copy()
24
+ else:
25
+ return config
26
+
27
+ def __call__(self):
28
+ return self.config
29
+
30
+ def _init_datasets(self):
31
+ for key, loader in self.config["datasets"].items():
32
+ dates_loader = loader.pop("dates", None)
33
+ if self.dates:
34
+ if dates_loader:
35
+ keys_all = list(set(self.dates.keys()).union(dates_loader.keys()))
36
+ dates = {
37
+ key: (
38
+ dates_loader[key]
39
+ if key in dates_loader.keys()
40
+ else self.dates[key]
41
+ )
42
+ for key in keys_all
43
+ }
44
+ else:
45
+ dates = self.dates.copy()
46
+ else:
47
+ if dates_loader:
48
+ dates = dates_loader.copy()
49
+ else:
50
+ dates = None
51
+
52
+ if dates:
53
+ dates = Dates(**dates)
54
+ loader["files"] = dates.substitute(loader["files"])
55
+ self.config["datasets"][key] = loader
mxalign/utils/dates.py ADDED
@@ -0,0 +1,76 @@
1
+ import numpy as np
2
+ from earthkit.data.utils.patterns import Pattern
3
+
4
+
5
+ class Dates:
6
+ def __init__(
7
+ self,
8
+ start: str | np.datetime64,
9
+ end: str | np.datetime64,
10
+ period: str | np.timedelta64,
11
+ range: str | np.timedelta64,
12
+ step: str | np.timedelta64,
13
+ ):
14
+ self._start = (
15
+ np.datetime64(start) if not isinstance(start, np.datetime64) else start
16
+ )
17
+ self._end = np.datetime64(end) if not isinstance(end, np.datetime64) else end
18
+ self._period = to_timedelta64(period) if isinstance(period, str) else period
19
+ self._range = to_timedelta64(range) if isinstance(range, str) else range
20
+ self._step = to_timedelta64(step) if isinstance(step, str) else step
21
+ valid_times = set()
22
+ lead_times = set()
23
+ reference_times = set()
24
+ date = self._start
25
+ while date <= self._end:
26
+ # print(date)
27
+ reference_times.add(date)
28
+ delta = np.timedelta64(0, "s")
29
+ while delta <= self._range:
30
+ valid_times.add(date + delta)
31
+ lead_times.add(delta)
32
+ delta += self._step
33
+ date += self._period
34
+ self.valid_times = list(valid_times)
35
+ self.reference_times = list(reference_times)
36
+ # FIXME: can we simplify this? earthkit.data.utils.patterns.Pattern does not accept np.int64
37
+ self.lead_times = sorted([int(t.astype(int)) for t in lead_times])
38
+
39
+ def substitute(self, path: str):
40
+ pattern = Pattern(path)
41
+ paths = pattern.substitute(
42
+ dict(reference_time=self.reference_times),
43
+ # dict(lead_time=self.lead_times),
44
+ # dict(valid_time=self.valid_times),
45
+ allow_extra=True,
46
+ )
47
+ return sorted(paths)
48
+
49
+
50
+ def to_timedelta64(freq: str) -> np.timedelta64:
51
+ """
52
+ Convert a frequency string to a numpy timedelta64 object.
53
+ The frequency string should be in the format of a number followed by a time unit,
54
+ e.g. '1D', '2H', '3M', etc.
55
+ The time unit can be one of the following:
56
+ - 'Y' for years
57
+ - 'M' for months
58
+ - 'W' for weeks
59
+ - 'D' for days
60
+ - 'h' for hours
61
+ - 'm' for minutes
62
+ - 's' for seconds
63
+ - 'ms' for milliseconds
64
+ Parameters
65
+ ----------
66
+ freq : str
67
+ The frequency string to convert.
68
+
69
+ Returns
70
+ -------
71
+ np.timedelta64
72
+ The converted numpy timedelta64 object.
73
+ """
74
+ value = freq[:-1]
75
+ unit = freq[-1]
76
+ return np.timedelta64(value, unit)
@@ -0,0 +1,104 @@
1
+ import cartopy.crs as ccrs
2
+
3
+
4
+ def create_cartopy_crs(projection, kws_projection, kws_globe=None) -> ccrs.Projection:
5
+ """Create a Cartopy coordinate reference system (CRS) based on the specified projection.
6
+
7
+ This function creates a Cartopy projection object using the provided projection name
8
+ and associated keyword arguments.
9
+
10
+ Parameters
11
+ ----------
12
+ projection : str
13
+ Name of the projection to create. Must be one of the supported projections
14
+ defined in PROJECTIONS.
15
+ projection_kws : dict[str, str]
16
+ Dictionary of keyword arguments to pass to the projection constructor.
17
+ globe_kws: dict[str, str], optional
18
+ Optional globe parameters which will be used to create a ccrs.Globe object.
19
+
20
+ Returns
21
+ -------
22
+ ccrs.Projection
23
+ The created Cartopy projection object.
24
+ Raises
25
+ ------
26
+ AssertionError
27
+ If the specified projection is not supported (not in PROJECTIONS).
28
+
29
+ Examples
30
+ --------
31
+ >>> projection_kws = {'central_longitude': 0,}
32
+ >>> globe_kws = {'ellipse': 'WGS84'}}
33
+ >>> crs = create_cartopy_crs('latlon', projection_kws, globe_kws)
34
+ """
35
+
36
+ # - Get the cartopy projection (crs)
37
+ try:
38
+ projection = PROJECTIONS[projection]
39
+ except KeyError:
40
+ raise ValueError(f"Unsupported projection: {projection}")
41
+ kws_projection.copy()
42
+
43
+ # - Move globe keywords to different dictionary
44
+ if kws_globe:
45
+ globe = ccrs.Globe(**kws_globe)
46
+
47
+ crs = projection(globe=globe, **kws_projection)
48
+ return crs
49
+
50
+
51
+ PROJECTIONS = dict(
52
+ lcc=ccrs.LambertConformal,
53
+ latlon=ccrs.PlateCarree,
54
+ PlateCarree=ccrs.PlateCarree,
55
+ Mercator=ccrs.Mercator,
56
+ Orthographic=ccrs.Orthographic,
57
+ )
58
+
59
+ BUILTIN = dict(
60
+ cerra=dict(
61
+ projection="lcc",
62
+ kws_globe=dict(
63
+ semimajor_axis=6371229.0,
64
+ semiminor_axis=6371229.0,
65
+ ),
66
+ kws_projection=dict(
67
+ central_longitude=8.0,
68
+ central_latitude=50.0,
69
+ standard_parallels=[50.0, 50.0],
70
+ ),
71
+ kws_grid=dict(
72
+ lon_ll=-17.4859,
73
+ lat_ll=20.2923,
74
+ lon_ur=74.1051,
75
+ lat_ur=63.7695,
76
+ dx=5500.0,
77
+ dy=5500.0,
78
+ nx=1069,
79
+ ny=1069,
80
+ ),
81
+ ),
82
+ uwcw=dict(
83
+ projection="lcc",
84
+ kws_globe=dict(
85
+ semimajor_axis=6371229.0,
86
+ semiminor_axis=6371229.0,
87
+ ),
88
+ kws_projection=dict(
89
+ central_longitude=-1.96590281,
90
+ central_latitude=55.5164337,
91
+ standard_parallels=[55.499996, 55.499996],
92
+ ),
93
+ kws_grid=dict(
94
+ lon_ll=-25.4470005,
95
+ lat_ll=39.6389999,
96
+ lon_ur=40.1508102,
97
+ lat_ur=62.6713715,
98
+ dx=2000.0,
99
+ dy=2000.0,
100
+ nx=1909,
101
+ ny=1609,
102
+ ),
103
+ ),
104
+ )
mxalign/utils/save.py ADDED
@@ -0,0 +1,62 @@
1
+ from earthkit.data.utils.patterns import Pattern
2
+
3
+
4
+ class DatasetPath:
5
+ def __init__(self, name, ds):
6
+ self.name = name
7
+ if ds.time.is_forecast():
8
+ years = ds["reference_time"].groupby(ds["reference_time"].dt.year).count()
9
+ self.year = int(years.isel(year=years.argmax())["year"].values)
10
+ ds_month = ds.sel(reference_time=ds.reference_time.dt.year == self.year)
11
+ months = (
12
+ ds_month["reference_time"]
13
+ .groupby(ds_month["reference_time"].dt.month)
14
+ .count()
15
+ )
16
+ self.month = int(months.isel(month=months.argmax())["month"].values)
17
+ ds_day = ds_month.sel(
18
+ reference_time=ds_month.reference_time.dt.month == self.month
19
+ )
20
+ days = (
21
+ ds_day["reference_time"]
22
+ .groupby(ds_day["reference_time"].dt.day)
23
+ .count()
24
+ )
25
+ self.day = int(days.isel(day=days.argmax())["day"].values)
26
+ elif ds.time.is_observation():
27
+ years = ds["valid_time"].groupby(ds["valid_time"].dt.year).count()
28
+ self.year = int(years.isel(year=years.argmax())["year"].values)
29
+ ds_month = ds.sel(valid_time=ds.valid_time.dt.year == self.year)
30
+ months = (
31
+ ds_month["valid_time"].groupby(ds_month["valid_time"].dt.month).count()
32
+ )
33
+ self.month = int(months.isel(month=months.argmax())["month"].values)
34
+ ds_day = ds_month.sel(valid_time=ds_month.valid_time.dt.month == self.month)
35
+ days = ds_day["valid_time"].groupby(ds_day["valid_time"].dt.day).count()
36
+ self.day = int(days.isel(day=days.argmax())["day"].values)
37
+
38
+ def substitute(self, path: str):
39
+ pattern = Pattern(path)
40
+ path = pattern.substitute(
41
+ dict(name=self.name),
42
+ dict(year=self.year),
43
+ dict(month=self.month),
44
+ dict(day=self.day),
45
+ allow_extra=True,
46
+ )
47
+ return path
48
+
49
+
50
+ def save_dataset(method, name, ds, **kwargs):
51
+ save_fn = getattr(ds, method)
52
+ dataset = DatasetPath(name, ds)
53
+ path = dataset.substitute(kwargs.pop("path"))
54
+ print(f"Saving to {path}")
55
+ save_fn(path, **kwargs)
56
+
57
+
58
+ def save_metrics(method, ds, **kwargs):
59
+ save_fn = getattr(ds, method)
60
+ path = kwargs.pop("path")
61
+ print(f"Saving to {path}")
62
+ save_fn(path, **kwargs)
@@ -0,0 +1,57 @@
1
+ from .transformations.external import _resolve_function
2
+ from functools import partial
3
+
4
+
5
+ class Metric:
6
+ def __init__(self, name, func_path, ds_ref, inputs, **kwargs):
7
+ self.name = name
8
+ func = _resolve_function(func_path)
9
+ self._is_xskillscore = func.__module__.startswith("xskillscore")
10
+ self._dim = kwargs.get("dim", None)
11
+
12
+ kwarg_ref = {}
13
+ kwarg_ds = []
14
+ for input_arg, ds_type in inputs.items():
15
+ if ds_type == "reference":
16
+ kwarg_ref[input_arg] = (
17
+ self._rechunk(ds_ref) if self._is_xskillscore else ds_ref
18
+ )
19
+ else:
20
+ kwarg_ds.append(input_arg)
21
+ if len(kwarg_ds) > 1:
22
+ raise ValueError(
23
+ f"More than one predictor-input argument defined for function {func_path}"
24
+ )
25
+ partial_kwargs = {**kwarg_ref, **kwargs}
26
+ self._func = partial(func, **partial_kwargs)
27
+ self._kwarg_ds = kwarg_ds[0]
28
+
29
+ def compute(self, ds):
30
+ if self._is_xskillscore:
31
+ ds = self._rechunk(ds)
32
+ kwarg_ds = {self._kwarg_ds: ds}
33
+ return self._func(**kwarg_ds)
34
+
35
+ def _rechunk(self, ds):
36
+ if self._dim is None:
37
+ return ds
38
+ dim = [self._dim] if isinstance(self._dim, str) else self._dim
39
+ dim_other = [d for d in ds.dims if d not in dim]
40
+ chunks = {d: -1 for d in dim}
41
+ for d in dim_other:
42
+ chunks[d] = 1
43
+ return ds.chunk(chunks)
44
+
45
+
46
+ def verify(fcst, obs, func_path, inputs, **kwargs):
47
+ func = _resolve_function(func_path=func_path)
48
+ datasets = {
49
+ "forecast": fcst,
50
+ "observation": obs,
51
+ }
52
+ input_kwargs = {arg_name: datasets[ds_type] for arg_name, ds_type in inputs.items()}
53
+
54
+ all_kwargs = {**input_kwargs, **kwargs}
55
+
56
+ result = func(**all_kwargs)
57
+ return result
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.4
2
+ Name: mxalign
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Author-email: Michiel Van Ginderachter <michiel.vanginderachter@meteo.be>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: bokeh>=3.8.2
9
+ Requires-Dist: cartopy>=0.25.0
10
+ Requires-Dist: dask>=2026.1.2
11
+ Requires-Dist: distributed>=2026.1.2
12
+ Requires-Dist: earthkit-data>=0.19.0
13
+ Requires-Dist: h5netcdf>=1.8.1
14
+ Requires-Dist: h5py>=3.15.1
15
+ Requires-Dist: netcdf4>=1.7.4
16
+ Requires-Dist: pyyaml>=6.0.3
17
+ Requires-Dist: scipy>=1.17.0
18
+ Requires-Dist: xarray>=2026.1.0
19
+ Requires-Dist: zarr<3.0
20
+ Provides-Extra: earthkit
21
+ Requires-Dist: earthkit-meteo>=0.6.1; extra == 'earthkit'
22
+ Provides-Extra: jobqueue
23
+ Requires-Dist: dask-jobqueue>=0.9.0; extra == 'jobqueue'
24
+ Provides-Extra: verification
25
+ Requires-Dist: xskillscore>=0.0.29; extra == 'verification'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # Meteo-xAlign
29
+
30
+ **An xarray based package for alignment of meteorological datasets**
31
+
32
+ ## What is this?
33
+
34
+ `mxalign` is an `xarray`-based package designed for the alignment and verification of meteorological datasets. It standardizes operations across datasets by attaching properties along three main axes:
35
+ - **Space:** Grid or point-based data
36
+ - **Time:** Forecasts, observations, or climatology
37
+ - **Uncertainty:** Deterministic, ensemble, or quantile forecasts
38
+
39
+ Currently, `mxalign` also acts as a full execution engine. It can load datasets (e.g., Anemoi inference outputs, observation datasets), apply transformations, align datasets in both space and time to match a reference, safely broadcast NaNs, and execute verification metrics on scaled Dask clusters (Local or Slurm).
40
+
41
+ > ⚠️ **Roadmap & Future Architecture Changes (planned for v0.2.0):**
42
+ > Currently, `mxalign` handles both alignment and the execution of the verification tooling pipeline, including loading and validation. In the upcoming `v0.2.0` release, this architecture will be refactored:
43
+ > - **Loading** will be split out into [`mlwp-data-loaders`](https://github.com/mlwp-tools/mlwp-data-loaders).
44
+ > - **Validation** of loaded `xr.Dataset`s will be moved to [`mlwp-data-specs`](https://github.com/mlwp-tools/mlwp-data-specs) (which will contain the requirements for each of the dataset traits and the validation logic).
45
+ > - **Execution** of the full verification pipeline (loading, transformations, alignment, and verification) from configuration files may be moved to a separate package in future releases.
46
+ > - **Tests** will be added to `mxalign` (building on test datasets already integrated into `mlwp-data-loaders`) that ensure that all alignment operations work correctly (Testing notebook execution inside `mxalign` is explicitly excluded from the current roadmap).
47
+
48
+ ## Python API
49
+
50
+ `mxalign` provides building blocks for manual alignment, transformations, and interpolations of `xarray` datasets. This is ideal for interactive use in Jupyter notebooks or custom Python scripts.
51
+
52
+ ```python
53
+ import xarray as xr
54
+ from mxalign import load, align_space, align_time, transform
55
+
56
+ # Load datasets (using registered loaders)
57
+ ds_obs = load(name="observations_loader", files=["obs.nc"])
58
+ ds_fcst = load(name="anemoi_inference", files=["forecast.nc"])
59
+
60
+ # Align the forecast spatially to match the observation reference
61
+ ds_fcst_aligned_space = align_space(ds_fcst, reference=ds_obs, method="interpolation")
62
+
63
+ # Align datasets temporally
64
+ datasets = {"obs": ds_obs, "fcst": ds_fcst_aligned_space}
65
+ aligned_datasets = align_time(datasets, method="intersection")
66
+ ```
67
+
68
+ For a more comprehensive interactive example, check out the [introductory notebook](./examples/introduction.ipynb).
69
+
70
+ ## Executing via a Configuration
71
+
72
+ For full verification pipeline execution, `mxalign` uses a YAML configuration file. This allows you to declaratively define how datasets are loaded, transformed, aligned, and verified.
73
+
74
+ ### Configuration Contents
75
+
76
+ The configuration file is divided into several main sections:
77
+
78
+ ```yaml
79
+ datasets:
80
+ # Define datasets to load, specifying the loader, files, and variables
81
+ obs_data:
82
+ loader: observations_loader
83
+ files: ["obs.nc"]
84
+ fcst_data:
85
+ loader: anemoi_inference
86
+ files: ["forecast.nc"]
87
+
88
+ transformations:
89
+ # Apply transformations to loaded datasets
90
+
91
+ alignment:
92
+ # Define reference dataset and alignment methods (space, time, NaN broadcasting)
93
+ reference: obs_data
94
+ time:
95
+ method: intersection
96
+
97
+ verification:
98
+ # Specify the reference dataset and the metrics to calculate
99
+ reference: obs_data
100
+ metrics:
101
+ # define metrics here
102
+ ```
103
+
104
+ ### Running from the Command Line
105
+
106
+ The CLI uses Dask to distribute the workload and supports both local execution and execution on Slurm-managed HPC clusters.
107
+
108
+ **Local Execution**
109
+ Run the pipeline on a local Dask cluster:
110
+ ```bash
111
+ mxalign local path/to/config.yaml --n_workers 4 --threads_per_worker 1
112
+ ```
113
+
114
+ **Slurm Execution**
115
+ Run the pipeline on a Slurm cluster:
116
+ ```bash
117
+ mxalign slurm path/to/config.yaml --account your_account --queue your_queue --cores 8 --memory 64GB
118
+ ```
119
+
120
+ ### Running from Python
121
+
122
+ You can also execute the entire configuration-driven pipeline directly from Python using the `Runner` class.
123
+
124
+ ```python
125
+ from mxalign.runner import Runner
126
+
127
+ # Initialize the runner with a YAML config file or a dictionary
128
+ runner = Runner("path/to/config.yaml")
129
+
130
+ # Execute the pipeline: loads, transforms, aligns, and verifies the datasets
131
+ runner.run()
132
+
133
+ # The resulting aligned datasets and computed metrics are accessible via:
134
+ aligned_datasets = runner.datasets
135
+ metrics = runner.metrics
136
+ ```
@@ -0,0 +1,43 @@
1
+ mxalign/__init__.py,sha256=zCGKgXlTyEMwLtd-Zy-RfLBxH6jbuR-dwnRSPxO2TZ8,1035
2
+ mxalign/cli.py,sha256=UpctNL68XOUwRIR9xRXaVD89LjovpUJ_l3COTr6DErU,4257
3
+ mxalign/runner.py,sha256=0TyWsHieToUKi-JSe2evqagPcP5wp3sLNtzA1rY15po,6105
4
+ mxalign/verification.py,sha256=jpRxCxfifSl5CQbSbP8sQbdChvyxmDNtLQKPpYSC184,1865
5
+ mxalign/accessors/__init__.py,sha256=NWKgy0F54cvFAUYZGoJjlqH1b7sFRWaqkuNkH0Sjr2w,79
6
+ mxalign/accessors/space.py,sha256=LF5L146ztvLI9V-t1oRsLhU3EDKuNyyGMP6Yr51LH2A,7469
7
+ mxalign/accessors/time.py,sha256=gwgMBsr88uE8ObwGNGGcIUG6KAUfb3n6u85buGB1NpI,7137
8
+ mxalign/align/__init__.py,sha256=cLlhuJYItabVtO73uaNcKmBa4-E1EWynwslzEDn824s,79
9
+ mxalign/align/nans.py,sha256=8sQhAoRAIXI27-Zg4SpjslidHRfYJcJi6yiQD7rYkrA,2511
10
+ mxalign/align/space.py,sha256=i4dt3D309xyqc1Pcbf5DbAJaysk73u-63X9m-c4mGVg,574
11
+ mxalign/align/time.py,sha256=ARuaz2KFeAYO84bgJUNORRxup-waNJ9w6qCHc8VnoIM,2298
12
+ mxalign/interpolations/__init__.py,sha256=Wgo_miRwfLn6moypehGkqCo1t_HhAP1ihAAnfAjcHUE,120
13
+ mxalign/interpolations/base.py,sha256=CqC7NQKH-1_3TxyhhzcCM1pa86ausiQ3CfBeFKQGq_Y,868
14
+ mxalign/interpolations/delaunay.py,sha256=KCkAtlqa7q-UwwOOIGcpWIx3KJItfm-Xg35owaO46c0,7925
15
+ mxalign/interpolations/interpolate.py,sha256=ApZzglT5ZDCG9X9h6ocwOc36mBiPEUaC6cGHSTRMVBA,972
16
+ mxalign/interpolations/registry.py,sha256=T3qez9s8SC86UvdErgv8DAXlCW-X4M3MrP_eEOGRxFw,333
17
+ mxalign/interpolations/xarray.py,sha256=uQC90t9JleoOPMmq33GpybUrvehTjKzST3K3XJi2cYM,2034
18
+ mxalign/loaders/__init__.py,sha256=QYZZPwpywvVNJhMYBMztS1prekJnHFK19xzUO-N5BoQ,203
19
+ mxalign/loaders/anemoi_datasets.py,sha256=TB4k1RnfSBd4GrXbQXRR5XISToysfdJMXLyXWI2nTaI,2764
20
+ mxalign/loaders/anemoi_inference.py,sha256=Siv-fHlDnEWwVE1ng5PvkyOKMFu8Fingc7_fdNYOBjw,2614
21
+ mxalign/loaders/base.py,sha256=SSgwYheu54WdZWs1IkW2q6C1NWignWWcyIx2eohZFo4,2999
22
+ mxalign/loaders/harp_obstable.py,sha256=rJFVf3tNSAZUnADlFN6jDVxlhaNZ_T96fX7oxgI23Sk,2290
23
+ mxalign/loaders/loader.py,sha256=cK3s6T2ORB1VDVfTrAv4ePMEFD-wF3YQIh54RfMocvM,229
24
+ mxalign/loaders/registry.py,sha256=yC32ta-35poHDzWNXbakcpgwRcPqx7VST2ZOvgCIt9A,282
25
+ mxalign/properties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ mxalign/properties/properties.py,sha256=B87GRFsfd_2nNXDRGLpS1h0kRzkRvCYBPJ1vVIHdN90,456
27
+ mxalign/properties/specs.py,sha256=OqmgwcuktrytJDwRx5r9jreMdKf_n8KoOnYXtuEGNLg,1601
28
+ mxalign/properties/utils.py,sha256=FndoAJgcAi4kNdBpErF61ThgTSs6sRM_KAUVaEN4Ckw,1252
29
+ mxalign/properties/validation.py,sha256=-b4FG-OuesN0H0dJ6Nrl9btkHNXF77qxy04LamwkWrI,1436
30
+ mxalign/transformations/__init__.py,sha256=GPYFFS2H3rbmEaGWSrsRWaixr6DTQes15Yl-kCrLFDM,85
31
+ mxalign/transformations/base.py,sha256=i9ZH9N2OHNoEJB_i8DIPAi-DSNt9dg5z-Su7Q4NvOTE,882
32
+ mxalign/transformations/external.py,sha256=pBoUEr0Sp4xNIbwNLsX2DAxKwdqEXbqdgjl6AANA6ws,1053
33
+ mxalign/transformations/registry.py,sha256=8IKlXJTR5o1Ql5i8eXUVvWW4VuiugLobeYwv7qV9GtQ,432
34
+ mxalign/transformations/transform.py,sha256=v6dL0066ijinO4JmGaKbsV1zpr9hAUNHMa32XW3fddE,855
35
+ mxalign/utils/config.py,sha256=MF0MA5rIlILYHkxwFmDtrv5DqTku6dv7iSd0xBxRf38,1719
36
+ mxalign/utils/dates.py,sha256=HFp8RxzLwiPWfnjixgFmBkkJNckJarHqD-KXfLfeub0,2522
37
+ mxalign/utils/projections.py,sha256=6Pa5KPIpSF40pYRHQvdvee8hh7zjY7GcDCZdqXPJfI8,2873
38
+ mxalign/utils/save.py,sha256=N9Fryh4anLQ0TKWSxb7QqEV8cQ6T5Rnw26T-5CJ7fzc,2431
39
+ mxalign-0.1.0.dist-info/METADATA,sha256=2Z50cQfZEfYGkdTizl98zE2ubl4EaL0-AYImb9ctBsc,5492
40
+ mxalign-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
41
+ mxalign-0.1.0.dist-info/entry_points.txt,sha256=WDMMkJE_x_dc_01YiSNXHoodh4XZtAS0W44b8u6by_w,45
42
+ mxalign-0.1.0.dist-info/licenses/LICENSE,sha256=ah0n-QlWlM2C19ceTpLEKA7h9SflIBXli6_wo8imRVg,1065
43
+ mxalign-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mxalign = mxalign.cli:main