mxalign 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mxalign-0.1.0/.github/dependabot.yaml +22 -0
- mxalign-0.1.0/.github/workflows/linting.yaml +16 -0
- mxalign-0.1.0/.gitignore +10 -0
- mxalign-0.1.0/.pre-commit-config.yaml +26 -0
- mxalign-0.1.0/.python-version +1 -0
- mxalign-0.1.0/CHANGELOG.md +20 -0
- mxalign-0.1.0/LICENSE +21 -0
- mxalign-0.1.0/PKG-INFO +136 -0
- mxalign-0.1.0/README.md +109 -0
- mxalign-0.1.0/doc/adr/ADR-001_seperate-validation-and-loading.md +39 -0
- mxalign-0.1.0/doc/adr/ADR-002_mxalign-loader-interface.md +74 -0
- mxalign-0.1.0/doc/adr/template.md +74 -0
- mxalign-0.1.0/examples/advanced.ipynb +805 -0
- mxalign-0.1.0/examples/config.yaml +67 -0
- mxalign-0.1.0/examples/introduction.ipynb +86607 -0
- mxalign-0.1.0/pyproject.toml +46 -0
- mxalign-0.1.0/src/mxalign/__init__.py +36 -0
- mxalign-0.1.0/src/mxalign/accessors/__init__.py +7 -0
- mxalign-0.1.0/src/mxalign/accessors/space.py +205 -0
- mxalign-0.1.0/src/mxalign/accessors/time.py +180 -0
- mxalign-0.1.0/src/mxalign/align/__init__.py +7 -0
- mxalign-0.1.0/src/mxalign/align/nans.py +72 -0
- mxalign-0.1.0/src/mxalign/align/space.py +21 -0
- mxalign-0.1.0/src/mxalign/align/time.py +62 -0
- mxalign-0.1.0/src/mxalign/cli.py +157 -0
- mxalign-0.1.0/src/mxalign/interpolations/__init__.py +9 -0
- mxalign-0.1.0/src/mxalign/interpolations/base.py +29 -0
- mxalign-0.1.0/src/mxalign/interpolations/delaunay.py +218 -0
- mxalign-0.1.0/src/mxalign/interpolations/interpolate.py +29 -0
- mxalign-0.1.0/src/mxalign/interpolations/registry.py +17 -0
- mxalign-0.1.0/src/mxalign/interpolations/xarray.py +63 -0
- mxalign-0.1.0/src/mxalign/loaders/__init__.py +11 -0
- mxalign-0.1.0/src/mxalign/loaders/anemoi_datasets.py +92 -0
- mxalign-0.1.0/src/mxalign/loaders/anemoi_inference.py +103 -0
- mxalign-0.1.0/src/mxalign/loaders/base.py +103 -0
- mxalign-0.1.0/src/mxalign/loaders/harp_obstable.py +81 -0
- mxalign-0.1.0/src/mxalign/loaders/loader.py +8 -0
- mxalign-0.1.0/src/mxalign/loaders/registry.py +17 -0
- mxalign-0.1.0/src/mxalign/properties/__init__.py +0 -0
- mxalign-0.1.0/src/mxalign/properties/properties.py +25 -0
- mxalign-0.1.0/src/mxalign/properties/specs.py +54 -0
- mxalign-0.1.0/src/mxalign/properties/utils.py +43 -0
- mxalign-0.1.0/src/mxalign/properties/validation.py +48 -0
- mxalign-0.1.0/src/mxalign/runner.py +167 -0
- mxalign-0.1.0/src/mxalign/transformations/__init__.py +7 -0
- mxalign-0.1.0/src/mxalign/transformations/base.py +38 -0
- mxalign-0.1.0/src/mxalign/transformations/external.py +34 -0
- mxalign-0.1.0/src/mxalign/transformations/registry.py +20 -0
- mxalign-0.1.0/src/mxalign/transformations/transform.py +28 -0
- mxalign-0.1.0/src/mxalign/utils/config.py +55 -0
- mxalign-0.1.0/src/mxalign/utils/dates.py +76 -0
- mxalign-0.1.0/src/mxalign/utils/projections.py +104 -0
- mxalign-0.1.0/src/mxalign/utils/save.py +62 -0
- mxalign-0.1.0/src/mxalign/verification.py +57 -0
- mxalign-0.1.0/uv.lock +2415 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: "github-actions"
|
|
4
|
+
directory: "/"
|
|
5
|
+
schedule:
|
|
6
|
+
interval: "monthly"
|
|
7
|
+
cooldown:
|
|
8
|
+
default-days: 7
|
|
9
|
+
- package-ecosystem: "pre-commit"
|
|
10
|
+
directory: "/"
|
|
11
|
+
schedule:
|
|
12
|
+
interval: "monthly"
|
|
13
|
+
- package-ecosystem: "uv"
|
|
14
|
+
directory: "/"
|
|
15
|
+
schedule:
|
|
16
|
+
interval: "monthly"
|
|
17
|
+
cooldown:
|
|
18
|
+
default-days: 7
|
|
19
|
+
groups:
|
|
20
|
+
uv.lock-patches:
|
|
21
|
+
update-types:
|
|
22
|
+
- patch
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: Linting Checks
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main, develop]
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
pre-commit:
|
|
11
|
+
name: Run Linters
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v6
|
|
15
|
+
- uses: actions/setup-python@v6
|
|
16
|
+
- uses: pre-commit/action@v3.0.1
|
mxalign-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: trailing-whitespace
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: check-yaml
|
|
8
|
+
- id: check-json
|
|
9
|
+
- id: check-toml
|
|
10
|
+
- id: check-added-large-files
|
|
11
|
+
- id: check-merge-conflict
|
|
12
|
+
- id: mixed-line-ending
|
|
13
|
+
- id: debug-statements
|
|
14
|
+
|
|
15
|
+
- repo: https://github.com/codespell-project/codespell
|
|
16
|
+
rev: v2.4.2
|
|
17
|
+
hooks:
|
|
18
|
+
- id: codespell
|
|
19
|
+
args: [--ignore-words-list=COO, --ignore-regex=^\s*"image\/png":\s.*]
|
|
20
|
+
|
|
21
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
22
|
+
rev: v0.15.6
|
|
23
|
+
hooks:
|
|
24
|
+
- id: ruff-check
|
|
25
|
+
args: [--fix]
|
|
26
|
+
- id: ruff-format
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0](https://github.com/mlwp-tools/mxalign/releases/tag/v0.1.0)
|
|
9
|
+
|
|
10
|
+
First release of `mxalign`, an xarray-based package for alignment of meteorological datasets, with the following functionality and configuration:
|
|
11
|
+
|
|
12
|
+
- Execution of the verification tooling pipeline.
|
|
13
|
+
- Core alignment capabilities (handling of space, time, and NaNs).
|
|
14
|
+
- Base data loaders, including support for Anemoi datasets and inference.
|
|
15
|
+
- Validation functionality for datasets.
|
|
16
|
+
- Implementations for interpolations (e.g., Delaunay, xarray).
|
|
17
|
+
- Accessors (space, time), transformations, and properties management tools.
|
|
18
|
+
- Introductory Jupyter notebook (`examples/introduction.ipynb`) demonstrating interactive usage.
|
|
19
|
+
- Integration of `uv.lock` dependency locking to ensure specific versions are used in testing, allowing for safe and reliable releases.
|
|
20
|
+
- Dependabot configuration with a strategy where PRs for flexible requirements will only be merged once tests confirm that `mxalign` works correctly with the newer upstream packages.
|
mxalign-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 rmi-mlwp
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mxalign-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mxalign
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author-email: Michiel Van Ginderachter <michiel.vanginderachter@meteo.be>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: bokeh>=3.8.2
|
|
9
|
+
Requires-Dist: cartopy>=0.25.0
|
|
10
|
+
Requires-Dist: dask>=2026.1.2
|
|
11
|
+
Requires-Dist: distributed>=2026.1.2
|
|
12
|
+
Requires-Dist: earthkit-data>=0.19.0
|
|
13
|
+
Requires-Dist: h5netcdf>=1.8.1
|
|
14
|
+
Requires-Dist: h5py>=3.15.1
|
|
15
|
+
Requires-Dist: netcdf4>=1.7.4
|
|
16
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
17
|
+
Requires-Dist: scipy>=1.17.0
|
|
18
|
+
Requires-Dist: xarray>=2026.1.0
|
|
19
|
+
Requires-Dist: zarr<3.0
|
|
20
|
+
Provides-Extra: earthkit
|
|
21
|
+
Requires-Dist: earthkit-meteo>=0.6.1; extra == 'earthkit'
|
|
22
|
+
Provides-Extra: jobqueue
|
|
23
|
+
Requires-Dist: dask-jobqueue>=0.9.0; extra == 'jobqueue'
|
|
24
|
+
Provides-Extra: verification
|
|
25
|
+
Requires-Dist: xskillscore>=0.0.29; extra == 'verification'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# Meteo-xAlign
|
|
29
|
+
|
|
30
|
+
**An xarray based package for alignment of meteorological datasets**
|
|
31
|
+
|
|
32
|
+
## What is this?
|
|
33
|
+
|
|
34
|
+
`mxalign` is an `xarray`-based package designed for the alignment and verification of meteorological datasets. It standardizes operations across datasets by attaching properties along three main axes:
|
|
35
|
+
- **Space:** Grid or point-based data
|
|
36
|
+
- **Time:** Forecasts, observations, or climatology
|
|
37
|
+
- **Uncertainty:** Deterministic, ensemble, or quantile forecasts
|
|
38
|
+
|
|
39
|
+
Currently, `mxalign` also acts as a full execution engine. It can load datasets (e.g., Anemoi inference outputs, observation datasets), apply transformations, align datasets in both space and time to match a reference, safely broadcast NaNs, and execute verification metrics on scaled Dask clusters (Local or Slurm).
|
|
40
|
+
|
|
41
|
+
> ⚠️ **Roadmap & Future Architecture Changes (planned for v0.2.0):**
|
|
42
|
+
> Currently, `mxalign` handles both alignment and the execution of the verification tooling pipeline, including loading and validation. In the upcoming `v0.2.0` release, this architecture will be refactored:
|
|
43
|
+
> - **Loading** will be split out into [`mlwp-data-loaders`](https://github.com/mlwp-tools/mlwp-data-loaders).
|
|
44
|
+
> - **Validation** of loaded `xr.Dataset`s will be moved to [`mlwp-data-specs`](https://github.com/mlwp-tools/mlwp-data-specs) (which will contain the requirements for each of the dataset traits and the validation logic).
|
|
45
|
+
> - **Execution** of the full verification pipeline (loading, transformations, alignment, and verification) from configuration files may be moved to a separate package in future releases.
|
|
46
|
+
> - **Tests** will be added to `mxalign` (building on test datasets already integrated into `mlwp-data-loaders`) that ensure that all alignment operations work correctly (Testing notebook execution inside `mxalign` is explicitly excluded from the current roadmap).
|
|
47
|
+
|
|
48
|
+
## Python API
|
|
49
|
+
|
|
50
|
+
`mxalign` provides building blocks for manual alignment, transformations, and interpolations of `xarray` datasets. This is ideal for interactive use in Jupyter notebooks or custom Python scripts.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import xarray as xr
|
|
54
|
+
from mxalign import load, align_space, align_time, transform
|
|
55
|
+
|
|
56
|
+
# Load datasets (using registered loaders)
|
|
57
|
+
ds_obs = load(name="observations_loader", files=["obs.nc"])
|
|
58
|
+
ds_fcst = load(name="anemoi_inference", files=["forecast.nc"])
|
|
59
|
+
|
|
60
|
+
# Align the forecast spatially to match the observation reference
|
|
61
|
+
ds_fcst_aligned_space = align_space(ds_fcst, reference=ds_obs, method="interpolation")
|
|
62
|
+
|
|
63
|
+
# Align datasets temporally
|
|
64
|
+
datasets = {"obs": ds_obs, "fcst": ds_fcst_aligned_space}
|
|
65
|
+
aligned_datasets = align_time(datasets, method="intersection")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For a more comprehensive interactive example, check out the [introductory notebook](./examples/introduction.ipynb).
|
|
69
|
+
|
|
70
|
+
## Executing via a Configuration
|
|
71
|
+
|
|
72
|
+
For full verification pipeline execution, `mxalign` uses a YAML configuration file. This allows you to declaratively define how datasets are loaded, transformed, aligned, and verified.
|
|
73
|
+
|
|
74
|
+
### Configuration Contents
|
|
75
|
+
|
|
76
|
+
The configuration file is divided into several main sections:
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
datasets:
|
|
80
|
+
# Define datasets to load, specifying the loader, files, and variables
|
|
81
|
+
obs_data:
|
|
82
|
+
loader: observations_loader
|
|
83
|
+
files: ["obs.nc"]
|
|
84
|
+
fcst_data:
|
|
85
|
+
loader: anemoi_inference
|
|
86
|
+
files: ["forecast.nc"]
|
|
87
|
+
|
|
88
|
+
transformations:
|
|
89
|
+
# Apply transformations to loaded datasets
|
|
90
|
+
|
|
91
|
+
alignment:
|
|
92
|
+
# Define reference dataset and alignment methods (space, time, NaN broadcasting)
|
|
93
|
+
reference: obs_data
|
|
94
|
+
time:
|
|
95
|
+
method: intersection
|
|
96
|
+
|
|
97
|
+
verification:
|
|
98
|
+
# Specify the reference dataset and the metrics to calculate
|
|
99
|
+
reference: obs_data
|
|
100
|
+
metrics:
|
|
101
|
+
# define metrics here
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Running from the Command Line
|
|
105
|
+
|
|
106
|
+
The CLI uses Dask to distribute the workload and supports both local execution and execution on Slurm-managed HPC clusters.
|
|
107
|
+
|
|
108
|
+
**Local Execution**
|
|
109
|
+
Run the pipeline on a local Dask cluster:
|
|
110
|
+
```bash
|
|
111
|
+
mxalign local path/to/config.yaml --n_workers 4 --threads_per_worker 1
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Slurm Execution**
|
|
115
|
+
Run the pipeline on a Slurm cluster:
|
|
116
|
+
```bash
|
|
117
|
+
mxalign slurm path/to/config.yaml --account your_account --queue your_queue --cores 8 --memory 64GB
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Running from Python
|
|
121
|
+
|
|
122
|
+
You can also execute the entire configuration-driven pipeline directly from Python using the `Runner` class.
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from mxalign.runner import Runner
|
|
126
|
+
|
|
127
|
+
# Initialize the runner with a YAML config file or a dictionary
|
|
128
|
+
runner = Runner("path/to/config.yaml")
|
|
129
|
+
|
|
130
|
+
# Execute the pipeline: loads, transforms, aligns, and verifies the datasets
|
|
131
|
+
runner.run()
|
|
132
|
+
|
|
133
|
+
# The resulting aligned datasets and computed metrics are accessible via:
|
|
134
|
+
aligned_datasets = runner.datasets
|
|
135
|
+
metrics = runner.metrics
|
|
136
|
+
```
|
mxalign-0.1.0/README.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Meteo-xAlign
|
|
2
|
+
|
|
3
|
+
**An xarray based package for alignment of meteorological datasets**
|
|
4
|
+
|
|
5
|
+
## What is this?
|
|
6
|
+
|
|
7
|
+
`mxalign` is an `xarray`-based package designed for the alignment and verification of meteorological datasets. It standardizes operations across datasets by attaching properties along three main axes:
|
|
8
|
+
- **Space:** Grid or point-based data
|
|
9
|
+
- **Time:** Forecasts, observations, or climatology
|
|
10
|
+
- **Uncertainty:** Deterministic, ensemble, or quantile forecasts
|
|
11
|
+
|
|
12
|
+
Currently, `mxalign` also acts as a full execution engine. It can load datasets (e.g., Anemoi inference outputs, observation datasets), apply transformations, align datasets in both space and time to match a reference, safely broadcast NaNs, and execute verification metrics on scaled Dask clusters (Local or Slurm).
|
|
13
|
+
|
|
14
|
+
> ⚠️ **Roadmap & Future Architecture Changes (planned for v0.2.0):**
|
|
15
|
+
> Currently, `mxalign` handles both alignment and the execution of the verification tooling pipeline, including loading and validation. In the upcoming `v0.2.0` release, this architecture will be refactored:
|
|
16
|
+
> - **Loading** will be split out into [`mlwp-data-loaders`](https://github.com/mlwp-tools/mlwp-data-loaders).
|
|
17
|
+
> - **Validation** of loaded `xr.Dataset`s will be moved to [`mlwp-data-specs`](https://github.com/mlwp-tools/mlwp-data-specs) (which will contain the requirements for each of the dataset traits and the validation logic).
|
|
18
|
+
> - **Execution** of the full verification pipeline (loading, transformations, alignment, and verification) from configuration files may be moved to a separate package in future releases.
|
|
19
|
+
> - **Tests** will be added to `mxalign` (building on test datasets already integrated into `mlwp-data-loaders`) that ensure that all alignment operations work correctly (Testing notebook execution inside `mxalign` is explicitly excluded from the current roadmap).
|
|
20
|
+
|
|
21
|
+
## Python API
|
|
22
|
+
|
|
23
|
+
`mxalign` provides building blocks for manual alignment, transformations, and interpolations of `xarray` datasets. This is ideal for interactive use in Jupyter notebooks or custom Python scripts.
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import xarray as xr
|
|
27
|
+
from mxalign import load, align_space, align_time, transform
|
|
28
|
+
|
|
29
|
+
# Load datasets (using registered loaders)
|
|
30
|
+
ds_obs = load(name="observations_loader", files=["obs.nc"])
|
|
31
|
+
ds_fcst = load(name="anemoi_inference", files=["forecast.nc"])
|
|
32
|
+
|
|
33
|
+
# Align the forecast spatially to match the observation reference
|
|
34
|
+
ds_fcst_aligned_space = align_space(ds_fcst, reference=ds_obs, method="interpolation")
|
|
35
|
+
|
|
36
|
+
# Align datasets temporally
|
|
37
|
+
datasets = {"obs": ds_obs, "fcst": ds_fcst_aligned_space}
|
|
38
|
+
aligned_datasets = align_time(datasets, method="intersection")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
For a more comprehensive interactive example, check out the [introductory notebook](./examples/introduction.ipynb).
|
|
42
|
+
|
|
43
|
+
## Executing via a Configuration
|
|
44
|
+
|
|
45
|
+
For full verification pipeline execution, `mxalign` uses a YAML configuration file. This allows you to declaratively define how datasets are loaded, transformed, aligned, and verified.
|
|
46
|
+
|
|
47
|
+
### Configuration Contents
|
|
48
|
+
|
|
49
|
+
The configuration file is divided into several main sections:
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
datasets:
|
|
53
|
+
# Define datasets to load, specifying the loader, files, and variables
|
|
54
|
+
obs_data:
|
|
55
|
+
loader: observations_loader
|
|
56
|
+
files: ["obs.nc"]
|
|
57
|
+
fcst_data:
|
|
58
|
+
loader: anemoi_inference
|
|
59
|
+
files: ["forecast.nc"]
|
|
60
|
+
|
|
61
|
+
transformations:
|
|
62
|
+
# Apply transformations to loaded datasets
|
|
63
|
+
|
|
64
|
+
alignment:
|
|
65
|
+
# Define reference dataset and alignment methods (space, time, NaN broadcasting)
|
|
66
|
+
reference: obs_data
|
|
67
|
+
time:
|
|
68
|
+
method: intersection
|
|
69
|
+
|
|
70
|
+
verification:
|
|
71
|
+
# Specify the reference dataset and the metrics to calculate
|
|
72
|
+
reference: obs_data
|
|
73
|
+
metrics:
|
|
74
|
+
# define metrics here
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Running from the Command Line
|
|
78
|
+
|
|
79
|
+
The CLI uses Dask to distribute the workload and supports both local execution and execution on Slurm-managed HPC clusters.
|
|
80
|
+
|
|
81
|
+
**Local Execution**
|
|
82
|
+
Run the pipeline on a local Dask cluster:
|
|
83
|
+
```bash
|
|
84
|
+
mxalign local path/to/config.yaml --n_workers 4 --threads_per_worker 1
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Slurm Execution**
|
|
88
|
+
Run the pipeline on a Slurm cluster:
|
|
89
|
+
```bash
|
|
90
|
+
mxalign slurm path/to/config.yaml --account your_account --queue your_queue --cores 8 --memory 64GB
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Running from Python
|
|
94
|
+
|
|
95
|
+
You can also execute the entire configuration-driven pipeline directly from Python using the `Runner` class.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from mxalign.runner import Runner
|
|
99
|
+
|
|
100
|
+
# Initialize the runner with a YAML config file or a dictionary
|
|
101
|
+
runner = Runner("path/to/config.yaml")
|
|
102
|
+
|
|
103
|
+
# Execute the pipeline: loads, transforms, aligns, and verifies the datasets
|
|
104
|
+
runner.run()
|
|
105
|
+
|
|
106
|
+
# The resulting aligned datasets and computed metrics are accessible via:
|
|
107
|
+
aligned_datasets = runner.datasets
|
|
108
|
+
metrics = runner.metrics
|
|
109
|
+
```
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
# These are optional metadata elements. Feel free to remove any of them.
|
|
3
|
+
status: accepted
|
|
4
|
+
date: 2026-03-25
|
|
5
|
+
decision-makers: Denby L. Van Ginderachter M.
|
|
6
|
+
informed: Francois B., Buurman S.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Separation of concerns between dataset-loading, -validation and -alignment
|
|
10
|
+
|
|
11
|
+
## Context
|
|
12
|
+
|
|
13
|
+
Currently `mxalign` implements dataset loading functionality, validation functionality (checking if the loaded dataset has all the correct metadata and) and alignment functionality. However, dataset-loading and -validation fall outside the main scope of the `mxalign` package.
|
|
14
|
+
|
|
15
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
16
|
+
## Decision Drivers
|
|
17
|
+
|
|
18
|
+
* Provide clarity on what the package does by separating the different concerns
|
|
19
|
+
* Provide a clear interface for users who want to bring their own dataset
|
|
20
|
+
* Provide flexibility for users
|
|
21
|
+
* Ease of maintenance by decoupling concerns
|
|
22
|
+
|
|
23
|
+
## Considered Options
|
|
24
|
+
|
|
25
|
+
1. Split out both dataset validation `mlwp-data-specs` and dataset loading `mlwp-data-loaders`
|
|
26
|
+
2. Split out dataset-validation and loading but keep those two together
|
|
27
|
+
|
|
28
|
+
## Decision Outcome
|
|
29
|
+
|
|
30
|
+
Chosen option 1., because it allows for most flexibility and provides a clear entry point for users who want to bring their own loader.
|
|
31
|
+
|
|
32
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
33
|
+
### Consequences
|
|
34
|
+
* Users can now validate their dataset with CLI
|
|
35
|
+
* Simplification of the loader structure; only functions no classes
|
|
36
|
+
* `mxalign` is now only responsible for the alignment tasks
|
|
37
|
+
|
|
38
|
+
## More Information
|
|
39
|
+
Currently the interface between dataset loaded with an `mlwp-data-loaders` loader and `mxalign` is not defined. Ideally `mxalign` should know the traits of the dataset to correctly align dataset. How do we inform `mxalign` on the traits? See [ADR-002](./ADR-002_mxalign-loader-interface.md) for possible options.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
---
|
|
2
|
+
# These are optional metadata elements. Feel free to remove any of them.
|
|
3
|
+
status: "{proposed | rejected | accepted | deprecated | … | superseded by ADR-0123"
|
|
4
|
+
date: {YYYY-MM-DD when the decision was last updated}
|
|
5
|
+
decision-makers: {list everyone involved in the decision}
|
|
6
|
+
consulted: {list everyone whose opinions are sought (typically subject-matter experts); and with whom there is a two-way communication}
|
|
7
|
+
informed: {list everyone who is kept up-to-date on progress; and with whom there is a one-way communication}
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# {short title, representative of solved problem and found solution}
|
|
11
|
+
|
|
12
|
+
## Context and Problem Statement
|
|
13
|
+
|
|
14
|
+
{Describe the context and problem statement, e.g., in free form using two to three sentences or in the form of an illustrative story. You may want to articulate the problem in form of a question and add links to collaboration boards or issue management systems.}
|
|
15
|
+
|
|
16
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
17
|
+
## Decision Drivers
|
|
18
|
+
|
|
19
|
+
* {decision driver 1, e.g., a force, facing concern, …}
|
|
20
|
+
* {decision driver 2, e.g., a force, facing concern, …}
|
|
21
|
+
* … <!-- numbers of drivers can vary -->
|
|
22
|
+
|
|
23
|
+
## Considered Options
|
|
24
|
+
|
|
25
|
+
* {title of option 1}
|
|
26
|
+
* {title of option 2}
|
|
27
|
+
* {title of option 3}
|
|
28
|
+
* … <!-- numbers of options can vary -->
|
|
29
|
+
|
|
30
|
+
## Decision Outcome
|
|
31
|
+
|
|
32
|
+
Chosen option: "{title of option 1}", because {justification. e.g., only option, which meets k.o. criterion decision driver | which resolves force {force} | … | comes out best (see below)}.
|
|
33
|
+
|
|
34
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
35
|
+
### Consequences
|
|
36
|
+
|
|
37
|
+
* Good, because {positive consequence, e.g., improvement of one or more desired qualities, …}
|
|
38
|
+
* Bad, because {negative consequence, e.g., compromising one or more desired qualities, …}
|
|
39
|
+
* … <!-- numbers of consequences can vary -->
|
|
40
|
+
|
|
41
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
42
|
+
### Confirmation
|
|
43
|
+
|
|
44
|
+
{Describe how the implementation of/compliance with the ADR can/will be confirmed. Are the design that was decided for and its implementation in line with the decision made? E.g., a design/code review or a test with a library such as ArchUnit can help validate this. Not that although we classify this element as optional, it is included in many ADRs.}
|
|
45
|
+
|
|
46
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
47
|
+
## Pros and Cons of the Options
|
|
48
|
+
|
|
49
|
+
### {title of option 1}
|
|
50
|
+
|
|
51
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
52
|
+
{example | description | pointer to more information | …}
|
|
53
|
+
|
|
54
|
+
* Good, because {argument a}
|
|
55
|
+
* Good, because {argument b}
|
|
56
|
+
<!-- use "neutral" if the given argument weights neither for good nor bad -->
|
|
57
|
+
* Neutral, because {argument c}
|
|
58
|
+
* Bad, because {argument d}
|
|
59
|
+
* … <!-- numbers of pros and cons can vary -->
|
|
60
|
+
|
|
61
|
+
### {title of other option}
|
|
62
|
+
|
|
63
|
+
{example | description | pointer to more information | …}
|
|
64
|
+
|
|
65
|
+
* Good, because {argument a}
|
|
66
|
+
* Good, because {argument b}
|
|
67
|
+
* Neutral, because {argument c}
|
|
68
|
+
* Bad, because {argument d}
|
|
69
|
+
* …
|
|
70
|
+
|
|
71
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
72
|
+
## More Information
|
|
73
|
+
|
|
74
|
+
{You might want to provide additional evidence/confidence for the decision outcome here and/or document the team agreement on the decision and/or define when/how this decision the decision should be realized and if/when it should be re-visited. Links to other decisions and resources might appear here as well.}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
---
|
|
2
|
+
# These are optional metadata elements. Feel free to remove any of them.
|
|
3
|
+
status: "{proposed | rejected | accepted | deprecated | … | superseded by ADR-0123"
|
|
4
|
+
date: {YYYY-MM-DD when the decision was last updated}
|
|
5
|
+
decision-makers: {list everyone involved in the decision}
|
|
6
|
+
consulted: {list everyone whose opinions are sought (typically subject-matter experts); and with whom there is a two-way communication}
|
|
7
|
+
informed: {list everyone who is kept up-to-date on progress; and with whom there is a one-way communication}
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# {short title, representative of solved problem and found solution}
|
|
11
|
+
|
|
12
|
+
## Context and Problem Statement
|
|
13
|
+
|
|
14
|
+
{Describe the context and problem statement, e.g., in free form using two to three sentences or in the form of an illustrative story. You may want to articulate the problem in form of a question and add links to collaboration boards or issue management systems.}
|
|
15
|
+
|
|
16
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
17
|
+
## Decision Drivers
|
|
18
|
+
|
|
19
|
+
* {decision driver 1, e.g., a force, facing concern, …}
|
|
20
|
+
* {decision driver 2, e.g., a force, facing concern, …}
|
|
21
|
+
* … <!-- numbers of drivers can vary -->
|
|
22
|
+
|
|
23
|
+
## Considered Options
|
|
24
|
+
|
|
25
|
+
* {title of option 1}
|
|
26
|
+
* {title of option 2}
|
|
27
|
+
* {title of option 3}
|
|
28
|
+
* … <!-- numbers of options can vary -->
|
|
29
|
+
|
|
30
|
+
## Decision Outcome
|
|
31
|
+
|
|
32
|
+
Chosen option: "{title of option 1}", because {justification. e.g., only option, which meets k.o. criterion decision driver | which resolves force {force} | … | comes out best (see below)}.
|
|
33
|
+
|
|
34
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
35
|
+
### Consequences
|
|
36
|
+
|
|
37
|
+
* Good, because {positive consequence, e.g., improvement of one or more desired qualities, …}
|
|
38
|
+
* Bad, because {negative consequence, e.g., compromising one or more desired qualities, …}
|
|
39
|
+
* … <!-- numbers of consequences can vary -->
|
|
40
|
+
|
|
41
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
42
|
+
### Confirmation
|
|
43
|
+
|
|
44
|
+
{Describe how the implementation of/compliance with the ADR can/will be confirmed. Are the design that was decided for and its implementation in line with the decision made? E.g., a design/code review or a test with a library such as ArchUnit can help validate this. Not that although we classify this element as optional, it is included in many ADRs.}
|
|
45
|
+
|
|
46
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
47
|
+
## Pros and Cons of the Options
|
|
48
|
+
|
|
49
|
+
### {title of option 1}
|
|
50
|
+
|
|
51
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
52
|
+
{example | description | pointer to more information | …}
|
|
53
|
+
|
|
54
|
+
* Good, because {argument a}
|
|
55
|
+
* Good, because {argument b}
|
|
56
|
+
<!-- use "neutral" if the given argument weights neither for good nor bad -->
|
|
57
|
+
* Neutral, because {argument c}
|
|
58
|
+
* Bad, because {argument d}
|
|
59
|
+
* … <!-- numbers of pros and cons can vary -->
|
|
60
|
+
|
|
61
|
+
### {title of other option}
|
|
62
|
+
|
|
63
|
+
{example | description | pointer to more information | …}
|
|
64
|
+
|
|
65
|
+
* Good, because {argument a}
|
|
66
|
+
* Good, because {argument b}
|
|
67
|
+
* Neutral, because {argument c}
|
|
68
|
+
* Bad, because {argument d}
|
|
69
|
+
* …
|
|
70
|
+
|
|
71
|
+
<!-- This is an optional element. Feel free to remove. -->
|
|
72
|
+
## More Information
|
|
73
|
+
|
|
74
|
+
{You might want to provide additional evidence/confidence for the decision outcome here and/or document the team agreement on the decision and/or define when/how this decision the decision should be realized and if/when it should be re-visited. Links to other decisions and resources might appear here as well.}
|