esgf-qa 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgf_qa/_constants.py +63 -1
- esgf_qa/_version.py +2 -2
- esgf_qa/cluster_results.py +467 -0
- esgf_qa/con_checks.py +209 -11
- esgf_qa/run_qa.py +356 -463
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/METADATA +47 -31
- esgf_qa-0.5.0.dist-info/RECORD +19 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/WHEEL +1 -1
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/top_level.txt +1 -1
- tests/test_cli.py +271 -0
- tests/test_cluster_results.py +166 -0
- tests/test_con_checks.py +263 -0
- tests/test_qaviewer.py +147 -0
- tests/test_run_dummy_qa.py +191 -0
- tests/test_run_qa.py +181 -0
- docs/esgf-qa_Logo.png +0 -0
- esgf_qa-0.3.0.dist-info/RECORD +0 -13
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/entry_points.txt +0 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: esgf-qa
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: QA based on compliance-checker
|
|
5
5
|
Author-email: Martin Schupfner <schupfner@dkrz.de>
|
|
6
6
|
Maintainer-email: Martin Schupfner <schupfner@dkrz.de>
|
|
@@ -21,6 +21,7 @@ Requires-Dist: cf_xarray
|
|
|
21
21
|
Requires-Dist: compliance-checker>=5.3.0
|
|
22
22
|
Requires-Dist: dask
|
|
23
23
|
Requires-Dist: netCDF4
|
|
24
|
+
Requires-Dist: packaging
|
|
24
25
|
Requires-Dist: pandas
|
|
25
26
|
Requires-Dist: textual
|
|
26
27
|
Requires-Dist: xarray
|
|
@@ -38,6 +39,7 @@ Requires-Dist: flake8-print; extra == "dev"
|
|
|
38
39
|
Requires-Dist: pre-commit; extra == "dev"
|
|
39
40
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
40
41
|
Requires-Dist: pytest-flake8; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
41
43
|
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
42
44
|
Requires-Dist: twine; extra == "dev"
|
|
43
45
|
Requires-Dist: wheel; extra == "dev"
|
|
@@ -45,34 +47,36 @@ Dynamic: license-file
|
|
|
45
47
|
|
|
46
48
|
[](https://pypi.org/project/esgf-qa/)
|
|
47
49
|
|
|
48
|
-
# esgf-qa
|
|
49
|
-
|
|
50
|
+
# esgf-qa
|
|
51
|
+
### Quality Assurance Workflow Based on `compliance-checker` and `cc-plugin-wcrp` (or other cc-plugins)
|
|
52
|
+
<img src="https://raw.githubusercontent.com/ESGF/esgf-qa/master/docs/esgf-qa_Logo.png" align="left" width="120">
|
|
50
53
|
|
|
51
|
-
`esgf-qa`
|
|
52
|
-
[ioos/compliance-checker](https://github.com/ioos/compliance-checker)
|
|
54
|
+
`esgf-qa` provides a flexible quality assurance (QA) workflow for evaluating dataset compliance using the
|
|
55
|
+
[ioos/compliance-checker](https://github.com/ioos/compliance-checker) framework
|
|
56
|
+
(including [CF](https://cfconventions.org/) compliance checks)
|
|
57
|
+
and any community plugins (`cc-plugin`s), such as
|
|
53
58
|
[ESGF/cc-plugin-wcrp](https://github.com/ESGF/cc-plugin-wcrp) and
|
|
54
59
|
[euro-cordex/cc-plugin-cc6](https://github.com/euro-cordex/cc-plugin-cc6).
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
consistency checks) as well as to summarize the test results.
|
|
61
|
+
The tool executes file-based quality control (QC) tests through the Compliance Checker,
|
|
62
|
+
and, where applicable, performs additional dataset-level checks to test inter-file time-axis continuity
|
|
63
|
+
and consistency in variable, coordinate and attribute definitions.
|
|
64
|
+
Results from both file- and dataset-level checks are aggregated, summarized, and clustered for easier interpretation.
|
|
61
65
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
66
|
+
### Currently supported checkers
|
|
67
|
+
|
|
68
|
+
While `esgf-qa` has been primarily developed for workflows assessing compliance with WCRP project data specifications
|
|
69
|
+
(e.g., CMIP, CORDEX), it can also be used for general CF-compliance testing and generally supports any
|
|
70
|
+
`cc-plugin`. It can be easily extended to support any projects following CORDEX- or CMIP-style CMOR table conventions.
|
|
65
71
|
|
|
66
72
|
| Standard | Checker Name |
|
|
67
73
|
| ---------------------------------------------------------------------------------------------------- | ------------ |
|
|
68
|
-
| [
|
|
69
|
-
| [
|
|
70
|
-
| [CORDEX-CMIP6 Archive Specifications](https://doi.org/10.5281/zenodo.10961069)
|
|
71
|
-
| [CMIP6
|
|
72
|
-
| [
|
|
73
|
-
|
|
|
74
|
-
| [EERIE CMOR Tables & CV](https://github.com/eerie-project/dreq_tools) | eerie |
|
|
75
|
-
| Custom MIP | mip |
|
|
74
|
+
| [CF Conventions](https://cfconventions.org/) (shipped with [ioos/compliance-checker](https://github.com/ioos/compliance-checker)) | cf |
|
|
75
|
+
| [WCRP CMIP6](https://pcmdi.llnl.gov/CMIP6/):<br><ul><li>[CMIP6 DRS](https://wcrp-cmip.github.io/WGCM_Infrastructure_Panel/Papers/CMIP6_global_attributes_filenames_CVs_v6.2.7.pdf)</li><li>[CMIP6 CVs](https://github.com/WCRP-CMIP/CMIP6_CVs) (esgvoc)</li></li><li>[cmip6-cmor-tables](https://github.com/PCMDI/cmip6-cmor-tables) (esgvoc)</li></ul> | wcrp_cmip6 |
|
|
76
|
+
| [WCRP CORDEX-CMIP6](https://cordex.org/):<br><ul><li>[CORDEX-CMIP6 Archive Specifications](https://doi.org/10.5281/zenodo.10961069)</li><li>[cordex-cmip6-cv](https://github.com/WCRP-CORDEX/cordex-cmip6-cv) (esgvoc)</li><li>[cordex-cmip6-cmor-tables](https://github.com/WCRP-CORDEX/cordex-cmip6-cmor-tables) (esgvoc)</li></ul> | wcrp_cordex_cmip6 |
|
|
77
|
+
| [WCRP CORDEX-CMIP6](https://cordex.org/):<br><ul><li>[CORDEX-CMIP6 Archive Specifications](https://doi.org/10.5281/zenodo.10961069)</li><li>[cordex-cmip6-cv](https://github.com/WCRP-CORDEX/cordex-cmip6-cv)</li><li>[cordex-cmip6-cmor-tables](https://github.com/WCRP-CORDEX/cordex-cmip6-cmor-tables)</li></ul> | cc6 |
|
|
78
|
+
| [EERIE](https://eerie-project.eu/):<br>[EERIE CMOR Tables & CV](https://github.com/eerie-project/dreq_tools) | eerie |
|
|
79
|
+
| Custom MIP (CMOR/MIP tables have to be specified) | mip |
|
|
76
80
|
|
|
77
81
|
## Installation
|
|
78
82
|
|
|
@@ -109,35 +113,41 @@ esgvoc install
|
|
|
109
113
|
|
|
110
114
|
- Test your installation
|
|
111
115
|
|
|
112
|
-
The following command should now also list the `
|
|
116
|
+
The following command should now also list the `cc-plugin-wcrp` checks next to all `cc_plugin_cc6` and `compliance_checker` checks:
|
|
113
117
|
```
|
|
114
118
|
cchecker.py -l
|
|
115
119
|
```
|
|
116
120
|
|
|
121
|
+
The following command should now list the necessary projects with metadata sources for `esgvoc`:
|
|
122
|
+
```
|
|
123
|
+
esgvoc status
|
|
124
|
+
```
|
|
125
|
+
|
|
117
126
|
## Usage
|
|
118
127
|
|
|
119
128
|
```shell
|
|
120
|
-
$ esgqa [-h] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
|
|
129
|
+
$ esgqa [-h] [-P <parallel_processes>] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
|
|
121
130
|
```
|
|
122
131
|
|
|
123
132
|
- positional arguments:
|
|
124
133
|
- `parent_dir`: Parent directory to scan for netCDF-files to check
|
|
125
134
|
- options:
|
|
126
135
|
- `-h, --help`: show this help message and exit
|
|
136
|
+
- `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores).
|
|
127
137
|
- `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_<hash>`.
|
|
128
|
-
- `-t, --test TEST`: The test to run ('wcrp_cmip6:latest'
|
|
129
|
-
- `-O, --option OPTION`: Additional options to be passed to the checkers. Format: '<checker>:<option_name>[:<option_value>]'
|
|
138
|
+
- `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used.
|
|
139
|
+
- `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `'<checker>:<option_name>[:<option_value>]'`. Multiple invocations possible.
|
|
130
140
|
- `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run.
|
|
131
|
-
- `-r, --resume`: Specify to continue a previous QC run. Requires the
|
|
141
|
+
- `-r, --resume`: Specify to continue a previous QC run. Requires the `<output_dir>` argument to be set.
|
|
132
142
|
- `-C, --include_consistency_checks`: Include basic consistency and continuity checks. When using the `wcrp-*`, `cc6`, `mip` or `eerie` checkers, they are included by default.
|
|
133
143
|
|
|
134
144
|
### Example Usage
|
|
135
145
|
|
|
136
146
|
```shell
|
|
137
|
-
$ esgqa -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
|
|
147
|
+
$ esgqa -P 8 -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
|
|
138
148
|
```
|
|
139
149
|
|
|
140
|
-
To resume at a later date, eg. if the QA run did not finish in time or more files have been added to the
|
|
150
|
+
To resume at a later date, eg. if the QA run did not finish in time or more files have been added to the `<parent_dir>`
|
|
141
151
|
(note, that the last modification date of files is NOT taken into account - once a certain file path has been checked
|
|
142
152
|
it will be marked as checked and checks will only be repeated if runtime errors occured):
|
|
143
153
|
|
|
@@ -148,7 +158,7 @@ $ esgqa -o QA_results/IAEVALL02_2025-10-20 -r
|
|
|
148
158
|
For a custom MIP with defined CMOR tables (`"mip"` is not a placeholder but an actual basic checker of the `cc_plugin_cc6`):
|
|
149
159
|
|
|
150
160
|
```shell
|
|
151
|
-
$ esgqa -o /path/to/test/results -t "mip:latest" -O "mip:tables:/path/to/mip_cmor_tables/Tables" /path/to/MIP/datasets
|
|
161
|
+
$ esgqa -o /path/to/test/results -t "mip:latest" -O "mip:tables:/path/to/mip_cmor_tables/Tables" /path/to/MIP/datasets/
|
|
152
162
|
```
|
|
153
163
|
|
|
154
164
|
For CF checks and basic time and consistency / continuity checks:
|
|
@@ -164,8 +174,13 @@ The results will be stored in two `json` files:
|
|
|
164
174
|
|
|
165
175
|
### Web view
|
|
166
176
|
The clustered results can be viewed using the following website:
|
|
167
|
-
|
|
177
|
+
|
|
178
|
+
- DKRZ: [https://cmiphub.dkrz.de/info/display_qc_results.html](https://cmiphub.dkrz.de/info/display_qc_results.html).
|
|
179
|
+
- IPSL: coming soon
|
|
180
|
+
|
|
168
181
|
This website runs entirely in the user's browser using JavaScript, without requiring interaction with a web server.
|
|
182
|
+
You can select one of the recent QA runs conducted at the respective site or select a local QA run result file to be displayed.
|
|
183
|
+
|
|
169
184
|
Alternatively, you can open the included `display_qc_results.html` file directly in your browser.
|
|
170
185
|
While the web view also supports the full (unclustered) results, it is recommended to not use the web view for files greater than a few MegaBytes.
|
|
171
186
|
|
|
@@ -188,8 +203,9 @@ in the GitLab Repository [qa-results](https://gitlab.dkrz.de/udag/qa-results). Y
|
|
|
188
203
|
|
|
189
204
|
This project is licensed under the Apache License 2.0, and includes the Inter font, which is licensed under the SIL Open Font License 1.1. See the [LICENSE](./LICENSE) file for more details.
|
|
190
205
|
|
|
206
|
+
|
|
191
207
|
> [!NOTE]
|
|
192
|
-
> **This project was originally developed by [DKRZ](https://www.dkrz.de)** under the name **cc-qa** (see [DKRZ GitLab](https://gitlab.dkrz.de/udag/cc-qa)), with funding from the
|
|
208
|
+
> **This project was originally developed by [DKRZ](https://www.dkrz.de)** under the name **cc-qa** (see [DKRZ GitLab](https://gitlab.dkrz.de/udag/cc-qa)), with funding from the _German Ministry of Research, Technology and Space_ ([BMFTR](https://www.bmftr.bund.de/en), reference `01LP2326E`).
|
|
193
209
|
> It has since been renamed to **esgf-qa** and is now maintained under the **Earth System Grid Federation (ESGF)** organization on GitHub.
|
|
194
210
|
>
|
|
195
211
|
> If you previously used `cc-qa`, please update your installations as described above.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
esgf_qa/__init__.py,sha256=iLmy2rOkHS_4KZWMD8BgT7R3tLMKeaTCDVf3B4FyYxM,91
|
|
2
|
+
esgf_qa/_constants.py,sha256=OgogPt2RdTzP0Cg9BYjMZ7Y7R_sZR1391iVyG15XSGY,3182
|
|
3
|
+
esgf_qa/_version.py,sha256=fvHpBU3KZKRinkriKdtAt3crenOyysELF-M9y3ozg3U,704
|
|
4
|
+
esgf_qa/cluster_results.py,sha256=3sN4vFv0ps1cS3YyvvdZWpsLs-SB2UKM7CPGqZBgiPw,19131
|
|
5
|
+
esgf_qa/con_checks.py,sha256=BAqbDcEmDB1kiRBaSaB76mNfKxoHtTNWqJHbtALcpIg,29074
|
|
6
|
+
esgf_qa/qaviewer.py,sha256=myt9lq47E40sD7KrMjVcAvy8sqocVinBSUYf4nOPD80,8843
|
|
7
|
+
esgf_qa/run_qa.py,sha256=e1pI1uv7fgO6f9biHHMRbe7KSaJlw3iPLxFjcieLy60,44893
|
|
8
|
+
esgf_qa-0.5.0.dist-info/licenses/LICENSE,sha256=S1WmzAIRoXFV26FENC3SW_XsmvkGtCs-4_gm7PrPYWg,12636
|
|
9
|
+
tests/test_cli.py,sha256=JmDIeGum8RDhgcy0ZpF9Rq1fMn15MXG4-Tzn1UYiYIU,10990
|
|
10
|
+
tests/test_cluster_results.py,sha256=ahwtG6666mP7VdVxHwPy7I8vV9rPxl2VRPdnH8VQk-w,5894
|
|
11
|
+
tests/test_con_checks.py,sha256=VCj_0jt_fbBqo_VWCrpHMHPs9IWxb5PtJs6Yh1jrxxU,8853
|
|
12
|
+
tests/test_qaviewer.py,sha256=ZEH7LkPIl3ocV0Xk4D4Zv6VIH9397hB71FtXLeo7NwY,4635
|
|
13
|
+
tests/test_run_dummy_qa.py,sha256=6pIQkvzP8c-mKynk3n19UvZAhvsPMpnu32YznWFDB2k,6213
|
|
14
|
+
tests/test_run_qa.py,sha256=H2K935lJi-6Znj9DMUn4DH7sR17qTrs4dGLMmIIC0bs,6130
|
|
15
|
+
esgf_qa-0.5.0.dist-info/METADATA,sha256=6TupoWWcXVuprpiSQ8dLC9NqhtdGSL5mwUkiC40vtB8,11306
|
|
16
|
+
esgf_qa-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
esgf_qa-0.5.0.dist-info/entry_points.txt,sha256=ZGMG_3eS7nyUJE6ZJ9v23Thcf-r29ZSZ7e8voBVwbf4,82
|
|
18
|
+
esgf_qa-0.5.0.dist-info/top_level.txt,sha256=BtbDH91jFtWygUPsLIr1g5CKU7Jmp4K-CU8yzCaONt0,14
|
|
19
|
+
esgf_qa-0.5.0.dist-info/RECORD,,
|
tests/test_cli.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pytest
|
|
10
|
+
import xarray as xr
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestQACommandLine:
|
|
14
|
+
"""
|
|
15
|
+
End-to-end pytest test class for esgqa CLI using synthetic CMIP6 and CORDEX-CMIP6 data.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def setup_class(cls):
|
|
20
|
+
"""
|
|
21
|
+
Generate lightweight synthetic CMIP6 and CORDEX-CMIP6 test datasets.
|
|
22
|
+
"""
|
|
23
|
+
cls.test_data_dir = tempfile.mkdtemp(prefix="esgf_qa_testdata_")
|
|
24
|
+
cls.cmip6_dir = os.path.join(cls.test_data_dir, "cmip6")
|
|
25
|
+
cls.cordex_dir = os.path.join(cls.test_data_dir, "cordex_cmip6")
|
|
26
|
+
cls.custom_dir = os.path.join(cls.test_data_dir, "custom")
|
|
27
|
+
os.makedirs(cls.cmip6_dir, exist_ok=True)
|
|
28
|
+
os.makedirs(cls.cordex_dir, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
# Generate lightweight CMIP6 test data
|
|
31
|
+
for var in ["tas", "huss"]:
|
|
32
|
+
base_path = (
|
|
33
|
+
Path(cls.cmip6_dir)
|
|
34
|
+
/ f"MPI-ESM1-2-LR/historical/r1i1p1f1/Amon/{var}/gn/v20210215"
|
|
35
|
+
)
|
|
36
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
for start_year in [1850, 1855]:
|
|
38
|
+
ntime = 60 # 5 years monthly data
|
|
39
|
+
times = np.array(np.arange(ntime), dtype=np.float64)
|
|
40
|
+
lats = np.arange(-90, 91, 10)
|
|
41
|
+
lons = np.arange(0, 360, 10)
|
|
42
|
+
data = np.zeros((len(times), len(lats), len(lons)))
|
|
43
|
+
ds = xr.Dataset(
|
|
44
|
+
{var: (("time", "lat", "lon"), data)},
|
|
45
|
+
coords={"time": times, "lat": lats, "lon": lons},
|
|
46
|
+
)
|
|
47
|
+
file_name = f"{var}_Amon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_{start_year:04d}01-"
|
|
48
|
+
file_name += f"{start_year+4:04d}12.nc"
|
|
49
|
+
ds.to_netcdf(base_path / file_name)
|
|
50
|
+
|
|
51
|
+
# Generate lightweight CORDEX-CMIP6 test data
|
|
52
|
+
for var in ["ta600", "tas"]:
|
|
53
|
+
base_path = (
|
|
54
|
+
Path(cls.cordex_dir)
|
|
55
|
+
/ f"DD/EUR-12/CLMcom-DWD/MPI-ESM1-2-HR/historical/r1i1p1f1/ICON-CLM-202407-1-1/v1-r1/mon/{var}/v20240920"
|
|
56
|
+
)
|
|
57
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
for start_year, end_year in [(1950, 1950), (1951, 1960)]:
|
|
59
|
+
ntime = (end_year - start_year + 1) * 12
|
|
60
|
+
times = np.array(np.arange(ntime), dtype=np.float64)
|
|
61
|
+
rlat = np.arange(0, 41, 10)
|
|
62
|
+
rlon = np.arange(0, 41, 10)
|
|
63
|
+
data = np.zeros((len(times), len(rlat), len(rlon)))
|
|
64
|
+
ds = xr.Dataset(
|
|
65
|
+
{var: (("time", "rlat", "rlon"), data)},
|
|
66
|
+
coords={"time": times, "rlat": rlat, "rlon": rlon},
|
|
67
|
+
)
|
|
68
|
+
file_name = f"{var}_EUR-12_MPI-ESM1-2-HR_historical_r1i1p1f1_CLMcom-DWD_ICON-CLM-202407-1-1_v1-r1_mon_{start_year:04d}01-{end_year:04d}12.nc"
|
|
69
|
+
ds.to_netcdf(base_path / file_name)
|
|
70
|
+
|
|
71
|
+
# Generate lightweight custom data
|
|
72
|
+
for var in ["temp2", "huss"]:
|
|
73
|
+
base_path = Path(cls.custom_dir) / "model_output"
|
|
74
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
for start_year in range(1850, 1860):
|
|
76
|
+
times = np.arange(0, 12) # 1 years monthly data
|
|
77
|
+
lats = np.arange(-90, 91, 10)
|
|
78
|
+
lons = np.arange(0, 360, 10)
|
|
79
|
+
data = np.zeros((len(times), len(lats), len(lons)))
|
|
80
|
+
ds = xr.Dataset(
|
|
81
|
+
{var: (("time", "lat", "lon"), data)},
|
|
82
|
+
coords={"time": times, "lat": lats, "lon": lons},
|
|
83
|
+
)
|
|
84
|
+
file_name = f"{var}_Amon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_{start_year:04d}01-"
|
|
85
|
+
file_name += f"{start_year+4:04d}12.nc"
|
|
86
|
+
ds.to_netcdf(base_path / file_name)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def teardown_class(cls):
|
|
90
|
+
"""Clean up temporary test data."""
|
|
91
|
+
shutil.rmtree(cls.test_data_dir)
|
|
92
|
+
|
|
93
|
+
def _run_cli(self, args, expect_error=False, expected_err_msg=None):
|
|
94
|
+
"""Run the esgqa CLI and optionally check for errors."""
|
|
95
|
+
cmd = ["python", "-m", "esgf_qa.run_qa"] + args
|
|
96
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
97
|
+
if expect_error:
|
|
98
|
+
assert (
|
|
99
|
+
result.returncode != 0
|
|
100
|
+
), f"Expected error but CLI succeeded:\n{result.stdout}\n{result.stderr}"
|
|
101
|
+
if expected_err_msg:
|
|
102
|
+
combined = result.stdout + "\n" + result.stderr
|
|
103
|
+
assert (
|
|
104
|
+
expected_err_msg in combined
|
|
105
|
+
), f"Expected error message '{expected_err_msg}' not found.\nOutput:\n{combined}"
|
|
106
|
+
else:
|
|
107
|
+
assert (
|
|
108
|
+
result.returncode == 0
|
|
109
|
+
), f"CLI failed unexpectedly:\n{result.stdout}\n{result.stderr}"
|
|
110
|
+
return result.stdout, result.stderr
|
|
111
|
+
|
|
112
|
+
@pytest.mark.parametrize(
|
|
113
|
+
"test_args",
|
|
114
|
+
[
|
|
115
|
+
["-t", "cc6:latest", "-o", "OUTPUT", "cmip6"],
|
|
116
|
+
["-t", "cc6", "-o", "OUTPUT", "cordex_cmip6"],
|
|
117
|
+
["-t", "cc6:latest", "-t", "cf", "-o", "OUTPUT", "cordex_cmip6"],
|
|
118
|
+
["-t", "cf:latest", "-o", "OUTPUT", "cmip6"],
|
|
119
|
+
["-t", "cf:1.7", "-C", "-o", "OUTPUT", "cmip6"],
|
|
120
|
+
[
|
|
121
|
+
"-t",
|
|
122
|
+
"wcrp_cmip6:latest",
|
|
123
|
+
"-t",
|
|
124
|
+
"cf:1.7",
|
|
125
|
+
"-o",
|
|
126
|
+
"OUTPUT",
|
|
127
|
+
"cmip6",
|
|
128
|
+
"-i",
|
|
129
|
+
"test_info",
|
|
130
|
+
],
|
|
131
|
+
[
|
|
132
|
+
"-t",
|
|
133
|
+
"wcrp_cordex_cmip6",
|
|
134
|
+
"-t",
|
|
135
|
+
"cf:1.7",
|
|
136
|
+
"-o",
|
|
137
|
+
"OUTPUT",
|
|
138
|
+
"cordex_cmip6",
|
|
139
|
+
"-i",
|
|
140
|
+
"test_info",
|
|
141
|
+
],
|
|
142
|
+
],
|
|
143
|
+
)
|
|
144
|
+
def test_cli_runs_successfully(self, test_args, tmp_path):
|
|
145
|
+
temp_dir = tempfile.mkdtemp()
|
|
146
|
+
try:
|
|
147
|
+
result_dir = tmp_path / "results"
|
|
148
|
+
args = [
|
|
149
|
+
(
|
|
150
|
+
os.path.join(self.test_data_dir, "cmip6")
|
|
151
|
+
if arg == "cmip6"
|
|
152
|
+
else (
|
|
153
|
+
os.path.join(self.test_data_dir, "cordex_cmip6")
|
|
154
|
+
if arg == "cordex_cmip6"
|
|
155
|
+
else arg.replace("OUTPUT", str(result_dir))
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
for arg in test_args
|
|
159
|
+
]
|
|
160
|
+
stdout, stderr = self._run_cli(args)
|
|
161
|
+
output_dir_index = args.index("-o") + 1
|
|
162
|
+
result_dir = args[output_dir_index]
|
|
163
|
+
result_files = os.listdir(result_dir)
|
|
164
|
+
assert any(
|
|
165
|
+
f.startswith("qa_result_") and f.endswith(".json") for f in result_files
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Check clustered summary if exists
|
|
169
|
+
clustered_files = [
|
|
170
|
+
f for f in result_files if "clustered" in f and f.endswith(".json")
|
|
171
|
+
]
|
|
172
|
+
for cf in clustered_files:
|
|
173
|
+
with open(os.path.join(result_dir, cf)) as f:
|
|
174
|
+
data = json.load(f)
|
|
175
|
+
for key in ["error", "fail", "info"]:
|
|
176
|
+
assert key in data
|
|
177
|
+
info = data["info"]
|
|
178
|
+
for field in [
|
|
179
|
+
"id",
|
|
180
|
+
"date",
|
|
181
|
+
"files",
|
|
182
|
+
"datasets",
|
|
183
|
+
"cc_version",
|
|
184
|
+
"checkers",
|
|
185
|
+
]:
|
|
186
|
+
assert field in info
|
|
187
|
+
for sev_dict in [data["fail"], data["error"]]:
|
|
188
|
+
for _, issues in sev_dict.items():
|
|
189
|
+
for issue_name, messages in issues.items():
|
|
190
|
+
for msg, files in messages.items():
|
|
191
|
+
assert isinstance(files, list)
|
|
192
|
+
assert (
|
|
193
|
+
len(files) == 1
|
|
194
|
+
), f"Clustered summary should have one example file for {msg}"
|
|
195
|
+
assert isinstance(files[0], str)
|
|
196
|
+
finally:
|
|
197
|
+
shutil.rmtree(temp_dir)
|
|
198
|
+
|
|
199
|
+
def test_cli_resume_functionality(self):
|
|
200
|
+
temp_dir = tempfile.mkdtemp()
|
|
201
|
+
output_dir = os.path.join(temp_dir, "output")
|
|
202
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
203
|
+
resume_file = os.path.join(output_dir, ".resume_info")
|
|
204
|
+
Path(os.path.join(output_dir, "progress.txt")).touch()
|
|
205
|
+
Path(os.path.join(output_dir, "progress_datasets.txt")).touch()
|
|
206
|
+
os.makedirs(os.path.join(output_dir, "tables"), exist_ok=True)
|
|
207
|
+
with open(resume_file, "w") as f:
|
|
208
|
+
json.dump(
|
|
209
|
+
{
|
|
210
|
+
"parent_dir": self.cmip6_dir,
|
|
211
|
+
"info": "test_resume",
|
|
212
|
+
"tests": ["cf:latest"],
|
|
213
|
+
},
|
|
214
|
+
f,
|
|
215
|
+
)
|
|
216
|
+
stdout, stderr = self._run_cli(["-r", "-o", output_dir])
|
|
217
|
+
assert "Resuming previous QA run" in stdout
|
|
218
|
+
shutil.rmtree(temp_dir)
|
|
219
|
+
|
|
220
|
+
@pytest.mark.parametrize(
|
|
221
|
+
"test_args, expected_err_msg",
|
|
222
|
+
[
|
|
223
|
+
(
|
|
224
|
+
["-t", "cf:latest", "-o", "some_dir"],
|
|
225
|
+
"Missing required argument <parent_dir>",
|
|
226
|
+
),
|
|
227
|
+
(
|
|
228
|
+
["-t", "invalid_checker:latest", "-o", "some_dir", "cmip6"],
|
|
229
|
+
"Invalid test(s) specified",
|
|
230
|
+
),
|
|
231
|
+
(
|
|
232
|
+
["-r", "-t", "cf:latest", "-o", "some_dir"],
|
|
233
|
+
"When using -r/--resume, the following arguments are not allowed",
|
|
234
|
+
),
|
|
235
|
+
],
|
|
236
|
+
)
|
|
237
|
+
def test_cli_fails_on_invalid_arguments(self, test_args, expected_err_msg):
|
|
238
|
+
temp_dir = tempfile.mkdtemp()
|
|
239
|
+
try:
|
|
240
|
+
args = [arg if arg != "cmip6" else self.cmip6_dir for arg in test_args]
|
|
241
|
+
self._run_cli(args, expect_error=True, expected_err_msg=expected_err_msg)
|
|
242
|
+
finally:
|
|
243
|
+
shutil.rmtree(temp_dir)
|
|
244
|
+
|
|
245
|
+
def test_cli_produces_valid_json(self):
|
|
246
|
+
temp_dir = Path(tempfile.mkdtemp())
|
|
247
|
+
try:
|
|
248
|
+
output_dir = temp_dir / "output"
|
|
249
|
+
output_dir.mkdir()
|
|
250
|
+
self._run_cli(
|
|
251
|
+
["-t", "cf:latest", "-o", str(output_dir), str(self.cmip6_dir)]
|
|
252
|
+
)
|
|
253
|
+
json_files = list(output_dir.glob("*.json"))
|
|
254
|
+
assert len(json_files) == 6
|
|
255
|
+
json_result_files = [
|
|
256
|
+
f for f in json_files if f.name.startswith("qa_result_")
|
|
257
|
+
]
|
|
258
|
+
assert len(json_result_files) == 2
|
|
259
|
+
with open(json_result_files[0]) as f:
|
|
260
|
+
data = json.load(f)
|
|
261
|
+
# "info" is the only required field
|
|
262
|
+
assert "info" in data
|
|
263
|
+
# "error" and "fail" are optional, others are not allowed
|
|
264
|
+
assert all([key in ["fail", "info", "error"] for key in data])
|
|
265
|
+
info = data["info"]
|
|
266
|
+
for field in ["id", "date", "files", "datasets", "cc_version", "checkers"]:
|
|
267
|
+
assert field in info
|
|
268
|
+
assert isinstance(data.get("error", {}), dict)
|
|
269
|
+
assert isinstance(data.get("fail", {}), dict)
|
|
270
|
+
finally:
|
|
271
|
+
shutil.rmtree(temp_dir)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import esgf_qa.cluster_results as esgqacr
|
|
6
|
+
from esgf_qa.cluster_results import QAResultAggregator
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def patch_checker_dicts(monkeypatch):
|
|
11
|
+
"""
|
|
12
|
+
Patch module-level checker_dict and checker_dict_ext
|
|
13
|
+
to avoid dependency on real ESGF constants.
|
|
14
|
+
"""
|
|
15
|
+
mock_checker_dict = {"cf": "CF", "cc6": "C-C6"}
|
|
16
|
+
mock_checker_dict_ext = {"cf": "CF-EXT", "cc6": "C-C6-EXT"}
|
|
17
|
+
monkeypatch.setattr(esgqacr, "checker_dict", mock_checker_dict)
|
|
18
|
+
monkeypatch.setattr(esgqacr, "checker_dict_ext", mock_checker_dict_ext)
|
|
19
|
+
yield
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def aggregator():
|
|
24
|
+
"""Provide a fresh aggregator instance."""
|
|
25
|
+
return QAResultAggregator()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_initial_summary_structure(aggregator):
|
|
29
|
+
"""Ensure the summary structure initializes correctly."""
|
|
30
|
+
assert "error" in aggregator.summary
|
|
31
|
+
assert "fail" in aggregator.summary
|
|
32
|
+
assert isinstance(aggregator.summary["fail"], defaultdict)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_update_adds_fail_entries(aggregator):
|
|
36
|
+
"""Verify that a failed test adds entries to the summary."""
|
|
37
|
+
result_dict = {
|
|
38
|
+
"cf": {
|
|
39
|
+
"check_units": {
|
|
40
|
+
"value": (0, 1),
|
|
41
|
+
"weight": 2,
|
|
42
|
+
"msgs": ["Missing attribute 'units'"],
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
aggregator.update(result_dict, dsid="ds1", file_name="file1.nc")
|
|
48
|
+
|
|
49
|
+
fail_summary = aggregator.summary["fail"]
|
|
50
|
+
assert 2 in fail_summary
|
|
51
|
+
test_name = "[CF] check_units"
|
|
52
|
+
assert test_name in fail_summary[2]
|
|
53
|
+
assert "Missing attribute 'units'" in fail_summary[2][test_name]
|
|
54
|
+
assert "ds1" in fail_summary[2][test_name]["Missing attribute 'units'"]
|
|
55
|
+
assert "file1.nc" in fail_summary[2][test_name]["Missing attribute 'units'"]["ds1"]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_update_adds_error_entries(aggregator):
|
|
59
|
+
"""Verify that an error test adds entries to the summary."""
|
|
60
|
+
result_dict = {"cf": {"errors": {"test_func": "Some internal error"}}}
|
|
61
|
+
|
|
62
|
+
aggregator.update(result_dict, dsid="dsX", file_name="fX.nc")
|
|
63
|
+
|
|
64
|
+
error_summary = aggregator.summary["error"]
|
|
65
|
+
assert "[CF] test_func" in error_summary
|
|
66
|
+
assert "Some internal error" in error_summary["[CF] test_func"]
|
|
67
|
+
assert "dsX" in error_summary["[CF] test_func"]["Some internal error"]
|
|
68
|
+
assert "fX.nc" in error_summary["[CF] test_func"]["Some internal error"]["dsX"]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_update_ds_uses_checker_dict_ext(aggregator):
|
|
72
|
+
"""Ensure update_ds uses checker_dict_ext for extended checkers."""
|
|
73
|
+
result_dict = {
|
|
74
|
+
"cf": {
|
|
75
|
+
"errors": {
|
|
76
|
+
"check1": {"msg": "Something broke", "files": ["fileA.nc", "fileB.nc"]}
|
|
77
|
+
},
|
|
78
|
+
"test2": {"weight": 3, "msgs": {"Bad value": ["fileC.nc"]}},
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
aggregator.update_ds(result_dict, dsid="dataset_42")
|
|
83
|
+
|
|
84
|
+
error_summary = aggregator.summary["error"]
|
|
85
|
+
fail_summary = aggregator.summary["fail"]
|
|
86
|
+
|
|
87
|
+
# Check both sections populated and use extended prefix
|
|
88
|
+
assert any("[CF-EXT]" in key for key in error_summary.keys())
|
|
89
|
+
assert any("[CF-EXT]" in key for key in fail_summary[3].keys())
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_sort_orders_failures_by_weight(aggregator):
|
|
93
|
+
"""Check that sorting produces a descending order by weight."""
|
|
94
|
+
aggregator.summary["fail"][1]["[CF] test1"] = {}
|
|
95
|
+
aggregator.summary["fail"][5]["[CF] test5"] = {}
|
|
96
|
+
aggregator.sort()
|
|
97
|
+
weights = list(aggregator.summary["fail"].keys())
|
|
98
|
+
assert weights == sorted(weights, reverse=True)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_cluster_messages_basic():
|
|
102
|
+
"""Cluster messages with small differences using threshold."""
|
|
103
|
+
messages = [
|
|
104
|
+
"Missing value for var1",
|
|
105
|
+
"Missing value for var2",
|
|
106
|
+
"Completely different",
|
|
107
|
+
]
|
|
108
|
+
clusters = QAResultAggregator.cluster_messages(messages[:], threshold=0.8)
|
|
109
|
+
|
|
110
|
+
# Expect two clusters: similar ones together
|
|
111
|
+
assert len(clusters) == 2
|
|
112
|
+
assert any("var1" in msg or "var2" in msg for msg in clusters[0])
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_generalize_message_group_single():
|
|
116
|
+
"""If there is one message, return it unchanged."""
|
|
117
|
+
msg, placeholders = QAResultAggregator.generalize_message_group(["Missing X"])
|
|
118
|
+
assert msg == "Missing X"
|
|
119
|
+
assert placeholders == {}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def test_generalize_message_group_multiple():
|
|
123
|
+
"""Generalization should replace differing tokens with placeholders."""
|
|
124
|
+
msgs = ["Missing variable A", "Missing variable B"]
|
|
125
|
+
generalized, placeholders = QAResultAggregator.generalize_message_group(msgs)
|
|
126
|
+
assert "Missing variable" in generalized
|
|
127
|
+
assert "{" in generalized
|
|
128
|
+
assert isinstance(placeholders, dict)
|
|
129
|
+
assert list(placeholders.keys()) # at least one placeholder
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_merge_placeholders_merges_close():
|
|
133
|
+
"""Test merging adjacent placeholders."""
|
|
134
|
+
tokens = ["{A}", "-", "{B}"]
|
|
135
|
+
dictionary = {"A": "foo", "B": "bar"}
|
|
136
|
+
merged_tokens, merged_dict = QAResultAggregator.merge_placeholders(
|
|
137
|
+
tokens, dictionary
|
|
138
|
+
)
|
|
139
|
+
# The placeholders should merge since only one char between them
|
|
140
|
+
assert len(merged_dict) <= 1
|
|
141
|
+
assert "{" in merged_tokens[0]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_cluster_summary_produces_clustered_summary(aggregator):
|
|
145
|
+
"""Integration-like test for cluster_summary on simple data."""
|
|
146
|
+
result_dict = {
|
|
147
|
+
"cf": {
|
|
148
|
+
"check_attrs": {
|
|
149
|
+
"value": (0, 1),
|
|
150
|
+
"weight": 3,
|
|
151
|
+
"msgs": ["Missing attr 'long_name'", "Missing attr 'standard_name'"],
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
aggregator.update(result_dict, dsid="ds1", file_name="file1.nc")
|
|
156
|
+
aggregator.sort()
|
|
157
|
+
aggregator.cluster_summary(threshold=0.7)
|
|
158
|
+
clustered = aggregator.clustered_summary["fail"]
|
|
159
|
+
|
|
160
|
+
# should contain weight 3 and a generalized message
|
|
161
|
+
assert 3 in clustered
|
|
162
|
+
test_name = next(iter(clustered[3].keys()))
|
|
163
|
+
assert "[CF]" in test_name
|
|
164
|
+
# at least one generalized message with "Missing attr"
|
|
165
|
+
found_msg_keys = list(clustered[3]["[CF] check_attrs"].keys())
|
|
166
|
+
assert any("Missing attr" in k for k in found_msg_keys)
|