esgf-qa 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgf_qa/_constants.py +36 -15
- esgf_qa/_version.py +2 -2
- esgf_qa/cluster_results.py +5 -4
- esgf_qa/run_qa.py +127 -63
- {esgf_qa-0.4.0.dist-info → esgf_qa-0.5.0.dist-info}/METADATA +10 -8
- esgf_qa-0.5.0.dist-info/RECORD +19 -0
- {esgf_qa-0.4.0.dist-info → esgf_qa-0.5.0.dist-info}/WHEEL +1 -1
- tests/test_cli.py +7 -3
- tests/test_run_qa.py +1 -1
- esgf_qa-0.4.0.dist-info/RECORD +0 -19
- {esgf_qa-0.4.0.dist-info → esgf_qa-0.5.0.dist-info}/entry_points.txt +0 -0
- {esgf_qa-0.4.0.dist-info → esgf_qa-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {esgf_qa-0.4.0.dist-info → esgf_qa-0.5.0.dist-info}/top_level.txt +0 -0
esgf_qa/_constants.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
from datetime import timedelta
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# Mapping of checker names to project names for better readability
|
|
4
4
|
checker_dict = {
|
|
5
5
|
"cc6": "CORDEX-CMIP6",
|
|
6
6
|
"cf": "CF-Conventions",
|
|
7
7
|
"mip": "MIP",
|
|
8
|
-
"plugin_cmip6": "CMIP6",
|
|
9
8
|
# "wcrp-cmip5": "CMIP5",
|
|
10
9
|
"wcrp_cmip6": "CMIP6",
|
|
11
|
-
# "
|
|
12
|
-
|
|
10
|
+
# "wcrp_cmip7aft: "CMIP7-AFT",
|
|
11
|
+
"wcrp_cmip7": "CMIP7",
|
|
13
12
|
# "wcrp_cordex": "CORDEX",
|
|
14
13
|
"wcrp_cordex_cmip6": "CORDEX-CMIP6",
|
|
15
14
|
# "obs4mips": "Obs4MIPs",
|
|
@@ -24,17 +23,39 @@ checker_dict_ext = {
|
|
|
24
23
|
}
|
|
25
24
|
checker_release_versions = {}
|
|
26
25
|
|
|
27
|
-
#
|
|
28
|
-
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
# Checkers for which consistency checks should be run
|
|
27
|
+
checker_supporting_consistency_checks = [
|
|
28
|
+
"wcrp_cmip7",
|
|
29
|
+
"wcrp_cmip6",
|
|
30
|
+
"wcrp_cordex_cmip6",
|
|
31
|
+
"cc6",
|
|
32
|
+
"mip",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# DRS parent directory names (for identifying project root and building dataset id)
|
|
36
|
+
supported_project_ids = [
|
|
37
|
+
"cmip7",
|
|
38
|
+
"cmip6plus",
|
|
39
|
+
"cmip6",
|
|
40
|
+
"cmip5",
|
|
41
|
+
"cordex",
|
|
42
|
+
"cordex-cmip6",
|
|
43
|
+
"cordex-fpsconv",
|
|
44
|
+
"obs4mips",
|
|
45
|
+
"input4mips",
|
|
46
|
+
"c3scordex",
|
|
47
|
+
"c3scmip5",
|
|
48
|
+
"c3scmip6",
|
|
49
|
+
"c3s-ipcc-ar6-atlas",
|
|
50
|
+
"c3satlas",
|
|
51
|
+
"c3s-cica-atlas",
|
|
52
|
+
"c3satlas_v1",
|
|
53
|
+
"c3s-atlas-dataset",
|
|
54
|
+
"c3satlas_v2",
|
|
55
|
+
"eerie",
|
|
56
|
+
"happi",
|
|
57
|
+
"cosmo-rea",
|
|
58
|
+
]
|
|
38
59
|
|
|
39
60
|
# Definition of maximum permitted deviations from the given frequency
|
|
40
61
|
deltdic = {}
|
esgf_qa/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.5.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
esgf_qa/cluster_results.py
CHANGED
|
@@ -101,7 +101,7 @@ class QAResultAggregator:
|
|
|
101
101
|
"errors"
|
|
102
102
|
].items():
|
|
103
103
|
self.summary["error"][
|
|
104
|
-
f"[{checker_dict
|
|
104
|
+
f"[{checker_dict.get(checker, checker)}] " + function_name
|
|
105
105
|
][error_msg][dsid].append(file_name)
|
|
106
106
|
else:
|
|
107
107
|
score, max_score = result_dict[checker][test]["value"]
|
|
@@ -110,7 +110,7 @@ class QAResultAggregator:
|
|
|
110
110
|
if score < max_score: # test outcome: fail
|
|
111
111
|
for msg in msgs:
|
|
112
112
|
self.summary["fail"][weight][
|
|
113
|
-
f"[{checker_dict
|
|
113
|
+
f"[{checker_dict.get(checker, checker)}] " + test
|
|
114
114
|
][msg][dsid].append(file_name)
|
|
115
115
|
|
|
116
116
|
def update_ds(self, result_dict, dsid):
|
|
@@ -132,7 +132,8 @@ class QAResultAggregator:
|
|
|
132
132
|
].items():
|
|
133
133
|
for file_name in errdict["files"]:
|
|
134
134
|
self.summary["error"][
|
|
135
|
-
f"[{checker_dict_ext
|
|
135
|
+
f"[{checker_dict_ext.get(checker, checker)}] "
|
|
136
|
+
+ function_name
|
|
136
137
|
][errdict["msg"]][dsid].append(file_name)
|
|
137
138
|
else:
|
|
138
139
|
weight = result_dict[checker][test].get("weight", 3)
|
|
@@ -140,7 +141,7 @@ class QAResultAggregator:
|
|
|
140
141
|
for msg, file_names in fails.items():
|
|
141
142
|
for file_name in file_names:
|
|
142
143
|
self.summary["fail"][weight][
|
|
143
|
-
f"[{checker_dict_ext
|
|
144
|
+
f"[{checker_dict_ext.get(checker, checker)}] " + test
|
|
144
145
|
][msg][dsid].append(file_name)
|
|
145
146
|
|
|
146
147
|
def sort(self):
|
esgf_qa/run_qa.py
CHANGED
|
@@ -12,12 +12,14 @@ from pathlib import Path
|
|
|
12
12
|
|
|
13
13
|
from compliance_checker import __version__ as cc_version
|
|
14
14
|
from compliance_checker.runner import CheckSuite
|
|
15
|
+
from packaging import version as pversion
|
|
15
16
|
|
|
16
17
|
from esgf_qa._constants import (
|
|
17
|
-
DRS_path_parent,
|
|
18
18
|
checker_dict,
|
|
19
19
|
checker_dict_ext,
|
|
20
20
|
checker_release_versions,
|
|
21
|
+
checker_supporting_consistency_checks,
|
|
22
|
+
supported_project_ids,
|
|
21
23
|
)
|
|
22
24
|
from esgf_qa._version import version
|
|
23
25
|
from esgf_qa.cluster_results import QAResultAggregator
|
|
@@ -53,7 +55,7 @@ def get_default_result_dir():
|
|
|
53
55
|
)
|
|
54
56
|
|
|
55
57
|
|
|
56
|
-
def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path,
|
|
58
|
+
def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_ids):
|
|
57
59
|
"""
|
|
58
60
|
Get the dataset id for a file.
|
|
59
61
|
|
|
@@ -65,8 +67,8 @@ def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_id):
|
|
|
65
67
|
Dictionary of dataset files.
|
|
66
68
|
file_path : str
|
|
67
69
|
Path to the file.
|
|
68
|
-
|
|
69
|
-
|
|
70
|
+
project_ids: list of str
|
|
71
|
+
List of supported project_ids
|
|
70
72
|
|
|
71
73
|
Returns
|
|
72
74
|
-------
|
|
@@ -75,16 +77,46 @@ def get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_id):
|
|
|
75
77
|
"""
|
|
76
78
|
dir_id = files_to_check_dict[file_path]["id_dir"].split("/")
|
|
77
79
|
fn_id = files_to_check_dict[file_path]["id_fn"].split("_")
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
dsid = ".".join(dir_id)
|
|
81
|
+
dir_id_lower = [el.lower() for el in dir_id]
|
|
82
|
+
for project_id in project_ids:
|
|
83
|
+
if project_id in dir_id_lower:
|
|
84
|
+
last_index = len(dir_id_lower) - 1 - dir_id_lower[::-1].index(project_id)
|
|
85
|
+
dsid = ".".join(dir_id[last_index:])
|
|
86
|
+
break
|
|
83
87
|
if len(dataset_files_map_ext[files_to_check_dict[file_path]["id_dir"]].keys()) > 1:
|
|
84
88
|
dsid += "." + ".".join(fn_id)
|
|
85
89
|
return dsid
|
|
86
90
|
|
|
87
91
|
|
|
92
|
+
def get_installed_checker_versions():
|
|
93
|
+
"""
|
|
94
|
+
Get all available versions of installed cc-plugins.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
dict
|
|
99
|
+
A dictionary of {checker_name: [version1, version2, latest], ...}.
|
|
100
|
+
"""
|
|
101
|
+
check_suite = CheckSuite()
|
|
102
|
+
check_suite.load_all_available_checkers()
|
|
103
|
+
installed_versions = {}
|
|
104
|
+
for checker in check_suite.checkers:
|
|
105
|
+
try:
|
|
106
|
+
name, version = checker.split(":")
|
|
107
|
+
except ValueError:
|
|
108
|
+
name, version = checker, "latest"
|
|
109
|
+
if version == "latest":
|
|
110
|
+
continue
|
|
111
|
+
if name not in installed_versions:
|
|
112
|
+
installed_versions[name] = []
|
|
113
|
+
installed_versions[name].append(version)
|
|
114
|
+
for name, versions in installed_versions.items():
|
|
115
|
+
installed_versions[name] = sorted(versions, key=pversion.parse) + ["latest"]
|
|
116
|
+
|
|
117
|
+
return installed_versions
|
|
118
|
+
|
|
119
|
+
|
|
88
120
|
def get_checker_release_versions(checkers, checker_options={}):
|
|
89
121
|
"""
|
|
90
122
|
Get the release versions of the checkers.
|
|
@@ -117,6 +149,12 @@ def get_checker_release_versions(checkers, checker_options={}):
|
|
|
117
149
|
)
|
|
118
150
|
elif checker.split(":")[0] in checker_dict_ext:
|
|
119
151
|
checker_release_versions[checker.split(":")[0]] = version
|
|
152
|
+
else:
|
|
153
|
+
checker_release_versions[checker.split(":")[0]] = (
|
|
154
|
+
check_suite.checkers.get(
|
|
155
|
+
checker, "unknown version"
|
|
156
|
+
)._cc_spec_version
|
|
157
|
+
)
|
|
120
158
|
|
|
121
159
|
|
|
122
160
|
def run_compliance_checker(file_path, checkers, checker_options={}):
|
|
@@ -166,8 +204,13 @@ def run_compliance_checker(file_path, checkers, checker_options={}):
|
|
|
166
204
|
ds, [checker], include_checks=None, skip_checks=[]
|
|
167
205
|
)
|
|
168
206
|
)
|
|
207
|
+
if hasattr(ds, "close"):
|
|
208
|
+
ds.close()
|
|
169
209
|
return results
|
|
170
|
-
|
|
210
|
+
results = check_suite.run_all(ds, checkers, include_checks=None, skip_checks=[])
|
|
211
|
+
if hasattr(ds, "close"):
|
|
212
|
+
ds.close()
|
|
213
|
+
return results
|
|
171
214
|
|
|
172
215
|
|
|
173
216
|
def track_checked_datasets(checked_datasets_file, checked_datasets):
|
|
@@ -263,14 +306,6 @@ def process_file(
|
|
|
263
306
|
checker = checkerv.split(":")[0]
|
|
264
307
|
check_results[checker] = dict()
|
|
265
308
|
check_results[checker]["errors"] = {}
|
|
266
|
-
# print()
|
|
267
|
-
# print("name",result[checker][0][0].name)
|
|
268
|
-
# print("weight", result[checker][0][0].weight)
|
|
269
|
-
# print("value", result[checker][0][0].value)
|
|
270
|
-
# print("msgs", result[checker][0][0].msgs)
|
|
271
|
-
# print("method", result[checker][0][0].check_method)
|
|
272
|
-
# print("children", result[checker][0][0].children)
|
|
273
|
-
# quit()
|
|
274
309
|
for check in result[checkerv][0]:
|
|
275
310
|
check_results[checker][check.name] = {}
|
|
276
311
|
check_results[checker][check.name]["weight"] = check.weight
|
|
@@ -481,13 +516,17 @@ def main():
|
|
|
481
516
|
"--option",
|
|
482
517
|
default=[],
|
|
483
518
|
action="append",
|
|
484
|
-
help="Additional options to be passed to the checkers. Format: '<checker>:<option_name>[:<option_value>]'.
|
|
519
|
+
help="Additional options to be passed to the checkers. Format: '<checker>:<option_name>[:<option_value>]'. "
|
|
520
|
+
"Multiple invocations possible.",
|
|
485
521
|
)
|
|
486
522
|
parser.add_argument(
|
|
487
523
|
"-t",
|
|
488
524
|
"--test",
|
|
489
525
|
action="append",
|
|
490
|
-
help="The test to run
|
|
526
|
+
help="The test(s) to run in format '<checker>[:<version>]', (eg. 'wcrp_cmip7', "
|
|
527
|
+
"'wcrp_cmip6:latest' or 'cf:<version>', can be specified multiple times, "
|
|
528
|
+
"eg.: '-t cc6:latest -t cf:1.11') - default: running 'cf:latest'. "
|
|
529
|
+
"The default version selected for each checker is 'latest'.",
|
|
491
530
|
)
|
|
492
531
|
parser.add_argument(
|
|
493
532
|
"-i",
|
|
@@ -507,6 +546,13 @@ def main():
|
|
|
507
546
|
action="store_true",
|
|
508
547
|
help="Include basic consistency and continuity checks. Default: False.",
|
|
509
548
|
)
|
|
549
|
+
parser.add_argument(
|
|
550
|
+
"-P",
|
|
551
|
+
"--parallel_processes",
|
|
552
|
+
type=int,
|
|
553
|
+
default=0,
|
|
554
|
+
help="Specify the maximum number of parallel processes. Default: 0 (= number of cores).",
|
|
555
|
+
)
|
|
510
556
|
args = parser.parse_args()
|
|
511
557
|
|
|
512
558
|
result_dir = os.path.abspath(args.output_dir)
|
|
@@ -518,6 +564,7 @@ def main():
|
|
|
518
564
|
args.include_consistency_checks if args.include_consistency_checks else False
|
|
519
565
|
)
|
|
520
566
|
cl_checker_options = parse_options(args.option)
|
|
567
|
+
parallel_processes = args.parallel_processes
|
|
521
568
|
|
|
522
569
|
# Progress file to track already checked files
|
|
523
570
|
progress_file = Path(result_dir, "progress.txt")
|
|
@@ -527,15 +574,15 @@ def main():
|
|
|
527
574
|
# Resume information stored in a json file
|
|
528
575
|
resume_info_file = Path(result_dir, ".resume_info")
|
|
529
576
|
|
|
530
|
-
# Do not allow
|
|
577
|
+
# Do not allow any but certain arguments if resuming previous QA run
|
|
531
578
|
if resume:
|
|
532
|
-
allowed_with_resume = {"output_dir", "info", "resume"}
|
|
579
|
+
allowed_with_resume = {"output_dir", "info", "resume", "parallel_processes"}
|
|
533
580
|
# Convert Namespace to dict for easier checking
|
|
534
581
|
set_args = {k for k, v in vars(args).items() if v not in (None, False, [], "")}
|
|
535
582
|
invalid_args = set_args - allowed_with_resume
|
|
536
583
|
if invalid_args:
|
|
537
584
|
parser.error(
|
|
538
|
-
f"When using -r/--resume,
|
|
585
|
+
f"When using -r/--resume, the following arguments are not allowed: {', '.join(invalid_args)}"
|
|
539
586
|
)
|
|
540
587
|
|
|
541
588
|
# Deal with result_dir
|
|
@@ -622,12 +669,13 @@ def main():
|
|
|
622
669
|
checker_options = defaultdict(dict)
|
|
623
670
|
else:
|
|
624
671
|
# Require versions to be specified:
|
|
625
|
-
# test_regex = re.compile(r"^[a-
|
|
672
|
+
# test_regex = re.compile(r"^[a-zA-Z0-9_-]+:(latest|[0-9]+(\.[0-9]+)*)$")
|
|
626
673
|
# Allow versions to be ommitted:
|
|
627
|
-
test_regex = re.compile(r"^[a-
|
|
674
|
+
test_regex = re.compile(r"^[a-zA-Z0-9_-]+(?::(latest|[0-9]+(?:\.[0-9]+)*))?$")
|
|
675
|
+
# Check format of specified checkers and separate checker, version, options
|
|
628
676
|
if not all([test_regex.match(test) for test in tests]):
|
|
629
677
|
raise Exception(
|
|
630
|
-
|
|
678
|
+
"Invalid test(s) specified. Please specify tests in the format 'checker_name' or'checker_name:version'."
|
|
631
679
|
)
|
|
632
680
|
checkers = [test.split(":")[0] for test in tests]
|
|
633
681
|
if sorted(checkers) != sorted(list(set(checkers))):
|
|
@@ -641,6 +689,29 @@ def main():
|
|
|
641
689
|
for test in tests
|
|
642
690
|
}
|
|
643
691
|
checker_options = defaultdict(dict)
|
|
692
|
+
# Check if specified checkers (or their requested versions) exist / are currently installed
|
|
693
|
+
cc_checker_versions = get_installed_checker_versions()
|
|
694
|
+
invalid_checkers = []
|
|
695
|
+
invalid_checkers_versions = []
|
|
696
|
+
invalid_checkers_errmsg = ""
|
|
697
|
+
for checker_i, checker_iv in checkers_versions.items():
|
|
698
|
+
if checker_i not in cc_checker_versions and checker_i != "eerie":
|
|
699
|
+
invalid_checkers.append(checker_i)
|
|
700
|
+
elif checker_i == "eerie":
|
|
701
|
+
pass
|
|
702
|
+
elif checker_iv not in cc_checker_versions[checker_i] and checker_i not in [
|
|
703
|
+
"cc6",
|
|
704
|
+
"mip",
|
|
705
|
+
]:
|
|
706
|
+
invalid_checkers_versions.append(checker_i)
|
|
707
|
+
if invalid_checkers:
|
|
708
|
+
invalid_checkers_errmsg = f"ERROR: Invalid test(s) specified. The following checkers are not supported or installed: {', '.join(invalid_checkers)}. "
|
|
709
|
+
for checker_i in invalid_checkers_versions:
|
|
710
|
+
if not invalid_checkers_errmsg:
|
|
711
|
+
invalid_checkers_errmsg = "ERROR: Invalid test(s) specified. "
|
|
712
|
+
invalid_checkers_errmsg += f"For checker {checker_i} only the following versions are currently supported / installed: {', '.join(cc_checker_versions[checker_i])}. "
|
|
713
|
+
if invalid_checkers_errmsg:
|
|
714
|
+
raise ValueError(invalid_checkers_errmsg)
|
|
644
715
|
if "cc6" in checkers_versions and checkers_versions["cc6"] != "latest":
|
|
645
716
|
checkers_versions["cc6"] = "latest"
|
|
646
717
|
warnings.warn("Version of checker 'cc6' must be 'latest'. Using 'latest'.")
|
|
@@ -665,10 +736,6 @@ def main():
|
|
|
665
736
|
raise Exception(
|
|
666
737
|
"ERROR: Cannot run both 'cc6' and 'mip' checkers at the same time."
|
|
667
738
|
)
|
|
668
|
-
if any(test not in checker_dict.keys() for test in checkers_versions):
|
|
669
|
-
raise Exception(
|
|
670
|
-
f"Invalid test(s) specified. Supported are: {', '.join(checker_dict.keys())}"
|
|
671
|
-
)
|
|
672
739
|
|
|
673
740
|
# Combine checkers and versions
|
|
674
741
|
# (checker_options are hardcoded)
|
|
@@ -709,15 +776,6 @@ def main():
|
|
|
709
776
|
progress_file.touch()
|
|
710
777
|
dataset_file.touch()
|
|
711
778
|
|
|
712
|
-
DRS_parent = "CORDEX-CMIP6"
|
|
713
|
-
for cname in checkers:
|
|
714
|
-
DRS_parent_tmp = DRS_path_parent.get(
|
|
715
|
-
checker_dict.get(cname.split(":")[0], ""), ""
|
|
716
|
-
)
|
|
717
|
-
if DRS_parent_tmp:
|
|
718
|
-
DRS_parent = DRS_parent_tmp
|
|
719
|
-
break
|
|
720
|
-
|
|
721
779
|
# Check if progress files exist and read already processed files/datasets
|
|
722
780
|
processed_files = set()
|
|
723
781
|
with open(progress_file) as file:
|
|
@@ -816,7 +874,7 @@ def main():
|
|
|
816
874
|
files_to_check = sorted(files_to_check)
|
|
817
875
|
for file_path in files_to_check:
|
|
818
876
|
files_to_check_dict[file_path]["id"] = get_dsid(
|
|
819
|
-
files_to_check_dict, dataset_files_map_ext, file_path,
|
|
877
|
+
files_to_check_dict, dataset_files_map_ext, file_path, supported_project_ids
|
|
820
878
|
)
|
|
821
879
|
files_to_check_dict[file_path]["result_file_ds"] = (
|
|
822
880
|
result_dir
|
|
@@ -884,22 +942,27 @@ def main():
|
|
|
884
942
|
raise Exception("No files found to check.")
|
|
885
943
|
else:
|
|
886
944
|
print(
|
|
887
|
-
f"
|
|
945
|
+
f"\nFound {len(files_to_check)} files (organized in {len(dataset_files_map)} datasets) to check."
|
|
888
946
|
)
|
|
889
947
|
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
print(
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
print(
|
|
948
|
+
# Save dictionaries to disk for information
|
|
949
|
+
with open(os.path.join(result_dir, "files_to_check.json"), "w") as f:
|
|
950
|
+
json.dump(files_to_check, f, indent=4)
|
|
951
|
+
with open(os.path.join(result_dir, "files_to_check_dict.json"), "w") as f:
|
|
952
|
+
json.dump(files_to_check_dict, f, indent=4)
|
|
953
|
+
with open(os.path.join(result_dir, "dataset_files_map.json"), "w") as f:
|
|
954
|
+
json.dump(dataset_files_map, f, indent=4)
|
|
955
|
+
with open(os.path.join(result_dir, "dataset_files_map_ext.json"), "w") as f:
|
|
956
|
+
json.dump(dataset_files_map_ext, f, indent=4)
|
|
957
|
+
print(
|
|
958
|
+
"Information on which files have been found and how these are organized into datasets was saved to disk:"
|
|
959
|
+
)
|
|
960
|
+
print(
|
|
961
|
+
f" - {os.path.join(result_dir, 'files_to_check.json')}\n"
|
|
962
|
+
f" - {os.path.join(result_dir, 'files_to_check_dict.json')}\n"
|
|
963
|
+
f" - {os.path.join(result_dir, 'dataset_files_map.json')}\n"
|
|
964
|
+
f" - {os.path.join(result_dir, 'dataset_files_map_ext.json')}"
|
|
965
|
+
)
|
|
903
966
|
|
|
904
967
|
#########################################################
|
|
905
968
|
# QA Part 1 - Run all compliance-checker checks
|
|
@@ -917,6 +980,8 @@ def main():
|
|
|
917
980
|
|
|
918
981
|
# Calculate the number of processes
|
|
919
982
|
num_processes = max(multiprocessing.cpu_count() - 4, 1)
|
|
983
|
+
if parallel_processes > 0:
|
|
984
|
+
num_processes = min(num_processes, parallel_processes)
|
|
920
985
|
print(f"Using {num_processes} parallel processes for cc checks.")
|
|
921
986
|
print()
|
|
922
987
|
|
|
@@ -963,13 +1028,8 @@ def main():
|
|
|
963
1028
|
|
|
964
1029
|
# Skip continuity and consistency checks if no cc6/mip checks were run
|
|
965
1030
|
# (and thus no consistency output file was created)
|
|
966
|
-
if (
|
|
967
|
-
"
|
|
968
|
-
or "mip:latest" in checkers
|
|
969
|
-
or "wcrp_cmip6:1.0" in checkers
|
|
970
|
-
or "wcrp_cmip6:latest" in checkers
|
|
971
|
-
or "wcrp_cordex_cmip6:1.0" in checkers
|
|
972
|
-
or "wcrp_cordex_cmip6:latest" in checkers
|
|
1031
|
+
if any(
|
|
1032
|
+
ch.split(":", 1)[0] in checker_supporting_consistency_checks for ch in checkers
|
|
973
1033
|
):
|
|
974
1034
|
#########################################################
|
|
975
1035
|
# QA Part 2 - Run all consistency & continuity checks
|
|
@@ -996,6 +1056,8 @@ def main():
|
|
|
996
1056
|
# Limit the number of processes for consistency checks since a lot
|
|
997
1057
|
# of files will be opened at the same time
|
|
998
1058
|
num_processes = min(num_processes, 10)
|
|
1059
|
+
if parallel_processes > 0:
|
|
1060
|
+
num_processes = min(num_processes, parallel_processes)
|
|
999
1061
|
print(f"Using {num_processes} parallel processes for dataset checks.")
|
|
1000
1062
|
print()
|
|
1001
1063
|
|
|
@@ -1046,7 +1108,9 @@ def main():
|
|
|
1046
1108
|
else:
|
|
1047
1109
|
print()
|
|
1048
1110
|
warnings.warn(
|
|
1049
|
-
"Continuity &
|
|
1111
|
+
"Continuity & consistency checks skipped since no appropriate checkers were run."
|
|
1112
|
+
" The following checkers support the continuity & consistency checks: "
|
|
1113
|
+
f"{', '.join(checker_supporting_consistency_checks)}"
|
|
1050
1114
|
)
|
|
1051
1115
|
|
|
1052
1116
|
#########################################################
|
|
@@ -1074,7 +1138,7 @@ def main():
|
|
|
1074
1138
|
"cc_version": cc_version,
|
|
1075
1139
|
"checkers": ", ".join(
|
|
1076
1140
|
[
|
|
1077
|
-
f"{checker_dict.get(checker.split(':')[0], '')} {checker.split(':')[0]}:{checker_release_versions[checker.split(':')[0]]}"
|
|
1141
|
+
f"{checker_dict.get(checker.split(':')[0], '')} {checker.split(':')[0]}:{checker_release_versions[checker.split(':')[0]]}".strip()
|
|
1078
1142
|
for checker in checkers
|
|
1079
1143
|
]
|
|
1080
1144
|
),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: esgf-qa
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: QA based on compliance-checker
|
|
5
5
|
Author-email: Martin Schupfner <schupfner@dkrz.de>
|
|
6
6
|
Maintainer-email: Martin Schupfner <schupfner@dkrz.de>
|
|
@@ -21,6 +21,7 @@ Requires-Dist: cf_xarray
|
|
|
21
21
|
Requires-Dist: compliance-checker>=5.3.0
|
|
22
22
|
Requires-Dist: dask
|
|
23
23
|
Requires-Dist: netCDF4
|
|
24
|
+
Requires-Dist: packaging
|
|
24
25
|
Requires-Dist: pandas
|
|
25
26
|
Requires-Dist: textual
|
|
26
27
|
Requires-Dist: xarray
|
|
@@ -53,7 +54,7 @@ Dynamic: license-file
|
|
|
53
54
|
`esgf-qa` provides a flexible quality assurance (QA) workflow for evaluating dataset compliance using the
|
|
54
55
|
[ioos/compliance-checker](https://github.com/ioos/compliance-checker) framework
|
|
55
56
|
(including [CF](https://cfconventions.org/) compliance checks)
|
|
56
|
-
and
|
|
57
|
+
and any community plugins (`cc-plugin`s), such as
|
|
57
58
|
[ESGF/cc-plugin-wcrp](https://github.com/ESGF/cc-plugin-wcrp) and
|
|
58
59
|
[euro-cordex/cc-plugin-cc6](https://github.com/euro-cordex/cc-plugin-cc6).
|
|
59
60
|
|
|
@@ -65,8 +66,8 @@ Results from both file- and dataset-level checks are aggregated, summarized, and
|
|
|
65
66
|
### Currently supported checkers
|
|
66
67
|
|
|
67
68
|
While `esgf-qa` has been primarily developed for workflows assessing compliance with WCRP project data specifications
|
|
68
|
-
(e.g., CMIP, CORDEX), it can also be used for general CF-compliance testing and
|
|
69
|
-
`cc-plugin
|
|
69
|
+
(e.g., CMIP, CORDEX), it can also be used for general CF-compliance testing and generally supports any
|
|
70
|
+
`cc-plugin`. It can be easily extended to support any projects following CORDEX- or CMIP-style CMOR table conventions.
|
|
70
71
|
|
|
71
72
|
| Standard | Checker Name |
|
|
72
73
|
| ---------------------------------------------------------------------------------------------------- | ------------ |
|
|
@@ -125,15 +126,16 @@ esgvoc status
|
|
|
125
126
|
## Usage
|
|
126
127
|
|
|
127
128
|
```shell
|
|
128
|
-
$ esgqa [-h] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
|
|
129
|
+
$ esgqa [-h] [-P <parallel_processes>] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
|
|
129
130
|
```
|
|
130
131
|
|
|
131
132
|
- positional arguments:
|
|
132
133
|
- `parent_dir`: Parent directory to scan for netCDF-files to check
|
|
133
134
|
- options:
|
|
134
135
|
- `-h, --help`: show this help message and exit
|
|
136
|
+
- `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores).
|
|
135
137
|
- `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_<hash>`.
|
|
136
|
-
- `-t, --test TEST`: The test to run (`'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`.
|
|
138
|
+
- `-t, --test TEST`: The test to run (eg. `'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used.
|
|
137
139
|
- `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `'<checker>:<option_name>[:<option_value>]'`. Multiple invocations possible.
|
|
138
140
|
- `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run.
|
|
139
141
|
- `-r, --resume`: Specify to continue a previous QC run. Requires the `<output_dir>` argument to be set.
|
|
@@ -142,7 +144,7 @@ $ esgqa [-h] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <pa
|
|
|
142
144
|
### Example Usage
|
|
143
145
|
|
|
144
146
|
```shell
|
|
145
|
-
$ esgqa -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
|
|
147
|
+
$ esgqa -P 8 -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
|
|
146
148
|
```
|
|
147
149
|
|
|
148
150
|
To resume at a later date, eg. if the QA run did not finish in time or more files have been added to the `<parent_dir>`
|
|
@@ -156,7 +158,7 @@ $ esgqa -o QA_results/IAEVALL02_2025-10-20 -r
|
|
|
156
158
|
For a custom MIP with defined CMOR tables (`"mip"` is not a placeholder but an actual basic checker of the `cc_plugin_cc6`):
|
|
157
159
|
|
|
158
160
|
```shell
|
|
159
|
-
$ esgqa -o /path/to/test/results -t "mip:latest" -O "mip:tables:/path/to/mip_cmor_tables/Tables" /path/to/MIP/datasets
|
|
161
|
+
$ esgqa -o /path/to/test/results -t "mip:latest" -O "mip:tables:/path/to/mip_cmor_tables/Tables" /path/to/MIP/datasets/
|
|
160
162
|
```
|
|
161
163
|
|
|
162
164
|
For CF checks and basic time and consistency / continuity checks:
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
esgf_qa/__init__.py,sha256=iLmy2rOkHS_4KZWMD8BgT7R3tLMKeaTCDVf3B4FyYxM,91
|
|
2
|
+
esgf_qa/_constants.py,sha256=OgogPt2RdTzP0Cg9BYjMZ7Y7R_sZR1391iVyG15XSGY,3182
|
|
3
|
+
esgf_qa/_version.py,sha256=fvHpBU3KZKRinkriKdtAt3crenOyysELF-M9y3ozg3U,704
|
|
4
|
+
esgf_qa/cluster_results.py,sha256=3sN4vFv0ps1cS3YyvvdZWpsLs-SB2UKM7CPGqZBgiPw,19131
|
|
5
|
+
esgf_qa/con_checks.py,sha256=BAqbDcEmDB1kiRBaSaB76mNfKxoHtTNWqJHbtALcpIg,29074
|
|
6
|
+
esgf_qa/qaviewer.py,sha256=myt9lq47E40sD7KrMjVcAvy8sqocVinBSUYf4nOPD80,8843
|
|
7
|
+
esgf_qa/run_qa.py,sha256=e1pI1uv7fgO6f9biHHMRbe7KSaJlw3iPLxFjcieLy60,44893
|
|
8
|
+
esgf_qa-0.5.0.dist-info/licenses/LICENSE,sha256=S1WmzAIRoXFV26FENC3SW_XsmvkGtCs-4_gm7PrPYWg,12636
|
|
9
|
+
tests/test_cli.py,sha256=JmDIeGum8RDhgcy0ZpF9Rq1fMn15MXG4-Tzn1UYiYIU,10990
|
|
10
|
+
tests/test_cluster_results.py,sha256=ahwtG6666mP7VdVxHwPy7I8vV9rPxl2VRPdnH8VQk-w,5894
|
|
11
|
+
tests/test_con_checks.py,sha256=VCj_0jt_fbBqo_VWCrpHMHPs9IWxb5PtJs6Yh1jrxxU,8853
|
|
12
|
+
tests/test_qaviewer.py,sha256=ZEH7LkPIl3ocV0Xk4D4Zv6VIH9397hB71FtXLeo7NwY,4635
|
|
13
|
+
tests/test_run_dummy_qa.py,sha256=6pIQkvzP8c-mKynk3n19UvZAhvsPMpnu32YznWFDB2k,6213
|
|
14
|
+
tests/test_run_qa.py,sha256=H2K935lJi-6Znj9DMUn4DH7sR17qTrs4dGLMmIIC0bs,6130
|
|
15
|
+
esgf_qa-0.5.0.dist-info/METADATA,sha256=6TupoWWcXVuprpiSQ8dLC9NqhtdGSL5mwUkiC40vtB8,11306
|
|
16
|
+
esgf_qa-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
esgf_qa-0.5.0.dist-info/entry_points.txt,sha256=ZGMG_3eS7nyUJE6ZJ9v23Thcf-r29ZSZ7e8voBVwbf4,82
|
|
18
|
+
esgf_qa-0.5.0.dist-info/top_level.txt,sha256=BtbDH91jFtWygUPsLIr1g5CKU7Jmp4K-CU8yzCaONt0,14
|
|
19
|
+
esgf_qa-0.5.0.dist-info/RECORD,,
|
tests/test_cli.py
CHANGED
|
@@ -230,7 +230,7 @@ class TestQACommandLine:
|
|
|
230
230
|
),
|
|
231
231
|
(
|
|
232
232
|
["-r", "-t", "cf:latest", "-o", "some_dir"],
|
|
233
|
-
"When using -r/--resume,
|
|
233
|
+
"When using -r/--resume, the following arguments are not allowed",
|
|
234
234
|
),
|
|
235
235
|
],
|
|
236
236
|
)
|
|
@@ -251,8 +251,12 @@ class TestQACommandLine:
|
|
|
251
251
|
["-t", "cf:latest", "-o", str(output_dir), str(self.cmip6_dir)]
|
|
252
252
|
)
|
|
253
253
|
json_files = list(output_dir.glob("*.json"))
|
|
254
|
-
assert len(json_files) ==
|
|
255
|
-
|
|
254
|
+
assert len(json_files) == 6
|
|
255
|
+
json_result_files = [
|
|
256
|
+
f for f in json_files if f.name.startswith("qa_result_")
|
|
257
|
+
]
|
|
258
|
+
assert len(json_result_files) == 2
|
|
259
|
+
with open(json_result_files[0]) as f:
|
|
256
260
|
data = json.load(f)
|
|
257
261
|
# "info" is the only required field
|
|
258
262
|
assert "info" in data
|
tests/test_run_qa.py
CHANGED
|
@@ -61,7 +61,7 @@ def test_get_dsid():
|
|
|
61
61
|
},
|
|
62
62
|
}
|
|
63
63
|
file_path = f"/path/to/{project_id}/drs/elements/until/file1_1950-1960.nc"
|
|
64
|
-
dsid = get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, project_id)
|
|
64
|
+
dsid = get_dsid(files_to_check_dict, dataset_files_map_ext, file_path, [project_id])
|
|
65
65
|
assert dsid == "my_project.drs.elements.until"
|
|
66
66
|
|
|
67
67
|
|
esgf_qa-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
esgf_qa/__init__.py,sha256=iLmy2rOkHS_4KZWMD8BgT7R3tLMKeaTCDVf3B4FyYxM,91
|
|
2
|
-
esgf_qa/_constants.py,sha256=CKWF9DCe3cen0Wp10yuKC49n8yQ8Ge5jPavd-JiJVro,2783
|
|
3
|
-
esgf_qa/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
|
|
4
|
-
esgf_qa/cluster_results.py,sha256=cy-Qc3SRbrYA6QGU_ROgum1Fxmd4wspPVvsJ6m5mVo0,19047
|
|
5
|
-
esgf_qa/con_checks.py,sha256=BAqbDcEmDB1kiRBaSaB76mNfKxoHtTNWqJHbtALcpIg,29074
|
|
6
|
-
esgf_qa/qaviewer.py,sha256=myt9lq47E40sD7KrMjVcAvy8sqocVinBSUYf4nOPD80,8843
|
|
7
|
-
esgf_qa/run_qa.py,sha256=VXuYWBHCzYN4Cjv80HOlCSyLKgj3tceB7qf0fAkuH6g,41724
|
|
8
|
-
esgf_qa-0.4.0.dist-info/licenses/LICENSE,sha256=S1WmzAIRoXFV26FENC3SW_XsmvkGtCs-4_gm7PrPYWg,12636
|
|
9
|
-
tests/test_cli.py,sha256=OcJ1Pq5l5vKnPP96r3_mBWyn3hWFQ7p7Xb7YJp4tAms,10821
|
|
10
|
-
tests/test_cluster_results.py,sha256=ahwtG6666mP7VdVxHwPy7I8vV9rPxl2VRPdnH8VQk-w,5894
|
|
11
|
-
tests/test_con_checks.py,sha256=VCj_0jt_fbBqo_VWCrpHMHPs9IWxb5PtJs6Yh1jrxxU,8853
|
|
12
|
-
tests/test_qaviewer.py,sha256=ZEH7LkPIl3ocV0Xk4D4Zv6VIH9397hB71FtXLeo7NwY,4635
|
|
13
|
-
tests/test_run_dummy_qa.py,sha256=6pIQkvzP8c-mKynk3n19UvZAhvsPMpnu32YznWFDB2k,6213
|
|
14
|
-
tests/test_run_qa.py,sha256=DUi7KpgpL80b9pL6XP4uFAw-8b0YqhMcwCixS4z8ZEI,6128
|
|
15
|
-
esgf_qa-0.4.0.dist-info/METADATA,sha256=anvxnx7EeAxbM5M7zp6MHKguY6wiecGsrlelTTyFmZk,11057
|
|
16
|
-
esgf_qa-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
-
esgf_qa-0.4.0.dist-info/entry_points.txt,sha256=ZGMG_3eS7nyUJE6ZJ9v23Thcf-r29ZSZ7e8voBVwbf4,82
|
|
18
|
-
esgf_qa-0.4.0.dist-info/top_level.txt,sha256=BtbDH91jFtWygUPsLIr1g5CKU7Jmp4K-CU8yzCaONt0,14
|
|
19
|
-
esgf_qa-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|