esgf-qa 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgf_qa/_constants.py +63 -1
- esgf_qa/_version.py +2 -2
- esgf_qa/cluster_results.py +467 -0
- esgf_qa/con_checks.py +209 -11
- esgf_qa/run_qa.py +356 -463
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/METADATA +47 -31
- esgf_qa-0.5.0.dist-info/RECORD +19 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/WHEEL +1 -1
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/top_level.txt +1 -1
- tests/test_cli.py +271 -0
- tests/test_cluster_results.py +166 -0
- tests/test_con_checks.py +263 -0
- tests/test_qaviewer.py +147 -0
- tests/test_run_dummy_qa.py +191 -0
- tests/test_run_qa.py +181 -0
- docs/esgf-qa_Logo.png +0 -0
- esgf_qa-0.3.0.dist-info/RECORD +0 -13
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/entry_points.txt +0 -0
- {esgf_qa-0.3.0.dist-info → esgf_qa-0.5.0.dist-info}/licenses/LICENSE +0 -0
esgf_qa/con_checks.py
CHANGED
|
@@ -32,7 +32,29 @@ def printtimedelta(d):
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def truncate_str(s, max_length=16):
|
|
35
|
-
|
|
35
|
+
"""
|
|
36
|
+
Truncate string if too long.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
s : str
|
|
41
|
+
String to truncate.
|
|
42
|
+
max_length : int, optional
|
|
43
|
+
Maximum length of string. Default is 16.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
str
|
|
48
|
+
Truncated string.
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
>>> truncate_str("This is a long string", 10)
|
|
53
|
+
'This...string'
|
|
54
|
+
>>> truncate_str("This is a short string", 16)
|
|
55
|
+
'This is a short string'
|
|
56
|
+
"""
|
|
57
|
+
if max_length <= 0 or max_length is None or len(s) <= max_length:
|
|
36
58
|
return s
|
|
37
59
|
|
|
38
60
|
# Select start and end of string
|
|
@@ -58,6 +80,23 @@ def truncate_str(s, max_length=16):
|
|
|
58
80
|
|
|
59
81
|
|
|
60
82
|
def compare_dicts(dict1, dict2, exclude_keys=None):
|
|
83
|
+
"""
|
|
84
|
+
Compare two dictionaries and return keys with differing values.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
dict1 : dict
|
|
89
|
+
First dictionary to compare.
|
|
90
|
+
dict2 : dict
|
|
91
|
+
Second dictionary to compare.
|
|
92
|
+
exclude_keys : list, optional
|
|
93
|
+
List of keys to exclude from comparison.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
list
|
|
98
|
+
List of keys with differing values.
|
|
99
|
+
"""
|
|
61
100
|
if exclude_keys is None:
|
|
62
101
|
exclude_keys = set()
|
|
63
102
|
else:
|
|
@@ -67,17 +106,36 @@ def compare_dicts(dict1, dict2, exclude_keys=None):
|
|
|
67
106
|
all_keys = (set(dict1) | set(dict2)) - exclude_keys
|
|
68
107
|
|
|
69
108
|
# Collect keys with differing values
|
|
70
|
-
differing_keys = [
|
|
109
|
+
differing_keys = [
|
|
110
|
+
key for key in sorted(list(all_keys)) if dict1.get(key) != dict2.get(key)
|
|
111
|
+
]
|
|
71
112
|
|
|
72
113
|
return differing_keys
|
|
73
114
|
|
|
74
115
|
|
|
75
116
|
def compare_nested_dicts(dict1, dict2, exclude_keys=None):
|
|
117
|
+
"""
|
|
118
|
+
Compare two nested dictionaries and return keys with differing values.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
dict1 : dict
|
|
123
|
+
First dictionary to compare.
|
|
124
|
+
dict2 : dict
|
|
125
|
+
Second dictionary to compare.
|
|
126
|
+
exclude_keys : list, optional
|
|
127
|
+
List of keys to exclude from comparison.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
dict
|
|
132
|
+
Dictionary of keys with differing values.
|
|
133
|
+
"""
|
|
76
134
|
diffs = {}
|
|
77
135
|
|
|
78
136
|
all_root_keys = set(dict1) | set(dict2)
|
|
79
137
|
|
|
80
|
-
for root_key in all_root_keys:
|
|
138
|
+
for root_key in sorted(list(all_root_keys)):
|
|
81
139
|
subdict1 = dict1.get(root_key, {})
|
|
82
140
|
subdict2 = dict2.get(root_key, {})
|
|
83
141
|
|
|
@@ -95,6 +153,32 @@ def compare_nested_dicts(dict1, dict2, exclude_keys=None):
|
|
|
95
153
|
|
|
96
154
|
|
|
97
155
|
def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
156
|
+
"""
|
|
157
|
+
Consistency checks.
|
|
158
|
+
|
|
159
|
+
Runs inter-file consistency checks on a dataset:
|
|
160
|
+
|
|
161
|
+
- Global attributes (values and data types)
|
|
162
|
+
- Variable attributes (values and data types)
|
|
163
|
+
- Coordinates (values)
|
|
164
|
+
- Dimensions (names and sizes)
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
ds : str
|
|
169
|
+
Dataset to process.
|
|
170
|
+
ds_map : dict
|
|
171
|
+
Dictionary mapping dataset IDs to file paths.
|
|
172
|
+
files_to_check_dict : dict
|
|
173
|
+
A special dictionary mapping files to check to datasets.
|
|
174
|
+
checker_options : dict
|
|
175
|
+
Dictionary of checker options.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
dict
|
|
180
|
+
A dictionary containing the results of the consistency checks.
|
|
181
|
+
"""
|
|
98
182
|
results = defaultdict(level1_factory)
|
|
99
183
|
filelist = sorted(ds_map[ds])
|
|
100
184
|
consistency_files = OrderedDict(
|
|
@@ -251,6 +335,27 @@ def consistency_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
251
335
|
|
|
252
336
|
|
|
253
337
|
def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
338
|
+
"""
|
|
339
|
+
Checks inter-file time and time_bnds continuity for a dataset.
|
|
340
|
+
|
|
341
|
+
This check identifies gaps in time or time_bnds between files of a dataset.
|
|
342
|
+
|
|
343
|
+
Parameters
|
|
344
|
+
----------
|
|
345
|
+
ds : str
|
|
346
|
+
Dataset to process.
|
|
347
|
+
ds_map : dict
|
|
348
|
+
Dictionary mapping dataset IDs to file paths.
|
|
349
|
+
files_to_check_dict : dict
|
|
350
|
+
A special dictionary mapping files to check to datasets.
|
|
351
|
+
checker_options : dict
|
|
352
|
+
Dictionary of checker options.
|
|
353
|
+
|
|
354
|
+
Returns
|
|
355
|
+
-------
|
|
356
|
+
dict
|
|
357
|
+
Dictionary of results.
|
|
358
|
+
"""
|
|
254
359
|
results = defaultdict(level1_factory)
|
|
255
360
|
filelist = sorted(ds_map[ds])
|
|
256
361
|
consistency_files = OrderedDict(
|
|
@@ -276,6 +381,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
276
381
|
calendar=data["time_info"]["calendar"],
|
|
277
382
|
)
|
|
278
383
|
if data["time_info"]["timen"]
|
|
384
|
+
and data["time_info"]["units"]
|
|
385
|
+
and data["time_info"]["calendar"]
|
|
279
386
|
else None
|
|
280
387
|
)
|
|
281
388
|
boundn = (
|
|
@@ -285,6 +392,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
285
392
|
calendar=data["time_info"]["calendar"],
|
|
286
393
|
)
|
|
287
394
|
if data["time_info"]["boundn"]
|
|
395
|
+
and data["time_info"]["units"]
|
|
396
|
+
and data["time_info"]["calendar"]
|
|
288
397
|
else None
|
|
289
398
|
)
|
|
290
399
|
if i == 1:
|
|
@@ -296,6 +405,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
296
405
|
calendar=data["time_info"]["calendar"],
|
|
297
406
|
)
|
|
298
407
|
if data["time_info"]["time0"]
|
|
408
|
+
and data["time_info"]["units"]
|
|
409
|
+
and data["time_info"]["calendar"]
|
|
299
410
|
else None
|
|
300
411
|
)
|
|
301
412
|
bound0 = (
|
|
@@ -305,6 +416,8 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
305
416
|
calendar=data["time_info"]["calendar"],
|
|
306
417
|
)
|
|
307
418
|
if data["time_info"]["bound0"]
|
|
419
|
+
and data["time_info"]["units"]
|
|
420
|
+
and data["time_info"]["calendar"]
|
|
308
421
|
else None
|
|
309
422
|
)
|
|
310
423
|
freq = data["time_info"]["frequency"]
|
|
@@ -337,23 +450,55 @@ def continuity_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
337
450
|
|
|
338
451
|
|
|
339
452
|
def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
453
|
+
"""
|
|
454
|
+
Compatibility checks for a dataset.
|
|
455
|
+
|
|
456
|
+
Checks for:
|
|
457
|
+
|
|
458
|
+
- xarray open_mfdataset (compat='override', join='outer')
|
|
459
|
+
- xarray open_mfdataset (compat='no_conflicts', join='exact')
|
|
460
|
+
|
|
461
|
+
Parameters
|
|
462
|
+
----------
|
|
463
|
+
ds : str
|
|
464
|
+
Dataset to process.
|
|
465
|
+
ds_map : dict
|
|
466
|
+
Dictionary mapping dataset IDs to file paths.
|
|
467
|
+
files_to_check_dict : dict
|
|
468
|
+
A special dictionary mapping files to check to datasets.
|
|
469
|
+
checker_options : dict
|
|
470
|
+
Dictionary of checker options.
|
|
471
|
+
|
|
472
|
+
Returns
|
|
473
|
+
-------
|
|
474
|
+
dict
|
|
475
|
+
Dictionary of results.
|
|
476
|
+
"""
|
|
340
477
|
results = defaultdict(level1_factory)
|
|
341
478
|
filelist = sorted(ds_map[ds])
|
|
342
479
|
|
|
343
480
|
# open_mfdataset - override
|
|
344
|
-
test = "xarray open_mfdataset
|
|
481
|
+
test = "xarray open_mfdataset (compat='override', join='outer')"
|
|
345
482
|
results[test]["weight"] = 3
|
|
346
483
|
try:
|
|
347
|
-
with xr.open_mfdataset(
|
|
484
|
+
with xr.open_mfdataset(
|
|
485
|
+
filelist, coords="minimal", compat="override", data_vars="all", join="outer"
|
|
486
|
+
) as ds:
|
|
348
487
|
pass
|
|
349
488
|
except Exception as e:
|
|
350
489
|
results[test]["msgs"][str(e)].extend(filelist)
|
|
351
490
|
|
|
352
491
|
# open_mfdataset - no_conflicts
|
|
353
|
-
test = "xarray open_mfdataset
|
|
492
|
+
test = "xarray open_mfdataset (compat='no_conflicts', join='exact')"
|
|
354
493
|
results[test]["weight"] = 3
|
|
355
494
|
try:
|
|
356
|
-
with xr.open_mfdataset(
|
|
495
|
+
with xr.open_mfdataset(
|
|
496
|
+
filelist,
|
|
497
|
+
coords="minimal",
|
|
498
|
+
compat="no_conflicts",
|
|
499
|
+
data_vars="all",
|
|
500
|
+
join="exact",
|
|
501
|
+
) as ds:
|
|
357
502
|
pass
|
|
358
503
|
except Exception as e:
|
|
359
504
|
results[test]["msgs"][str(e)].extend(filelist)
|
|
@@ -362,6 +507,25 @@ def compatibility_checks(ds, ds_map, files_to_check_dict, checker_options):
|
|
|
362
507
|
|
|
363
508
|
|
|
364
509
|
def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
|
|
510
|
+
"""
|
|
511
|
+
Checks consistency of dataset time coverage.
|
|
512
|
+
|
|
513
|
+
Variables that differ in their time coverage are reported.
|
|
514
|
+
|
|
515
|
+
Parameters
|
|
516
|
+
----------
|
|
517
|
+
ds_map : dict
|
|
518
|
+
Dictionary mapping dataset IDs to file paths.
|
|
519
|
+
files_to_check_dict : dict
|
|
520
|
+
A special dictionary mapping files to check to datasets.
|
|
521
|
+
checker_options : dict
|
|
522
|
+
Dictionary of checker options.
|
|
523
|
+
|
|
524
|
+
Returns
|
|
525
|
+
-------
|
|
526
|
+
dict
|
|
527
|
+
Dictionary of results.
|
|
528
|
+
"""
|
|
365
529
|
results = defaultdict(level0_factory)
|
|
366
530
|
test = "Time coverage"
|
|
367
531
|
|
|
@@ -420,17 +584,27 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
|
|
|
420
584
|
|
|
421
585
|
# Compare coverage
|
|
422
586
|
if len(coverage_start.keys()) > 1:
|
|
423
|
-
|
|
424
|
-
|
|
587
|
+
try:
|
|
588
|
+
scov = min(coverage_start.values())
|
|
589
|
+
except ValueError:
|
|
590
|
+
scov = None
|
|
591
|
+
try:
|
|
592
|
+
ecov = max(coverage_end.values())
|
|
593
|
+
except ValueError:
|
|
594
|
+
ecov = None
|
|
425
595
|
# Get all ds where coverage_start differs
|
|
426
596
|
for ds in coverage_start.keys():
|
|
427
597
|
fl = sorted(ds_map[ds])
|
|
428
|
-
if
|
|
598
|
+
if scov is None:
|
|
599
|
+
pass
|
|
600
|
+
elif coverage_start[ds] != scov:
|
|
429
601
|
results[ds][test]["weight"] = 1
|
|
430
602
|
results[ds][test]["msgs"][
|
|
431
603
|
f"Time series starts at '{coverage_start[ds]}' while other time series start at '{scov}'"
|
|
432
604
|
] = [fl[0]]
|
|
433
|
-
if
|
|
605
|
+
if ecov is None:
|
|
606
|
+
pass
|
|
607
|
+
elif ds in coverage_end and coverage_end[ds] != ecov:
|
|
434
608
|
results[ds][test]["weight"] = 1
|
|
435
609
|
results[ds][test]["msgs"][
|
|
436
610
|
f"Time series ends at '{coverage_end[ds]}' while other time series end at '{ecov}'"
|
|
@@ -440,6 +614,30 @@ def dataset_coverage_checks(ds_map, files_to_check_dict, checker_options):
|
|
|
440
614
|
|
|
441
615
|
|
|
442
616
|
def inter_dataset_consistency_checks(ds_map, files_to_check_dict, checker_options):
|
|
617
|
+
"""
|
|
618
|
+
Inter-dataset consistency checks.
|
|
619
|
+
|
|
620
|
+
Will group datasets by realm and grid for certain checks.
|
|
621
|
+
Runs inter-dataset consistency checks:
|
|
622
|
+
|
|
623
|
+
- Required and non-required global attributes (values and data types)
|
|
624
|
+
- Coordinates (values)
|
|
625
|
+
- Dimensions (names and sizes)
|
|
626
|
+
|
|
627
|
+
Parameters
|
|
628
|
+
----------
|
|
629
|
+
ds_map : dict
|
|
630
|
+
Dictionary mapping dataset IDs to file paths.
|
|
631
|
+
files_to_check_dict : dict
|
|
632
|
+
A special dictionary mapping files to check to datasets.
|
|
633
|
+
checker_options : dict
|
|
634
|
+
Dictionary of checker options.
|
|
635
|
+
|
|
636
|
+
Returns
|
|
637
|
+
-------
|
|
638
|
+
dict
|
|
639
|
+
Dictionary of results.
|
|
640
|
+
"""
|
|
443
641
|
results = defaultdict(level0_factory)
|
|
444
642
|
filedict = {}
|
|
445
643
|
consistency_data = {}
|